[LPT] Mixed precision GPU Plugin tests (#4498)
This commit is contained in:
parent
7d0cc01c1b
commit
e2ada66826
@ -72,7 +72,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
|
||||
{ {-128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true },
|
||||
{ {0.2f}, ngraph::element::f32, {}, false }
|
||||
},
|
||||
"output_original",
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
|
||||
@ -126,7 +126,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
|
||||
{},
|
||||
{ {0.2f}, ngraph::element::f32, {}, false }
|
||||
},
|
||||
"output_original",
|
||||
"Convolution",
|
||||
"U8"
|
||||
},
|
||||
|
||||
@ -177,7 +177,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
|
||||
{ {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true },
|
||||
{ {0.2f}, ngraph::element::f32, {}, false }
|
||||
},
|
||||
"output_original",
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
|
||||
@ -228,7 +228,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
|
||||
{},
|
||||
{ {0.2f}, ngraph::element::f32, {}, false }
|
||||
},
|
||||
"output_original",
|
||||
"Convolution",
|
||||
"U8"
|
||||
},
|
||||
};
|
||||
|
@ -27,7 +27,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
|
||||
false,
|
||||
{},
|
||||
false,
|
||||
"output",
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
{
|
||||
@ -35,7 +35,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
|
||||
false,
|
||||
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
|
||||
false,
|
||||
"output",
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
{
|
||||
@ -43,7 +43,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
|
||||
false,
|
||||
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
|
||||
false,
|
||||
"output_original",
|
||||
"Convolution",
|
||||
"U8"
|
||||
},
|
||||
{
|
||||
@ -51,7 +51,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
|
||||
false,
|
||||
{ 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
|
||||
false,
|
||||
"output",
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
{
|
||||
@ -59,7 +59,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
|
||||
false,
|
||||
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f } },
|
||||
false,
|
||||
"output",
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
{
|
||||
@ -67,7 +67,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
|
||||
false,
|
||||
{ 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
|
||||
false,
|
||||
"output",
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
{
|
||||
@ -75,7 +75,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
|
||||
true,
|
||||
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
|
||||
false,
|
||||
"output_original",
|
||||
"Convolution",
|
||||
"U8"
|
||||
},
|
||||
{
|
||||
@ -83,7 +83,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
|
||||
true,
|
||||
{ 255ul, ngraph::Shape { 1 }, { 0.f }, { 254.f }, { -18.7f }, { 18.7f } },
|
||||
false,
|
||||
"output_original",
|
||||
"Convolution",
|
||||
"U8"
|
||||
},
|
||||
};
|
||||
|
@ -12,8 +12,9 @@ using namespace LayerTestsDefinitions;
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
||||
namespace {
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -21,7 +21,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
|
||||
{ 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
|
||||
{ {}, {}, {} },
|
||||
"matMul/FC",
|
||||
"FullyConnected",
|
||||
"U8"
|
||||
},
|
||||
// 3D with dequantize on weights
|
||||
@ -31,7 +31,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 2, 2.f), ngraph::element::i8, ngraph::Shape{ 2, 4 } },
|
||||
{},
|
||||
{ ngraph::element::f32, {}, {0.1f} },
|
||||
"matMul/FC",
|
||||
"FullyConnected",
|
||||
"U8"
|
||||
},
|
||||
// 3D with different values
|
||||
@ -41,7 +41,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
|
||||
{ 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
|
||||
{ {}, {}, {} },
|
||||
"matMul/FC",
|
||||
"FullyConnected",
|
||||
"U8"
|
||||
},
|
||||
// 4D with different values
|
||||
@ -51,7 +51,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
|
||||
{ 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
|
||||
{ {}, {}, {} },
|
||||
"matMul/FC",
|
||||
"FullyConnected",
|
||||
"U8"
|
||||
},
|
||||
// 4D with Dq on weights
|
||||
@ -61,7 +61,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 2, 2.f), ngraph::element::i8, ngraph::Shape{ 2, 4 } },
|
||||
{},
|
||||
{ ngraph::element::f32, {}, {{0.1f, 0.01}, ngraph::element::f32, ngraph::Shape{ 2, 1 }} },
|
||||
"matMul/FC",
|
||||
"FullyConnected",
|
||||
"U8"
|
||||
},
|
||||
// 3D with the same values
|
||||
@ -71,7 +71,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 4, 2.f), ngraph::element::f32, ngraph::Shape{ 4, 4 } },
|
||||
{ 256ul, {{1}, {1}, {1}, {1}}, {-128.f}, {127.f}, {-128.f}, {127.f} },
|
||||
{ {}, {}, {} },
|
||||
"matMul/FC",
|
||||
"FullyConnected",
|
||||
"U8"
|
||||
},
|
||||
// 2D with subtract on activations
|
||||
@ -81,7 +81,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>{1, 2, 3, 4, 5, 6}, ngraph::element::f32, ngraph::Shape{ 2, 3 } },
|
||||
{ 256ul, {{1}, {1}, {1}, {1}}, {-128.f}, {127.f}, {-12.8f}, {12.7f} },
|
||||
{ {}, {}, {} },
|
||||
"matMul/1",
|
||||
"FullyConnected",
|
||||
"U8"
|
||||
},
|
||||
// 2D with subtract on activations & Dq on weights
|
||||
@ -91,7 +91,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>{1, 2, 3, 4, 5, 6}, ngraph::element::i8, ngraph::Shape{ 2, 3 } },
|
||||
{},
|
||||
{ ngraph::element::f32, {}, {0.1f} },
|
||||
"matMul/1",
|
||||
"FullyConnected",
|
||||
"U8"
|
||||
}
|
||||
};
|
||||
|
@ -13,7 +13,7 @@ using namespace InferenceEngine::details;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
//ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<LayerTestsDefinitions::AddTestValues> params = {
|
||||
|
@ -14,7 +14,7 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -13,7 +13,7 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
@ -72,7 +72,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
|
||||
{ {-128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true },
|
||||
{ {0.2f}, ngraph::element::f32, {}, false }
|
||||
},
|
||||
"output_original",
|
||||
"Convolution",
|
||||
"U8"
|
||||
},
|
||||
|
||||
@ -126,7 +126,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
|
||||
{},
|
||||
{ {0.2f}, ngraph::element::f32, {}, false }
|
||||
},
|
||||
"output_original",
|
||||
"Convolution",
|
||||
"U8"
|
||||
},
|
||||
|
||||
@ -177,7 +177,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
|
||||
{ {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::i8, true },
|
||||
{ {0.2f}, ngraph::element::f32, {}, false }
|
||||
},
|
||||
"output_original",
|
||||
"Convolution",
|
||||
"U8"
|
||||
},
|
||||
|
||||
@ -228,7 +228,7 @@ const std::vector<LayerTestsDefinitions::ConvolutionQDqTransformationParam> para
|
||||
{},
|
||||
{ {0.2f}, ngraph::element::f32, {}, false }
|
||||
},
|
||||
"output_original",
|
||||
"Convolution",
|
||||
"U8"
|
||||
},
|
||||
};
|
||||
|
@ -13,7 +13,7 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
@ -27,30 +27,32 @@ const std::vector<LayerTestsDefinitions::ConvolutionTransformationParam> params
|
||||
false,
|
||||
{},
|
||||
false,
|
||||
"output",
|
||||
""
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
{
|
||||
{},
|
||||
false,
|
||||
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
|
||||
false,
|
||||
"output",
|
||||
""
|
||||
"Convolution",
|
||||
"FP32"
|
||||
},
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
|
||||
false,
|
||||
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
|
||||
false,
|
||||
"output_original",
|
||||
"Convolution",
|
||||
"U8"
|
||||
},
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { -12.75f }, { 6.375f } },
|
||||
true,
|
||||
{ 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } },
|
||||
false
|
||||
false,
|
||||
"Convolution",
|
||||
"U8"
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -14,7 +14,7 @@ using namespace ngraph::opset1;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> precisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<DepthToSpace::DepthToSpaceMode> modes = {
|
||||
|
@ -13,7 +13,8 @@ using namespace InferenceEngine::details;
|
||||
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> precisions = {
|
||||
ngraph::element::f32
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -13,7 +13,8 @@ using namespace InferenceEngine::details;
|
||||
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> precisions = {
|
||||
ngraph::element::f32
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -13,7 +13,7 @@ using namespace ngraph::pass::low_precision;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -14,7 +14,7 @@ using namespace ngraph::pass::low_precision;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -14,7 +14,7 @@ using namespace ngraph::pass::low_precision;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -12,8 +12,9 @@ using namespace LayerTestsDefinitions;
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
||||
namespace {
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16,
|
||||
};
|
||||
|
||||
const std::vector<LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -13,7 +13,7 @@ using namespace InferenceEngine::details;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<MatMulShapes> shapes = {
|
||||
|
@ -9,7 +9,8 @@ using namespace InferenceEngine::details;
|
||||
|
||||
namespace {
|
||||
const std::vector<element::Type> precisions = {
|
||||
element::f32
|
||||
element::f32,
|
||||
// element::f16 // TODO: temporarily commented due to failing in GPU Plugin on constant folding stage
|
||||
};
|
||||
|
||||
const std::vector< ngraph::Shape > inputAndQuantizationShapes = {
|
||||
|
@ -14,6 +14,7 @@ using namespace ngraph::pass::low_precision;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -13,6 +13,7 @@ using namespace ngraph::pass::low_precision;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::Shape> dimensions = {
|
||||
|
@ -12,7 +12,7 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -9,7 +9,8 @@ using namespace InferenceEngine::details;
|
||||
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> precisions = {
|
||||
ngraph::element::f32
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<std::pair<ngraph::Shape, ngraph::Shape>> shapes = {
|
||||
|
@ -12,7 +12,7 @@ using namespace InferenceEngine::details;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> precisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
std::vector<MatMulTransformationTestValues> testValues = {
|
||||
|
@ -10,7 +10,10 @@ using namespace LayerTestsDefinitions;
|
||||
using namespace InferenceEngine::details;
|
||||
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> precisions = { ngraph::element::f32 };
|
||||
const std::vector<ngraph::element::Type> precisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
//transpose_a = false, transpose_b = true
|
||||
std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
@ -20,7 +23,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
|
||||
{ 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
|
||||
{ {}, {}, {} },
|
||||
"result_result",
|
||||
"FullyConnected",
|
||||
"FP32"
|
||||
},
|
||||
{
|
||||
@ -29,7 +32,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 2, 2.f), ngraph::element::i8, ngraph::Shape{ 2, 4 } },
|
||||
{},
|
||||
{ ngraph::element::f32, {}, {0.1f} },
|
||||
"result_result",
|
||||
"FullyConnected",
|
||||
"FP32"
|
||||
},
|
||||
{
|
||||
@ -38,7 +41,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
|
||||
{ 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
|
||||
{ {}, {}, {} },
|
||||
"result_result",
|
||||
"FullyConnected",
|
||||
"FP32"
|
||||
},
|
||||
{
|
||||
@ -47,7 +50,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
|
||||
{ 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
|
||||
{ {}, {}, {} },
|
||||
"matMul",
|
||||
"FullyConnected",
|
||||
"U8"
|
||||
},
|
||||
{
|
||||
@ -56,7 +59,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 2, 2.f), ngraph::element::i8, ngraph::Shape{ 2, 4 } },
|
||||
{},
|
||||
{ ngraph::element::f32, {}, {{0.1f, 0.01}, ngraph::element::f32, ngraph::Shape{ 2, 1 }} },
|
||||
"matMul",
|
||||
"FullyConnected",
|
||||
"U8"
|
||||
},
|
||||
{
|
||||
@ -65,7 +68,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 4, 2.f), ngraph::element::f32, ngraph::Shape{ 4, 4 } },
|
||||
{ 256ul, {{1}, {1}, {1}, {1}}, {-128.f}, {127.f}, {-128.f}, {127.f} },
|
||||
{ {}, {}, {} },
|
||||
"result_result",
|
||||
"FullyConnected",
|
||||
"FP32"
|
||||
},
|
||||
{
|
||||
@ -74,7 +77,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>{1, 2, 3, 4, 5, 6}, ngraph::element::f32, ngraph::Shape{ 2, 3 } },
|
||||
{ 256ul, {{1}, {1}, {1}, {1}}, {-128.f}, {127.f}, {-12.8f}, {12.7f} },
|
||||
{ {}, {}, {} },
|
||||
"matMul",
|
||||
"FullyConnected",
|
||||
"U8"
|
||||
},
|
||||
{
|
||||
@ -83,6 +86,8 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>{1, 2, 3, 4, 5, 6}, ngraph::element::i8, ngraph::Shape{ 2, 3 } },
|
||||
{},
|
||||
{ ngraph::element::f32, {}, {0.1f} },
|
||||
"FullyConnected",
|
||||
"U8"
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -12,6 +12,7 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues> params = {
|
||||
|
@ -9,7 +9,8 @@ using namespace InferenceEngine::details;
|
||||
|
||||
namespace {
|
||||
const std::vector<element::Type> precisions = {
|
||||
element::f32
|
||||
element::f32,
|
||||
element::f16
|
||||
};
|
||||
|
||||
const std::vector< ngraph::Shape > inputShapes = {
|
||||
|
@ -12,7 +12,7 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
//ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<LayerTestsDefinitions::MultiplyTestValues> params = {
|
||||
|
@ -12,7 +12,7 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<MultiplyWithOneParentTransformationValues> values = {
|
||||
|
@ -9,7 +9,8 @@ using namespace InferenceEngine::details;
|
||||
|
||||
namespace {
|
||||
const std::vector<element::Type> precisions = {
|
||||
element::f32
|
||||
element::f32,
|
||||
element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::Shape> inputAndQuantizationShapes = {
|
||||
|
@ -13,7 +13,7 @@ using namespace InferenceEngine::details;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> precisions = {
|
||||
ngraph::element::f32,
|
||||
//ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<std::pair<ngraph::Shape, ngraph::Shape> > inputAndQuantizationShapes = {
|
||||
|
@ -12,7 +12,8 @@ using namespace InferenceEngine::details;
|
||||
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> precisions = {
|
||||
ngraph::element::f32
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
std::vector<PReluTestValues> testValues = {
|
||||
|
@ -12,7 +12,7 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
// ngraph::element::f16 // TODO: enable f16 test inference (change ngraph function + fp32 to fp16 replacements)
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -13,7 +13,7 @@ using namespace InferenceEngine::details;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> precisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
std::vector<ReluTestValues> testValues = {
|
||||
|
@ -11,8 +11,8 @@ using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -15,7 +15,7 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -13,6 +13,7 @@ using namespace ngraph::pass::low_precision;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
|
||||
|
@ -14,7 +14,7 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -13,6 +13,7 @@ using namespace ngraph::pass::low_precision;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -13,6 +13,7 @@ using namespace ngraph::pass::low_precision;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -12,7 +12,7 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> precisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<TransposeTransformationTestValues> testValues = {
|
||||
|
@ -13,6 +13,7 @@ using namespace ngraph::pass::low_precision;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
|
||||
|
@ -15,7 +15,7 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
// ngraph::element::f16
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
|
@ -49,8 +49,8 @@ inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeWithNotOpti
|
||||
|
||||
// ngraph::builder::subgraph::FakeQuantizeOnData
|
||||
typedef std::tuple<
|
||||
InferenceEngine::Precision,
|
||||
InferenceEngine::SizeVector,
|
||||
ngraph::element::Type,
|
||||
ngraph::Shape,
|
||||
std::string,
|
||||
ngraph::pass::low_precision::LayerTransformation::Params,
|
||||
FakeQuantizeWithNotOptimalTransformationTestValues> FakeQuantizeTransformationParams;
|
||||
|
@ -59,7 +59,7 @@ void ConvolutionQDqTransformation::Run() {
|
||||
LayerTestsCommon::Run();
|
||||
|
||||
const auto params = std::get<4>(GetParam());
|
||||
const auto actualType = getRuntimePrecision(params.layerName);
|
||||
const auto actualType = getRuntimePrecisionByType(params.layerName);
|
||||
EXPECT_EQ(actualType, params.expectedKernelType);
|
||||
}
|
||||
|
||||
|
@ -58,8 +58,12 @@ void ConvolutionTransformation::Run() {
|
||||
LayerTestsCommon::Run();
|
||||
|
||||
const auto params = std::get<4>(GetParam());
|
||||
const auto actualType = getRuntimePrecision(params.layerName);
|
||||
EXPECT_EQ(actualType, params.expectedKernelType);
|
||||
const auto actualPrecision = getRuntimePrecisionByType(params.layerName);
|
||||
auto expectedPrecision = params.expectedKernelType;
|
||||
if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ngraph::element::f16) {
|
||||
expectedPrecision = "FP16";
|
||||
}
|
||||
EXPECT_EQ(actualPrecision, expectedPrecision);
|
||||
}
|
||||
|
||||
void ConvolutionTransformation::validate() {
|
||||
|
@ -16,8 +16,8 @@
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
std::string FakeQuantizeWithNotOptimalTransformation::getTestCaseName(testing::TestParamInfo<FakeQuantizeTransformationParams> obj) {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
InferenceEngine::SizeVector inputShapes;
|
||||
ngraph::element::Type netPrecision;
|
||||
ngraph::Shape inputShapes;
|
||||
std::string targetDevice;
|
||||
ngraph::pass::low_precision::LayerTransformation::Params params;
|
||||
FakeQuantizeWithNotOptimalTransformationTestValues testValues;
|
||||
@ -29,14 +29,14 @@ std::string FakeQuantizeWithNotOptimalTransformation::getTestCaseName(testing::T
|
||||
}
|
||||
|
||||
void FakeQuantizeWithNotOptimalTransformation::SetUp() {
|
||||
InferenceEngine::SizeVector inputShape;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
ngraph::Shape inputShape;
|
||||
ngraph::element::Type netPrecision;
|
||||
ngraph::pass::low_precision::LayerTransformation::Params params;
|
||||
FakeQuantizeWithNotOptimalTransformationTestValues testValues;
|
||||
std::tie(netPrecision, inputShape, targetDevice, params, testValues) = this->GetParam();
|
||||
|
||||
function = ngraph::builder::subgraph::FakeQuantizeAndConvolutionFunction::get(
|
||||
FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision),
|
||||
netPrecision,
|
||||
inputShape,
|
||||
testValues.fqOnData,
|
||||
testValues.convertOnData,
|
||||
@ -52,7 +52,7 @@ void FakeQuantizeWithNotOptimalTransformation::Run() {
|
||||
LayerTestsCommon::Run();
|
||||
|
||||
const auto params = std::get<4>(GetParam());
|
||||
const auto actualType = getRuntimePrecision("output_original");
|
||||
const auto actualType = getRuntimePrecisionByType("Convolution");
|
||||
EXPECT_EQ(actualType, params.expectedPrecision);
|
||||
}
|
||||
|
||||
|
@ -96,9 +96,12 @@ void MatMulWithConstantTransformation::Run() {
|
||||
LayerTestsCommon::Run();
|
||||
|
||||
const auto params = std::get<2>(GetParam());
|
||||
const auto actualType = getRuntimePrecision(params.layerName);
|
||||
|
||||
EXPECT_EQ(actualType, params.expectedKernelType);
|
||||
const auto actualPrecision = getRuntimePrecisionByType(params.layerName);
|
||||
auto expectedPrecision = params.expectedKernelType;
|
||||
if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ngraph::element::f16) {
|
||||
expectedPrecision = "FP16";
|
||||
}
|
||||
EXPECT_EQ(actualPrecision, expectedPrecision);
|
||||
}
|
||||
|
||||
TEST_P(MatMulWithConstantTransformation, CompareWithRefImpl) {
|
||||
|
@ -83,10 +83,10 @@ void MultiplyTransformation::validate() {
|
||||
const auto mul = output->get_input_node_shared_ptr(0);
|
||||
const std::string typeName = mul->get_type_name();
|
||||
ASSERT_EQ("Eltwise", typeName);
|
||||
|
||||
const bool notTransformed = param.expectedPrecisions[0] == param.expectedPrecisions[1];
|
||||
for (size_t i = 0; i < param.expectedPrecisions.size(); ++i) {
|
||||
const auto curPrecision = mul->get_input_element_type(i);
|
||||
const auto expectedPrecision = param.expectedPrecisions[i];
|
||||
const auto expectedPrecision = notTransformed ? precision : param.expectedPrecisions[i];
|
||||
ASSERT_EQ(curPrecision, expectedPrecision);
|
||||
}
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ void NormalizeL2Transformation::validate() {
|
||||
ASSERT_EQ("NormalizeIE", typeName);
|
||||
|
||||
const auto inputPrecision = normalize->get_input_element_type(0);
|
||||
const auto expectedPrecision = shift ? ngraph::element::f32 : ngraph::element::u8;
|
||||
const auto expectedPrecision = shift ? precision : ngraph::element::u8;
|
||||
ASSERT_EQ(inputPrecision, expectedPrecision);
|
||||
}
|
||||
|
||||
|
@ -79,6 +79,7 @@ public:
|
||||
std::map<std::string, std::string>& GetConfiguration();
|
||||
|
||||
std::string getRuntimePrecision(const std::string& layerName);
|
||||
std::string getRuntimePrecisionByType(const std::string& layerType);
|
||||
|
||||
template<class T>
|
||||
static void Compare(const T *expected, const T *actual, std::size_t size, T threshold) {
|
||||
|
@ -358,6 +358,30 @@ std::string LayerTestsCommon::getRuntimePrecision(const std::string& layerName)
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string LayerTestsCommon::getRuntimePrecisionByType(const std::string& layerType) {
|
||||
const auto execGraph = executableNetwork.GetExecGraphInfo();
|
||||
const auto function = execGraph.getFunction();
|
||||
|
||||
for (const auto& op : function->get_ops()) {
|
||||
const auto& rtInfo = op->get_rt_info();
|
||||
const auto& typeIt = rtInfo.find("layerType");
|
||||
|
||||
IE_ASSERT(typeIt != rtInfo.end()) << "Layer is not found for type: " << layerType;
|
||||
|
||||
const auto type = ngraph::as_type_ptr<ngraph::VariantWrapper<std::string>>(typeIt->second)->get();
|
||||
if (type == layerType) {
|
||||
const auto& it = rtInfo.find("runtimePrecision");
|
||||
|
||||
IE_ASSERT(it != rtInfo.end()) << "Runtime precision is not found for node: " << type;
|
||||
|
||||
const auto rtPrecisionPtr = ngraph::as_type_ptr<ngraph::VariantWrapper<std::string>>(it->second);
|
||||
return rtPrecisionPtr->get();
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
void LayerTestsCommon::SetRefMode(RefMode mode) {
|
||||
refMode = mode;
|
||||
}
|
||||
|
@ -109,6 +109,7 @@ public:
|
||||
bool operator==(const DequantizationOperations& value) const noexcept {
|
||||
return equal(value);
|
||||
}
|
||||
void setPrecision(const ngraph::element::Type& type) noexcept;
|
||||
|
||||
Convert convert;
|
||||
Subtract subtract;
|
||||
|
@ -185,6 +185,14 @@ DequantizationOperations::DequantizationOperations(
|
||||
multiply(multiply)
|
||||
{}
|
||||
|
||||
void DequantizationOperations::setPrecision(const ngraph::element::Type& type) noexcept {
|
||||
convert.outPrecision = type;
|
||||
subtract.constantPrecision = type;
|
||||
subtract.outPrecision = type;
|
||||
multiply.constantPrecision = type;
|
||||
multiply.outPrecision = type;
|
||||
}
|
||||
|
||||
bool DequantizationOperations::empty() const noexcept {
|
||||
return convert.empty() && subtract.empty() && multiply.empty();
|
||||
}
|
||||
|
@ -147,7 +147,7 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginalWithIncorrectW
|
||||
fakeQuantizeOnWeights.outputLowValues, fakeQuantizeOnWeights.outputHighValues);
|
||||
|
||||
const auto subtract = isCorrect ? nullptr : std::make_shared<DequantizationSubtract>(fqOnWeights,
|
||||
std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, Shape{ 1, 1, 1, 1 }, 3.0f));
|
||||
std::make_shared<ngraph::opset1::Constant>(precision, Shape{ 1, 1, 1, 1 }, 3.0f));
|
||||
|
||||
const auto convolution = std::make_shared<ngraph::opset1::Convolution>(
|
||||
fakeQuantizeOnData.empty() ? input : fqOnData,
|
||||
|
@ -54,8 +54,9 @@ std::shared_ptr<ngraph::Function> FuseConvertFunction::getWithFQ(
|
||||
ngraph::Shape(inputShape));
|
||||
parent = input1;
|
||||
}
|
||||
|
||||
const std::shared_ptr<Node> dequantizationOp = makeDequantization(parent, dequantization);
|
||||
auto deqStructure = dequantization;
|
||||
deqStructure.multiply.outPrecision = inputPrecision;
|
||||
const std::shared_ptr<Node> dequantizationOp = makeDequantization(parent, deqStructure);
|
||||
|
||||
std::shared_ptr<op::Parameter> input2 = std::make_shared<ngraph::opset1::Parameter>(
|
||||
inputPrecision,
|
||||
@ -68,7 +69,7 @@ std::shared_ptr<ngraph::Function> FuseConvertFunction::getWithFQ(
|
||||
// just some non-transparent layer
|
||||
const auto power = std::make_shared<opset1::Power>(
|
||||
fakeQuantizeOnActivations,
|
||||
std::make_shared<opset1::Constant>(element::f32, Shape{}, std::vector<float>{2.f}));
|
||||
std::make_shared<opset1::Constant>(inputPrecision, Shape{}, std::vector<float>{2.f}));
|
||||
|
||||
const auto add = std::make_shared<opset1::Add>(
|
||||
dequantizationOp,
|
||||
|
@ -70,7 +70,7 @@ std::shared_ptr<Node> createWeightsOriginal(
|
||||
ngraph::opset1::Constant::create(
|
||||
element::i64,
|
||||
Shape{ 5 },
|
||||
std::vector<size_t>({ groupCount, outputChannelsCount / groupCount, inputChannelsPerGroup, 7, 7 })),
|
||||
std::vector<size_t>({ groupCount, outputChannelsCount / groupCount, inputChannelsPerGroup, kernelSize, kernelSize })),
|
||||
true);
|
||||
}
|
||||
|
||||
@ -146,7 +146,7 @@ std::shared_ptr<ngraph::Function> GroupConvolutionFunction::getOriginal(
|
||||
// TODO: pass as argument
|
||||
//const size_t groupCount = 3ul;
|
||||
const size_t outputChannelsCount = outputShape[1];
|
||||
const size_t kernelSize = 7ul;
|
||||
const size_t kernelSize = 5ul;
|
||||
const size_t inputChannelsCount = inputShape[1];
|
||||
|
||||
std::vector<float> weightsValues = { 1.f };
|
||||
|
@ -313,12 +313,15 @@ std::shared_ptr<ngraph::Function> MatMulFunction::getOriginal(
|
||||
const auto dequantizationOnData = makeFakeQuantize(input, precision, fqOnData);
|
||||
|
||||
const std::shared_ptr<ngraph::Node> weightsConst = std::make_shared<ngraph::opset1::Constant>(
|
||||
weights.outPrecision,
|
||||
weights.outPrecision.is_real() ? precision : weights.outPrecision,
|
||||
weights.shape,
|
||||
weights.values);
|
||||
|
||||
const std::shared_ptr<ngraph::Node> fakeQuantize = fqOnWeights.empty() ? nullptr : makeFakeQuantize(weightsConst, precision, fqOnWeights);
|
||||
const auto dequantizationOnWeights = makeDequantization(fakeQuantize == nullptr ? weightsConst : fakeQuantize, deqOnWeights);
|
||||
|
||||
auto deqStructure = deqOnWeights;
|
||||
deqStructure.setPrecision(precision);
|
||||
const auto dequantizationOnWeights = makeDequantization(fakeQuantize == nullptr ? weightsConst : fakeQuantize, deqStructure);
|
||||
|
||||
const std::shared_ptr<ngraph::opset1::MatMul> matMul = std::make_shared<ngraph::opset1::MatMul>(
|
||||
dequantizationOnData,
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include <ngraph_functions/utils/ngraph_helpers.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
@ -61,12 +62,36 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template<ngraph::element::Type_t from, ngraph::element::Type_t to>
|
||||
class ConvertConvertLayerOutputPrecision : public MatcherPass {
|
||||
public:
|
||||
ConvertConvertLayerOutputPrecision() {
|
||||
auto convert = ngraph::pattern::wrap_type<opset1::Convert>();
|
||||
ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) {
|
||||
auto convert = std::dynamic_pointer_cast<ngraph::op::Convert>(m.get_match_root());
|
||||
if (!convert) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (convert->get_convert_element_type() == ngraph::element::Type(from)) {
|
||||
convert->set_convert_element_type(to);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(convert, "ConvertConvertLayerPrecision");
|
||||
register_matcher(m, callback);
|
||||
}
|
||||
};
|
||||
|
||||
template<ngraph::element::Type_t from, ngraph::element::Type_t to>
|
||||
class ConvertPrecision : public ngraph::pass::GraphRewrite {
|
||||
public:
|
||||
ConvertPrecision() {
|
||||
add_matcher<ConvertConstantsPrecision<from, to>>();
|
||||
add_matcher<ConvertParametersPrecision<from, to>>();
|
||||
add_matcher<ConvertConvertLayerOutputPrecision<from, to>>();
|
||||
}
|
||||
};
|
||||
} // namespace pass
|
||||
|
Loading…
Reference in New Issue
Block a user