[CPU] Extend AMX deconv int8 to support oscale+eltwise+eltwise post ops pattern (#14026)

* Extend AMX deconv int8 to support oscale+eltwise+eltwise post ops pattern.

* Add deconv AMX test cases.

* Add multi-eltwise test on deconv AMX int8.

* Applied review comments.

* Update onednn submodule.
This commit is contained in:
Luwei Zhou
2022-12-06 22:32:02 +08:00
committed by GitHub
parent 3b86f12c37
commit 79f04a704c
3 changed files with 89 additions and 1 deletions

View File

@@ -297,12 +297,20 @@ const std::vector<std::vector<ptrdiff_t>> padBegins2d = { {0, 0} };
const std::vector<std::vector<ptrdiff_t>> padEnds2d = { {0, 0} };
const std::vector<InferenceEngine::SizeVector> dilations2d = { {1, 1} };
const std::vector<InferenceEngine::SizeVector> deconvAmxKernels2d = { {3, 3}, {2, 2}};
const std::vector<InferenceEngine::SizeVector> deconvAmxStrides2d = { {2, 2}};
/* ============= Deconvolution params (3D) ============= */
const std::vector<InferenceEngine::SizeVector> kernels3d = { {3, 3, 3}, {1, 1, 1} };
const std::vector<InferenceEngine::SizeVector> strides3d = { {1, 1, 1}, {2, 2, 2} };
const std::vector<std::vector<ptrdiff_t>> padBegins3d = { {0, 0, 0} };
const std::vector<std::vector<ptrdiff_t>> padEnds3d = { {0, 0, 0} };
const std::vector<InferenceEngine::SizeVector> dilations3d = { {1, 1, 1} };
const std::vector<InferenceEngine::SizeVector> deconvAmxKernels3d = { {3, 3, 3}, {2, 2, 2} };
const std::vector<InferenceEngine::SizeVector> deconvAmxStrides3d = { {2, 2, 2} };
/* ============= */
/* INSTANCES */
@@ -488,6 +496,7 @@ const std::vector<DeconvInputData> Blocked_2D_inputs_smoke = {
}
};
const auto convParams_ExplicitPadding_Blocked_2D_nightly = ::testing::Combine(
::testing::ValuesIn(kernels2d),
// Use 7x7 with stride 1 is too small to generate 15x15 output. It needs a big negative pad which will result
@@ -530,6 +539,17 @@ const auto convParams_ExplicitPadding_Blocked_2D = ::testing::Combine(
::testing::ValuesIn(emptyOutputPadding)
);
const auto convParams_ExplicitPadding_AMX_2D = ::testing::Combine(
::testing::ValuesIn(deconvAmxKernels2d),
::testing::ValuesIn(deconvAmxStrides2d),
::testing::ValuesIn(padBegins2d),
::testing::ValuesIn(padEnds2d),
::testing::ValuesIn(dilations2d),
::testing::ValuesIn(numOutChannels_Blocked),
::testing::Values(ngraph::op::PadType::EXPLICIT),
::testing::ValuesIn(emptyOutputPadding)
);
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest,
::testing::Combine(
convParams_ExplicitPadding_Blocked_2D,
@@ -550,6 +570,26 @@ INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Blocked_BF16, DeconvolutionLayerCPUTest
::testing::Values(cpuBF16PluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_NSPC_BF16_AMX_NO_FUSING, DeconvolutionLayerCPUTest,
::testing::Combine(
convParams_ExplicitPadding_AMX_2D,
::testing::ValuesIn(Blocked_2D_inputs_smoke),
::testing::Values(ElementType::f32),
::testing::ValuesIn({emptyFusingSpec}),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_nspc_amx})),
::testing::Values(cpuBF16PluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_NSPC_INT8_AMX, DeconvolutionLayerCPUTest,
::testing::Combine(
convParams_ExplicitPadding_AMX_2D,
::testing::ValuesIn(Blocked_2D_inputs_smoke),
::testing::Values(ElementType::i8),
::testing::ValuesIn({emptyFusingSpec, fusingClampRoundAddRelu}),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_nspc_amx})),
::testing::Values(cpuEmptyPluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest,
::testing::Combine(
convParams_ExplicitPadding_Blocked_2D_nightly,
@@ -624,6 +664,17 @@ const auto convParams_ExplicitPadding_Blocked_3D = ::testing::Combine(
::testing::ValuesIn(emptyOutputPadding)
);
const auto convParams_ExplicitPadding_AMX_3D = ::testing::Combine(
::testing::ValuesIn(deconvAmxKernels3d),
::testing::ValuesIn(deconvAmxStrides3d),
::testing::ValuesIn(padBegins3d),
::testing::ValuesIn(padEnds3d),
::testing::ValuesIn(dilations3d),
::testing::Values(32),
::testing::Values(ngraph::op::PadType::EXPLICIT),
::testing::ValuesIn(emptyOutputPadding)
);
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest,
::testing::Combine(
convParams_ExplicitPadding_Blocked_3D,
@@ -644,6 +695,27 @@ INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Blocked_BF16, DeconvolutionLayerCPUTest
::testing::Values(cpuBF16PluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_NSPC_BF16_AMX_NO_FUSING, DeconvolutionLayerCPUTest,
::testing::Combine(
convParams_ExplicitPadding_AMX_3D,
::testing::ValuesIn(Blocked_3D_inputs_smoke),
::testing::Values(ElementType::f32),
::testing::ValuesIn({emptyFusingSpec}),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D_nspc_amx})),
::testing::Values(cpuBF16PluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_NSPC_INT8_AMX, DeconvolutionLayerCPUTest,
::testing::Combine(
convParams_ExplicitPadding_AMX_3D,
::testing::ValuesIn(Blocked_3D_inputs_smoke),
::testing::Values(ElementType::i8),
::testing::ValuesIn({emptyFusingSpec, fusingClampRoundAddRelu}),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D_nspc_amx})),
::testing::Values(cpuEmptyPluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest,
::testing::Combine(
convParams_ExplicitPadding_Blocked_3D_nightly,

View File

@@ -244,6 +244,22 @@ const auto fusingScaleShift = fusingSpecificParams{ std::make_shared<postNodesMg
return std::make_shared<ngraph::opset1::Add>(cfg.input, constNode);
}, "Add(PerChannel)"}}), {"Add"} };
const auto fusingClampRoundAddRelu = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
{[](postNodeConfig& cfg){
return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Clamp, {}, {3.0f, 6.0f});
}, "Clamp"},
{[](postNodeConfig& cfg){
return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::RoundHalfToEven);
}, "RoundHalfToEven"},
{[](postNodeConfig& cfg){
ngraph::Shape secondMultInShape(1, 1);
auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector<float>{}, true);
return std::make_shared<ngraph::opset1::Add>(cfg.input, secondMultInput);
}, "AddPerTensor"},
{[](postNodeConfig& cfg){
return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Relu);
}, "Relu"}}), {"Clamp", "Round", "Add", "Relu"}};
const auto fusingScaleShiftAndFakeQuantizePerChannel = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
{[](postNodeConfig& cfg) {
ngraph::Shape newShape = generatePerChannelShape(cfg.target);