[CPU] Extend AMX deconv int8 to support oscale+eltwise+eltwise post ops pattern (#14026)
* Extend AMX deconv int8 to support oscale+eltwise+eltwise post ops pattern. * Add deconv AMX test cases. * Add multi-eltwise test on deconv AMX int8. * Applied review comments. * Update onednn submodule.
This commit is contained in:
@@ -297,12 +297,20 @@ const std::vector<std::vector<ptrdiff_t>> padBegins2d = { {0, 0} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padEnds2d = { {0, 0} };
|
||||
const std::vector<InferenceEngine::SizeVector> dilations2d = { {1, 1} };
|
||||
|
||||
|
||||
const std::vector<InferenceEngine::SizeVector> deconvAmxKernels2d = { {3, 3}, {2, 2}};
|
||||
const std::vector<InferenceEngine::SizeVector> deconvAmxStrides2d = { {2, 2}};
|
||||
|
||||
/* ============= Deconvolution params (3D) ============= */
|
||||
const std::vector<InferenceEngine::SizeVector> kernels3d = { {3, 3, 3}, {1, 1, 1} };
|
||||
const std::vector<InferenceEngine::SizeVector> strides3d = { {1, 1, 1}, {2, 2, 2} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padBegins3d = { {0, 0, 0} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padEnds3d = { {0, 0, 0} };
|
||||
const std::vector<InferenceEngine::SizeVector> dilations3d = { {1, 1, 1} };
|
||||
|
||||
const std::vector<InferenceEngine::SizeVector> deconvAmxKernels3d = { {3, 3, 3}, {2, 2, 2} };
|
||||
const std::vector<InferenceEngine::SizeVector> deconvAmxStrides3d = { {2, 2, 2} };
|
||||
|
||||
/* ============= */
|
||||
|
||||
/* INSTANCES */
|
||||
@@ -488,6 +496,7 @@ const std::vector<DeconvInputData> Blocked_2D_inputs_smoke = {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
const auto convParams_ExplicitPadding_Blocked_2D_nightly = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels2d),
|
||||
// Use 7x7 with stride 1 is too small to generate 15x15 output. It needs a big negative pad which will result
|
||||
@@ -530,6 +539,17 @@ const auto convParams_ExplicitPadding_Blocked_2D = ::testing::Combine(
|
||||
::testing::ValuesIn(emptyOutputPadding)
|
||||
);
|
||||
|
||||
const auto convParams_ExplicitPadding_AMX_2D = ::testing::Combine(
|
||||
::testing::ValuesIn(deconvAmxKernels2d),
|
||||
::testing::ValuesIn(deconvAmxStrides2d),
|
||||
::testing::ValuesIn(padBegins2d),
|
||||
::testing::ValuesIn(padEnds2d),
|
||||
::testing::ValuesIn(dilations2d),
|
||||
::testing::ValuesIn(numOutChannels_Blocked),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT),
|
||||
::testing::ValuesIn(emptyOutputPadding)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Blocked_2D,
|
||||
@@ -550,6 +570,26 @@ INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Blocked_BF16, DeconvolutionLayerCPUTest
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_NSPC_BF16_AMX_NO_FUSING, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_AMX_2D,
|
||||
::testing::ValuesIn(Blocked_2D_inputs_smoke),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn({emptyFusingSpec}),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_nspc_amx})),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_NSPC_INT8_AMX, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_AMX_2D,
|
||||
::testing::ValuesIn(Blocked_2D_inputs_smoke),
|
||||
::testing::Values(ElementType::i8),
|
||||
::testing::ValuesIn({emptyFusingSpec, fusingClampRoundAddRelu}),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_nspc_amx})),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Blocked_2D_nightly,
|
||||
@@ -624,6 +664,17 @@ const auto convParams_ExplicitPadding_Blocked_3D = ::testing::Combine(
|
||||
::testing::ValuesIn(emptyOutputPadding)
|
||||
);
|
||||
|
||||
const auto convParams_ExplicitPadding_AMX_3D = ::testing::Combine(
|
||||
::testing::ValuesIn(deconvAmxKernels3d),
|
||||
::testing::ValuesIn(deconvAmxStrides3d),
|
||||
::testing::ValuesIn(padBegins3d),
|
||||
::testing::ValuesIn(padEnds3d),
|
||||
::testing::ValuesIn(dilations3d),
|
||||
::testing::Values(32),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT),
|
||||
::testing::ValuesIn(emptyOutputPadding)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Blocked_3D,
|
||||
@@ -644,6 +695,27 @@ INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Blocked_BF16, DeconvolutionLayerCPUTest
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_NSPC_BF16_AMX_NO_FUSING, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_AMX_3D,
|
||||
::testing::ValuesIn(Blocked_3D_inputs_smoke),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn({emptyFusingSpec}),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D_nspc_amx})),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_NSPC_INT8_AMX, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_AMX_3D,
|
||||
::testing::ValuesIn(Blocked_3D_inputs_smoke),
|
||||
::testing::Values(ElementType::i8),
|
||||
::testing::ValuesIn({emptyFusingSpec, fusingClampRoundAddRelu}),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D_nspc_amx})),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Blocked_3D_nightly,
|
||||
|
||||
@@ -244,6 +244,22 @@ const auto fusingScaleShift = fusingSpecificParams{ std::make_shared<postNodesMg
|
||||
return std::make_shared<ngraph::opset1::Add>(cfg.input, constNode);
|
||||
}, "Add(PerChannel)"}}), {"Add"} };
|
||||
|
||||
const auto fusingClampRoundAddRelu = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](postNodeConfig& cfg){
|
||||
return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Clamp, {}, {3.0f, 6.0f});
|
||||
}, "Clamp"},
|
||||
{[](postNodeConfig& cfg){
|
||||
return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::RoundHalfToEven);
|
||||
}, "RoundHalfToEven"},
|
||||
{[](postNodeConfig& cfg){
|
||||
ngraph::Shape secondMultInShape(1, 1);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Add>(cfg.input, secondMultInput);
|
||||
}, "AddPerTensor"},
|
||||
{[](postNodeConfig& cfg){
|
||||
return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Relu);
|
||||
}, "Relu"}}), {"Clamp", "Round", "Add", "Relu"}};
|
||||
|
||||
const auto fusingScaleShiftAndFakeQuantizePerChannel = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](postNodeConfig& cfg) {
|
||||
ngraph::Shape newShape = generatePerChannelShape(cfg.target);
|
||||
|
||||
2
src/plugins/intel_cpu/thirdparty/onednn
vendored
2
src/plugins/intel_cpu/thirdparty/onednn
vendored
Submodule src/plugins/intel_cpu/thirdparty/onednn updated: 70419193f0...fb24b408cc
Reference in New Issue
Block a user