[FIX][CPU] Convert CF: added convert childs number check (#19022)

* [FIX][CPU] Convert CF: added convert childs number check

* code style fix

* WIP: [CPU][TESTS] Convert CF: Added subgraph test with two outputs for Convert node

* MatMulDecompressConvertTest refactoring + instances fixes

* removed KeepConstAndDecompressionForMatMul pass and added cpu callback for KeepConstAndDecompression

* MatMulDecompressConvertTest2: added graphs and small problem description

* small review fix
This commit is contained in:
Anton Voronov 2023-08-11 12:10:00 +04:00 committed by GitHub
parent 85609d4881
commit 2a3132941c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 208 additions and 97 deletions

View File

@ -14,7 +14,6 @@ namespace pass {
class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding;
class TRANSFORMATIONS_API DisableDecompressionConvertConstantFolding;
class TRANSFORMATIONS_API KeepConstAndDecompression;
class TRANSFORMATIONS_API KeepConstAndDecompressionForMatMul;
} // namespace pass
} // namespace ov
@ -48,14 +47,3 @@ public:
OPENVINO_RTTI("KeepConstAndDecompression", "0");
KeepConstAndDecompression();
};
/**
* @ingroup ie_transformation_common_api
* @brief Disables ConstantFolding for Convert operation (just before MatMul operation only) and prevents conversion
* of f16 Consts to f32.
*/
class ov::pass::KeepConstAndDecompressionForMatMul : public MatcherPass {
public:
OPENVINO_RTTI("KeepConstAndDecompressionForMatMul", "0");
KeepConstAndDecompressionForMatMul();
};

View File

@ -59,6 +59,10 @@ pass::KeepConstAndDecompression::KeepConstAndDecompression() {
ov::is_shape_subgraph(node->shared_from_this()))
return false;
if (transformation_callback(node)) {
return false;
}
disable_constant_folding(node);
if (!is_type<ov::op::v0::Constant>(node->input_value(0).get_node_shared_ptr()))
@ -70,28 +74,3 @@ pass::KeepConstAndDecompression::KeepConstAndDecompression() {
auto m = std::make_shared<pattern::Matcher>(node_pattern, matcher_name);
register_matcher(m, callback);
}
pass::KeepConstAndDecompressionForMatMul::KeepConstAndDecompressionForMatMul() {
MATCHER_SCOPE(KeepConstAndDecompressionForMatMul);
auto matmul = pass::pattern::wrap_type<ov::op::v0::MatMul>();
matcher_pass_callback callback = [=](pass::pattern::Matcher& m) {
auto node = m.get_match_root();
// input to matmul is decompression Convert
const auto& inp_convert = node->input_value(1).get_node_shared_ptr();
if (!is_type<ov::op::v0::Convert>(inp_convert) || !is_decompression(inp_convert))
return false;
disable_constant_folding(inp_convert);
if (!is_type<ov::op::v0::Constant>(inp_convert->input_value(0).get_node_shared_ptr()))
return false;
enable_keep_fp16_const(inp_convert->input_value(0).get_node_shared_ptr());
return false;
};
auto m = std::make_shared<pass::pattern::Matcher>(matmul, matcher_name);
this->register_matcher(m, callback);
}

View File

@ -203,7 +203,14 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
manager.set_per_pass_validation(false);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::InitNodeInfo);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkShapeOfSubgraphs);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompressionForMatMul);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression);
CPU_SET_CALLBACK_COMMON(manager,
[](const_node_ptr &node) -> bool {
const auto outputs = node->get_output_target_inputs(0);
return outputs.size() != 1 || !is_type<ov::op::v0::MatMul>(outputs.begin()->get_node());
},
ov::pass::KeepConstAndDecompression);
const bool useLpt = !defaultPrecisions.empty();
if (useLpt) {
@ -434,7 +441,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
AUGRUCell node (see AUGRUCellFusion pass). In such cases, some constant paths will be unfolded, which can lead to crashes in the plugin. To avoid this,
we re-mark decompression converts again and finally do CF for those constant paths that are not inputs to MatMul node */
CPU_REGISTER_PASS_COMMON(manager, ov::pass::EnableDecompressionConvertConstantFolding);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompressionForMatMul);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConstantFolding);
manager.run_passes(model);

View File

@ -140,7 +140,7 @@ protected:
std::swap(*(shape.end() - 1), *(shape.end() - 2));
}
void CheckFCWeightsPrecision() const {
void CheckFCWeightsPrecision(ElementType expectedWeiElemType) const {
auto getExecValue = [](const ov::Node::RTMap& rtInfo, const std::string &paramName) -> std::string {
auto it = rtInfo.find(paramName);
IE_ASSERT(rtInfo.end() != it);
@ -153,7 +153,7 @@ protected:
if (getExecValue(fcNode->get_rt_info(), ExecGraphInfoSerialization::LAYER_TYPE) == "FullyConnected") {
const auto &constNode = fcNode->get_input_node_shared_ptr(1);
element::Type expectedType(getExecValue(constNode->get_rt_info(), ExecGraphInfoSerialization::OUTPUT_PRECISIONS));
ASSERT_EQ(expectedType, weiConstElemType);
ASSERT_EQ(expectedType, expectedWeiElemType);
}
}
}
@ -163,6 +163,7 @@ protected:
std::vector<InputShape> inputShapes;
std::pair<bool, bool> transpose;
ElementType weiConstElemType;
std::map<std::string, std::string> additionalConfig;
CPUSpecificParams cpuParams;
@ -174,8 +175,8 @@ protected:
bool transpA = transpose.first;
bool transpB = transpose.second;
if (transpA) transposesCount++;
if (!transpB) transposesCount++;
if (transpA) transposeCount++;
if (!transpB) transposeCount++;
if (transpA) {
transposeShape(inputDynamicShapes[0]);
@ -214,6 +215,8 @@ protected:
inputB = std::make_shared<opset1::Convert>(inputB, convertOutType);
mark_as_decompression(inputB);
}
expectedWeiConstElemType = weiConstElemType;
auto matMul = builder::makeMatMul(paramOuts[0], inputB, transpA, transpB);
function = CPUTestsBase::makeNgraphFunction(netType, params, matMul, cpuNodeType);
@ -221,15 +224,16 @@ protected:
void CheckExecutionGraph() {
CheckPluginRelatedResults(compiledModel, "FullyConnected");
CheckNumberOfNodesWithType(compiledModel, "FullyConnected", 1);
CheckNumberOfNodesWithType(compiledModel, "Transpose", transposesCount);
CheckNumberOfNodesWithType(compiledModel, "FullyConnected", fullyConnectedCount);
CheckNumberOfNodesWithType(compiledModel, "Transpose", transposeCount);
CheckNumberOfNodesWithType(compiledModel, "Convert", 0);
CheckNumberOfNodesWithType(compiledModel, "Reorder", 0);
CheckFCWeightsPrecision();
CheckFCWeightsPrecision(expectedWeiConstElemType);
}
size_t transposesCount = 0;
ElementType weiConstElemType = ElementType::f32;
size_t fullyConnectedCount = 1;
size_t transposeCount = 0;
ElementType expectedWeiConstElemType = ElementType::f32;
};
TEST_P(MatMulDecompressConvertTest, CompareWithRefs) {
@ -273,14 +277,7 @@ const std::vector<std::vector<InputShape>> inputShapes3D = {
},
};
std::vector<std::map<std::string, std::string>> filterAdditionalConfig() {
std::vector<std::map<std::string, std::string>> additionalConfig;
#ifndef OV_CPU_WITH_MLAS
additionalConfig.push_back(std::map<std::string, std::string>{/* empty config */});
#endif
return additionalConfig;
}
std::map<std::string, std::string> emptyConfig = {/* empty config */};
std::vector<std::map<std::string, std::string>> filterAdditionalConfig_BF16() {
std::vector<std::map<std::string, std::string>> additionalConfig;
@ -290,23 +287,25 @@ std::vector<std::map<std::string, std::string>> filterAdditionalConfig_BF16() {
return additionalConfig;
}
std::vector<std::map<std::string, std::string>> filterAdditionalConfig_MLAS() {
std::vector<std::map<std::string, std::string>> additionalConfig;
additionalConfig.push_back(std::map<std::string, std::string>{/* empty config */});
return additionalConfig;
}
std::vector<CPUSpecificParams> filterSpecificParams() {
std::vector<CPUSpecificParams> filterSpecificParams(bool trySetMlas) {
std::vector<CPUSpecificParams> specificParams;
if (with_cpu_x86_avx512_core()) {
specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx512"}, "brgemm_avx512"});
} else if (with_cpu_x86_avx2()) {
specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx2"}, "brgemm_avx2"});
if (trySetMlas) {
#ifdef OV_CPU_WITH_MLAS
specificParams.push_back(CPUSpecificParams{{}, {}, {"gemm_mlas"}, "gemm_mlas"});
#endif
}
// try set onednn jit params if we can't or shouldn't use mlas
if (specificParams.empty()) {
if (with_cpu_x86_avx512_core()) {
specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx512"}, "brgemm_avx512"});
} else if (with_cpu_x86_avx2()) {
specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx2"}, "brgemm_avx2"});
}
}
return specificParams;
}
std::vector<CPUSpecificParams> filterSpecificParams_BF16() {
std::vector<CPUSpecificParams> specificParams;
specificParams.push_back(CPUSpecificParams{{}, {}, {"jit_gemm"}, "jit_gemm"});
@ -314,34 +313,25 @@ std::vector<CPUSpecificParams> filterSpecificParams_BF16() {
}
std::vector<CPUSpecificParams> filterSpecificParams_MLAS() {
std::vector<CPUSpecificParams> specificParams;
specificParams.push_back(CPUSpecificParams{{}, {}, {"gemm_mlas"}, "gemm_mlas"});
return specificParams;
}
#ifdef OV_CPU_WITH_MLAS
const auto testParams2D_MLAS_smoke = ::testing::Combine(
const auto testParams2D_FP32_smoke = ::testing::Combine(
::testing::ValuesIn(inputShapes2D),
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f32),
::testing::ValuesIn(filterAdditionalConfig_MLAS()),
::testing::ValuesIn(filterSpecificParams_MLAS()));
::testing::Values(emptyConfig),
::testing::ValuesIn(filterSpecificParams(true)));
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_MLAS, MatMulDecompressConvertTest, testParams2D_MLAS_smoke,
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP32, MatMulDecompressConvertTest, testParams2D_FP32_smoke,
MatMulDecompressConvertTest::getTestCaseName);
#endif
const auto testParams2D_smoke = ::testing::Combine(
const auto testParams2D_FP16_smoke = ::testing::Combine(
::testing::ValuesIn(inputShapes2D),
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f32, ElementType::f16),
::testing::ValuesIn(filterAdditionalConfig()),
::testing::ValuesIn(filterSpecificParams()));
::testing::Values(ElementType::f16),
::testing::Values(emptyConfig),
::testing::ValuesIn(filterSpecificParams(false)));
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D, MatMulDecompressConvertTest, testParams2D_smoke,
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP16, MatMulDecompressConvertTest, testParams2D_FP16_smoke,
MatMulDecompressConvertTest::getTestCaseName);
@ -356,27 +346,25 @@ INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_BF16, MatMulDecompressConvertTest, testPara
MatMulDecompressConvertTest::getTestCaseName);
#ifdef OV_CPU_WITH_MLAS
const auto testParams3D_MLAS_smoke = ::testing::Combine(
const auto testParams3D_FP32_smoke = ::testing::Combine(
::testing::ValuesIn(inputShapes3D),
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f32),
::testing::ValuesIn(filterAdditionalConfig_MLAS()),
::testing::ValuesIn(filterSpecificParams_MLAS()));
::testing::Values(emptyConfig),
::testing::ValuesIn(filterSpecificParams(true)));
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_MLAS, MatMulDecompressConvertTest, testParams3D_MLAS_smoke,
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_FP32, MatMulDecompressConvertTest, testParams3D_FP32_smoke,
MatMulDecompressConvertTest::getTestCaseName);
#endif
const auto testParams3D_smoke = ::testing::Combine(
const auto testParams3D_FP16_smoke = ::testing::Combine(
::testing::ValuesIn(inputShapes3D),
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f32, ElementType::f16),
::testing::ValuesIn(filterAdditionalConfig()),
::testing::ValuesIn(filterSpecificParams()));
::testing::Values(ElementType::f16),
::testing::Values(emptyConfig),
::testing::ValuesIn(filterSpecificParams(false)));
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D, MatMulDecompressConvertTest, testParams3D_smoke,
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_FP16, MatMulDecompressConvertTest, testParams3D_FP16_smoke,
MatMulDecompressConvertTest::getTestCaseName);
@ -392,4 +380,153 @@ INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_BF16, MatMulDecompressConvertTest, testPara
} // namespace
/* In case of Convert has 2 or more consumers there is a problem with memory allocation in CPU plug-in (see Edge::init() method).
Maybe we can just remove the check (edgePtr->getParent()->isConstant() && !edgePtr->getChild()->isConstant()) and everything will be OK,
But this solution should be additionally checked. For now, for these cases we will not be doing CF on the CPU side and it should be done
on the ngraph side.
* Graph before:
------------ ------------ ------------
|Input(f32)| |Input(f16)| |Input(f32)|
------------ ------------ ------------
| | |
| --------------------------------- |
| |Convert(decompression f16->f32)| |
| --------------------------------- |
| | | |
----------------------- -----------------------
| MatMul | | MatMul |
----------------------- -----------------------
| |
---------------------------------
| Concat |
---------------------------------
|
--------
|Output|
--------
* Exec graph:
------------ -------------------------------- ------------
|Input(f32)| | Input(f32) | |Input(f32)|
------------ -------------------------------- ------------
| | | |
----------------------- -----------------------
| MatMul | | MatMul |
----------------------- -----------------------
| |
---------------------------------
| Concat |
---------------------------------
|
--------
|Output|
--------
*/
using MatMulDecompressConvertParams2 = std::tuple<
std::vector<InputShape>, // input shapes
std::pair<bool, bool>, // transposeA, transposeB
ElementType, // weights precision
std::map<std::string, std::string>, // additional config
CPUSpecificParams
>;
class MatMulDecompressConvertTest2 : public MatMulDecompressConvertTest {
protected:
void SetUp() override {
targetDevice = ov::test::utils::DEVICE_CPU;
std::vector<InputShape> inputShapes;
std::pair<bool, bool> transpose;
ElementType weiConstElemType;
std::map<std::string, std::string> additionalConfig;
CPUSpecificParams cpuParams;
std::tie(inputShapes, transpose, weiConstElemType, additionalConfig, cpuParams) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
init_input_shapes(inputShapes);
bool transpA = transpose.first;
bool transpB = transpose.second;
fullyConnectedCount = 2;
if (transpA) transposeCount += 2;
if (!transpB) transposeCount++;
if (transpA) {
transposeShape(inputDynamicShapes[0]);
for (auto& shapes : targetStaticShapes) {
transposeShape(shapes[0]);
}
transposeShape(inputDynamicShapes[1]);
for (auto& shapes : targetStaticShapes) {
transposeShape(shapes[1]);
}
}
if (transpB) {
transposeShape(inputDynamicShapes[2]);
for (auto& shapes : targetStaticShapes) {
transposeShape(shapes[2]);
}
}
const auto& inShapeFC0 = inputDynamicShapes[0];
const auto& inShapeFC1 = inputDynamicShapes[1];
const auto& inShapeWeights = inputDynamicShapes[2];
configuration.insert(additionalConfig.begin(), additionalConfig.end());
ElementType netType = ElementType::f32;
ElementType convertOutType = ElementType::f32;
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
convertOutType = inType = outType = netType = ElementType::bf16;
weiConstElemType = (weiConstElemType != ElementType::f32) ? weiConstElemType : ElementType::bf16;
} else {
inType = outType = netType;
}
std::string cpuNodeType = "FullyConnected";
selectedType = makeSelectedTypeStr(selectedType, outType);
auto params = builder::makeDynamicParams(inType, {inShapeFC0, inShapeFC1});
auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<opset1::Parameter>(params));
std::shared_ptr<Node> inputWeights = builder::makeConstant<float>(weiConstElemType, inShapeWeights.get_shape(), {}, true);
if (weiConstElemType == ElementType::f16) {
inputWeights = std::make_shared<opset1::Convert>(inputWeights, convertOutType);
mark_as_decompression(inputWeights);
}
// In this test, convert must be folded on the ngraph side, so the constant with fp32 precision is expected
expectedWeiConstElemType = ElementType::f32;
auto matMul0 = builder::makeMatMul(paramOuts[0], inputWeights, transpA, transpB);
auto matMul1 = builder::makeMatMul(paramOuts[1], inputWeights, transpA, transpB);
auto concat = builder::makeConcat({matMul0, matMul1}, 0);
function = CPUTestsBase::makeNgraphFunction(netType, params, concat, cpuNodeType);
}
};
TEST_P(MatMulDecompressConvertTest2, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED();
run();
CheckExecutionGraph();
}
namespace {
const auto testParams2D_FP16_2_smoke = ::testing::Combine(
::testing::Values(static_shapes_to_test_representation({{2, 3}, {2, 3}, {3, 4}})),
::testing::Values(std::pair<bool, bool>{false, true}),
::testing::Values(ElementType::f16),
::testing::Values(emptyConfig),
::testing::ValuesIn(filterSpecificParams(true)));
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP16_2, MatMulDecompressConvertTest2, testParams2D_FP16_2_smoke,
MatMulDecompressConvertTest2::getTestCaseName);
} // namespace
} // namespace SubgraphTestsDefinitions