[FIX][CPU] Convert CF: added convert childs number check (#19022)
* [FIX][CPU] Convert CF: added convert childs number check * code style fix * WIP: [CPU][TESTS] Convert CF: Added subgraph test with two outputs for Convert node * MatMulDecompressConvertTest refactoring + instances fixes * removed KeepConstAndDecompressionForMatMul pass and added cpu callback for KeepConstAndDecompression * MatMulDecompressConvertTest2: added graphs and small problem description * small review fix
This commit is contained in:
parent
85609d4881
commit
2a3132941c
@ -14,7 +14,6 @@ namespace pass {
|
||||
class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding;
|
||||
class TRANSFORMATIONS_API DisableDecompressionConvertConstantFolding;
|
||||
class TRANSFORMATIONS_API KeepConstAndDecompression;
|
||||
class TRANSFORMATIONS_API KeepConstAndDecompressionForMatMul;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ov
|
||||
@ -48,14 +47,3 @@ public:
|
||||
OPENVINO_RTTI("KeepConstAndDecompression", "0");
|
||||
KeepConstAndDecompression();
|
||||
};
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief Disables ConstantFolding for Convert operation (just before MatMul operation only) and prevents conversion
|
||||
* of f16 Consts to f32.
|
||||
*/
|
||||
class ov::pass::KeepConstAndDecompressionForMatMul : public MatcherPass {
|
||||
public:
|
||||
OPENVINO_RTTI("KeepConstAndDecompressionForMatMul", "0");
|
||||
KeepConstAndDecompressionForMatMul();
|
||||
};
|
||||
|
@ -59,6 +59,10 @@ pass::KeepConstAndDecompression::KeepConstAndDecompression() {
|
||||
ov::is_shape_subgraph(node->shared_from_this()))
|
||||
return false;
|
||||
|
||||
if (transformation_callback(node)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
disable_constant_folding(node);
|
||||
|
||||
if (!is_type<ov::op::v0::Constant>(node->input_value(0).get_node_shared_ptr()))
|
||||
@ -70,28 +74,3 @@ pass::KeepConstAndDecompression::KeepConstAndDecompression() {
|
||||
auto m = std::make_shared<pattern::Matcher>(node_pattern, matcher_name);
|
||||
register_matcher(m, callback);
|
||||
}
|
||||
|
||||
pass::KeepConstAndDecompressionForMatMul::KeepConstAndDecompressionForMatMul() {
|
||||
MATCHER_SCOPE(KeepConstAndDecompressionForMatMul);
|
||||
auto matmul = pass::pattern::wrap_type<ov::op::v0::MatMul>();
|
||||
|
||||
matcher_pass_callback callback = [=](pass::pattern::Matcher& m) {
|
||||
auto node = m.get_match_root();
|
||||
|
||||
// input to matmul is decompression Convert
|
||||
const auto& inp_convert = node->input_value(1).get_node_shared_ptr();
|
||||
if (!is_type<ov::op::v0::Convert>(inp_convert) || !is_decompression(inp_convert))
|
||||
return false;
|
||||
|
||||
disable_constant_folding(inp_convert);
|
||||
|
||||
if (!is_type<ov::op::v0::Constant>(inp_convert->input_value(0).get_node_shared_ptr()))
|
||||
return false;
|
||||
enable_keep_fp16_const(inp_convert->input_value(0).get_node_shared_ptr());
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<pass::pattern::Matcher>(matmul, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
@ -203,7 +203,14 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
|
||||
manager.set_per_pass_validation(false);
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::InitNodeInfo);
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkShapeOfSubgraphs);
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompressionForMatMul);
|
||||
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression);
|
||||
CPU_SET_CALLBACK_COMMON(manager,
|
||||
[](const_node_ptr &node) -> bool {
|
||||
const auto outputs = node->get_output_target_inputs(0);
|
||||
return outputs.size() != 1 || !is_type<ov::op::v0::MatMul>(outputs.begin()->get_node());
|
||||
},
|
||||
ov::pass::KeepConstAndDecompression);
|
||||
|
||||
const bool useLpt = !defaultPrecisions.empty();
|
||||
if (useLpt) {
|
||||
@ -434,7 +441,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
|
||||
AUGRUCell node (see AUGRUCellFusion pass). In such cases, some constant paths will be unfolded, which can lead to crashes in the plugin. To avoid this,
|
||||
we re-mark decompression converts again and finally do CF for those constant paths that are not inputs to MatMul node */
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::EnableDecompressionConvertConstantFolding);
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompressionForMatMul);
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression);
|
||||
CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConstantFolding);
|
||||
|
||||
manager.run_passes(model);
|
||||
|
@ -140,7 +140,7 @@ protected:
|
||||
std::swap(*(shape.end() - 1), *(shape.end() - 2));
|
||||
}
|
||||
|
||||
void CheckFCWeightsPrecision() const {
|
||||
void CheckFCWeightsPrecision(ElementType expectedWeiElemType) const {
|
||||
auto getExecValue = [](const ov::Node::RTMap& rtInfo, const std::string ¶mName) -> std::string {
|
||||
auto it = rtInfo.find(paramName);
|
||||
IE_ASSERT(rtInfo.end() != it);
|
||||
@ -153,7 +153,7 @@ protected:
|
||||
if (getExecValue(fcNode->get_rt_info(), ExecGraphInfoSerialization::LAYER_TYPE) == "FullyConnected") {
|
||||
const auto &constNode = fcNode->get_input_node_shared_ptr(1);
|
||||
element::Type expectedType(getExecValue(constNode->get_rt_info(), ExecGraphInfoSerialization::OUTPUT_PRECISIONS));
|
||||
ASSERT_EQ(expectedType, weiConstElemType);
|
||||
ASSERT_EQ(expectedType, expectedWeiElemType);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -163,6 +163,7 @@ protected:
|
||||
|
||||
std::vector<InputShape> inputShapes;
|
||||
std::pair<bool, bool> transpose;
|
||||
ElementType weiConstElemType;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
CPUSpecificParams cpuParams;
|
||||
|
||||
@ -174,8 +175,8 @@ protected:
|
||||
bool transpA = transpose.first;
|
||||
bool transpB = transpose.second;
|
||||
|
||||
if (transpA) transposesCount++;
|
||||
if (!transpB) transposesCount++;
|
||||
if (transpA) transposeCount++;
|
||||
if (!transpB) transposeCount++;
|
||||
|
||||
if (transpA) {
|
||||
transposeShape(inputDynamicShapes[0]);
|
||||
@ -214,6 +215,8 @@ protected:
|
||||
inputB = std::make_shared<opset1::Convert>(inputB, convertOutType);
|
||||
mark_as_decompression(inputB);
|
||||
}
|
||||
expectedWeiConstElemType = weiConstElemType;
|
||||
|
||||
auto matMul = builder::makeMatMul(paramOuts[0], inputB, transpA, transpB);
|
||||
|
||||
function = CPUTestsBase::makeNgraphFunction(netType, params, matMul, cpuNodeType);
|
||||
@ -221,15 +224,16 @@ protected:
|
||||
|
||||
void CheckExecutionGraph() {
|
||||
CheckPluginRelatedResults(compiledModel, "FullyConnected");
|
||||
CheckNumberOfNodesWithType(compiledModel, "FullyConnected", 1);
|
||||
CheckNumberOfNodesWithType(compiledModel, "Transpose", transposesCount);
|
||||
CheckNumberOfNodesWithType(compiledModel, "FullyConnected", fullyConnectedCount);
|
||||
CheckNumberOfNodesWithType(compiledModel, "Transpose", transposeCount);
|
||||
CheckNumberOfNodesWithType(compiledModel, "Convert", 0);
|
||||
CheckNumberOfNodesWithType(compiledModel, "Reorder", 0);
|
||||
CheckFCWeightsPrecision();
|
||||
CheckFCWeightsPrecision(expectedWeiConstElemType);
|
||||
}
|
||||
|
||||
size_t transposesCount = 0;
|
||||
ElementType weiConstElemType = ElementType::f32;
|
||||
size_t fullyConnectedCount = 1;
|
||||
size_t transposeCount = 0;
|
||||
ElementType expectedWeiConstElemType = ElementType::f32;
|
||||
};
|
||||
|
||||
TEST_P(MatMulDecompressConvertTest, CompareWithRefs) {
|
||||
@ -273,14 +277,7 @@ const std::vector<std::vector<InputShape>> inputShapes3D = {
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
std::vector<std::map<std::string, std::string>> filterAdditionalConfig() {
|
||||
std::vector<std::map<std::string, std::string>> additionalConfig;
|
||||
#ifndef OV_CPU_WITH_MLAS
|
||||
additionalConfig.push_back(std::map<std::string, std::string>{/* empty config */});
|
||||
#endif
|
||||
return additionalConfig;
|
||||
}
|
||||
std::map<std::string, std::string> emptyConfig = {/* empty config */};
|
||||
|
||||
std::vector<std::map<std::string, std::string>> filterAdditionalConfig_BF16() {
|
||||
std::vector<std::map<std::string, std::string>> additionalConfig;
|
||||
@ -290,22 +287,24 @@ std::vector<std::map<std::string, std::string>> filterAdditionalConfig_BF16() {
|
||||
return additionalConfig;
|
||||
}
|
||||
|
||||
std::vector<std::map<std::string, std::string>> filterAdditionalConfig_MLAS() {
|
||||
std::vector<std::map<std::string, std::string>> additionalConfig;
|
||||
additionalConfig.push_back(std::map<std::string, std::string>{/* empty config */});
|
||||
return additionalConfig;
|
||||
}
|
||||
|
||||
std::vector<CPUSpecificParams> filterSpecificParams() {
|
||||
std::vector<CPUSpecificParams> filterSpecificParams(bool trySetMlas) {
|
||||
std::vector<CPUSpecificParams> specificParams;
|
||||
if (trySetMlas) {
|
||||
#ifdef OV_CPU_WITH_MLAS
|
||||
specificParams.push_back(CPUSpecificParams{{}, {}, {"gemm_mlas"}, "gemm_mlas"});
|
||||
#endif
|
||||
}
|
||||
// try set onednn jit params if we can't or shouldn't use mlas
|
||||
if (specificParams.empty()) {
|
||||
if (with_cpu_x86_avx512_core()) {
|
||||
specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx512"}, "brgemm_avx512"});
|
||||
} else if (with_cpu_x86_avx2()) {
|
||||
specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx2"}, "brgemm_avx2"});
|
||||
}
|
||||
return specificParams;
|
||||
}
|
||||
|
||||
return specificParams;
|
||||
}
|
||||
|
||||
std::vector<CPUSpecificParams> filterSpecificParams_BF16() {
|
||||
std::vector<CPUSpecificParams> specificParams;
|
||||
@ -314,34 +313,25 @@ std::vector<CPUSpecificParams> filterSpecificParams_BF16() {
|
||||
}
|
||||
|
||||
|
||||
std::vector<CPUSpecificParams> filterSpecificParams_MLAS() {
|
||||
std::vector<CPUSpecificParams> specificParams;
|
||||
specificParams.push_back(CPUSpecificParams{{}, {}, {"gemm_mlas"}, "gemm_mlas"});
|
||||
return specificParams;
|
||||
}
|
||||
|
||||
|
||||
#ifdef OV_CPU_WITH_MLAS
|
||||
const auto testParams2D_MLAS_smoke = ::testing::Combine(
|
||||
const auto testParams2D_FP32_smoke = ::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes2D),
|
||||
::testing::ValuesIn(transposeParams),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(filterAdditionalConfig_MLAS()),
|
||||
::testing::ValuesIn(filterSpecificParams_MLAS()));
|
||||
::testing::Values(emptyConfig),
|
||||
::testing::ValuesIn(filterSpecificParams(true)));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_MLAS, MatMulDecompressConvertTest, testParams2D_MLAS_smoke,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP32, MatMulDecompressConvertTest, testParams2D_FP32_smoke,
|
||||
MatMulDecompressConvertTest::getTestCaseName);
|
||||
#endif
|
||||
|
||||
|
||||
const auto testParams2D_smoke = ::testing::Combine(
|
||||
const auto testParams2D_FP16_smoke = ::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes2D),
|
||||
::testing::ValuesIn(transposeParams),
|
||||
::testing::Values(ElementType::f32, ElementType::f16),
|
||||
::testing::ValuesIn(filterAdditionalConfig()),
|
||||
::testing::ValuesIn(filterSpecificParams()));
|
||||
::testing::Values(ElementType::f16),
|
||||
::testing::Values(emptyConfig),
|
||||
::testing::ValuesIn(filterSpecificParams(false)));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D, MatMulDecompressConvertTest, testParams2D_smoke,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP16, MatMulDecompressConvertTest, testParams2D_FP16_smoke,
|
||||
MatMulDecompressConvertTest::getTestCaseName);
|
||||
|
||||
|
||||
@ -356,27 +346,25 @@ INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_BF16, MatMulDecompressConvertTest, testPara
|
||||
MatMulDecompressConvertTest::getTestCaseName);
|
||||
|
||||
|
||||
#ifdef OV_CPU_WITH_MLAS
|
||||
const auto testParams3D_MLAS_smoke = ::testing::Combine(
|
||||
const auto testParams3D_FP32_smoke = ::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes3D),
|
||||
::testing::ValuesIn(transposeParams),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(filterAdditionalConfig_MLAS()),
|
||||
::testing::ValuesIn(filterSpecificParams_MLAS()));
|
||||
::testing::Values(emptyConfig),
|
||||
::testing::ValuesIn(filterSpecificParams(true)));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_MLAS, MatMulDecompressConvertTest, testParams3D_MLAS_smoke,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_FP32, MatMulDecompressConvertTest, testParams3D_FP32_smoke,
|
||||
MatMulDecompressConvertTest::getTestCaseName);
|
||||
#endif
|
||||
|
||||
|
||||
const auto testParams3D_smoke = ::testing::Combine(
|
||||
const auto testParams3D_FP16_smoke = ::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes3D),
|
||||
::testing::ValuesIn(transposeParams),
|
||||
::testing::Values(ElementType::f32, ElementType::f16),
|
||||
::testing::ValuesIn(filterAdditionalConfig()),
|
||||
::testing::ValuesIn(filterSpecificParams()));
|
||||
::testing::Values(ElementType::f16),
|
||||
::testing::Values(emptyConfig),
|
||||
::testing::ValuesIn(filterSpecificParams(false)));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D, MatMulDecompressConvertTest, testParams3D_smoke,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_FP16, MatMulDecompressConvertTest, testParams3D_FP16_smoke,
|
||||
MatMulDecompressConvertTest::getTestCaseName);
|
||||
|
||||
|
||||
@ -392,4 +380,153 @@ INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_BF16, MatMulDecompressConvertTest, testPara
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
/* In case of Convert has 2 or more consumers there is a problem with memory allocation in CPU plug-in (see Edge::init() method).
|
||||
Maybe we can just remove the check (edgePtr->getParent()->isConstant() && !edgePtr->getChild()->isConstant()) and everything will be OK,
|
||||
But this solution should be additionally checked. For now, for these cases we will not be doing CF on the CPU side and it should be done
|
||||
on the ngraph side.
|
||||
|
||||
* Graph before:
|
||||
------------ ------------ ------------
|
||||
|Input(f32)| |Input(f16)| |Input(f32)|
|
||||
------------ ------------ ------------
|
||||
| | |
|
||||
| --------------------------------- |
|
||||
| |Convert(decompression f16->f32)| |
|
||||
| --------------------------------- |
|
||||
| | | |
|
||||
----------------------- -----------------------
|
||||
| MatMul | | MatMul |
|
||||
----------------------- -----------------------
|
||||
| |
|
||||
---------------------------------
|
||||
| Concat |
|
||||
---------------------------------
|
||||
|
|
||||
--------
|
||||
|Output|
|
||||
--------
|
||||
|
||||
* Exec graph:
|
||||
------------ -------------------------------- ------------
|
||||
|Input(f32)| | Input(f32) | |Input(f32)|
|
||||
------------ -------------------------------- ------------
|
||||
| | | |
|
||||
----------------------- -----------------------
|
||||
| MatMul | | MatMul |
|
||||
----------------------- -----------------------
|
||||
| |
|
||||
---------------------------------
|
||||
| Concat |
|
||||
---------------------------------
|
||||
|
|
||||
--------
|
||||
|Output|
|
||||
--------
|
||||
*/
|
||||
using MatMulDecompressConvertParams2 = std::tuple<
|
||||
std::vector<InputShape>, // input shapes
|
||||
std::pair<bool, bool>, // transposeA, transposeB
|
||||
ElementType, // weights precision
|
||||
std::map<std::string, std::string>, // additional config
|
||||
CPUSpecificParams
|
||||
>;
|
||||
|
||||
class MatMulDecompressConvertTest2 : public MatMulDecompressConvertTest {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
targetDevice = ov::test::utils::DEVICE_CPU;
|
||||
|
||||
std::vector<InputShape> inputShapes;
|
||||
std::pair<bool, bool> transpose;
|
||||
ElementType weiConstElemType;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
CPUSpecificParams cpuParams;
|
||||
|
||||
std::tie(inputShapes, transpose, weiConstElemType, additionalConfig, cpuParams) = this->GetParam();
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
|
||||
init_input_shapes(inputShapes);
|
||||
|
||||
bool transpA = transpose.first;
|
||||
bool transpB = transpose.second;
|
||||
|
||||
fullyConnectedCount = 2;
|
||||
if (transpA) transposeCount += 2;
|
||||
if (!transpB) transposeCount++;
|
||||
|
||||
if (transpA) {
|
||||
transposeShape(inputDynamicShapes[0]);
|
||||
for (auto& shapes : targetStaticShapes) {
|
||||
transposeShape(shapes[0]);
|
||||
}
|
||||
transposeShape(inputDynamicShapes[1]);
|
||||
for (auto& shapes : targetStaticShapes) {
|
||||
transposeShape(shapes[1]);
|
||||
}
|
||||
}
|
||||
if (transpB) {
|
||||
transposeShape(inputDynamicShapes[2]);
|
||||
for (auto& shapes : targetStaticShapes) {
|
||||
transposeShape(shapes[2]);
|
||||
}
|
||||
}
|
||||
|
||||
const auto& inShapeFC0 = inputDynamicShapes[0];
|
||||
const auto& inShapeFC1 = inputDynamicShapes[1];
|
||||
const auto& inShapeWeights = inputDynamicShapes[2];
|
||||
|
||||
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||
|
||||
ElementType netType = ElementType::f32;
|
||||
ElementType convertOutType = ElementType::f32;
|
||||
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
|
||||
convertOutType = inType = outType = netType = ElementType::bf16;
|
||||
weiConstElemType = (weiConstElemType != ElementType::f32) ? weiConstElemType : ElementType::bf16;
|
||||
} else {
|
||||
inType = outType = netType;
|
||||
}
|
||||
|
||||
std::string cpuNodeType = "FullyConnected";
|
||||
selectedType = makeSelectedTypeStr(selectedType, outType);
|
||||
|
||||
auto params = builder::makeDynamicParams(inType, {inShapeFC0, inShapeFC1});
|
||||
auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<opset1::Parameter>(params));
|
||||
std::shared_ptr<Node> inputWeights = builder::makeConstant<float>(weiConstElemType, inShapeWeights.get_shape(), {}, true);
|
||||
if (weiConstElemType == ElementType::f16) {
|
||||
inputWeights = std::make_shared<opset1::Convert>(inputWeights, convertOutType);
|
||||
mark_as_decompression(inputWeights);
|
||||
}
|
||||
// In this test, convert must be folded on the ngraph side, so the constant with fp32 precision is expected
|
||||
expectedWeiConstElemType = ElementType::f32;
|
||||
|
||||
auto matMul0 = builder::makeMatMul(paramOuts[0], inputWeights, transpA, transpB);
|
||||
auto matMul1 = builder::makeMatMul(paramOuts[1], inputWeights, transpA, transpB);
|
||||
|
||||
auto concat = builder::makeConcat({matMul0, matMul1}, 0);
|
||||
|
||||
function = CPUTestsBase::makeNgraphFunction(netType, params, concat, cpuNodeType);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(MatMulDecompressConvertTest2, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED();
|
||||
run();
|
||||
CheckExecutionGraph();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
const auto testParams2D_FP16_2_smoke = ::testing::Combine(
|
||||
::testing::Values(static_shapes_to_test_representation({{2, 3}, {2, 3}, {3, 4}})),
|
||||
::testing::Values(std::pair<bool, bool>{false, true}),
|
||||
::testing::Values(ElementType::f16),
|
||||
::testing::Values(emptyConfig),
|
||||
::testing::ValuesIn(filterSpecificParams(true)));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP16_2, MatMulDecompressConvertTest2, testParams2D_FP16_2_smoke,
|
||||
MatMulDecompressConvertTest2::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
|
Loading…
Reference in New Issue
Block a user