[CPU Tests] migate matmul test cases to be api 2.0

This commit is contained in:
River.Li 2023-11-28 11:34:38 +08:00
parent 543db46143
commit b5fe09b3fa
7 changed files with 136 additions and 144 deletions

View File

@ -294,7 +294,7 @@ std::vector<std::string> disabledTestPatterns() {
retVector.emplace_back(R"(.*LoadNetworkCompiledKernelsCacheTest.*CanCreateCacheDirAndDumpBinariesUnicodePath.*)");
#endif
if (!InferenceEngine::with_cpu_x86_avx512_core()) {
if (!ov::with_cpu_x86_avx512_core()) {
// on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
// tests are useless on such platforms
retVector.emplace_back(R"(.*(BF|bf)16.*)");
@ -305,7 +305,7 @@ std::vector<std::string> disabledTestPatterns() {
retVector.emplace_back(R"(.*Snippets.*(MatMul|Matmul).*)");
}
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
if (!InferenceEngine::with_cpu_x86_avx512_core_fp16()) {
if (!ov::with_cpu_x86_avx512_core_fp16()) {
// Skip fp16 tests for paltforms that don't support fp16 precision
retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)");
}
@ -318,7 +318,7 @@ std::vector<std::string> disabledTestPatterns() {
retVector.emplace_back(R"(.*EltwiseLayerCPUTest.*IS=\(\[1\.\.10\.2\.5\.6\]_\).*eltwiseOpType=SqDiff.*_configItem=INFERENCE_PRECISION_HINT=f16.*)");
#endif // OV_CPU_ARM_ENABLE_FP16
#endif
if (!InferenceEngine::with_cpu_x86_avx512_core_vnni() && !InferenceEngine::with_cpu_x86_avx512_core_amx_int8()) {
if (!ov::with_cpu_x86_avx512_core_vnni() && !ov::with_cpu_x86_avx512_core_amx_int8()) {
// MatMul in Snippets uses BRGEMM that supports i8 only on platforms with VNNI or AMX instructions
retVector.emplace_back(R"(.*Snippets.*MatMulFQ.*)");
retVector.emplace_back(R"(.*Snippets.*MatMul.*Quantized.*)");
@ -326,11 +326,11 @@ std::vector<std::string> disabledTestPatterns() {
retVector.emplace_back(R"(.*Snippets.*MHAINT8.*)");
retVector.emplace_back(R"(.*Snippets.*MHAQuant.*)");
}
if (!InferenceEngine::with_cpu_x86_avx512_core_amx_int8())
if (!ov::with_cpu_x86_avx512_core_amx_int8())
//TODO: Issue 92895
// on platforms which do not support AMX, we are disabling I8 input tests
retVector.emplace_back(R"(smoke_LPT/FakeQuantizeWithNotOptimalTransformation.CompareWithRefImpl.*CPU.*i8.*)");
if (!InferenceEngine::with_cpu_x86_avx512_core_amx_bf16() && !InferenceEngine::with_cpu_x86_bfloat16()) {
if (!ov::with_cpu_x86_avx512_core_amx_bf16() && !ov::with_cpu_x86_bfloat16()) {
// ignored for not supported bf16 platforms
retVector.emplace_back(R"(.*smoke_Snippets_EnforcePrecision_bf16.*)");
retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16.*)");

View File

@ -7,8 +7,6 @@
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "transformations/rt_info/decompression.hpp"
using namespace ngraph;
using namespace InferenceEngine;
using namespace CPUTestUtils;
using namespace ov::test;
@ -82,13 +80,11 @@ namespace SubgraphTestsDefinitions {
--------
*/
using MatMulDecompressConvertParams = std::tuple<
std::vector<InputShape>, // input shapes
std::pair<bool, bool>, // transposeA, transposeB
ElementType, // weights precision
std::map<std::string, std::string>, // additional config
CPUSpecificParams
>;
using MatMulDecompressConvertParams = std::tuple<std::vector<InputShape>, // input shapes
std::pair<bool, bool>, // transposeA, transposeB
ElementType, // weights precision
ov::AnyMap, // additional config
CPUSpecificParams>;
class MatMulDecompressConvertTest : public testing::WithParamInterface<MatMulDecompressConvertParams>,
virtual public SubgraphBaseTest, public CPUTestsBase {
@ -97,7 +93,7 @@ public:
std::vector<InputShape> inputShapes;
std::pair<bool, bool> transpose;
ElementType weiElemType;
std::map<std::string, std::string> additionalConfig;
ov::AnyMap additionalConfig;
CPUSpecificParams cpuParams;
std::tie(inputShapes, transpose, weiElemType, additionalConfig, cpuParams) = obj.param;
@ -124,7 +120,7 @@ public:
result << "config=(";
for (const auto& configEntry : additionalConfig) {
result << configEntry.first << ", " << configEntry.second << ":";
result << configEntry.first << ", " << configEntry.second.as<std::string>() << ":";
}
result << ")";
@ -135,12 +131,12 @@ public:
protected:
template<typename T>
void transposeShape(T& shape) {
void transpose_shape(T& shape) {
OPENVINO_ASSERT(shape.size() > 1);
std::swap(*(shape.end() - 1), *(shape.end() - 2));
}
void CheckFCWeightsPrecision(ElementType expectedWeiElemType) const {
void check_fc_weights_precision(ElementType expectedWeiElemType) const {
auto getExecValue = [](const ov::Node::RTMap& rtInfo, const std::string &paramName) -> std::string {
auto it = rtInfo.find(paramName);
OPENVINO_ASSERT(rtInfo.end() != it);
@ -152,7 +148,7 @@ protected:
for (const auto &fcNode : execFunction->get_ops()) {
if (getExecValue(fcNode->get_rt_info(), ExecGraphInfoSerialization::LAYER_TYPE) == "FullyConnected") {
const auto &constNode = fcNode->get_input_node_shared_ptr(1);
element::Type expectedType(getExecValue(constNode->get_rt_info(), ExecGraphInfoSerialization::OUTPUT_PRECISIONS));
ov::element::Type expectedType(getExecValue(constNode->get_rt_info(), ov::exec_model_info::OUTPUT_PRECISIONS));
ASSERT_EQ(expectedType, expectedWeiElemType);
}
}
@ -164,7 +160,7 @@ protected:
std::vector<InputShape> inputShapes;
std::pair<bool, bool> transpose;
ElementType weiConstElemType;
std::map<std::string, std::string> additionalConfig;
ov::AnyMap additionalConfig;
CPUSpecificParams cpuParams;
std::tie(inputShapes, transpose, weiConstElemType, additionalConfig, cpuParams) = this->GetParam();
@ -179,15 +175,15 @@ protected:
if (!transpB) transposeCount++;
if (transpA) {
transposeShape(inputDynamicShapes[0]);
transpose_shape(inputDynamicShapes[0]);
for (auto& shapes : targetStaticShapes) {
transposeShape(shapes[0]);
transpose_shape(shapes[0]);
}
}
if (transpB) {
transposeShape(inputDynamicShapes[1]);
transpose_shape(inputDynamicShapes[1]);
for (auto& shapes : targetStaticShapes) {
transposeShape(shapes[1]);
transpose_shape(shapes[1]);
}
}
@ -198,7 +194,8 @@ protected:
ElementType netType = ElementType::f32;
ElementType convertOutType = ElementType::f32;
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
auto it = additionalConfig.find(ov::hint::inference_precision.name());
if (it != additionalConfig.end() && it->second.as<ov::element::Type>() == ov::element::bf16) {
convertOutType = inType = outType = netType = ElementType::bf16;
weiConstElemType = (weiConstElemType != ElementType::f32) ? weiConstElemType : ElementType::bf16;
} else {
@ -209,9 +206,9 @@ protected:
selectedType = makeSelectedTypeStr(selectedType, outType);
ov::ParameterVector params{std::make_shared<ov::op::v0::Parameter>(inType, inShapeA)};
std::shared_ptr<Node> inputB = builder::makeConstant<float>(weiConstElemType, inShapeB.get_shape(), {}, true);
std::shared_ptr<ov::Node> inputB = ngraph::builder::makeConstant<float>(weiConstElemType, inShapeB.get_shape(), {}, true);
if (weiConstElemType == ElementType::f16) {
inputB = std::make_shared<opset1::Convert>(inputB, convertOutType);
inputB = std::make_shared<ov::op::v0::Convert>(inputB, convertOutType);
mark_as_decompression(inputB);
}
expectedWeiConstElemType = weiConstElemType;
@ -221,13 +218,13 @@ protected:
function = CPUTestsBase::makeNgraphFunction(netType, params, matMul, cpuNodeType);
}
void CheckExecutionGraph() {
void check_execution_graph() {
CheckPluginRelatedResults(compiledModel, "FullyConnected");
CheckNumberOfNodesWithType(compiledModel, "FullyConnected", fullyConnectedCount);
CheckNumberOfNodesWithType(compiledModel, "Transpose", transposeCount);
CheckNumberOfNodesWithType(compiledModel, "Convert", 0);
CheckNumberOfNodesWithType(compiledModel, "Reorder", 0);
CheckFCWeightsPrecision(expectedWeiConstElemType);
check_fc_weights_precision(expectedWeiConstElemType);
}
size_t fullyConnectedCount = 1;
@ -238,7 +235,7 @@ protected:
TEST_P(MatMulDecompressConvertTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED();
run();
CheckExecutionGraph();
check_execution_graph();
}
namespace {
@ -276,17 +273,17 @@ const std::vector<std::vector<InputShape>> inputShapes3D = {
},
};
std::map<std::string, std::string> emptyConfig = {/* empty config */};
ov::AnyMap emptyConfig = {/* empty config */};
std::vector<std::map<std::string, std::string>> filterAdditionalConfig_BF16() {
std::vector<std::map<std::string, std::string>> additionalConfig;
if (with_cpu_x86_avx512_core()) {
additionalConfig.push_back({{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}});
std::vector<ov::AnyMap> filter_additional_config_bf16() {
std::vector<ov::AnyMap> additionalConfig;
if (ov::with_cpu_x86_avx512_core()) {
additionalConfig.push_back({{ov::hint::inference_precision(ov::element::bf16)}});
}
return additionalConfig;
}
std::vector<CPUSpecificParams> filterSpecificParams(bool trySetMlas) {
std::vector<CPUSpecificParams> filter_specific_params(bool trySetMlas) {
std::vector<CPUSpecificParams> specificParams;
if (trySetMlas) {
#ifdef OV_CPU_WITH_MLAS
@ -295,9 +292,9 @@ std::vector<CPUSpecificParams> filterSpecificParams(bool trySetMlas) {
}
// try set onednn jit params if we can't or shouldn't use mlas
if (specificParams.empty()) {
if (with_cpu_x86_avx512_core()) {
if (ov::with_cpu_x86_avx512_core()) {
specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx512"}, "brgemm_avx512"});
} else if (with_cpu_x86_avx2()) {
} else if (ov::with_cpu_x86_avx2()) {
specificParams.push_back(CPUSpecificParams{{}, {}, {"brgemm_avx2"}, "brgemm_avx2"});
}
}
@ -305,7 +302,7 @@ std::vector<CPUSpecificParams> filterSpecificParams(bool trySetMlas) {
return specificParams;
}
std::vector<CPUSpecificParams> filterSpecificParams_BF16() {
std::vector<CPUSpecificParams> filter_specific_params_bf16() {
std::vector<CPUSpecificParams> specificParams;
specificParams.push_back(CPUSpecificParams{{}, {}, {"jit_gemm"}, "jit_gemm"});
return specificParams;
@ -317,7 +314,7 @@ const auto testParams2D_FP32_smoke = ::testing::Combine(
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f32),
::testing::Values(emptyConfig),
::testing::ValuesIn(filterSpecificParams(true)));
::testing::ValuesIn(filter_specific_params(true)));
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP32, MatMulDecompressConvertTest, testParams2D_FP32_smoke,
MatMulDecompressConvertTest::getTestCaseName);
@ -328,7 +325,7 @@ const auto testParams2D_FP16_smoke = ::testing::Combine(
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f16),
::testing::Values(emptyConfig),
::testing::ValuesIn(filterSpecificParams(false)));
::testing::ValuesIn(filter_specific_params(false)));
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP16, MatMulDecompressConvertTest, testParams2D_FP16_smoke,
MatMulDecompressConvertTest::getTestCaseName);
@ -338,8 +335,8 @@ const auto testParams2D_BF16_smoke = ::testing::Combine(
::testing::ValuesIn(inputShapes2D),
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f32, ElementType::f16),
::testing::ValuesIn(filterAdditionalConfig_BF16()),
::testing::ValuesIn(filterSpecificParams_BF16()));
::testing::ValuesIn(filter_additional_config_bf16()),
::testing::ValuesIn(filter_specific_params_bf16()));
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_BF16, MatMulDecompressConvertTest, testParams2D_BF16_smoke,
MatMulDecompressConvertTest::getTestCaseName);
@ -350,7 +347,7 @@ const auto testParams3D_FP32_smoke = ::testing::Combine(
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f32),
::testing::Values(emptyConfig),
::testing::ValuesIn(filterSpecificParams(true)));
::testing::ValuesIn(filter_specific_params(true)));
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_FP32, MatMulDecompressConvertTest, testParams3D_FP32_smoke,
MatMulDecompressConvertTest::getTestCaseName);
@ -361,7 +358,7 @@ const auto testParams3D_FP16_smoke = ::testing::Combine(
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f16),
::testing::Values(emptyConfig),
::testing::ValuesIn(filterSpecificParams(false)));
::testing::ValuesIn(filter_specific_params(false)));
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_FP16, MatMulDecompressConvertTest, testParams3D_FP16_smoke,
MatMulDecompressConvertTest::getTestCaseName);
@ -371,8 +368,8 @@ const auto testParams3D_BF16_smoke = ::testing::Combine(
::testing::ValuesIn(inputShapes3D),
::testing::ValuesIn(transposeParams),
::testing::Values(ElementType::f32, ElementType::f16),
::testing::ValuesIn(filterAdditionalConfig_BF16()),
::testing::ValuesIn(filterSpecificParams_BF16()));
::testing::ValuesIn(filter_additional_config_bf16()),
::testing::ValuesIn(filter_specific_params_bf16()));
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_BF16, MatMulDecompressConvertTest, testParams3D_BF16_smoke,
MatMulDecompressConvertTest::getTestCaseName);
@ -382,7 +379,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_BF16, MatMulDecompressConvertTest, testPara
/* In case of Convert has 2 or more consumers there is a problem with memory allocation in CPU plug-in (see Edge::init()
method). Maybe we can just remove the check (edgePtr->getParent()->isConstant() && !edgePtr->getChild()->isConstant())
and everything will be OK, But this solution should be additionally checked. For now, for these cases we will not be
doing CF on the CPU side and it should be done on the ngraph side.
doing CF on the CPU side and it should be done on the graph side.
* Graph before:
------------ ------------ ------------
@ -422,13 +419,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_BF16, MatMulDecompressConvertTest, testPara
|Output|
--------
*/
using MatMulDecompressConvertParams2 = std::tuple<
std::vector<InputShape>, // input shapes
std::pair<bool, bool>, // transposeA, transposeB
ElementType, // weights precision
std::map<std::string, std::string>, // additional config
CPUSpecificParams
>;
using MatMulDecompressConvertParams2 = std::tuple<std::vector<InputShape>, // input shapes
std::pair<bool, bool>, // transposeA, transposeB
ElementType, // weights precision
ov::AnyMap, // additional property
CPUSpecificParams>;
class MatMulDecompressConvertTest2 : public MatMulDecompressConvertTest {
protected:
@ -438,7 +433,7 @@ protected:
std::vector<InputShape> inputShapes;
std::pair<bool, bool> transpose;
ElementType weiConstElemType;
std::map<std::string, std::string> additionalConfig;
ov::AnyMap additionalConfig;
CPUSpecificParams cpuParams;
std::tie(inputShapes, transpose, weiConstElemType, additionalConfig, cpuParams) = this->GetParam();
@ -454,19 +449,19 @@ protected:
if (!transpB) transposeCount++;
if (transpA) {
transposeShape(inputDynamicShapes[0]);
transpose_shape(inputDynamicShapes[0]);
for (auto& shapes : targetStaticShapes) {
transposeShape(shapes[0]);
transpose_shape(shapes[0]);
}
transposeShape(inputDynamicShapes[1]);
transpose_shape(inputDynamicShapes[1]);
for (auto& shapes : targetStaticShapes) {
transposeShape(shapes[1]);
transpose_shape(shapes[1]);
}
}
if (transpB) {
transposeShape(inputDynamicShapes[2]);
transpose_shape(inputDynamicShapes[2]);
for (auto& shapes : targetStaticShapes) {
transposeShape(shapes[2]);
transpose_shape(shapes[2]);
}
}
@ -478,7 +473,8 @@ protected:
ElementType netType = ElementType::f32;
ElementType convertOutType = ElementType::f32;
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
auto it = additionalConfig.find(ov::hint::inference_precision.name());
if (it != additionalConfig.end() && it->second.as<ov::element::Type>() == ov::element::bf16) {
convertOutType = inType = outType = netType = ElementType::bf16;
weiConstElemType = (weiConstElemType != ElementType::f32) ? weiConstElemType : ElementType::bf16;
} else {
@ -492,12 +488,12 @@ protected:
for (auto&& shape : {inShapeFC0, inShapeFC1}) {
params.push_back(std::make_shared<ov::op::v0::Parameter>(inType, shape));
}
std::shared_ptr<Node> inputWeights = builder::makeConstant<float>(weiConstElemType, inShapeWeights.get_shape(), {}, true);
std::shared_ptr<ov::Node> inputWeights = ngraph::builder::makeConstant<float>(weiConstElemType, inShapeWeights.get_shape(), {}, true);
if (weiConstElemType == ElementType::f16) {
inputWeights = std::make_shared<opset1::Convert>(inputWeights, convertOutType);
inputWeights = std::make_shared<ov::op::v0::Convert>(inputWeights, convertOutType);
mark_as_decompression(inputWeights);
}
// In this test, convert must be folded on the ngraph side, so the constant with fp32 precision is expected
// In this test, convert must be folded on the graph side, so the constant with fp32 precision is expected
expectedWeiConstElemType = ElementType::f32;
auto matMul0 = std::make_shared<ov::op::v0::MatMul>(params[0], inputWeights, transpA, transpB);
@ -512,7 +508,7 @@ protected:
TEST_P(MatMulDecompressConvertTest2, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED();
run();
CheckExecutionGraph();
check_execution_graph();
}
namespace {
@ -522,7 +518,7 @@ const auto testParams2D_FP16_2_smoke = ::testing::Combine(
::testing::Values(std::pair<bool, bool>{false, true}),
::testing::Values(ElementType::f16),
::testing::Values(emptyConfig),
::testing::ValuesIn(filterSpecificParams(true)));
::testing::ValuesIn(filter_specific_params(true)));
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_FP16_2, MatMulDecompressConvertTest2, testParams2D_FP16_2_smoke,
MatMulDecompressConvertTest2::getTestCaseName);

View File

@ -6,18 +6,18 @@
#include "test_utils/fusing_test_utils.hpp"
#include "ov_models/builders.hpp"
#include "common_test_utils/common_utils.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include <algorithm>
#include <cassert>
using namespace ngraph;
using namespace InferenceEngine;
using namespace CPUTestUtils;
using namespace ov::test;
namespace SubgraphTestsDefinitions {
using ElementType = ov::element::Type_t;
using MatmulBrgemmInt8TestParams = std::tuple<SizeVector, // input shape
using MatmulBrgemmInt8TestParams = std::tuple<ov::Shape, // input shape
bool, // true: FullyConnected false: Matmul
ElementType, // input u8/s8
ElementType, // output f32/u8/s8
@ -30,10 +30,10 @@ using MatmulBrgemmInt8TestParams = std::tuple<SizeVector, // input shape
// (u8/s8 + s8)->f32
// (u8/s8 + s8)->u8/s8
class MatmulBrgemmInt8Test : public testing::WithParamInterface<MatmulBrgemmInt8TestParams>, public CpuTestWithFusing,
virtual public LayerTestsUtils::LayerTestsCommon {
virtual public ov::test::SubgraphBaseStaticTest {
public:
static std::string getTestCaseName(testing::TestParamInfo<MatmulBrgemmInt8TestParams> obj) {
SizeVector supportedInputShapes;
ov::Shape supportedInputShapes;
bool isFC;
ElementType inType;
ElementType outType;
@ -41,7 +41,7 @@ public:
std::tie(supportedInputShapes, isFC, inType, outType, cpuParams) = obj.param;
std::ostringstream result;
result << "IS=" << ov::test::utils::vec2str(supportedInputShapes) << "_";
result << "IS=" << supportedInputShapes.to_string() << "_";
result << (isFC ? "FullyConnected" : "MatMul") << "_";
result << "InputType=" << inType << "_";
result << "OutputType=" << outType << "_";
@ -57,16 +57,16 @@ protected:
ElementType outType;
void SetUp() override {
targetDevice = ov::test::utils::DEVICE_CPU;
SizeVector inShapes;
ov::Shape inShapes;
CPUSpecificParams cpuParams;
std::tie(inShapes, isFC, inType, outType, cpuParams) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
const auto ngPrec = element::f32;
const auto ngPrec = ov::element::f32;
ov::ParameterVector inputParams {std::make_shared<ov::op::v0::Parameter>(ngPrec, ov::Shape(inShapes))};
std::shared_ptr<Node> fq1;
std::shared_ptr<Node> matMul;
std::shared_ptr<Node> nodeBeforeConv;
std::shared_ptr<ov::Node> fq1;
std::shared_ptr<ov::Node> matMul;
std::shared_ptr<ov::Node> nodeBeforeConv;
selectedType = makeSelectedTypeStr(selectedType, ElementType::i8);
if (inType == ElementType::u8)
fq1 = ngraph::builder::makeFakeQuantize(inputParams[0], ngPrec, 256, {}, {0.0f}, {2.55f}, {0.0f}, {2.55f});
@ -74,15 +74,15 @@ protected:
fq1 = ngraph::builder::makeFakeQuantize(inputParams[0], ngPrec, 256, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f});
if (isFC) {
ngraph::Shape weightShape = inShapes;
ov::Shape weightShape = inShapes;
std::swap(weightShape[0], weightShape[1]);
auto weightsNode = ngraph::builder::makeConstant(ngPrec, weightShape, std::vector<float>{0.0f}, true);
auto fq2 = ngraph::builder::makeFakeQuantize(weightsNode, ngPrec, 256, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f});
auto fc = std::make_shared<ngraph::opset1::MatMul>(fq1, fq2, false, false);
auto fc = std::make_shared<ov::op::v0::MatMul>(fq1, fq2, false, false);
fc->get_rt_info() = getCPUInfo();
fc->set_friendly_name(nameMatmul);
auto biasWeightsNode = ngraph::builder::makeConstant(ngPrec, {}, std::vector<float>{0.0f}, true);
matMul = std::make_shared<ngraph::opset1::Add>(fc, biasWeightsNode);
matMul = std::make_shared<ov::op::v1::Add>(fc, biasWeightsNode);
} else {
auto fq2 = ngraph::builder::makeFakeQuantize(inputParams[0], ngPrec, 256, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f});
matMul = std::make_shared<ov::op::v0::MatMul>(fq1, fq2, false, true);
@ -98,7 +98,7 @@ protected:
// matmul->fq->matmul can cover x8*s8->x8 case
auto filterWeightsShape = matMul->get_output_shape(0);
auto filterWeightsNode = ngraph::builder::makeConstant(element::f32, filterWeightsShape, std::vector<float>{}, true);
auto filterWeightsNode = ngraph::builder::makeConstant(ov::element::f32, filterWeightsShape, std::vector<float>{}, true);
auto fq3 = ngraph::builder::makeFakeQuantize(filterWeightsNode, ngPrec, 256, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f});
// only matmul avx2 support s8*s8 input
auto matMul2 = std::make_shared<ov::op::v0::MatMul>(nodeBeforeConv, fq3, false, false);
@ -106,7 +106,7 @@ protected:
function = makeNgraphFunction(ngPrec, inputParams, matMul2, "MatmulBrgemmInt8");
}
void CheckNode(std::shared_ptr<const ov::Model> function, const std::string& nodeName) {
void check_node(std::shared_ptr<const ov::Model> function, const std::string& nodeName) {
ASSERT_NE(nullptr, function);
for (const auto &node : function->get_ops()) {
const auto & rtInfo = node->get_rt_info();
@ -127,18 +127,17 @@ protected:
TEST_P(MatmulBrgemmInt8Test, CompareWithRefs) {
// only cover avx2_vnni
if (InferenceEngine::with_cpu_x86_avx512_core() || !InferenceEngine::with_cpu_x86_avx2_vnni())
if (ov::with_cpu_x86_avx512_core() || !ov::with_cpu_x86_avx2_vnni())
GTEST_SKIP();
Run();
InferenceEngine::CNNNetwork execGraphInfo = executableNetwork.GetExecGraphInfo();
auto exec = execGraphInfo.getFunction();
CheckNode(exec, nameMatmul);
run();
auto exec = compiledModel.get_runtime_model();
check_node(exec, nameMatmul);
}
namespace {
const std::vector<SizeVector> supportedInputShapes = {
const std::vector<ov::Shape> supportedInputShapes = {
{16, 32},
{17, 15},
};

View File

@ -4,37 +4,35 @@
#include "test_utils/cpu_test_utils.hpp"
#include "ov_models/builders.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
using namespace ngraph;
using namespace InferenceEngine;
using namespace CPUTestUtils;
using namespace ov::test;
namespace SubgraphTestsDefinitions {
using MatmulStridedInputsOutputsTestParams = Precision;
using MatmulStridedInputsOutputsTestParams = ov::element::Type;
class MatmulStridedInputsOutputsTest : public testing::WithParamInterface<MatmulStridedInputsOutputsTestParams>,
public CPUTestsBase,
virtual public LayerTestsUtils::LayerTestsCommon {
virtual public SubgraphBaseStaticTest {
public:
static std::string getTestCaseName(testing::TestParamInfo<MatmulStridedInputsOutputsTestParams> obj) {
Precision netPrecision;
ov::element::Type netPrecision;
netPrecision = obj.param;
std::ostringstream result;
result << "netPRC=" << netPrecision.name() << "_";
result << "netPRC=" << netPrecision.to_string() << "_";
return result.str();
}
protected:
void SetUp() override {
targetDevice = ov::test::utils::DEVICE_CPU;
Precision netPrecision;
netPrecision = this->GetParam();
const auto ngPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
targetDevice = utils::DEVICE_CPU;
const auto ngPrec = this->GetParam();
SizeVector splitShape{1, 2, 1, 16};
ov::Shape splitShape{1, 2, 1, 16};
ov::ParameterVector splitInputParams {std::make_shared<ov::op::v0::Parameter>(ngPrec, ov::Shape(splitShape))};
auto split_axis_op = std::make_shared<ov::op::v0::Constant>(ov::element::Type_t::i64, ov::Shape{}, std::vector<int64_t>{1});
auto split = std::make_shared<ov::op::v1::Split>(splitInputParams[0], split_axis_op, 2);
@ -42,19 +40,20 @@ protected:
std::vector<ov::Shape> concatShapes{{1, 1, 8, 8}, {1, 1, 8, 8}};
ov::ParameterVector concatInputParams {std::make_shared<ov::op::v0::Parameter>(ngPrec, concatShapes[0]),
std::make_shared<ov::op::v0::Parameter>(ngPrec, concatShapes[1])};
const auto concatOutputNodes = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(concatInputParams));
const auto concatOutputNodes = ov::test::utils::convert2OutputVector(
ov::test::utils::castOps2Nodes<ov::op::v0::Parameter>(concatInputParams));
const auto concat = std::make_shared<ov::op::v0::Concat>(concatOutputNodes, 2);
const auto matMul1 = std::make_shared<ov::op::v0::MatMul>(split->output(0), concat, false, false);
SizeVector matmulShape{1, 1, 16, 8};
ov::Shape matmulShape{1, 1, 16, 8};
ov::ParameterVector matmulInputParams {std::make_shared<ov::op::v0::Parameter>(ngPrec, ov::Shape(matmulShape))};
const auto matMul2 = std::make_shared<ov::op::v0::MatMul>(split->output(1), matmulInputParams[0], false, false);
const auto concatMatMuls = std::make_shared<ov::op::v0::Concat>(ov::NodeVector{matMul1, matMul2}, 2 /* 3rd axis */);
ngraph::ParameterVector inputParams = {splitInputParams[0], concatInputParams[0], concatInputParams[1], matmulInputParams[0]};
ov::ParameterVector inputParams = {splitInputParams[0], concatInputParams[0], concatInputParams[1], matmulInputParams[0]};
function = makeNgraphFunction(ngPrec, inputParams, concatMatMuls, "MatmulStridedInputsOutputs");
}
};
@ -84,14 +83,14 @@ protected:
*/
TEST_P(MatmulStridedInputsOutputsTest, CompareWithRefs) {
Run();
run();
}
namespace {
INSTANTIATE_TEST_SUITE_P(smoke_Check, MatmulStridedInputsOutputsTest,
::testing::Values(Precision::FP32,
Precision::BF16),
::testing::Values(ov::element::f32,
ov::element::bf16),
MatmulStridedInputsOutputsTest::getTestCaseName);
} // namespace

View File

@ -7,8 +7,6 @@
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "transformations/rt_info/decompression.hpp"
using namespace ngraph;
using namespace InferenceEngine;
using namespace CPUTestUtils;
using namespace ov::test;
@ -58,7 +56,7 @@ using MatmulWeightsDecompressionParams = std::tuple<ShapeParams,
bool, // transpose on weights
bool, // decompression subtract
bool, // reshape on decompression constants
std::map<std::string, std::string>, // additional config
ov::AnyMap, // additional config
fusingSpecificParams,
bool>; // should use decompression implementation
@ -73,7 +71,7 @@ public:
bool transpose;
bool decompression_sub;
bool reshape_on_decompression;
std::map<std::string, std::string> additional_config;
ov::AnyMap additional_config;
fusingSpecificParams fusing_params;
bool should_fuse;
@ -99,7 +97,7 @@ public:
result << "config=(";
for (const auto& configEntry : additional_config) {
result << configEntry.first << ", " << configEntry.second << ":";
result << configEntry.first << ", " << configEntry.second.as<std::string>() << ":";
}
result << ")";
result << CpuTestWithFusing::getTestCaseName(fusing_params);
@ -145,7 +143,7 @@ protected:
auto weights = ngraph::builder::makeConstant<int8_t>(weights_precision, transformed_weights_shape, {}, true, 7);
weights->set_friendly_name("Compressed_weights");
auto weights_convert = std::make_shared<ngraph::opset1::Convert>(weights, decompression_precision);
auto weights_convert = std::make_shared<ov::op::v0::Convert>(weights, decompression_precision);
std::shared_ptr<ov::Node> mul_parent = weights_convert;
auto output_channels = *weights_shape.rbegin();
@ -166,7 +164,7 @@ protected:
scaleshift_const_shape.erase(std::remove(scaleshift_const_shape.begin(), scaleshift_const_shape.end(), 1), scaleshift_const_shape.end());
if (add_subtract) {
auto shift_const = ngraph::builder::makeConstant<uint8_t>(weights_precision, scaleshift_const_shape, {}, true, 7);
std::shared_ptr<ov::Node> shift_convert = std::make_shared<ngraph::opset1::Convert>(shift_const, decompression_precision);
std::shared_ptr<ov::Node> shift_convert = std::make_shared<ov::op::v0::Convert>(shift_const, decompression_precision);
if (reshape_on_decompression_constant) {
auto shift_reshape_const = ov::opset10::Constant::create(ov::element::i32, {scaleshift_target_shape.size()}, scaleshift_target_shape);
auto shift_reshape = std::make_shared<ov::opset10::Reshape>(shift_convert, shift_reshape_const, false);
@ -234,7 +232,7 @@ protected:
bool transpose_weights;
bool decompression_sub;
bool reshape_on_decompression;
std::map<std::string, std::string> additional_config;
ov::AnyMap additional_config;
fusingSpecificParams fusing_params;
bool should_fuse;
@ -252,7 +250,7 @@ protected:
std::tie(postOpMgrPtr, fusedOps) = fusing_params;
init_input_shapes({shape_params.data_shape, {{}, {{shape_params.weights_shape}}}});
ElementType netType = element::f32;
ElementType netType = ov::element::f32;
inType = outType = netType;
function = initSubgraph(inputDynamicShapes[0],
@ -266,7 +264,7 @@ protected:
reshape_on_decompression);
}
void checkResults() {
void check_results() {
const auto& test_param = GetParam();
const auto& weights_precision = std::get<1>(test_param);
@ -290,19 +288,19 @@ protected:
TEST_P(MatmulWeightsDecompression, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
checkResults();
check_results();
}
namespace {
std::vector<std::map<std::string, std::string>> filterAdditionalConfigBasic() {
std::vector<std::map<std::string, std::string>> additional_config = {CPUTestUtils::cpuEmptyPluginConfig};
std::vector<ov::AnyMap> filter_additional_config_basic() {
std::vector<ov::AnyMap> additional_config = {CPUTestUtils::empty_plugin_config};
return additional_config;
}
std::vector<std::map<std::string, std::string>> filterAdditionalConfigAMX() {
std::vector<std::map<std::string, std::string>> additional_config = {};
if (with_cpu_x86_avx512_core_amx())
additional_config.push_back({{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}});
std::vector<ov::AnyMap> filter_additional_config_amx() {
std::vector<ov::AnyMap> additional_config = {};
if (ov::with_cpu_x86_avx512_core_amx())
additional_config.push_back({{ov::hint::inference_precision(ov::element::bf16)}});
return additional_config;
}
@ -345,7 +343,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_basic,
::testing::Values(true),
::testing::Values(true),
::testing::Values(true),
::testing::ValuesIn(filterAdditionalConfigBasic()),
::testing::ValuesIn(filter_additional_config_basic()),
::testing::ValuesIn(fusing_params),
::testing::Values(true)),
MatmulWeightsDecompression::getTestCaseName);
@ -358,7 +356,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_amx,
::testing::Values(true),
::testing::Values(true),
::testing::Values(true),
::testing::ValuesIn(filterAdditionalConfigAMX()),
::testing::ValuesIn(filter_additional_config_amx()),
::testing::ValuesIn(fusing_params),
::testing::Values(true)),
MatmulWeightsDecompression::getTestCaseName);
@ -387,7 +385,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases_basic,
::testing::ValuesIn(transpose_weights),
::testing::ValuesIn(add_decompression_sub),
::testing::ValuesIn(reshape_on_decompression),
::testing::ValuesIn(filterAdditionalConfigBasic()),
::testing::ValuesIn(filter_additional_config_basic()),
::testing::Values(emptyFusingSpec),
::testing::Values(true)),
MatmulWeightsDecompression::getTestCaseName);
@ -400,7 +398,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases_amx,
::testing::ValuesIn(transpose_weights),
::testing::ValuesIn(add_decompression_sub),
::testing::ValuesIn(reshape_on_decompression),
::testing::ValuesIn(filterAdditionalConfigAMX()),
::testing::ValuesIn(filter_additional_config_amx()),
::testing::Values(emptyFusingSpec),
::testing::Values(true)),
MatmulWeightsDecompression::getTestCaseName);

View File

@ -128,13 +128,13 @@ public:
const std::vector<cpu_memory_format_t>& outFmts,
const std::vector<std::string>& priority);
//TODO: change to setter method
static std::string makeSelectedTypeStr(std::string implString, ngraph::element::Type_t elType);
static std::string makeSelectedTypeStr(std::string implString, ov::element::Type_t elType);
void updateSelectedType(const std::string& primitiveType, const ov::element::Type netType, const ov::AnyMap& config);
CPUInfo getCPUInfo() const;
std::shared_ptr<ngraph::Function> makeNgraphFunction(const ngraph::element::Type &ngPrc,
ngraph::ParameterVector &params,
const std::shared_ptr<ngraph::Node> &lastNode,
std::shared_ptr<ov::Model> makeNgraphFunction(const ov::element::Type &ngPrc,
ov::ParameterVector &params,
const std::shared_ptr<ov::Node> &lastNode,
std::string name);
void CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, const std::set<std::string>& nodeType) const;
@ -153,9 +153,9 @@ protected:
* @param lastNode The last node of the initial graph.
* @return The last node of the modified graph.
*/
virtual std::shared_ptr<ngraph::Node> modifyGraph(const ngraph::element::Type &ngPrc,
ngraph::ParameterVector &params,
const std::shared_ptr<ngraph::Node> &lastNode);
virtual std::shared_ptr<ov::Node> modifyGraph(const ov::element::Type& ngPrc,
ov::ParameterVector& params,
const std::shared_ptr<ov::Node>& lastNode);
virtual bool primTypeCheck(std::string primType) const;

View File

@ -25,10 +25,10 @@ std::string CpuTestWithFusing::getTestCaseName(fusingSpecificParams params) {
return result.str();
}
std::shared_ptr<ngraph::Node>
CpuTestWithFusing::modifyGraph(const ngraph::element::Type &ngPrc, ngraph::ParameterVector &params, const std::shared_ptr<ngraph::Node> &lastNode) {
std::shared_ptr<ov::Node>
CpuTestWithFusing::modifyGraph(const ov::element::Type &ngPrc, ov::ParameterVector &params, const std::shared_ptr<ov::Node> &lastNode) {
CPUTestsBase::modifyGraph(ngPrc, params, lastNode);
std::shared_ptr<ngraph::Node> retNode = lastNode;
std::shared_ptr<ov::Node> retNode = lastNode;
if (postOpMgrPtr) {
retNode = postOpMgrPtr->addPostOps(ngPrc, params, lastNode);
}
@ -42,7 +42,7 @@ void CpuTestWithFusing::CheckFusingResults(const std::shared_ptr<const ov::Model
for (const auto & op : function->get_ops()) {
const auto &rtInfo = op->get_rt_info();
auto getExecValue = [](const std::string &paramName, const ngraph::Node::RTMap& rtInfo) -> std::string {
auto getExecValue = [](const std::string &paramName, const ov::Node::RTMap& rtInfo) -> std::string {
auto it = rtInfo.find(paramName);
OPENVINO_ASSERT(rtInfo.end() != it);
return it->second.as<std::string>();
@ -76,8 +76,8 @@ void CpuTestWithFusing::CheckPluginRelatedResultsImpl(const std::shared_ptr<cons
CheckFusingResults(function, nodeType);
}
std::shared_ptr<ngraph::Node>
postFunctionMgr::addPostOps(const ngraph::element::Type &ngPrc, ngraph::ParameterVector &params, const std::shared_ptr<ngraph::Node> &lastNode) const {
std::shared_ptr<ov::Node>
postFunctionMgr::addPostOps(const ov::element::Type &ngPrc, ov::ParameterVector &params, const std::shared_ptr<ov::Node> &lastNode) const {
auto clonedPostFunction = ngraph::clone_function(*_pFunction);
clonedPostFunction->set_friendly_name(_pFunction->get_friendly_name());
clonedPostFunction->replace_node(clonedPostFunction->get_parameters()[0], lastNode);
@ -90,9 +90,9 @@ std::string postFunctionMgr::getFusedOpsNames() const {
postNodesMgr::postNodesMgr(std::vector<postNodeBuilder> postNodes) : _postNodes(std::move(postNodes)) {}
std::shared_ptr<ngraph::Node>
postNodesMgr::addPostOps(const ngraph::element::Type &ngPrc, ngraph::ParameterVector &params, const std::shared_ptr<ngraph::Node> &lastNode) const {
std::shared_ptr<ngraph::Node> tmpNode = lastNode;
std::shared_ptr<ov::Node>
postNodesMgr::addPostOps(const ov::element::Type &ngPrc, ov::ParameterVector &params, const std::shared_ptr<ov::Node> &lastNode) const {
std::shared_ptr<ov::Node> tmpNode = lastNode;
postNodeConfig cfg{lastNode, tmpNode, ngPrc, params};