[CPU] Gemm node: supported precisions U8 and I8 and added tests
This commit is contained in:
parent
92e5e010b9
commit
629ca3a5d8
@ -11,6 +11,7 @@
|
||||
#include <cmath>
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include "ie_parallel.hpp"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -118,29 +119,38 @@ void MKLDNNGemmNode::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
|
||||
auto inPrec0 = getCnnLayer()->insData[0].lock()->getPrecision();
|
||||
auto inPrec1 = getCnnLayer()->insData[1].lock()->getPrecision();
|
||||
if ((inPrec0 != Precision::U8 && inPrec0 != Precision::I8) || inPrec1 != Precision::I8 || isThreeInputs) {
|
||||
inPrec0 = Precision::FP32;
|
||||
inPrec1 = Precision::FP32;
|
||||
}
|
||||
|
||||
auto inputDataType0 = MKLDNNExtensionUtils::IEPrecisionToDataType(inPrec0);
|
||||
auto inputDataType1 = MKLDNNExtensionUtils::IEPrecisionToDataType(inPrec1);
|
||||
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
|
||||
|
||||
auto same = [&] (memory::format fmt) -> PrimitiveDescInfo {
|
||||
InferenceEngine::LayerConfig config;
|
||||
config.dynBatchSupport = true;
|
||||
for (size_t i = 0; i < getParentEdges().size(); i++) {
|
||||
InferenceEngine::DataConfig dataConfig;
|
||||
dataConfig.inPlace = -1;
|
||||
dataConfig.constant = false;
|
||||
dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType, fmt);
|
||||
config.inConfs.push_back(dataConfig);
|
||||
}
|
||||
InferenceEngine::LayerConfig config;
|
||||
config.dynBatchSupport = true;
|
||||
|
||||
auto createDataConfig = [](const MKLDNNDims& dims, memory::data_type dataType) -> InferenceEngine::DataConfig {
|
||||
InferenceEngine::DataConfig dataConfig;
|
||||
dataConfig.inPlace = -1;
|
||||
dataConfig.constant = false;
|
||||
dataConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmt);
|
||||
config.outConfs.push_back(dataConfig);
|
||||
return {config, impl_desc_type::gemm_any, fmt};
|
||||
dataConfig.inPlace = -1;
|
||||
dataConfig.constant = false;
|
||||
dataConfig.desc = MKLDNNMemoryDesc(dims, dataType, MKLDNNMemory::GetPlainFormat(dims));
|
||||
return dataConfig;
|
||||
};
|
||||
|
||||
supportedPrimitiveDescriptors.push_back(same(memory::any));
|
||||
config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getDims(), inputDataType0));
|
||||
config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getDims(), inputDataType1));
|
||||
if (isThreeInputs) {
|
||||
auto inputDataType2 = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
|
||||
config.inConfs.push_back(createDataConfig(getParentEdgeAt(2)->getDims(), inputDataType2));
|
||||
}
|
||||
|
||||
config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getDims(), outputDataType));
|
||||
|
||||
supportedPrimitiveDescriptors.push_back(PrimitiveDescInfo(config, impl_desc_type::gemm_any, MKLDNNMemory::GetPlainFormat(getChildEdgeAt(0)->getDims())));
|
||||
}
|
||||
|
||||
void MKLDNNGemmNode::initOptimalPrimitiveDescriptor() {
|
||||
@ -157,16 +167,6 @@ void MKLDNNGemmNode::initOptimalPrimitiveDescriptor() {
|
||||
if (!selectedPD) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Only FP32 is supported for now
|
||||
auto& selectedConfig = getSelectedPrimitiveDescriptor()->getConfig();
|
||||
for (auto &inConf : selectedConfig.inConfs) {
|
||||
inConf.desc.setPrecision(Precision::FP32);
|
||||
}
|
||||
|
||||
for (auto &outConf : selectedConfig.outConfs) {
|
||||
outConf.desc.setPrecision(Precision::FP32);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGemmNode::createPrimitive() {
|
||||
@ -187,17 +187,44 @@ void MKLDNNGemmNode::createPrimitive() {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGemmNode::execute(mkldnn::stream strm) {
|
||||
inline void process_gemm(char transa, char transb, int M, int N, int K, float alpha, const float *A, int lda,
|
||||
const float *B, int ldb, float beta, float *C, int ldc) {
|
||||
mkldnn_sgemm(transa, transb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
|
||||
}
|
||||
|
||||
inline void process_gemm(char transa, char transb, int M, int N, int K, float alpha, const uint8_t *A, int lda,
|
||||
const int8_t *B, int ldb, float beta, float *C, int ldc) {
|
||||
const int32_t co = 0;
|
||||
int32_t *Ci = reinterpret_cast<int32_t *>(C);
|
||||
mkldnn_gemm_u8s8s32(transa, transb, 'F', M, N, K, alpha, A, lda, 0, B, ldb, 0, beta, Ci, ldc, &co);
|
||||
parallel_for(M * N, [&](size_t i) {
|
||||
C[i] = Ci[i];
|
||||
});
|
||||
}
|
||||
|
||||
inline void process_gemm(char transa, char transb, int M, int N, int K, float alpha, const int8_t *A, int lda,
|
||||
const int8_t *B, int ldb, float beta, float *C, int ldc) {
|
||||
const int32_t co = 0;
|
||||
int32_t *Ci = reinterpret_cast<int32_t *>(C);
|
||||
mkldnn_gemm_s8s8s32(transa, transb, 'F', M, N, K, alpha, A, lda, 0, B, ldb, 0, beta, Ci, ldc, &co);
|
||||
parallel_for(M * N, [&](size_t i) {
|
||||
C[i] = Ci[i];
|
||||
});
|
||||
}
|
||||
|
||||
template<typename T0, typename T1>
|
||||
void MKLDNNGemmNode::process_data() {
|
||||
auto inDims0 = getParentEdgeAt(0)->getDims();
|
||||
auto inDims1 = getParentEdgeAt(1)->getDims();
|
||||
auto outDims = getChildEdgeAt(0)->getDims();
|
||||
|
||||
auto& srcMemory0 = getParentEdgeAt(0)->getMemory();
|
||||
auto& srcMemory1 = getParentEdgeAt(1)->getMemory();
|
||||
const float *src0_ptr = reinterpret_cast<const float*>(srcMemory0.GetData()) +
|
||||
srcMemory0.GetDescriptor().data.layout_desc.blocking.offset_padding;
|
||||
const float *src1_ptr = reinterpret_cast<const float*>(srcMemory1.GetData()) +
|
||||
srcMemory1.GetDescriptor().data.layout_desc.blocking.offset_padding;
|
||||
|
||||
const T0 *src0_ptr = reinterpret_cast<const T0*>(srcMemory0.GetData()) +
|
||||
srcMemory0.GetDescriptor().data.layout_desc.blocking.offset_padding;
|
||||
const T1 *src1_ptr = reinterpret_cast<const T1*>(srcMemory1.GetData()) +
|
||||
srcMemory1.GetDescriptor().data.layout_desc.blocking.offset_padding;
|
||||
float *dst_ptr = reinterpret_cast<float*>(getChildEdgeAt(0)->getMemory().GetData()) +
|
||||
getChildEdgeAt(0)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
|
||||
|
||||
@ -218,7 +245,7 @@ void MKLDNNGemmNode::execute(mkldnn::stream strm) {
|
||||
if (isThreeInputs) {
|
||||
auto& srcMemory2 = getParentEdgeAt(2)->getMemory();
|
||||
src2_ptr = reinterpret_cast<const float *>(srcMemory2.GetData()) +
|
||||
srcMemory2.GetDescriptor().data.layout_desc.blocking.offset_padding;
|
||||
srcMemory2.GetDescriptor().data.layout_desc.blocking.offset_padding;
|
||||
} else {
|
||||
src2_ptr = dst_ptr;
|
||||
}
|
||||
@ -228,8 +255,8 @@ void MKLDNNGemmNode::execute(mkldnn::stream strm) {
|
||||
}
|
||||
|
||||
for (int b1 = 0; b1 < MB1; b1++) {
|
||||
const float *a_ptr = src0_ptr;
|
||||
const float *b_ptr = src1_ptr;
|
||||
const T0 *a_ptr = src0_ptr;
|
||||
const T1 *b_ptr = src1_ptr;
|
||||
const float *c_ptr = src2_ptr;
|
||||
float *d_ptr = dst_ptr;
|
||||
|
||||
@ -239,7 +266,7 @@ void MKLDNNGemmNode::execute(mkldnn::stream strm) {
|
||||
c_ptr += cOffsets[0];
|
||||
}
|
||||
|
||||
mkldnn_sgemm(transa, transb, M, N, K, alpha, a_ptr, lda, b_ptr, ldb, beta, d_ptr, ldc);
|
||||
process_gemm(transa, transb, M, N, K, alpha, a_ptr, lda, b_ptr, ldb, beta, d_ptr, ldc);
|
||||
|
||||
a_ptr += aOffsets[0];
|
||||
b_ptr += bOffsets[0];
|
||||
@ -256,6 +283,22 @@ void MKLDNNGemmNode::execute(mkldnn::stream strm) {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGemmNode::execute(mkldnn::stream strm) {
|
||||
switch (getParentEdgeAt(0)->getDesc().getPrecision()) {
|
||||
case Precision::FP32:
|
||||
process_data<float, float>();
|
||||
break;
|
||||
case Precision::I8:
|
||||
process_data<int8_t, int8_t>();
|
||||
break;
|
||||
case Precision::U8:
|
||||
process_data<uint8_t, int8_t>();
|
||||
break;
|
||||
default:
|
||||
THROW_IE_EXCEPTION << "Gemm node: first input has unsupported precision";
|
||||
}
|
||||
}
|
||||
|
||||
bool MKLDNNGemmNode::created() const {
|
||||
return getType() == Gemm;
|
||||
}
|
||||
|
@ -38,6 +38,8 @@ private:
|
||||
std::vector<int> aOffsets;
|
||||
std::vector<int> bOffsets;
|
||||
std::vector<int> cOffsets;
|
||||
|
||||
template<typename T0, typename T1> void process_data();
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -0,0 +1,34 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "single_layer_tests/mat_mul.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<InferenceEngine::Precision> inputPrecisions = {
|
||||
InferenceEngine::Precision::FP32
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> shapesA = {
|
||||
{1, 4, 5, 6}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> shapesB = {
|
||||
{1, 4, 6, 4}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MatMul, MatMulTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(shapesA),
|
||||
::testing::ValuesIn(shapesB),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
MatMulTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
@ -0,0 +1,60 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "subgraph_tests/quantized_mat_mul.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
using namespace ngraph::helpers;
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> shapesA = {
|
||||
{1, 4, 5, 6}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> shapesB = {
|
||||
{1, 4, 6, 4}
|
||||
};
|
||||
|
||||
const std::vector<size_t> levels = {256};
|
||||
const std::vector<QuantizationGranularity> granularity = {Pertensor};
|
||||
|
||||
const auto quantParams_i8i8 = ::testing::Combine(
|
||||
::testing::ValuesIn(levels),
|
||||
::testing::ValuesIn(granularity),
|
||||
::testing::Values(InferenceEngine::Precision::I8)
|
||||
);
|
||||
|
||||
const auto quantParams_u8i8 = ::testing::Combine(
|
||||
::testing::ValuesIn(levels),
|
||||
::testing::ValuesIn(granularity),
|
||||
::testing::Values(InferenceEngine::Precision::U8)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(QuantMatMul_i8i8, QuantMatMulTest,
|
||||
::testing::Combine(
|
||||
quantParams_i8i8,
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::ValuesIn(shapesA),
|
||||
::testing::ValuesIn(shapesB),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
QuantMatMulTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(QuantMatMul_u8i8, QuantMatMulTest,
|
||||
::testing::Combine(
|
||||
quantParams_u8i8,
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::ValuesIn(shapesA),
|
||||
::testing::ValuesIn(shapesB),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
QuantMatMulTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
@ -0,0 +1,31 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "functional_test_utils/layer_test_utils.hpp"
|
||||
|
||||
typedef std::tuple<
|
||||
InferenceEngine::Precision,
|
||||
InferenceEngine::SizeVector,
|
||||
InferenceEngine::SizeVector,
|
||||
LayerTestsUtils::TargetDevice
|
||||
> MatMulLayerTestParamsSet;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class MatMulTest : public testing::WithParamInterface<MatMulLayerTestParamsSet>, public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<MatMulLayerTestParamsSet> &obj);
|
||||
|
||||
protected:
|
||||
void SetUp() override;
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -0,0 +1,36 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "functional_test_utils/layer_test_utils.hpp"
|
||||
|
||||
typedef std::tuple<
|
||||
size_t,
|
||||
ngraph::helpers::QuantizationGranularity,
|
||||
InferenceEngine::Precision> QuantParams;
|
||||
|
||||
typedef std::tuple<
|
||||
QuantParams,
|
||||
InferenceEngine::Precision,
|
||||
InferenceEngine::SizeVector,
|
||||
InferenceEngine::SizeVector,
|
||||
LayerTestsUtils::TargetDevice> QuantMatMulLayerTestParamsSet;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class QuantMatMulTest : public testing::WithParamInterface<QuantMatMulLayerTestParamsSet>, public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<QuantMatMulLayerTestParamsSet> &obj);
|
||||
|
||||
protected:
|
||||
void SetUp() override;
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -0,0 +1,50 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "single_layer_tests/mat_mul.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
std::string MatMulTest::getTestCaseName(const testing::TestParamInfo<MatMulLayerTestParamsSet> &obj) {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
InferenceEngine::SizeVector inputShape0;
|
||||
InferenceEngine::SizeVector inputShape1;
|
||||
std::string targetDevice;
|
||||
std::tie(netPrecision, inputShape0, inputShape1, targetDevice) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS0=" << CommonTestUtils::vec2str(inputShape0) << "_";
|
||||
result << "IS1=" << CommonTestUtils::vec2str(inputShape1) << "_";
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "targetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void MatMulTest::SetUp() {
|
||||
InferenceEngine::SizeVector inputShape0;
|
||||
InferenceEngine::SizeVector inputShape1;
|
||||
auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
|
||||
std::tie(netPrecision, inputShape0, inputShape1, targetDevice) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape0, inputShape1});
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(
|
||||
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
auto MatMul = std::dynamic_pointer_cast<ngraph::opset3::MatMul>(
|
||||
ngraph::builder::makeMatMul(paramOuts[0], paramOuts[1]));
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(MatMul)};
|
||||
function = std::make_shared<ngraph::Function>(results, params, "MatMul");
|
||||
}
|
||||
|
||||
TEST_P(MatMulTest, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -0,0 +1,88 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "subgraph_tests/quantized_mat_mul.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
using ngraph::helpers::QuantizationGranularity;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
std::string QuantMatMulTest::getTestCaseName(const testing::TestParamInfo<QuantMatMulLayerTestParamsSet> &obj) {
|
||||
QuantParams quantParams;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
InferenceEngine::SizeVector inputShape0;
|
||||
InferenceEngine::SizeVector inputShape1;
|
||||
std::string targetDevice;
|
||||
std::tie(quantParams, netPrecision, inputShape0, inputShape1, targetDevice) = obj.param;
|
||||
|
||||
size_t quantLevels;
|
||||
QuantizationGranularity quantGranularity;
|
||||
InferenceEngine::Precision fqPrec0;
|
||||
std::tie(quantLevels, quantGranularity, fqPrec0) = quantParams;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS0=" << CommonTestUtils::vec2str(inputShape0) << "_";
|
||||
result << "IS1=" << CommonTestUtils::vec2str(inputShape1) << "_";
|
||||
result << "Levels=" << quantLevels << "_";
|
||||
result << "QuantGranularity=" << quantGranularity << "_";
|
||||
result << "fq0PRC=" << fqPrec0.name() << "_";
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "targetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void QuantMatMulTest::SetUp() {
|
||||
QuantParams quantParams;
|
||||
InferenceEngine::SizeVector inputShape0;
|
||||
InferenceEngine::SizeVector inputShape1;
|
||||
auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
|
||||
std::tie(quantParams, netPrecision, inputShape0, inputShape1, targetDevice) = this->GetParam();
|
||||
|
||||
size_t quantLevels;
|
||||
QuantizationGranularity quantGranularity;
|
||||
InferenceEngine::Precision fqPrec0;
|
||||
std::tie(quantLevels, quantGranularity, fqPrec0) = quantParams;
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape0, inputShape1});
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(
|
||||
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
|
||||
auto makeFakeQuantizeNode = [ngPrc, quantLevels, quantGranularity](const ngraph::Output<ngraph::Node> &in,
|
||||
std::vector<size_t> inputShape, InferenceEngine::Precision prec) -> std::shared_ptr<ngraph::Node> {
|
||||
std::vector<size_t> dataFqConstShapes(inputShape.size(), 1);
|
||||
if (quantGranularity == ngraph::helpers::Perchannel)
|
||||
dataFqConstShapes[1] = inputShape[1];
|
||||
size_t constDataSize = ngraph::shape_size(dataFqConstShapes);
|
||||
std::vector<float> inputLowData(constDataSize), inputHighData(constDataSize), outputLowData(constDataSize), outputHighData(constDataSize);
|
||||
for (int i = 0; i < constDataSize; i++) {
|
||||
inputLowData[i] = 0;
|
||||
inputHighData[i] = 255;
|
||||
outputLowData[i] = prec == InferenceEngine::Precision::I8 ? -128 : 0;
|
||||
outputHighData[i] = prec == InferenceEngine::Precision::I8 ? 127 : 255;
|
||||
}
|
||||
return ngraph::builder::makeFakeQuantize(in, ngPrc, quantLevels, dataFqConstShapes, inputLowData, inputHighData, outputLowData, outputHighData);
|
||||
};
|
||||
|
||||
auto dataFq0 = makeFakeQuantizeNode(paramOuts[0], inputShape0, fqPrec0);
|
||||
auto dataFq1 = makeFakeQuantizeNode(paramOuts[1], inputShape1, InferenceEngine::Precision::I8);
|
||||
|
||||
auto MatMul = std::dynamic_pointer_cast<ngraph::opset3::MatMul>(
|
||||
ngraph::builder::makeMatMul(dataFq0, dataFq1));
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(MatMul)};
|
||||
function = std::make_shared<ngraph::Function>(results, params, "QuantMatMul");
|
||||
}
|
||||
|
||||
TEST_P(QuantMatMulTest, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -89,6 +89,8 @@ InferenceEngine::details::LowPrecisionTransformer LayerTransformation::getLowPre
|
||||
return transformer;
|
||||
}
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
||||
void LayerTransformation::checkPrecisions(const InferenceEngine::CNNLayer& layer, const InferenceEngine::Precision& expectedPrecision) {
|
||||
for (const InferenceEngine::DataWeakPtr insDataWeak : layer.insData) {
|
||||
const InferenceEngine::DataPtr insData = insDataWeak.lock();
|
||||
@ -153,6 +155,8 @@ void LayerTransformation::checkPrecisions(
|
||||
checkPrecision(layer, expectedOutputPrecisions, 0, false);
|
||||
}
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
std::pair<float, float> LayerTransformation::getQuantizationInterval(const InferenceEngine::Precision precision) {
|
||||
const bool unsignedInterval = precision == InferenceEngine::Precision::U8;
|
||||
const float low = unsignedInterval ? 0.f : -128.f;
|
||||
|
@ -239,5 +239,8 @@ std::shared_ptr<Node> makeShuffleChannels(const ngraph::Output<Node> &in,
|
||||
int axis,
|
||||
int group);
|
||||
|
||||
std::shared_ptr<Node> makeMatMul(const Output<Node>& A,
|
||||
const Output<Node>& B);
|
||||
|
||||
} // namespace builder
|
||||
} // namespace ngraph
|
||||
|
16
inference-engine/tests/ngraph_functions/src/mat_mul.cpp
Normal file
16
inference-engine/tests/ngraph_functions/src/mat_mul.cpp
Normal file
@ -0,0 +1,16 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace builder {
|
||||
|
||||
std::shared_ptr<Node> makeMatMul(const Output<Node>& A,
|
||||
const Output<Node>& B) {
|
||||
return std::make_shared<ngraph::opset3::MatMul>(A, B);
|
||||
}
|
||||
|
||||
} // namespace builder
|
||||
} // namespace ngraph
|
Loading…
Reference in New Issue
Block a user