[GNA] Fixed a bug with channel multiplier for int8 weights quantization (#9234)
This commit is contained in:
parent
1901f33bd3
commit
d4fdbba7d8
@ -111,9 +111,9 @@ ngraph::pass::ConvertMatMulToFC::ConvertMatMulToFC() {
|
||||
// Check that if second inputs is Constant operation and it's shape without ones dimensions has length <= 2
|
||||
// we replace MatMul with FullyConnected operation.
|
||||
// Otherwise we replace MatMul with Gemm.
|
||||
if ((std::dynamic_pointer_cast<opset1::Constant> (fc_input_b.get_node_shared_ptr()) ||
|
||||
std::dynamic_pointer_cast<opset1::FakeQuantize>(fc_input_b.get_node_shared_ptr())) &&
|
||||
std::count_if(shape_b.begin(), shape_b.end(), [](size_t x) {
|
||||
auto fq_after_const = std::dynamic_pointer_cast<opset1::FakeQuantize>(fc_input_b.get_node_shared_ptr());
|
||||
if ((std::dynamic_pointer_cast<opset1::Constant> (fc_input_b.get_node_shared_ptr()) || fq_after_const) &&
|
||||
std::count_if(shape_b.begin(), shape_b.end(), [](size_t x) {
|
||||
return x != 1;
|
||||
}) <= 2) {
|
||||
Shape shape_a_aligned, shape_b_aligned;
|
||||
@ -131,8 +131,18 @@ ngraph::pass::ConvertMatMulToFC::ConvertMatMulToFC() {
|
||||
|
||||
// Weights normalization
|
||||
if (!matmul->get_transpose_b()) {
|
||||
fc_input_b = create_transpose(fc_input_b, matmul->get_friendly_name() + "/transpose_b");
|
||||
Output<ov::Node> constant = fc_input_b;
|
||||
// transpose the constant itself, not FQ output, to allow constant folding to apply this transpose
|
||||
if (fq_after_const) {
|
||||
constant = fc_input_b.get_node_shared_ptr()->input_value(0);
|
||||
}
|
||||
fc_input_b = create_transpose(constant, matmul->get_friendly_name() + "/transpose_b");
|
||||
new_ops.push_back(fc_input_b.get_node_shared_ptr());
|
||||
if (fq_after_const) {
|
||||
fc_input_b = fq_after_const->clone_with_new_inputs(OutputVector{fc_input_b, fq_after_const->input_value(1),
|
||||
fq_after_const->input_value(2), fq_after_const->input_value(3), fq_after_const->input_value(4)});
|
||||
new_ops.push_back(fc_input_b.get_node_shared_ptr());
|
||||
}
|
||||
}
|
||||
|
||||
if (shape_b.size() != 2) {
|
||||
|
@ -258,6 +258,9 @@ void QuantizationCallback<int8_t, gna_compound_bias_t>::runFakeQuantize() const
|
||||
channel_multiplier = scaled_row_max / static_cast<float>(MAX_VAL_1B_WEIGHT);
|
||||
}
|
||||
|
||||
// channel multiplier shouldn't be 0
|
||||
channel_multiplier = channel_multiplier == 0 ? 1 : channel_multiplier;
|
||||
|
||||
ptr_int_biases[i].multiplier = static_cast<uint8_t> (channel_multiplier + 0.5f);
|
||||
if (channel_multiplier > MAX_OUT_MULTIPLIER) {
|
||||
THROW_GNA_EXCEPTION << "invalid channel multiplier: " << channel_multiplier;
|
||||
|
@ -0,0 +1,140 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "functional_test_utils/plugin_cache.hpp"
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
#include "ngraph_functions/pass/convert_prc.hpp"
|
||||
|
||||
typedef std::tuple<
|
||||
InferenceEngine::Precision, // Network Precision
|
||||
std::string, // Target Device
|
||||
std::map<std::string, std::string>, // Configuration
|
||||
std::pair<float, float>, // Weights values
|
||||
std::vector<size_t> // Input shapes
|
||||
> matmulParams;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class PerchannelQuantTest : public testing::WithParamInterface<matmulParams>,
|
||||
public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<matmulParams> obj) {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::string targetDevice;
|
||||
std::map<std::string, std::string> configuration;
|
||||
std::pair<float, float> weightsValues;
|
||||
std::vector<size_t> inputShape;
|
||||
std::tie(netPrecision, targetDevice, configuration, weightsValues, inputShape) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "targetDevice=" << targetDevice << "_";
|
||||
for (auto const& configItem : configuration) {
|
||||
result << "_configItem=" << configItem.first << "_" << configItem.second;
|
||||
}
|
||||
result << "_range=(" << weightsValues.first << ", " << weightsValues.second << ")";
|
||||
result << "_IS=(" << CommonTestUtils::vec2str(inputShape) << ")";
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::pair<float, float> weightsValues;
|
||||
std::vector<size_t> inputShape;
|
||||
std::tie(netPrecision, targetDevice, configuration, weightsValues, inputShape) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
const ngraph::Shape constShape = {inputShape.back(), inputShape.back()};
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||
|
||||
std::vector<float> weights;
|
||||
std::vector<float> weightsMin, weightsMax;
|
||||
for (int i = 0; i < constShape.front(); ++i) {
|
||||
// multiplier to increase weights ranges difference between different channels
|
||||
float mul = (i % 2 ? 1.0 : 0.001);
|
||||
float rowMin = weightsValues.first * mul;
|
||||
float rowMax = weightsValues.second * mul;
|
||||
auto rowWeights = CommonTestUtils::generate_float_numbers(constShape.back(), rowMin, rowMax);
|
||||
weights.insert(std::end(weights), std::begin(rowWeights), std::end(rowWeights));
|
||||
weightsMin.push_back(rowMin);
|
||||
weightsMax.push_back(rowMax);
|
||||
}
|
||||
|
||||
auto constant = ngraph::builder::makeConstant<float>(ngPrc, constShape, weights);
|
||||
auto wLowNode = ngraph::builder::makeConstant<float>(ngPrc, {constShape.front()}, { weightsMin });
|
||||
auto wHighNode = ngraph::builder::makeConstant<float>(ngPrc, {constShape.front()}, { weightsMax });
|
||||
auto wFq = std::make_shared<ngraph::opset8::FakeQuantize>(constant, wLowNode, wHighNode, wLowNode, wHighNode,
|
||||
std::numeric_limits<uint8_t>::max() - 1);
|
||||
auto matmul = std::make_shared<ngraph::opset8::MatMul>(params[0], wFq, false, true);
|
||||
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset8::Result>(matmul)};
|
||||
function = std::make_shared<ngraph::Function>(results, params, "PerchannelQuantTest");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(PerchannelQuantTest, CompareWithRefImpl) {
|
||||
LoadNetwork();
|
||||
GenerateInputs();
|
||||
Infer();
|
||||
auto results = GetOutputs();
|
||||
size_t size = results.front()->size();
|
||||
auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(results.front());
|
||||
IE_ASSERT(memory);
|
||||
const auto lockedMemory = memory->wmap();
|
||||
const auto actualBuffer = lockedMemory.as<const float*>();
|
||||
|
||||
// check that outputs haven't been zero out by a channel multilplier
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (actualBuffer[i] == 0.0) {
|
||||
IE_THROW() << "Unexpected 0 output value";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Precision::FP16
|
||||
};
|
||||
|
||||
const std::vector<std::map<std::string, std::string>> configs = {
|
||||
{
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<std::pair<float, float>> weightsValues = {
|
||||
{-0.1, 0.1},
|
||||
{-1.0, 1.0},
|
||||
{-10.0, 10.0}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> inputShapes = {
|
||||
{1, 128},
|
||||
{1, 38},
|
||||
{1, 8}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_base, PerchannelQuantTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::ValuesIn(configs),
|
||||
::testing::ValuesIn(weightsValues),
|
||||
::testing::ValuesIn(inputShapes)),
|
||||
PerchannelQuantTest::getTestCaseName);
|
||||
} // namespace LayerTestsDefinitions
|
Loading…
Reference in New Issue
Block a user