[GNA] Add extra segments to PWL only if it is fused with Conv2D layer (#13144)

* [GNA] Add extra segments to PWL only if it is fused with Conv2D layer
* added checking if extra segments are needed by checking if activation
    functions are fused with Conv2D or Conv2D with Pooling applied
* added unit tests verifying implmentation

* Update src/tests/unit/gna/gna_extra_pwl_segments_tests.cpp

fix license
This commit is contained in:
Marcin Kusmierski
2022-10-04 09:59:47 +02:00
committed by GitHub
parent 76c138ce85
commit 43db45a4fb
7 changed files with 338 additions and 83 deletions

View File

@@ -17,7 +17,7 @@
#include "pwl_input_params.hpp"
#include "pwl_segments_creator_factory.hpp"
// This function performes emulatation of HW saturation of PWL segments in SW
// This function performs emulation of HW saturation of PWL segments in SW
// by inserting additional segments when overflow would happen
static void insert_extra_pwl_segments(std::vector<gna_pwl_segment_t>& gna_pwl,
const int16_t y_min,
@@ -80,59 +80,6 @@ static void print_segment(double x, double y, double slope) {
y << std::setw(12) << std::setfill(' ') << slope << std::endl;
}
static std::vector<gna_pwl_segment_t> create_multisegment_gna_pwl(const std::vector<pwl_t>& pwl,
double in_scale,
double out_scale,
double min_x_val,
double max_x_val,
double min_y_val,
double max_y_val,
bool fake_quantize,
bool add_last_seg) {
std::vector<gna_pwl_segment_t> gna_pwl;
int32_t xbase = static_cast<int32_t> (INT32_MIN & XBASEMASK); // zero out the 2 lsb
int16_t ybase = FLOAT_TO_INT16(min_y_val * out_scale);
int16_t slope = 0;
gna_pwl.push_back({xbase, ybase, slope});
print_segment(xbase / in_scale, min_y_val, slope);
if (!fake_quantize && min_x_val > INT32_MIN / in_scale) {
auto s = gna_slope(pwl[0].m, in_scale, out_scale);
slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
xbase = (static_cast<int32_t>(min_x_val * in_scale) & XBASEMASK) | s.slope_scale_index;
ybase = FLOAT_TO_INT16(min_y_val * out_scale);
gna_pwl.push_back({xbase, ybase, slope});
print_segment(min_x_val, min_y_val, pwl[0].m);
}
for (uint32_t i = 0; i < pwl.size(); ++i) {
if (!fake_quantize && (pwl[i].alpha <= min_x_val ||
pwl[i].alpha <= INT32_MIN / in_scale ||
pwl[i].alpha >= max_x_val)) {
continue;
}
auto s = gna_slope(pwl[i].m, in_scale, out_scale);
xbase = ((static_cast<int32_t> (in_scale * pwl[i].alpha)) & XBASEMASK) | s.slope_scale_index;
ybase = FLOAT_TO_INT16(pwl[i].beta * out_scale);
slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl.push_back({xbase, ybase, slope});
print_segment(pwl[i].alpha, pwl[i].beta, pwl[i].m);
}
if (!fake_quantize && add_last_seg) {
// insert extra segment for xvalues > u_bound
xbase = static_cast<int32_t>(max_x_val * in_scale) & XBASEMASK;
ybase = FLOAT_TO_INT16(max_y_val * out_scale);
slope = 0;
gna_pwl.push_back({xbase, ybase, slope});
print_segment(max_x_val, max_y_val, slope);
}
return gna_pwl;
}
void make_gna_pwl(const DnnActivation& fun,
const std::vector<pwl_t>& pwl,
const double l_bound,
@@ -140,12 +87,12 @@ void make_gna_pwl(const DnnActivation& fun,
const double in_scale,
const double out_scale,
const bool low_precision,
const bool is_fused_with_conv2d,
std::vector<gna_pwl_segment_t> &gna_pwl) {
gnalog() << "make_gna_pwl\n";
gnalog() << " in_scale " << in_scale << "\n";
gnalog() << " out_scale " << out_scale << "\n";
print_segments_header(fun);
if (fun.type == kActIdentity) {
auto pwl_creator = ov::intel_gna::backend::PWLSegmentsCreatorFactory::CreateCreator(fun.type);
if (pwl_creator == nullptr) {
@@ -155,18 +102,16 @@ void make_gna_pwl(const DnnActivation& fun,
auto segments_with_borders = pwl_creator->CreateSegmentsWithBorders(input_data);
gna_pwl = segments_with_borders.segments;
auto& y_min_max = segments_with_borders.border_values.y_min_max;
// looks like insert_extra_pwl_segments is not needed for identity because the left most and the right most
// segments meets condition put in the method.
insert_extra_pwl_segments(gna_pwl, y_min_max.y_min, y_min_max.y_max);
// Extra segments are needed only in case activation function was fused with Conv2D layer
if (is_fused_with_conv2d) {
insert_extra_pwl_segments(gna_pwl, y_min_max.y_min, y_min_max.y_max);
}
return;
}
pwl_gna_slope_scale_t s;
const int16_t y_min = low_precision ? INT8_MIN : INT16_MIN;
const int16_t y_max = low_precision ? INT8_MAX : INT16_MAX;
switch (fun) {
case kActRelu:
case kActLeakyRelu: {
@@ -336,7 +281,10 @@ void make_gna_pwl(const DnnActivation& fun,
THROW_GNA_EXCEPTION << "Unexpected function activation!" << fun;
}
insert_extra_pwl_segments(gna_pwl, y_min, y_max);
// Extra segments are needed only in case activation function was fused with Conv2D layer
if (is_fused_with_conv2d) {
insert_extra_pwl_segments(gna_pwl, y_min, y_max);
}
}
template<typename T>

View File

@@ -16,6 +16,7 @@ void make_gna_pwl(const DnnActivation& fun,
const double in_scale,
const double out_scale,
const bool low_precision,
const bool is_fused_with_conv2d,
std::vector<gna_pwl_segment_t>& gna_pwl);
void make_gna_pwl(const std::shared_ptr<ngraph::Node>& node,
const double in_scale,

View File

@@ -44,6 +44,25 @@ using namespace std;
using namespace GNAPluginNS;
using namespace memory;
static bool CheckIFLastComponentIsPrecededByConv2D(const GNAPluginNS::backend::DnnComponents::storage_type& components,
bool verify_with_pooling = true) {
bool proceded_by_conv2D = false;
auto last_element = components.rbegin();
if (components.size() > 1) {
last_element++;
if (last_element->dnnComponent.operation == kDnnConvolutional2dOp) {
proceded_by_conv2D = true;
} else if (verify_with_pooling && components.size() > 2) {
auto prev_operation = last_element->dnnComponent.operation;
last_element++;
if (last_element->dnnComponent.operation == kDnnConvolutional2dOp) {
proceded_by_conv2D = (prev_operation == kDnnMaxPoolOp);
}
}
}
return proceded_by_conv2D;
}
#define CREATE(name) [](GNAGraphCompiler *p, CNNLayerPtr l) {p->name(l);}
void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr) {
@@ -2069,11 +2088,12 @@ case name:\
gnaFlags->input_low_precision);
} else {
PwlDesignOpt(activation_type,
ptr_pwl_segments,
input_pwl_scale_factor,
output_pwl_scale_factor,
gnaFlags->input_low_precision,
layer->getNode());
input_pwl_scale_factor,
output_pwl_scale_factor,
gnaFlags->input_low_precision,
layer->getNode(),
CheckIFLastComponentIsPrecededByConv2D(dnnComponents.components),
ptr_pwl_segments);
}
ptr_pwl_segments_target = reinterpret_cast<gna_pwl_segment_t*>(&ptr_pwl_segments_target);
}

View File

@@ -39,11 +39,12 @@ inline double power(const double x, const std::tuple<double, double, double>& ar
}
void PwlDesignOpt(const DnnActivation& activation_type,
std::vector<gna_pwl_segment_t> &ptr_segment,
const float scale_in,
const float scale_out,
const bool low_precision,
const std::shared_ptr<ngraph::Node>& node) {
const float scale_in,
const float scale_out,
const bool low_precision,
const std::shared_ptr<ngraph::Node>& node,
const bool is_fused_with_conv2d,
std::vector<gna_pwl_segment_t>& ptr_segment) {
std::vector<pwl_t> pwl;
switch (activation_type) {
case kActPwl: {
@@ -51,24 +52,68 @@ void PwlDesignOpt(const DnnActivation& activation_type,
break;
}
case kActRelu:
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, low_precision, ptr_segment);
make_gna_pwl(activation_type,
pwl,
-1.0,
1.0,
scale_in,
scale_out,
low_precision,
is_fused_with_conv2d,
ptr_segment);
break;
case kActLeakyRelu:
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, low_precision, ptr_segment);
make_gna_pwl(activation_type,
pwl,
-1.0,
1.0,
scale_in,
scale_out,
low_precision,
is_fused_with_conv2d,
ptr_segment);
break;
case kActIdentity:
case kActFakeQuantize:
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, low_precision, ptr_segment);
make_gna_pwl(activation_type,
pwl,
-1.0,
1.0,
scale_in,
scale_out,
low_precision,
is_fused_with_conv2d,
ptr_segment);
break;
case kActKaldiLstmClipping:
make_gna_pwl(activation_type, pwl, activation_type.args.clamp.low, activation_type.args.clamp.high,
scale_in, scale_out, low_precision, ptr_segment);
scale_in,
scale_out,
low_precision,
is_fused_with_conv2d,
ptr_segment);
break;
case kActSign:
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, low_precision, ptr_segment);
make_gna_pwl(activation_type,
pwl,
-1.0,
1.0,
scale_in,
scale_out,
low_precision,
is_fused_with_conv2d,
ptr_segment);
break;
case kActAbs:
make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, low_precision, ptr_segment);
make_gna_pwl(activation_type,
pwl,
-1.0,
1.0,
scale_in,
scale_out,
low_precision,
is_fused_with_conv2d,
ptr_segment);
break;
default:
THROW_GNA_EXCEPTION << "Unknown piecewise linear function type: " << activation_type.type;

View File

@@ -85,8 +85,9 @@ void PwlDesign(const DnnActivation& activation_type,
const float scale_out,
const bool low_precision);
void PwlDesignOpt(const DnnActivation& activation_type,
std::vector<gna_pwl_segment_t> &ptr_segment,
const float scale_in,
const float scale_out,
const bool low_precision,
const std::shared_ptr<ngraph::Node>& node);
const float scale_in,
const float scale_out,
const bool low_precision,
const std::shared_ptr<ngraph::Node>& node,
const bool is_fused_with_conv2d,
std::vector<gna_pwl_segment_t>& ptr_segment);

View File

@@ -144,6 +144,7 @@ TEST_P(MakePWLIdentityTestFixture, check_make_pwl) {
input_params.in_scale_,
input_params.out_scale_,
low_precision_,
false,
output_pwl);
if (input_params.should_throw_) {
FAIL() << "Should throw, but didn't";

View File

@@ -0,0 +1,239 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include "any_copy.hpp"
#include "gna_infer_request.hpp"
#include "gna_plugin.hpp"
#include "ngraph_functions/builders.hpp"
using GNAPluginNS::GNAPlugin;
namespace {
struct ConvolutionParameters {
intel_dnn_operation_t convolution_type;
ov::Shape input_shape;
ov::Shape kernel;
ov::Shape stride;
ov::Shape dilation;
size_t numOutChannels;
std::vector<size_t> pad_begin_pool;
std::vector<size_t> pad_end_pool;
};
struct PWLExtraSegmentsParamsWithConv {
ConvolutionParameters conv_params;
ov::element::Type precision;
ngraph::helpers::ActivationTypes activation_type;
size_t expected_segments_num;
bool use_pooling;
};
const std::unordered_map<ngraph::helpers::ActivationTypes, std::string> kNGraphActivationMapForTests{
{ngraph::helpers::ActivationTypes::Relu, "Relu"},
{ngraph::helpers::ActivationTypes::Sigmoid, "Sigmoid"}};
const std::unordered_map<intel_dnn_operation_t, std::string> kDnnOperationMapForTests{
{kDnnConvolutional1dOp, "kDnnConvolutional1dOp"},
{kDnnConvolutional2dOp, "kDnnConvolutional2dOp"},
};
template <typename T>
std::string GetEnumName(const T& enum_value,
const std::unordered_map<T, std::string>& enum_map,
const std::string default_value = "") {
auto iter = enum_map.find(enum_value);
if (iter == enum_map.end()) {
return default_value;
}
return iter->second;
}
std::string GetDnnOperationName(const intel_dnn_operation_t& enum_value, const std::string default_value = "") {
return GetEnumName(enum_value, kDnnOperationMapForTests, default_value);
}
std::string GetNGraphActivationTypeName(const ngraph::helpers::ActivationTypes& enum_value,
const std::string default_value = "") {
return GetEnumName(enum_value, kNGraphActivationMapForTests, default_value);
}
std::ostream& operator<<(std::ostream& os, const ngraph::helpers::ActivationTypes& value) {
os << GetNGraphActivationTypeName(value);
return os;
}
std::ostream& operator<<(std::ostream& os, const intel_dnn_operation_t& value) {
os << GetDnnOperationName(value);
return os;
}
class GNAPluginForPWLExtraSegmentsTest : public GNAPlugin {
public:
GNAPluginForPWLExtraSegmentsTest(const std::map<std::string, std::string>& config) : GNAPlugin(config) {
gnamem.reset(new GNAPluginNS::gna_memory_float(GNAPluginNS::memory::GNAFloatAllocator{}));
graphCompiler.setGNAMemoryPtr(gnamem);
gnadevice.reset();
}
void Test(const size_t expected_segments) {
for (const auto& component : graphCompiler.dnnComponents.components) {
if (component.dnnComponent.operation == kDnnPiecewiselinearOp) {
EXPECT_EQ(expected_segments, component.dnnComponent.op.pwl.num_segments);
}
}
}
};
class GNAPWLExtraSegmentsTestFixture : public ::testing::TestWithParam<PWLExtraSegmentsParamsWithConv> {
public:
static std::string getTestCaseName(const testing::TestParamInfo<PWLExtraSegmentsParamsWithConv>& obj);
void SetUp() override;
protected:
InferenceEngine::CNNNetwork cnn_network_;
};
std::string GNAPWLExtraSegmentsTestFixture::getTestCaseName(
const testing::TestParamInfo<PWLExtraSegmentsParamsWithConv>& obj) {
auto params = obj.param;
std::ostringstream result;
result << "ConvolutionType=" << params.conv_params.convolution_type << "_";
result << "ActivationType=" << params.activation_type << "_";
result << "PoolingOn=" << (params.use_pooling ? "yes" : "no");
return result.str();
}
void GNAPWLExtraSegmentsTestFixture::SetUp() {
auto params = GetParam();
const auto& conv_params = params.conv_params;
const auto& precision = params.precision;
const auto& input_shape = conv_params.input_shape;
const auto& kernel = conv_params.kernel;
const auto& stride = conv_params.stride;
const auto& pad_begin_pool = conv_params.pad_begin_pool;
const auto& pad_end_pool = conv_params.pad_end_pool;
ov::CoordinateDiff pad_begin(pad_begin_pool.begin(), pad_begin_pool.end());
ov::CoordinateDiff pad_end(pad_end_pool.begin(), pad_end_pool.end());
const auto& dilation = conv_params.dilation;
const auto& activation_type = params.activation_type;
const auto& use_pooling = params.use_pooling;
std::vector<size_t> filter_shape;
filter_shape.push_back(conv_params.numOutChannels);
filter_shape.push_back(input_shape[1]);
filter_shape.insert(filter_shape.end(), kernel.begin(), kernel.end());
auto input = std::make_shared<ngraph::opset9::Parameter>(precision, input_shape);
auto filter = ngraph::builder::makeConstant<float>(precision, filter_shape, {1.f}, true);
auto conv = std::make_shared<ngraph::opset9::Convolution>(input, filter, stride, pad_begin, pad_end, dilation);
auto activation = ngraph::builder::makeActivation(conv, precision, activation_type);
std::shared_ptr<ngraph::opset9::Result> result = nullptr;
if (use_pooling) {
auto maxpool = ngraph::builder::makePooling(activation,
stride,
pad_begin_pool,
pad_end_pool,
kernel,
ngraph::op::RoundingType::FLOOR,
ngraph::op::PadType::VALID,
false,
ngraph::helpers::PoolingTypes::MAX);
result = std::make_shared<ngraph::opset9::Result>(maxpool);
} else {
result = std::make_shared<ngraph::opset9::Result>(activation);
}
auto function = std::make_shared<ov::Model>(ov::ResultVector({result}),
ov::ParameterVector({input}),
"convolution_with_activation_exrta_segments");
cnn_network_ = InferenceEngine::CNNNetwork(function);
}
auto kPrecision32 = ov::element::f32;
const ov::Shape kInput1D = {1, 1, 1, 8};
const ov::Shape kKernel1D = {1, 2};
const ov::Shape kStride1D = {1, 1};
const ov::Shape kDilation1D = kStride1D;
const size_t kOutChanneldsNum1D = 4;
const std::vector<size_t> kPadBegin1D = {0, 0};
const std::vector<size_t> kPadEnd1D = {0, 0};
const ConvolutionParameters kConvolutionParams1D =
{kDnnConvolutional1dOp, kInput1D, kKernel1D, kStride1D, kDilation1D, kOutChanneldsNum1D, kPadBegin1D, kPadEnd1D};
const PWLExtraSegmentsParamsWithConv kConvolution1DReluWithoutPoolParams = {kConvolutionParams1D,
kPrecision32,
ngraph::helpers::ActivationTypes::Relu,
2,
false};
const PWLExtraSegmentsParamsWithConv kConvolution1DReluWithPoolParams = {kConvolutionParams1D,
kPrecision32,
ngraph::helpers::ActivationTypes::Relu,
2,
true};
const PWLExtraSegmentsParamsWithConv kConvolution1DSigmoidWithoutPoolParams =
{kConvolutionParams1D, kPrecision32, ngraph::helpers::ActivationTypes::Sigmoid, 12, false};
const PWLExtraSegmentsParamsWithConv kConvolution1DSigmoidWithPoolParams = {kConvolutionParams1D,
kPrecision32,
ngraph::helpers::ActivationTypes::Sigmoid,
12,
true};
const ov::Shape kInput2D = {1, 8, 20, 16};
const ov::Shape kKernel2D = {1, 1};
const ov::Shape kStride2D = {1, 1};
const ov::Shape kDilation2D = kStride2D;
const size_t kOutChanneldsNum2D = 8;
const std::vector<size_t> kPadBegin2D = {0, 0};
const std::vector<size_t> kPadEnd2D = {0, 0};
const ConvolutionParameters kConvolutionParams2D =
{kDnnConvolutional2dOp, kInput2D, kKernel2D, kStride2D, kDilation2D, kOutChanneldsNum2D, kPadBegin2D, kPadEnd2D};
const PWLExtraSegmentsParamsWithConv kConvolution2DReluWithoutPoolParams = {kConvolutionParams2D,
kPrecision32,
ngraph::helpers::ActivationTypes::Relu,
4,
false};
const PWLExtraSegmentsParamsWithConv kConvolution2DReluWithPoolParams = {kConvolutionParams2D,
kPrecision32,
ngraph::helpers::ActivationTypes::Relu,
4,
true};
const PWLExtraSegmentsParamsWithConv kConvolution2DSigmoidWithoutPoolParams =
{kConvolutionParams2D, kPrecision32, ngraph::helpers::ActivationTypes::Sigmoid, 12, false};
const PWLExtraSegmentsParamsWithConv kConvolution2DSigmoidWithPoolParams = {kConvolutionParams2D,
kPrecision32,
ngraph::helpers::ActivationTypes::Sigmoid,
12,
true};
INSTANTIATE_TEST_CASE_P(GNAPWLExtraSegmentsConv1DTests,
GNAPWLExtraSegmentsTestFixture,
::testing::Values(kConvolution1DReluWithoutPoolParams,
kConvolution1DReluWithPoolParams,
kConvolution1DSigmoidWithoutPoolParams,
kConvolution1DSigmoidWithPoolParams),
GNAPWLExtraSegmentsTestFixture::getTestCaseName);
INSTANTIATE_TEST_CASE_P(GNAPWLExtraSegmentsConv2DTests,
GNAPWLExtraSegmentsTestFixture,
::testing::Values(kConvolution2DReluWithoutPoolParams,
kConvolution2DReluWithPoolParams,
kConvolution2DSigmoidWithoutPoolParams,
kConvolution2DSigmoidWithPoolParams),
GNAPWLExtraSegmentsTestFixture::getTestCaseName);
TEST_P(GNAPWLExtraSegmentsTestFixture, check_number_of_segments) {
auto params = GetParam();
const ov::AnyMap& gna_config = {ov::intel_gna::execution_mode(ov::intel_gna::ExecutionMode::SW_EXACT)};
GNAPluginForPWLExtraSegmentsTest plugin(ov::any_copy(gna_config));
EXPECT_NO_THROW(plugin.LoadNetwork(cnn_network_));
EXPECT_NO_THROW(plugin.Test(params.expected_segments_num));
}
} // namespace