diff --git a/inference-engine/src/gna_plugin/backend/make_pwl.cpp b/inference-engine/src/gna_plugin/backend/make_pwl.cpp index e0f71bc7fc7..c4f98e88c2b 100644 --- a/inference-engine/src/gna_plugin/backend/make_pwl.cpp +++ b/inference-engine/src/gna_plugin/backend/make_pwl.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include "runtime/pwl.h" @@ -58,7 +59,72 @@ static void insert_extra_pwl_segments(std::vector& gna_pwl, } } -void make_gna_pwl(const DnnActivation fun, +static void print_segments_header(const DnnActivation& fun) { + gnalog() << "=========================== " << intel_dnn_activation_name[fun] << + " segments ===========================\n"; + gnalog() << std::setw(12) << std::setfill(' ') << "x" << std::setw(12) << std::setfill(' ') << + "y" << std::setw(12) << std::setfill(' ') << "slope" << std::endl; +} + +static void print_segment(double x, double y, double slope) { + gnalog() << std::setw(12) << std::setfill(' ') << x << std::setw(12) << std::setfill(' ') << + y << std::setw(12) << std::setfill(' ') << slope << std::endl; +} + +static std::vector create_multisegment_gna_pwl(const std::vector& pwl, + double in_scale, + double out_scale, + double min_x_val, + double max_x_val, + double min_y_val, + double max_y_val, + bool fake_quantize, + bool add_last_seg) { + std::vector gna_pwl; + + int32_t xbase = static_cast (INT32_MIN & XBASEMASK); // zero out the 2 lsb + int16_t ybase = FLOAT_TO_INT16(min_y_val * out_scale); + int16_t slope = 0; + gna_pwl.push_back({xbase, ybase, slope}); + print_segment(xbase / in_scale, min_y_val, slope); + + if (!fake_quantize && min_x_val > INT32_MIN / in_scale) { + auto s = gna_slope(pwl[0].m, in_scale, out_scale); + slope = FLOAT_TO_INT16(s.slope * s.slope_scale); + xbase = (static_cast(min_x_val * in_scale) & XBASEMASK) | s.slope_scale_index; + ybase = FLOAT_TO_INT16(min_y_val * out_scale); + gna_pwl.push_back({xbase, ybase, slope}); + print_segment(min_x_val, min_y_val, pwl[0].m); + } + + for (uint32_t i = 0; i < pwl.size(); ++i) { + if (!fake_quantize && (pwl[i].alpha <= min_x_val || + pwl[i].alpha <= INT32_MIN / in_scale || + pwl[i].alpha >= max_x_val)) { + continue; + } + + auto s = gna_slope(pwl[i].m, in_scale, out_scale); + xbase = ((static_cast (in_scale * pwl[i].alpha)) & XBASEMASK) | s.slope_scale_index; + ybase = FLOAT_TO_INT16(pwl[i].beta * out_scale); + slope = FLOAT_TO_INT16(s.slope * s.slope_scale); + gna_pwl.push_back({xbase, ybase, slope}); + print_segment(pwl[i].alpha, pwl[i].beta, pwl[i].m); + } + + if (!fake_quantize && add_last_seg) { + // insert extra segment for xvalues > u_bound + xbase = static_cast(max_x_val * in_scale) & XBASEMASK; + ybase = FLOAT_TO_INT16(max_y_val * out_scale); + slope = 0; + gna_pwl.push_back({xbase, ybase, slope}); + print_segment(max_x_val, max_y_val, slope); + } + + return gna_pwl; +} + +void make_gna_pwl(const DnnActivation& fun, const std::vector& pwl, const double l_bound, const double u_bound, @@ -73,199 +139,56 @@ void make_gna_pwl(const DnnActivation fun, gnalog() << "make_gna_pwl\n"; gnalog() << " in_scale " << in_scale << "\n"; gnalog() << " out_scale " << out_scale << "\n"; + print_segments_header(fun); switch (fun) { case kActSigmoid: case kActTanh: case kActSoftSign: { - auto n_segments = static_cast (pwl_size) + 1; - gna_pwl.resize(n_segments); // insert extra segment for x values < l_bound - gna_pwl[0].xBase = static_cast (INT32_MIN & XBASEMASK); // zero out the 2 lsb + double min_x_val; + double min_y_val; if (fun == kActSigmoid) { - gnalog() << "=========================== Sigmoid Segments ===========================\n"; - auto minVal = (fun.fqParams.set && *fun.fqParams.input_low > 0) ? FLOAT_TO_INT16(*fun.fqParams.input_low * out_scale) : 0; - gna_pwl[0].yBase = gna_pwl[1].yBase = minVal; - gna_pwl[1].xBase = (static_cast (in_scale * (-pwl[0].b / pwl[0].m))) & XBASEMASK; + min_y_val = fun.fqParams.set ? pwl[0].beta : 0; + min_x_val = -pwl[0].b / pwl[0].m; } else if (fun == kActTanh) { - gnalog() << "=========================== Tanh Segments ===========================\n"; - auto minVal = (fun.fqParams.set && *fun.fqParams.input_low > -1) ? FLOAT_TO_INT16(*fun.fqParams.input_low * out_scale) : - static_cast(-1.0 * out_scale); - gna_pwl[0].yBase = gna_pwl[1].yBase = minVal; - gna_pwl[1].xBase = (static_cast (in_scale * (-1.0 - pwl[0].b) / pwl[0].m)) & XBASEMASK; + min_y_val = fun.fqParams.set ? pwl[0].beta : -1.0; + min_x_val = (-1.0 - pwl[0].b) / pwl[0].m; } else { - gnalog() << "=========================== SoftSign Segments ===========================\n"; - auto minVal = (fun.fqParams.set && *fun.fqParams.input_low > -1) ? FLOAT_TO_INT16(*fun.fqParams.input_low * out_scale) : - static_cast(-1.0 * out_scale); - gna_pwl[0].yBase = gna_pwl[1].yBase = minVal; - gna_pwl[1].xBase = (static_cast (in_scale * (-1.0 - pwl[0].b) / pwl[0].m)) & XBASEMASK; + min_y_val = fun.fqParams.set ? pwl[0].beta : -1.0; + min_x_val = (-1.0 - pwl[0].b) / pwl[0].m; } - gna_pwl[0].slope = 0; - - gnalog() << (gna_pwl[0].xBase) / in_scale - << " " << (gna_pwl[0].yBase) / out_scale - << " " << 0.0 - << "\n"; - - s = gna_slope(pwl[0].m, in_scale, out_scale); - gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); - gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index; - - gnalog() << (gna_pwl[1].xBase/in_scale) - << " " << (gna_pwl[1].yBase) / out_scale - << " " << pwl[0].m - << "\n"; - - for (uint32_t i = 1; i < pwl_size - 1; ++i) { - s = gna_slope(pwl[i].m, in_scale, out_scale); - gna_pwl[i + 1].xBase = (static_cast (in_scale * pwl[i].alpha)) & XBASEMASK; - gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale); - gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); - gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index; - - gnalog() << (pwl[i].alpha) - << " " << pwl[i].beta - << " " << pwl[i].m - << "\n"; - } - // insert extra segment for xvalues > u_bound - auto maxVal = (fun.fqParams.set && *fun.fqParams.input_high <= 1) ? *fun.fqParams.input_high : 1.0; - gna_pwl[n_segments - 1].xBase = - ((uint32_t) (in_scale * (1.0 - pwl[pwl_size - 2].b) / pwl[pwl_size - 2].m)) & XBASEMASK; - gna_pwl[n_segments - 1].yBase = FLOAT_TO_INT16(maxVal * out_scale); - gna_pwl[n_segments - 1].slope = 0; - - gnalog() << (gna_pwl[n_segments - 1].xBase / in_scale) - << " " << 1.0 - << " " << 0.0 - << "\n"; + double max_y_val = fun.fqParams.set ? pwl.back().beta : 1.0; + double max_x_val = fun.srcFQParams.set ? u_bound : (1.0 - pwl[pwl_size - 2].b) / pwl[pwl_size - 2].m; + gna_pwl = create_multisegment_gna_pwl(pwl, in_scale, out_scale, min_x_val, max_x_val, min_y_val, max_y_val, + fun.fqParams.set, true); break; } case kActExp: { - auto n_segments = static_cast (pwl_size) + 1; - gna_pwl.resize(n_segments); - // insert extra segment for x values < l_bound - gna_pwl[0].xBase = static_cast (INT32_MIN & XBASEMASK); // zero out the 2 lsb - gnalog() << "=========================== Exp Segments ===========================\n"; - gna_pwl[0].yBase = gna_pwl[1].yBase = 0; - gna_pwl[1].xBase = (static_cast (in_scale * (-pwl[0].b / pwl[0].m))) & XBASEMASK; - gna_pwl[0].slope = 0; - - gnalog() << (gna_pwl[0].xBase) / in_scale - << " " << (gna_pwl[0].yBase) / out_scale - << " " << 0.0 - << "\n"; - - s = gna_slope(pwl[0].m, in_scale, out_scale); - gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); - gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index; - - gnalog() << ((int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale) - << " " << (gna_pwl[1].yBase) / out_scale - << " " << pwl[0].m - << "\n"; - - for (uint32_t i = 1; i < pwl_size - 1; ++i) { - s = gna_slope(pwl[i].m, in_scale, out_scale); - gna_pwl[i + 1].xBase = (static_cast (in_scale * pwl[i].alpha)) & XBASEMASK; - gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale); - gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); - gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index; - - gnalog() << (pwl[i].alpha) - << " " << pwl[i].beta - << " " << pwl[i].m - << "\n"; - } - // insert extra segment for xvalues > u_bound - gna_pwl[n_segments - 1].xBase = - ((uint32_t)(in_scale * (y_max/out_scale - pwl[pwl_size - 2].b) / pwl[pwl_size - 2].m)) & XBASEMASK; - gna_pwl[n_segments - 1].yBase = y_max; - gna_pwl[n_segments - 1].slope = 0; - - gnalog() << (gna_pwl[n_segments - 1].xBase / in_scale) - << " " << 1.0 - << " " << 0.0 - << "\n"; + double min_x_val = -pwl[0].b / pwl[0].m; + double max_x_val = (y_max/out_scale - pwl[pwl_size - 2].b) / pwl[pwl_size - 2].m; + double min_y_val = fun.fqParams.set ? pwl[0].beta : 0; + double max_y_val = fun.fqParams.set ? pwl.front().beta : y_max / out_scale; + gna_pwl = create_multisegment_gna_pwl(pwl, in_scale, out_scale, min_x_val, max_x_val, min_y_val, max_y_val, + fun.fqParams.set, true); break; } case kActLog: { - auto n_segments = static_cast (pwl_size); - gna_pwl.resize(n_segments); - // insert extra segment for x values < l_bound - gna_pwl[0].xBase = static_cast (INT32_MIN & XBASEMASK); // zero out the 2 lsb - gnalog() << "=========================== Log Segments ===========================\n"; - gna_pwl[0].yBase = gna_pwl[1].yBase = y_min; - gna_pwl[1].xBase = (static_cast (1 + ~XBASEMASK)); // smallest representable value - gna_pwl[0].slope = 0; - - gnalog() << gna_pwl[0].xBase / in_scale - << " " << (gna_pwl[0].yBase) / out_scale - << " " << 0.0 - << "\n"; - - s = gna_slope(pwl[0].m, in_scale, out_scale); - gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); - gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index; - - gnalog() << ((int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale) - << " " << (gna_pwl[1].yBase) / out_scale - << " " << pwl[0].m - << "\n"; - - for (uint32_t i = 1; i < pwl_size - 1; ++i) { - s = gna_slope(pwl[i].m, in_scale, out_scale); - gna_pwl[i + 1].xBase = (static_cast (in_scale * pwl[i].alpha)) & XBASEMASK; - gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale); - gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); - gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index; - - gnalog() << (pwl[i].alpha) - << " " << pwl[i].beta - << " " << pwl[i].m - << "\n"; - } + double min_x_val = 1 + ~XBASEMASK; + double max_x_val = INT32_MAX / in_scale; + double min_y_val = y_min / out_scale; + double max_y_val = y_max / out_scale; + gna_pwl = create_multisegment_gna_pwl(pwl, in_scale, out_scale, min_x_val, max_x_val, min_y_val, max_y_val, + fun.fqParams.set, false); break; } case kActNegLog: case kActNegHalfLog: { - auto n_segments = static_cast (pwl_size); - gna_pwl.resize(n_segments); - // insert extra segment for x values < l_bound - gna_pwl[0].xBase = static_cast (INT32_MIN & XBASEMASK); // zero out the 2 lsb - if (fun == kActNegHalfLog) - gnalog() << "=========================== NegHalfLog Segments ===========================\n"; - else - gnalog() << "=========================== NegLog Segments ===========================\n"; - gna_pwl[0].yBase = gna_pwl[1].yBase = y_max; - gna_pwl[1].xBase = (static_cast (1 + ~XBASEMASK)); // smallest representable value - gna_pwl[0].slope = 0; - - gnalog() << gna_pwl[0].xBase / in_scale - << " " << (gna_pwl[0].yBase) / out_scale - << " " << 0.0 - << "\n"; - - s = gna_slope(pwl[0].m, in_scale, out_scale); - gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); - gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index; - - gnalog() << ((int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale) - << " " << (gna_pwl[1].yBase) / out_scale - << " " << pwl[0].m - << "\n"; - - for (uint32_t i = 1; i < pwl_size - 1; ++i) { - s = gna_slope(pwl[i].m, in_scale, out_scale); - gna_pwl[i + 1].xBase = (static_cast (in_scale * pwl[i].alpha)) & XBASEMASK; - gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale); - gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); - gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index; - - gnalog() << (pwl[i].alpha) - << " " << pwl[i].beta - << " " << pwl[i].m - << "\n"; - } + double min_x_val = 1 + ~XBASEMASK; + double max_x_val = INT32_MAX / in_scale; + double min_y_val = y_max / out_scale; + double max_y_val = y_min / out_scale; + gna_pwl = create_multisegment_gna_pwl(pwl, in_scale, out_scale, min_x_val, max_x_val, min_y_val, max_y_val, + fun.fqParams.set, false); break; } case kActRelu: @@ -273,10 +196,6 @@ void make_gna_pwl(const DnnActivation fun, auto n_segments = 2; gna_pwl.resize(n_segments); - if (fun == kActRelu) - gnalog() << "=========================== ReLU Segments ===========================\n"; - else - gnalog() << "=========================== LeakyReLU Segments ======================\n"; int32_t x_lower = INT32_MIN; int32_t x_upper = INT32_MAX; int32_t y_lower = y_min; @@ -297,19 +216,16 @@ void make_gna_pwl(const DnnActivation fun, gna_pwl[0].xBase = (x_lower & XBASEMASK) | s.slope_scale_index; // zero out the 2 lsb gna_pwl[0].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); - gnalog() << (int32_t)(gna_pwl[0].xBase & XBASEMASK) / in_scale - << " " << gna_pwl[0].yBase / out_scale - << " " << (gna_pwl[0].slope * in_scale) / (out_scale*s.slope_scale) - << "\n"; + print_segment((int32_t)(gna_pwl[0].xBase & XBASEMASK) / in_scale, + gna_pwl[0].yBase / out_scale, + (gna_pwl[0].slope * in_scale) / (out_scale*s.slope_scale)); + gna_pwl[1].xBase = 0; gna_pwl[1].yBase = 0; s = gna_slope(1.0, in_scale, out_scale); gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index; - gnalog() << 0.0 - << " " << 0.0 - << " " << (gna_pwl[1].slope * in_scale) / (out_scale*s.slope_scale) - << "\n"; + print_segment(0.0, 0.0, (gna_pwl[1].slope * in_scale) / (out_scale*s.slope_scale)); if (fun.fqParams.set) { // need a right segment gna_pwl.push_back({ @@ -317,10 +233,7 @@ void make_gna_pwl(const DnnActivation fun, y_upper, 0 }); - gnalog() << (x_upper & XBASEMASK) / in_scale - << " " << gna_pwl[n_segments].yBase / out_scale - << " " << 0 - << "\n"; + print_segment((x_upper & XBASEMASK) / in_scale, gna_pwl[n_segments].yBase / out_scale, 0.0); } break; } @@ -328,34 +241,28 @@ void make_gna_pwl(const DnnActivation fun, auto n_segments = 3; gna_pwl.resize(n_segments); - gnalog() << "=========================== Sign Segments ===========================\n"; int32_t x_lower = INT32_MIN; int16_t y_lower = static_cast(-1.0 * out_scale); gna_pwl[0].yBase = y_lower; gna_pwl[0].xBase = (x_lower & XBASEMASK); // zero out the 2 lsb gna_pwl[0].slope = 0; - gnalog() << gna_pwl[0].xBase / in_scale - << " " << gna_pwl[0].yBase / out_scale - << " " << (gna_pwl[0].slope * in_scale) / (out_scale*s.slope_scale) - << "\n"; + print_segment(gna_pwl[0].xBase / in_scale, gna_pwl[0].yBase / out_scale, + (gna_pwl[0].slope * in_scale) / (out_scale*s.slope_scale)); gna_pwl[1].xBase = -1; gna_pwl[1].yBase = 0; gna_pwl[1].slope = 0; gna_pwl[1].xBase = gna_pwl[1].xBase & XBASEMASK; - gnalog() << gna_pwl[1].xBase / in_scale - << " " << gna_pwl[1].yBase / out_scale - << " " << (gna_pwl[1].slope * in_scale) / (out_scale*s.slope_scale) - << "\n"; + print_segment(gna_pwl[1].xBase / in_scale, gna_pwl[1].yBase / out_scale, + (gna_pwl[1].slope * in_scale) / (out_scale*s.slope_scale)); + gna_pwl[2].xBase = 1 + ~XBASEMASK; // smallest representable positive number gna_pwl[2].yBase = static_cast(1.0 * out_scale); s = gna_slope(1.0, in_scale, out_scale); gna_pwl[2].slope = 0; gna_pwl[2].xBase = gna_pwl[2].xBase & XBASEMASK; - gnalog() << gna_pwl[2].xBase / in_scale - << " " << gna_pwl[2].yBase / out_scale - << " " << (gna_pwl[2].slope * in_scale) / (out_scale*s.slope_scale) - << "\n"; + print_segment(gna_pwl[2].xBase / in_scale, gna_pwl[2].yBase / out_scale, + (gna_pwl[2].slope * in_scale) / (out_scale*s.slope_scale)); break; } case kActIdentity: @@ -373,7 +280,6 @@ void make_gna_pwl(const DnnActivation fun, } auto n_segments = 2; if (fun == kActKaldiLstmClipping) { - gnalog() << "=========================== Clipping Segments ===========================\n"; if (x_lower < l_bound * in_scale) { if (y_lower < l_bound * out_scale) { x_lower = FLOAT_TO_INT32(l_bound * in_scale); @@ -391,42 +297,32 @@ void make_gna_pwl(const DnnActivation fun, } } } else if (fun == kActIdentity) { - gnalog() << "=========================== Identity Segments ===========================\n"; if (x_lower < y_lower * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale); if (x_upper > y_upper * in_scale / out_scale) x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale); if (y_lower < x_lower * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale); if (y_upper > x_upper * out_scale / in_scale) y_upper = FLOAT_TO_INT16(x_upper * out_scale / in_scale); - } else if (fun == kActFakeQuantize) { - gnalog() << "=========================== Fake Quantize Segments ===========================\n"; } + gna_pwl.resize(n_segments); gna_pwl[0].xBase = INT32_MIN & XBASEMASK; // zero out the 2 lsb gna_pwl[0].yBase = y_lower; gna_pwl[0].slope = 0; - gnalog() << gna_pwl[0].xBase / in_scale - << " " << gna_pwl[0].yBase / out_scale - << " " << 0 - << "\n"; + print_segment(gna_pwl[0].xBase / in_scale, gna_pwl[0].yBase / out_scale, 0.0); gna_pwl[1].xBase = x_lower & XBASEMASK; // zero out the 2 lsb gna_pwl[1].yBase = y_lower; s = gna_slope(1.0, in_scale, out_scale); gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index; - gnalog() << (int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale - << " " << gna_pwl[1].yBase / out_scale - << " " << 1.0 - << "\n"; + print_segment((int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale, gna_pwl[1].yBase / out_scale, 1.0); + if (INT32_MAX > x_upper) { // need a right segment gna_pwl.push_back({ static_cast(x_upper & XBASEMASK), // zero out the 2 lsb y_upper, 0 }); - gnalog() << (x_upper & XBASEMASK) / in_scale - << " " << gna_pwl[n_segments].yBase / out_scale - << " " << 0 - << "\n"; + print_segment((x_upper & XBASEMASK) / in_scale, gna_pwl[n_segments].yBase / out_scale, 0.0); } break; } @@ -440,7 +336,6 @@ void make_gna_pwl(const DnnActivation fun, if (y_upper > x_upper * out_scale / in_scale) y_upper = FLOAT_TO_INT16(x_upper * out_scale / in_scale); if (x_upper > y_upper * in_scale / out_scale) x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale); - gnalog() << "=========================== Abs Segments ===========================\n"; if (y_upper == y_max) { // saturation at ends - need one more segment n_segments += 1; gna_pwl.resize(n_segments); @@ -457,19 +352,14 @@ void make_gna_pwl(const DnnActivation fun, s = gna_slope(-1.0, in_scale, out_scale); gna_pwl[i].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); gna_pwl[i].xBase = gna_pwl[i].xBase | s.slope_scale_index; - gnalog() << (int32_t)(gna_pwl[i].xBase & XBASEMASK) / in_scale - << " " << gna_pwl[i].yBase / out_scale - << " " << -1.0 - << "\n"; + print_segment((int32_t)(gna_pwl[i].xBase & XBASEMASK) / in_scale, gna_pwl[i].yBase / out_scale, -1.0); + gna_pwl[i + 1].xBase = 0; gna_pwl[i + 1].yBase = 0; s = gna_slope(1.0, in_scale, out_scale); gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index; - gnalog() << (int32_t)(gna_pwl[i + 1].xBase & XBASEMASK) / in_scale - << " " << gna_pwl[i + 1].yBase / out_scale - << " " << 1.0 - << "\n"; + print_segment((int32_t)(gna_pwl[i + 1].xBase & XBASEMASK) / in_scale, gna_pwl[i + 1].yBase / out_scale, 1.0); break; } case kActPow: { @@ -551,11 +441,7 @@ void make_gna_pwl(const DnnActivation fun, gna_pwl[0].xBase = INT32_MIN & XBASEMASK; // zero out the 2 lsb gna_pwl[0].yBase = y_lower; gna_pwl[0].slope = 0; - gnalog() << gna_pwl[0].xBase / in_scale - << " " << gna_pwl[0].yBase / out_scale - << " " << 0 - << "\n"; - + print_segment(gna_pwl[0].xBase / in_scale, gna_pwl[0].yBase / out_scale, 0.0); gna_pwl[1].xBase = x_lower & XBASEMASK; // zero out the 2 lsb gna_pwl[1].yBase = y_lower; @@ -563,73 +449,27 @@ void make_gna_pwl(const DnnActivation fun, s = gna_slope(slope, in_scale, out_scale); gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index; - gnalog() << (int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale - << " " << gna_pwl[1].yBase / out_scale - << " " << 1.0 - << "\n"; + print_segment((int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale, gna_pwl[1].yBase / out_scale, 1.0); if (INT32_MAX > x_upper) { // need a right segment gna_pwl.push_back({ static_cast(x_upper & XBASEMASK), // zero out the 2 lsb y_upper, 0 }); - gnalog() << (x_upper & XBASEMASK) / in_scale - << " " << gna_pwl[2].yBase / out_scale - << " " << 0 - << "\n"; + print_segment((x_upper & XBASEMASK) / in_scale, gna_pwl[2].yBase / out_scale, 0.0); } } else { - auto n_segments = static_cast (pwl_size) + 1; - gna_pwl.resize(n_segments); - // insert extra segment for x values < l_bound - gna_pwl[0].xBase = static_cast (INT32_MIN & XBASEMASK); // zero out the 2 lsb - gnalog() << "=========================== Exp Segments ===========================\n"; - gna_pwl[0].yBase = gna_pwl[1].yBase = 0; - gna_pwl[1].xBase = (static_cast (in_scale * (-pwl[0].b / pwl[0].m))) & XBASEMASK; - gna_pwl[0].slope = 0; - - gnalog() << (gna_pwl[0].xBase) / in_scale - << " " << (gna_pwl[0].yBase) / out_scale - << " " << 0.0 - << "\n"; - - s = gna_slope(pwl[0].m, in_scale, out_scale); - gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); - gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index; - - gnalog() << ((int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale) - << " " << (gna_pwl[1].yBase) / out_scale - << " " << pwl[0].m - << "\n"; - - for (uint32_t i = 1; i < pwl_size - 1; ++i) { - s = gna_slope(pwl[i].m, in_scale, out_scale); - gna_pwl[i + 1].xBase = (static_cast (in_scale * pwl[i].alpha)) & XBASEMASK; - gna_pwl[i + 1].yBase = FLOAT_TO_INT16(pwl[i].beta * out_scale); - gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); - gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index; - - gnalog() << (pwl[i].alpha) - << " " << pwl[i].beta - << " " << pwl[i].m - << "\n"; - } - // insert extra segment for xvalues > u_bound - gna_pwl[n_segments - 1].xBase = - ((uint32_t)(in_scale * (y_max / out_scale - pwl[pwl_size - 2].b) / pwl[pwl_size - 2].m)) & XBASEMASK; - gna_pwl[n_segments - 1].yBase = y_max; - gna_pwl[n_segments - 1].slope = 0; - - gnalog() << (gna_pwl[n_segments - 1].xBase / in_scale) - << " " << 1.0 - << " " << 0.0 - << "\n"; + double min_x_val = -pwl[0].b / pwl[0].m; + double max_x_val = (y_max/out_scale - pwl[pwl_size - 2].b) / pwl[pwl_size - 2].m; + double min_y_val = fun.fqParams.set ? pwl[0].beta : 0; + double max_y_val = fun.fqParams.set ? pwl.front().beta : y_max / out_scale; + gna_pwl = create_multisegment_gna_pwl(pwl, in_scale, out_scale, min_x_val, max_x_val, min_y_val, max_y_val, + fun.fqParams.set, true); break; } break; } default: - gnalog() << "Unexpected function activation!\n"; THROW_GNA_EXCEPTION << "Unexpected function activation!" << fun; } insert_extra_pwl_segments(gna_pwl, y_min, y_max); diff --git a/inference-engine/src/gna_plugin/backend/make_pwl.hpp b/inference-engine/src/gna_plugin/backend/make_pwl.hpp index 5cc879d75ec..a436cba26bd 100644 --- a/inference-engine/src/gna_plugin/backend/make_pwl.hpp +++ b/inference-engine/src/gna_plugin/backend/make_pwl.hpp @@ -7,7 +7,7 @@ #include #include "runtime/pwl.h" -void make_gna_pwl(const DnnActivation fun, +void make_gna_pwl(const DnnActivation& fun, const std::vector& pwl, const double l_bound, const double u_bound, diff --git a/inference-engine/tests/functional/plugin/gna/scale_factors_tests/eltwise_act_fq.cpp b/inference-engine/tests/functional/plugin/gna/scale_factors_tests/eltwise_act_fq.cpp new file mode 100644 index 00000000000..b9d54f1308b --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/scale_factors_tests/eltwise_act_fq.cpp @@ -0,0 +1,167 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + +#include "ngraph_functions/pass/convert_prc.hpp" + +static std::map activationNames = { + {ngraph::helpers::ActivationTypes::Sigmoid, "Sigmoid"}, + {ngraph::helpers::ActivationTypes::Tanh, "Tanh"}, + {ngraph::helpers::ActivationTypes::Relu, "Relu"}, + {ngraph::helpers::ActivationTypes::Exp, "Exp"}, + {ngraph::helpers::ActivationTypes::Log, "Log"}, + {ngraph::helpers::ActivationTypes::Sign, "Sign"}, + {ngraph::helpers::ActivationTypes::Abs, "Abs"} +}; + +typedef std::tuple< + InferenceEngine::Precision, // Network Precision + std::string, // Target Device + std::map, // Configuration + std::pair, // Input values + ngraph::helpers::ActivationTypes // Activation type +> eltwiseActFqParams; + +namespace LayerTestsDefinitions { + +class EltwiseActFqTest : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + std::string targetDevice; + std::map configuration; + std::pair inputValues; + ngraph::helpers::ActivationTypes act; + std::tie(netPrecision, targetDevice, configuration, inputValues, act) = obj.param; + + std::ostringstream result; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice << "_"; + for (auto const& configItem : configuration) { + result << "_configItem=" << configItem.first << "_" << configItem.second; + } + result << "_range=(" << inputValues.first << ", " << inputValues.second << ")"; + result << "_act=" << activationNames[act]; + + return result.str(); + } + + InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override { + InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc()); + blob->allocate(); + + auto* rawBlobDataPtr = blob->buffer().as(); + std::vector values = CommonTestUtils::generate_float_numbers(blob->size(), inputDataMin, inputDataMax); + for (size_t i = 0; i < blob->size(); i++) { + rawBlobDataPtr[i] = values[i]; + } + return blob; + } + +protected: + void SetUp() override { + InferenceEngine::Precision netPrecision; + std::pair inputValues; + ngraph::helpers::ActivationTypes act; + + std::tie(netPrecision, targetDevice, configuration, inputValues, act) = this->GetParam(); + std::tie(inputDataMin, inputDataMax) = inputValues; + if (act == ngraph::helpers::ActivationTypes::Log) { + // clamp not positive values + inputDataMin = 1.0e-3; + } + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + const ngraph::Shape shape = {1, 128}; + auto params = ngraph::builder::makeParams(ngPrc, {shape}); + + auto lowNodeIn = ngraph::builder::makeConstant(ngPrc, {1}, { 100 * inputDataMin }); + auto highNodeIn = ngraph::builder::makeConstant(ngPrc, {1}, { 100 * inputDataMax }); + auto fqIn = std::make_shared(params[0], lowNodeIn, highNodeIn, + lowNodeIn, highNodeIn, levels16); + + auto constant = ngraph::builder::makeConstant(ngPrc, shape, + CommonTestUtils::generate_float_numbers(shape[1], inputDataMin, inputDataMax)); + auto add = std::make_shared(fqIn, constant); + + auto lowNode = ngraph::builder::makeConstant(ngPrc, {1}, { 2 * inputDataMin }); + auto highNode = ngraph::builder::makeConstant(ngPrc, {1}, { 2 * inputDataMax }); + auto fq = std::make_shared(add, lowNode, highNode, + lowNode, highNode, levels32); + + auto tanh = ngraph::builder::makeActivation(fq, ngPrc, act); + + auto lowNodeOut = ngraph::builder::makeConstant(ngPrc, {1}, { std::tanh(2 * inputDataMin) }); + auto highNodeOut = ngraph::builder::makeConstant(ngPrc, {1}, { std::tanh(2 * inputDataMax) }); + auto fqOut = std::make_shared(tanh, lowNodeOut, highNodeOut, + lowNodeOut, highNodeOut, levels16); + + ngraph::ResultVector results{std::make_shared(fqOut)}; + function = std::make_shared(results, params, "TanhFq"); + } + + float inputDataMax = 1.0; + float inputDataMin = -1.0; + const size_t levels16 = std::numeric_limits::max(); + const size_t levels32 = std::numeric_limits::max(); + // to reproduce the problem with quite big distance between min int and min value from stats + const size_t sf_reducer = 100; +}; + +TEST_P(EltwiseActFqTest, CompareWithRefImpl) { + Run(); +}; + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector> configs = { + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + } +}; + +const std::vector> inputValues = { + {-10.0, 10.0}, + {-5.0, 5.0}, + {-1.0, 1.0}, + {-0.04, 0.04} +}; + +const std::vector activationTypes = { + ngraph::helpers::ActivationTypes::Sigmoid, + ngraph::helpers::ActivationTypes::Tanh, + ngraph::helpers::ActivationTypes::Relu, + ngraph::helpers::ActivationTypes::Exp, + ngraph::helpers::ActivationTypes::Log, + ngraph::helpers::ActivationTypes::Sign, + ngraph::helpers::ActivationTypes::Abs +}; + +INSTANTIATE_TEST_SUITE_P(smoke_base, EltwiseActFqTest, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(configs), + ::testing::ValuesIn(inputValues), + ::testing::ValuesIn(activationTypes)), + EltwiseActFqTest::getTestCaseName); +} // namespace LayerTestsDefinitions \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp index dc21ee929e0..35da141d619 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp @@ -19,6 +19,8 @@ std::vector disabledTestPatterns() { // TODO: FIX BUG 32210 R"(.*ActivationLayerTest.CompareWithRefs/(Sigmoid|Tanh|Exp|Log).*)", R"(.*ActivationFQSubgraph.*activation=(Exp|Log).*)", + // TODO: Issue 68586 + R"(.*EltwiseActFqTest.*act=Log.*)", // TODO: Issue 32542 R"(.*(EltwiseLayerTest).*eltwiseOpType=(Sum|Sub).*opType=SCALAR.*)", R"(.*(EltwiseLayerTest).*eltwiseOpType=Prod.*secondaryInputType=PARAMETER.*opType=SCALAR.*)", diff --git a/ngraph/core/include/openvino/runtime/tensor.hpp b/ngraph/core/include/openvino/runtime/tensor.hpp index 59913671067..512cbaba1b5 100644 --- a/ngraph/core/include/openvino/runtime/tensor.hpp +++ b/ngraph/core/include/openvino/runtime/tensor.hpp @@ -79,8 +79,8 @@ public: * @param type Tensor element type * @param shape Tensor shape * @param host_ptr Pointer to pre-allocated host memory - * @param strides Optional strides parameters in elements. Strides are supposed to be equal to shape if they are not - * set + * @param strides Optional strides parameters in bytes. Strides are supposed to be computed automatically based + * on shape and element size */ Tensor(const element::Type type, const Shape& shape, void* host_ptr, const Strides& strides = {}); @@ -124,7 +124,7 @@ public: size_t get_byte_size() const; /** - * @return Tensor's strides in elements + * @return Tensor's strides in bytes */ Strides get_strides() const; diff --git a/ngraph/core/src/runtime/ov_tensor.cpp b/ngraph/core/src/runtime/ov_tensor.cpp index b7ccf5aa9fc..85333cb2eca 100644 --- a/ngraph/core/src/runtime/ov_tensor.cpp +++ b/ngraph/core/src/runtime/ov_tensor.cpp @@ -40,15 +40,26 @@ Tensor::Tensor(const element::Type element_type, const Shape& shape, const Alloc _impl->allocate(); } -Tensor::Tensor(const element::Type element_type, const Shape& shape, void* host_ptr, const Strides& strides) { +Tensor::Tensor(const element::Type element_type, const Shape& shape, void* host_ptr, const Strides& byte_strides) { ie::SizeVector blk_order(shape.size()); std::iota(blk_order.begin(), blk_order.end(), 0); ie::SizeVector dim_offset(shape.size(), 0); ie::SizeVector blk_strides; - if (strides.empty()) { + if (byte_strides.empty()) { blk_strides = ov::row_major_strides(shape); } else { - blk_strides.assign(strides.begin(), strides.end()); + blk_strides.resize(byte_strides.size()); + std::transform(byte_strides.begin(), + byte_strides.end(), + blk_strides.begin(), + [&element_type](size_t byte_stride) { + OPENVINO_ASSERT(byte_stride % element_type.size() == 0, + "Limitation: Stride in bytes ", + byte_stride, + " should be divisible by size of element ", + element_type.size()); + return byte_stride / element_type.size(); + }); } try { @@ -93,7 +104,19 @@ Strides Tensor::get_strides() const { OPENVINO_ASSERT(get_element_type().bitwidth() >= 8, "Could not get strides for types with bitwidths less then 8 bit. Tensor type: ", get_element_type()); - OV_TENSOR_STATEMENT(return _impl->getTensorDesc().getBlockingDesc().getStrides()); + OV_TENSOR_STATEMENT({ + const auto& element_strides = _impl->getTensorDesc().getBlockingDesc().getStrides(); + const size_t elem_size = get_element_type().size(); + Strides byte_strides; + byte_strides.resize(element_strides.size()); + std::transform(element_strides.begin(), + element_strides.end(), + byte_strides.begin(), + [&elem_size](size_t stride) { + return stride * elem_size; + }); + return byte_strides; + }); } size_t Tensor::get_size() const { @@ -120,6 +143,7 @@ void* Tensor::data(const element::Type element_type) const { ", is not representable as pointer to ", element_type); } + // since we don't use byte offsets, we need to explicitly multiply by element_size auto byte_offset = _impl->getTensorDesc().getBlockingDesc().getOffsetPadding() * get_element_type().size(); OPENVINO_ASSERT((get_element_type().bitwidth() >= 8) || (byte_offset == 0), "ROI access for types with bitwidths less then 8 bit is not implemented. Tensor type: ", diff --git a/ngraph/test/ov_tensor_test.cpp b/ngraph/test/ov_tensor_test.cpp index a88b7b22db0..900bb6c1496 100644 --- a/ngraph/test/ov_tensor_test.cpp +++ b/ngraph/test/ov_tensor_test.cpp @@ -18,6 +18,13 @@ using OVTensorTest = ::testing::Test; +inline ov::Strides byteStrides(const ov::Strides& strides, const ov::element::Type& type) { + ov::Strides byte_strides(strides.size()); + for (size_t i = 0; i < strides.size(); ++i) + byte_strides[i] = strides[i] * type.size(); + return byte_strides; +} + TEST_F(OVTensorTest, canCreateTensor) { ov::Shape shape = {4, 3, 2}; ov::runtime::Tensor t{ov::element::f32, shape}; @@ -27,7 +34,7 @@ TEST_F(OVTensorTest, canCreateTensor) { ASSERT_EQ(ov::element::f32, t.get_element_type()); ASSERT_EQ(shape, t.get_shape()); ASSERT_NE(shape, t.get_strides()); - ASSERT_EQ(ov::Strides({6, 2, 1}), t.get_strides()); + ASSERT_EQ(byteStrides(ov::Strides({6, 2, 1}), t.get_element_type()), t.get_strides()); ASSERT_EQ(ov::element::f32.size() * totalSize, t.get_byte_size()); ASSERT_THROW(t.data(ov::element::i64), ov::Exception); ASSERT_THROW(t.data(), ov::Exception); @@ -72,7 +79,7 @@ TEST_F(OVTensorTest, canAccessExternalData) { ASSERT_EQ(data, t.data(ov::element::f32)); ASSERT_EQ(data, ptr); ASSERT_THROW(t.data(), ov::Exception); - ASSERT_EQ(ov::row_major_strides(shape), t.get_strides()); + ASSERT_EQ(byteStrides(ov::row_major_strides(shape), t.get_element_type()), t.get_strides()); ASSERT_EQ(ov::shape_size(shape), t.get_size()); ASSERT_EQ(ov::shape_size(shape) * ov::element::f32.size(), t.get_byte_size()); } @@ -81,11 +88,11 @@ TEST_F(OVTensorTest, canAccessExternalData) { TEST_F(OVTensorTest, canAccessExternalDataWithStrides) { ov::Shape shape = {2, 3}; float data[] = {5.f, 6.f, 7.f, 0.f, 1.f, 42.f, 3.f, 0.f}; - ov::runtime::Tensor t{ov::element::f32, shape, data, {4, 1}}; - ASSERT_EQ(ov::Strides({4, 1}), t.get_strides()); + ov::runtime::Tensor t{ov::element::f32, shape, data, {16, 4}}; + ASSERT_EQ(ov::Strides({16, 4}), t.get_strides()); { ASSERT_EQ((ov::Shape{2, 3}), t.get_shape()); - float* ptr = t.data(); + const float* ptr = t.data(); ASSERT_EQ(ptr[5], 42); } } @@ -98,16 +105,23 @@ TEST_F(OVTensorTest, cannotCreateTensorWithExternalNullptr) { TEST_F(OVTensorTest, cannotCreateTensorWithWrongStrides) { ov::Shape shape = {2, 3}; float data[] = {5.f, 6.f, 7.f, 0.f, 1.f, 42.f, 3.f, 0.f}; + const auto el = ov::element::f32; { // strides.size() != shape.size() - EXPECT_THROW(ov::runtime::Tensor(ov::element::f32, shape, data, {6, 3, 1}), ov::Exception); + EXPECT_THROW(ov::runtime::Tensor(el, shape, data, byteStrides({6, 3, 1}, el)), ov::Exception); } { // strides values are element-wise >= ov::row_major_strides(shape) values - EXPECT_THROW(ov::runtime::Tensor(ov::element::f32, shape, data, {2, 1}), ov::Exception); - EXPECT_THROW(ov::runtime::Tensor(ov::element::f32, shape, data, {3, 0}), ov::Exception); - EXPECT_THROW(ov::runtime::Tensor(ov::element::f32, shape, data, {3, 2}), ov::Exception); - EXPECT_NO_THROW(ov::runtime::Tensor(ov::element::f32, shape, data, {6, 2})); + EXPECT_THROW(ov::runtime::Tensor(el, shape, data, byteStrides({2, 1}, el)), ov::Exception); + EXPECT_THROW(ov::runtime::Tensor(el, shape, data, byteStrides({3, 0}, el)), ov::Exception); + EXPECT_THROW(ov::runtime::Tensor(el, shape, data, byteStrides({3, 2}, el)), ov::Exception); + EXPECT_NO_THROW(ov::runtime::Tensor(el, shape, data, byteStrides({6, 2}, el))); + } + { + // strides are not divisible by elem_size + EXPECT_THROW(ov::runtime::Tensor(el, shape, data, {7, el.size()}), ov::Exception); + EXPECT_THROW(ov::runtime::Tensor(el, shape, data, {3, 0}), ov::Exception); + EXPECT_THROW(ov::runtime::Tensor(el, shape, data, {el.size(), 3}), ov::Exception); } } @@ -119,7 +133,7 @@ TEST_F(OVTensorTest, saveDimsAndSizeAfterMove) { ASSERT_EQ(shape, new_tensor.get_shape()); ASSERT_EQ(ov::element::f32, new_tensor.get_element_type()); - ASSERT_EQ(ov::row_major_strides(shape), new_tensor.get_strides()); + ASSERT_EQ(byteStrides(ov::row_major_strides(shape), new_tensor.get_element_type()), new_tensor.get_strides()); ASSERT_THROW(t.get_size(), ov::Exception); ASSERT_THROW(t.get_element_type(), ov::Exception); @@ -141,7 +155,7 @@ TEST_F(OVTensorTest, canSetShape) { ASSERT_EQ(t.get_shape(), origShape); ASSERT_NO_THROW(t.set_shape({4, 5, 6})); ASSERT_EQ(newShape, t.get_shape()); - ASSERT_EQ(ov::row_major_strides(newShape), t.get_strides()); + ASSERT_EQ(byteStrides(ov::row_major_strides(newShape), t.get_element_type()), t.get_strides()); ASSERT_NE(orig_data, t.data()); // check that setShape for copy changes original Tensor @@ -180,7 +194,7 @@ TEST_F(OVTensorTest, makeRangeRoiTensor) { ASSERT_EQ(roi_tensor.data() - t.data(), ref_offset_elems); ASSERT_EQ(reinterpret_cast(roi_tensor.data()) - reinterpret_cast(t.data()), ref_offset_bytes); ASSERT_EQ(roi_tensor.get_strides(), t.get_strides()); - ASSERT_EQ(ref_strides, roi_tensor.get_strides()); + ASSERT_EQ(byteStrides(ref_strides, roi_tensor.get_element_type()), roi_tensor.get_strides()); ASSERT_EQ(roi_tensor.get_element_type(), t.get_element_type()); } @@ -218,14 +232,15 @@ TEST_F(OVTensorTest, readRangeRoiBlob) { ov::runtime::Tensor roi_tensor{t, {0, 0, 2, 4}, {1, 3, 4, 8}}; ASSERT_NE(false, static_cast(roi_tensor)); { - auto roi = roi_tensor.data(); + const std::uint8_t* roi = reinterpret_cast(roi_tensor.data()); ASSERT_NE(nullptr, roi); auto strides = roi_tensor.get_strides(); for (auto&& c : ngraph::CoordinateTransformBasic{roi_tensor.get_shape()}) { - auto actual = roi[c[3] * strides[3] + c[2] * strides[2] + c[1] * strides[1] + c[0] * strides[0]]; - auto expected = t.data()[(c[3] + 4) * strides[3] + (c[2] + 2) * strides[2] + - (c[1] + 0) * strides[1] + (c[0] + 0) * strides[0]]; - ASSERT_EQ(expected, actual) << c; + auto actual_addr = roi + c[3] * strides[3] + c[2] * strides[2] + c[1] * strides[1] + c[0] * strides[0]; + auto expected_addr = t.data() + ((c[3] + 4) * strides[3] + (c[2] + 2) * strides[2] + + (c[1] + 0) * strides[1] + (c[0] + 0) * strides[0]) / + t.get_element_type().size(); + ASSERT_EQ(actual_addr, reinterpret_cast(expected_addr)); } } } diff --git a/runtime/bindings/python/src/pyopenvino/core/common.cpp b/runtime/bindings/python/src/pyopenvino/core/common.cpp index 9c1c89e1586..e68721d929e 100644 --- a/runtime/bindings/python/src/pyopenvino/core/common.cpp +++ b/runtime/bindings/python/src/pyopenvino/core/common.cpp @@ -73,14 +73,6 @@ const std::map dtype_to_ov_type = { {"bool", ov::element::boolean}, }; -ov::Strides to_numpy_strides(const ov::Strides& strides, const ov::element::Type& ov_type) { - ov::Strides numpy_strides(strides.size()); - std::transform(strides.begin(), strides.end(), numpy_strides.begin(), [&ov_type](size_t stride) { - return stride * ov_type.size(); - }); - return numpy_strides; -} - InferenceEngine::Layout get_layout_from_string(const std::string& layout) { return layout_str_to_enum.at(layout); } diff --git a/runtime/bindings/python/src/pyopenvino/core/common.hpp b/runtime/bindings/python/src/pyopenvino/core/common.hpp index 9eafbfe36b9..e1ddd1e7abc 100644 --- a/runtime/bindings/python/src/pyopenvino/core/common.hpp +++ b/runtime/bindings/python/src/pyopenvino/core/common.hpp @@ -36,8 +36,6 @@ namespace Common extern const std::map ov_type_to_dtype; extern const std::map dtype_to_ov_type; - ov::Strides to_numpy_strides(const ov::Strides& strides, const ov::element::Type& ov_type); - InferenceEngine::Layout get_layout_from_string(const std::string& layout); const std::string& get_layout_from_enum(const InferenceEngine::Layout& layout); diff --git a/runtime/bindings/python/src/pyopenvino/core/tensor.cpp b/runtime/bindings/python/src/pyopenvino/core/tensor.cpp index 90c01d7d18b..836fa2a6fb7 100644 --- a/runtime/bindings/python/src/pyopenvino/core/tensor.cpp +++ b/runtime/bindings/python/src/pyopenvino/core/tensor.cpp @@ -71,7 +71,7 @@ void regclass_Tensor(py::module m) { cls.def_property_readonly("data", [](ov::runtime::Tensor& self) { return py::array(Common::ov_type_to_dtype.at(self.get_element_type()), self.get_shape(), - Common::to_numpy_strides(self.get_strides(), self.get_element_type()), + self.get_strides(), self.data(), py::cast(self)); });