FakeQuantize - reference implementation refactor (#6478)

* new implementation of FakeQuantize * move FakeQuantize backend test to fake_quantize.in.cpp * enable some test for FakeQuantize * Add support for PDPD broadcasting and some backend tests * arm plugin expect less attributes in function call * try to fix arm plugin build * try to build arm plugin * start changing backend test * add check for attributes number in visitor test * fix backend test after refactoring * add default parameter value to runtime::reference::fake_quantize * Revert "add default parameter value to runtime::reference::fake_quantize" This reverts commit 9d2c00d967. * add SLT for FakeQuantize * remove fesetround * change `v1` namesapce to `v0` in ref impl * add FakeQuantize-1 to VERIFIED_OP_REFERENCES * pass immutable reference to test functions
2021-07-19 11:20:54 +02:00
parent 6e14890972
commit 0d9212683f
16 changed files with 635 additions and 365 deletions
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
@@ -11,11 +11,19 @@ using namespace LayerTestsDefinitions;

 namespace {

-const std::vector<InferenceEngine::Precision> netPrecisions = {
-        InferenceEngine::Precision::FP32,
-        InferenceEngine::Precision::FP16
+const ngraph::op::AutoBroadcastSpec numpyBroadcast = ngraph::op::AutoBroadcastType::NUMPY;
+
+const ngraph::op::AutoBroadcastSpec noneBroadcast = ngraph::op::AutoBroadcastType::NONE;
+
+const std::vector<ngraph::op::AutoBroadcastSpec> broadcasts = {
+    {ngraph::op::AutoBroadcastType::NUMPY},
+    {ngraph::op::AutoBroadcastType::PDPD, -1},
 };

+const std::vector<InferenceEngine::Precision>
+    netPrecisions = {InferenceEngine::Precision::FP32,
+                     InferenceEngine::Precision::FP16};
+
 const std::vector<std::vector<size_t>> inputShapes = {{1, 1}, {2, 6}, {1, 1, 1}, {2, 6, 13},
                                                      {1, 1, 1, 1}, {3, 10, 5, 6}, {2, 8, 5, 18}, {2, 16, 3, 18}, {3, 49, 5, 6},
                                                      {1, 1, 1, 1, 1}, {3, 10, 2, 5, 6}, {2, 8, 1, 5, 18}, {2, 16, 4, 3, 18}, {3, 49, 7, 5, 6}};
@@ -30,10 +38,11 @@ const auto fqParams = ::testing::Combine(
        ::testing::ValuesIn(levels),
        ::testing::ValuesIn(constShapes),
        ::testing::Values(fqArgs),
-        ::testing::Values(inputParams)
+        ::testing::Values(inputParams),
+        ::testing::ValuesIn(broadcasts)
 );

-INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTest,
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTestRevise,
                        ::testing::Combine(
                                fqParams,
                                ::testing::ValuesIn(netPrecisions),
@@ -44,27 +53,52 @@ INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTest,
                                ::testing::ValuesIn(inputShapes),
                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                ::testing::Values(config)),
-                        FakeQuantizeLayerTest::getTestCaseName);
+                        FakeQuantizeLayerTestRevise::getTestCaseName);
+
+
+const std::vector<size_t> singleShape = {3, 4, 2, 5};
+const auto noneBroadcastFqParams = ::testing::Combine(
+        ::testing::ValuesIn(levels),
+        ::testing::Values(singleShape),
+        ::testing::Values(fqArgs),
+        ::testing::Values(inputParams),
+        ::testing::Values(noneBroadcast)
+);
+
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeNoneBroadcast, FakeQuantizeLayerTestRevise,
+                        ::testing::Combine(
+                                noneBroadcastFqParams,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(singleShape),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                                ::testing::Values(config)),
+                        FakeQuantizeLayerTestRevise::getTestCaseName);

 const std::vector<std::vector<size_t>> inputShapesPerChannel = {{11, 10, 22, 19}, {11, 10, 5, 6}};
 const std::vector<std::vector<size_t>> constShapesPerChannelAxis0 = {{11, 1, 1, 1}};
-const std::vector<std::vector<size_t>> constShapesPerChannelAxis1 = {{1, 10, 1, 1}};
+const std::vector<std::vector<size_t>> constShapesPerChannelAxis1 = {{1, 10, 1, 1}, {10, 1, 1}};

 const auto fqParamsPerChannelAxis0 = ::testing::Combine(
        ::testing::ValuesIn(levels),
        ::testing::ValuesIn(constShapesPerChannelAxis0),
        ::testing::Values(fqArgs),
-        ::testing::Values(inputParams)
+        ::testing::Values(inputParams),
+        ::testing::Values(numpyBroadcast)
 );

 const auto fqParamsPerChannelAxis1 = ::testing::Combine(
        ::testing::ValuesIn(levels),
        ::testing::ValuesIn(constShapesPerChannelAxis1),
        ::testing::Values(fqArgs),
-        ::testing::Values(inputParams)
+        ::testing::Values(inputParams),
+        ::testing::Values(numpyBroadcast)
 );

-INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannelAxis0, FakeQuantizeLayerTest,
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannelAxis0, FakeQuantizeLayerTestRevise,
                        ::testing::Combine(
                                fqParamsPerChannelAxis0,
                                ::testing::ValuesIn(netPrecisions),
@@ -75,9 +109,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannelAxis0, FakeQuantizeLayerTes
                                ::testing::ValuesIn(inputShapesPerChannel),
                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                ::testing::Values(config)),
-                        FakeQuantizeLayerTest::getTestCaseName);
+                        FakeQuantizeLayerTestRevise::getTestCaseName);

-INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannelAxis1, FakeQuantizeLayerTest,
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannelAxis1, FakeQuantizeLayerTestRevise,
                        ::testing::Combine(
                                fqParamsPerChannelAxis1,
                                ::testing::ValuesIn(netPrecisions),
@@ -88,7 +122,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannelAxis1, FakeQuantizeLayerTes
                                ::testing::ValuesIn(inputShapesPerChannel),
                                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                ::testing::Values(config)),
-                        FakeQuantizeLayerTest::getTestCaseName);
+                        FakeQuantizeLayerTestRevise::getTestCaseName);

 const std::vector<std::vector<size_t>> inputShapesPerChannel2D = {{1, 10}};
 const std::vector<std::vector<size_t>> constShapesPerChannel2D = { {10}, {1, 10}, {1} };
@@ -96,10 +130,11 @@ const auto fqParamsPerChannel2D = ::testing::Combine(
    ::testing::ValuesIn(levels),
    ::testing::ValuesIn(constShapesPerChannel2D),
    ::testing::Values(fqArgs),
-    ::testing::Values(inputParams)
+    ::testing::Values(inputParams),
+    ::testing::Values(numpyBroadcast)
 );

-INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannel2D, FakeQuantizeLayerTest,
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannel2D, FakeQuantizeLayerTestRevise,
    ::testing::Combine(
        fqParamsPerChannel2D,
        ::testing::ValuesIn(netPrecisions),
@@ -110,6 +145,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannel2D, FakeQuantizeLayerTest,
        ::testing::ValuesIn(inputShapesPerChannel2D),
        ::testing::Values(CommonTestUtils::DEVICE_CPU),
        ::testing::Values(config)),
-    FakeQuantizeLayerTest::getTestCaseName);
+    FakeQuantizeLayerTestRevise::getTestCaseName);

 }  // namespace
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/fake_quantize.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/fake_quantize.cpp
@@ -81,10 +81,11 @@ const auto fqParams = ::testing::Combine(
    ::testing::ValuesIn(levels),
    ::testing::ValuesIn(constShapes),
    ::testing::ValuesIn(fqArgs),
-    ::testing::ValuesIn(inputParams)
+    ::testing::ValuesIn(inputParams),
+    ::testing::Values(ngraph::op::AutoBroadcastType::NUMPY)
 );

-INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTest,
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTestRevise,
    ::testing::Combine(
    fqParams,
    ::testing::ValuesIn(netPrecisions),
@@ -95,6 +96,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTest,
    ::testing::ValuesIn(inputShapes),
    ::testing::Values(CommonTestUtils::DEVICE_GNA),
    ::testing::ValuesIn(gnaQuantModes)),
-    FakeQuantizeLayerTest::getTestCaseName);
+    FakeQuantizeLayerTestRevise::getTestCaseName);

 }  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
@@ -29,10 +29,11 @@ const auto fqParams = ::testing::Combine(
        ::testing::ValuesIn(levels),
        ::testing::ValuesIn(constShapes),
        ::testing::Values(fqArgs),
-        ::testing::Values(inputParams)
+        ::testing::Values(inputParams),
+        ::testing::Values(ngraph::op::AutoBroadcastType::NUMPY)
 );

-INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTest,
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTestRevise,
                        ::testing::Combine(
                                fqParams,
                                ::testing::ValuesIn(netPrecisions),
@@ -43,6 +44,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTest,
                                ::testing::ValuesIn(inputShapes),
                                ::testing::Values(CommonTestUtils::DEVICE_GPU),
                                ::testing::Values(config)),
-                        FakeQuantizeLayerTest::getTestCaseName);
+                        FakeQuantizeLayerTestRevise::getTestCaseName);

 }  // namespace
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp
@@ -8,7 +8,7 @@

 namespace LayerTestsDefinitions {

-TEST_P(FakeQuantizeLayerTest, CompareWithRefs) {
+TEST_P(FakeQuantizeLayerTestRevise, CompareWithRefs) {
    Run();
    SKIP_IF_CURRENT_TEST_IS_DISABLED();

--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/fake_quantize.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/fake_quantize.hpp
@@ -26,6 +26,7 @@

 namespace LayerTestsDefinitions {

+
 typedef std::tuple<
        size_t,              // levels
        std::vector<size_t>, // const inputs shape
@@ -48,7 +49,45 @@ typedef std::tuple<
 class FakeQuantizeLayerTest : public testing::WithParamInterface<fqLayerTestParamsSet>,
                              virtual public LayerTestsUtils::LayerTestsCommon {
 public:
-    static std::string getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj);
+    static std::string getTestCaseName(const testing::TestParamInfo<fqLayerTestParamsSet>& obj);
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
+protected:
+    void SetUp() override;
+    void UpdateSeed();
+
+ protected:
+    float inputDataMin        = 0.0;
+    float inputDataMax        = 10.0;
+    float inputDataResolution = 1.0;
+    int32_t  seed = 1;
+};
+
+
+//TODO after update all plugins remove *Revise types
+typedef std::tuple<
+        size_t,                         // fake quantize levels
+        std::vector<size_t>,            // fake quantize inputs shape
+        std::vector<float>,             // fake quantize (inputLow, inputHigh, outputLow, outputHigh) or empty for random
+        std::vector<float>,             // input generator data (low, high, resolution) or empty for default
+        ngraph::op::AutoBroadcastSpec   // fake quantize broadcast mode
+> fqSpecificParamsRevise;
+typedef std::tuple<
+        fqSpecificParamsRevise,
+        InferenceEngine::Precision,        // Net precision
+        InferenceEngine::Precision,        // Input precision
+        InferenceEngine::Precision,        // Output precision
+        InferenceEngine::Layout,           // Input layout
+        InferenceEngine::Layout,           // Output layout
+        InferenceEngine::SizeVector,       // Input shapes
+        LayerTestsUtils::TargetDevice,     // Device name
+
+        std::pair<std::string, std::map<std::string, std::string>> // Additional backend configuration and alis name to it
+> fqLayerTestParamsSetRevise;
+
+class FakeQuantizeLayerTestRevise : public testing::WithParamInterface<fqLayerTestParamsSetRevise>,
+                                    virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<fqLayerTestParamsSetRevise>& obj);
    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
 protected:
    void SetUp() override;
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/fake_quantize.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/fake_quantize.cpp
@@ -6,7 +6,8 @@

 namespace LayerTestsDefinitions {

-std::string FakeQuantizeLayerTest::getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj) {
+
+std::string FakeQuantizeLayerTest::getTestCaseName(const testing::TestParamInfo<fqLayerTestParamsSet>& obj) {
    fqSpecificParams fqParams;
    InferenceEngine::Precision netPrecision;
    InferenceEngine::Precision inPrc, outPrc;
@@ -113,4 +114,117 @@ void FakeQuantizeLayerTest::UpdateSeed() {
    std::cout << "\033[0;32m" << "[          ] " << "\033[0;0m"
              << "seed = " << seed << std::endl;
 }
+
+
+
+std::string FakeQuantizeLayerTestRevise::getTestCaseName(const testing::TestParamInfo<fqLayerTestParamsSetRevise>& obj) {
+    fqSpecificParamsRevise fqParams;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::Precision inPrc, outPrc;
+    InferenceEngine::Layout inLayout, outLayout;
+    InferenceEngine::SizeVector inputShapes;
+    std::string targetDevice;
+    std::pair<std::string, std::map<std::string, std::string>> config;
+    std::tie(fqParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, targetDevice, config) = obj.param;
+    size_t levels;
+    std::vector<size_t> constShape;
+    std::vector<float> fqDirectArgs;
+    std::vector<float> inputArg;
+    ngraph::op::AutoBroadcastSpec broadcast;
+    std::tie(levels, constShape, fqDirectArgs, inputArg, broadcast) = fqParams;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "CS=" << CommonTestUtils::vec2str(constShape) << "_";
+    result << "LEVELS=" << levels << "_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "inPRC=" << inPrc.name() << "_";
+    result << "outPRC=" << outPrc.name() << "_";
+    result << "inL=" << inLayout << "_";
+    result << "outL=" << outLayout << "_";
+    result << "trgDev=" << targetDevice;
+    if (!config.first.empty()) {
+        result << "_targetConfig=" << config.first;
+    }
+    if (!fqDirectArgs.empty()) {
+        result << "_fqArgs=" << fqDirectArgs[0] << "_" << fqDirectArgs[1] << "_" << fqDirectArgs[2] << "_" << fqDirectArgs[3];
+    }
+    if (inputArg.size() == 3) {
+        result << "_inputArg=" << inputArg[0] << "_" << inputArg[1] << "_" << inputArg[2];
+    }
+    result << "_" << broadcast.m_type;
+    return result.str();
+}
+
+void FakeQuantizeLayerTestRevise::SetUp() {
+    fqSpecificParamsRevise fqParams;
+    std::vector<size_t> inputShape;
+    std::pair<std::string, std::map<std::string, std::string>> config;
+    auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+    std::tie(fqParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice, config) = this->GetParam();
+    InferenceEngine::SizeVector kernel, stride, dilation;
+    size_t levels;
+    std::vector<size_t> constShape;
+    std::vector<float> fqDirectArg;
+    std::vector<float> inputArg;
+    ngraph::op::AutoBroadcastSpec broadcast;
+    std::tie(levels, constShape, fqDirectArg, inputArg, broadcast) = fqParams;
+    if (inputArg.size() == 3) {
+        inputDataMin = inputArg[0];
+        inputDataMax = inputArg[1];
+        inputDataResolution = inputArg[2];
+    }
+    if (fqDirectArg.size() != 0) {
+        threshold = (fqDirectArg[3] - fqDirectArg[2]) / levels;
+    }
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+
+    UpdateSeed();
+
+    std::shared_ptr<ngraph::Node> fakeQNode;
+    if (fqDirectArg.empty()) {
+        int32_t ngraphSeed = seed;
+        if (NGRAPH_SEED != USE_CLOCK_TIME) {
+            ngraphSeed = NGRAPH_SEED;
+        }
+        std::cout << "\033[0;32m" << "[          ] " << "\033[0;0m"
+                  << "ngraphSeed = " << ngraphSeed << std::endl;
+        fakeQNode = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, levels, constShape, ngraphSeed);
+    } else {
+        fakeQNode = ngraph::builder::makeFakeQuantize(
+            paramOuts[0],
+            ngPrc,
+            levels,
+            constShape,
+            {fqDirectArg[0]},
+            {fqDirectArg[1]},
+            {fqDirectArg[2]},
+            {fqDirectArg[3]});
+    }
+    auto fq = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(fakeQNode);
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(fq)};
+    function = std::make_shared<ngraph::Function>(results, params, "fakeQuantize");
+
+    configuration = config.second;
+}
+
+InferenceEngine::Blob::Ptr FakeQuantizeLayerTestRevise::GenerateInput(const InferenceEngine::InputInfo &info) const {
+    return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution,
+      seed);
+}
+
+void FakeQuantizeLayerTestRevise::UpdateSeed() {
+    if (BASE_SEED == USE_CLOCK_TIME) {
+        seed = std::chrono::system_clock::now().time_since_epoch().count();
+    } else if (BASE_SEED == USE_INCREMENTAL_SEED) {
+        seed += 9999;
+    } else {
+        seed = BASE_SEED;
+    }
+    std::cout << "\033[0;32m" << "[          ] " << "\033[0;0m"
+              << "seed = " << seed << std::endl;
+}
 }  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
@@ -35,6 +35,7 @@ VERIFIED_OP_REFERENCES = [
    'ExperimentalDetectronPriorGridGenerator-6',
    'ExperimentalDetectronROIFeatureExtractor-6',
    'ExperimentalDetectronTopKROIs-6',
+    'FakeQuantize-1',
    'Floor-1'
    'FloorMod-1'
    'GRUSequence-5',
--- a/ngraph/core/include/ngraph/op/fake_quantize.hpp
+++ b/ngraph/core/include/ngraph/op/fake_quantize.hpp
@@ -5,8 +5,8 @@
 #pragma once

 #include "ngraph/node.hpp"
+#include "ngraph/op/op.hpp"
 #include "ngraph/op/util/attr_types.hpp"
-#include "ngraph/op/util/fused_op.hpp"

 namespace ngraph
 {
--- a/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp
@@ -4,12 +4,15 @@

 #pragma once

+#include <algorithm>
 #include <cmath>
 #include <cstddef>
 #include <numeric>
 #include <utility>
 #include <vector>

+#include "ngraph/check.hpp"
+#include "ngraph/op/util/attr_types.hpp"
 #include "ngraph/shape.hpp"

 namespace ngraph
@@ -18,9 +21,9 @@ namespace ngraph
    {
        namespace reference
        {
-            namespace
+            namespace fake_quantize_details
            {
-                std::vector<size_t>
+                inline std::vector<size_t>
                    calc_broadcast_index_offset(const std::vector<size_t>& memory_offsets,
                                                const std::vector<size_t>& broadcast_shape)
                {
@@ -32,9 +35,8 @@ namespace ngraph
                            broadcast_offsets[i] = memory_offsets[i];
                        }
                    }
-                    if (!std::all_of(broadcast_shape.begin(),
-                                     broadcast_shape.end(),
-                                     [](size_t i) { return i == 1; }) &&
+                    const auto not_one = [](size_t i) { return i != 1; };
+                    if (std::any_of(broadcast_shape.begin(), broadcast_shape.end(), not_one) &&
                        broadcast_shape.back() == 1)
                    {
                        broadcast_offsets[broadcast_offsets.size() - 1] = 1;
@@ -53,182 +55,243 @@ namespace ngraph
                    return broadcast_offsets;
                }

-                size_t calc_full_broadcast_offset(const std::vector<size_t>& current_dims,
-                                                  const std::vector<size_t>& offsets)
+                inline size_t calc_full_broadcast_offset(const std::vector<size_t>& current_dims,
+                                                         const std::vector<size_t>& offsets)
                {
-                    size_t full_index_offset = 0;
-                    for (size_t i = 0; i < current_dims.size(); ++i)
-                    {
-                        full_index_offset += offsets[i] * current_dims[i];
-                    }
-                    return full_index_offset;
+                    return std::inner_product(
+                        begin(current_dims), end(current_dims), begin(offsets), 0);
                }

-                void align_shape_sizes(Shape& shape, size_t target_size)
+                inline Shape align_shape_sizes(const Shape& shape,
+                                               const Shape& target_shape,
+                                               const op::AutoBroadcastSpec& broadcast)
                {
-                    for (size_t i = 0; i < shape.size() - target_size; ++i)
+                    Shape s;
+                    switch (broadcast.m_type)
                    {
-                        shape.insert(shape.begin(), 1);
+                    case op::AutoBroadcastType::NONE:
+                    {
+                        s = shape;
+                        break;
                    }
+                    case op::AutoBroadcastType::NUMPY:
+                    {
+                        s = Shape(target_shape.size(), 1);
+                        std::copy(begin(shape), end(shape), prev(end(s), shape.size()));
+                        break;
+                    }
+                    case op::AutoBroadcastType::PDPD:
+                    {
+                        const size_t axis = broadcast.m_axis == -1
+                                                ? target_shape.size() - shape.size()
+                                                : static_cast<size_t>(broadcast.m_axis);
+
+                        s = Shape(target_shape.size(), 1);
+                        const auto axis_to_copy = target_shape.size() - axis;
+                        const auto b = begin(shape);
+                        const auto e = b + axis_to_copy; // from e to end(shape) should be only ones
+                        std::copy(b, e, next(begin(s), axis));
+                        break;
+                    }
+                    }
+                    return s;
                }

-                void increment_current_dim(std::vector<size_t>& current_dims,
-                                           const std::vector<size_t>& shape,
-                                           size_t incremented_dim_number)
+                inline void increment_current_dim(std::vector<size_t>& current_dims,
+                                                  const std::vector<size_t>& shape)
                {
-                    current_dims[incremented_dim_number] += 1;
-                    if (current_dims[incremented_dim_number] == shape[incremented_dim_number] &&
-                        incremented_dim_number != 0)
+                    size_t incremented_dim_number = current_dims.size();
+                    while (incremented_dim_number-- > 0)
                    {
-                        for (size_t i = incremented_dim_number; i < shape.size(); ++i)
+                        current_dims[incremented_dim_number] += 1;
+                        if (current_dims[incremented_dim_number] < shape[incremented_dim_number])
                        {
-                            current_dims[i] = 0;
+                            break;
                        }
-                        increment_current_dim(current_dims, shape, incremented_dim_number - 1);
+                        current_dims[incremented_dim_number] = 0;
                    }
                }
-            } // namespace

-            template <typename T>
-            void fake_quantize(const T* arg,
-                               const T* in_low,
-                               const T* in_high,
-                               const T* out_low,
-                               const T* out_high,
-                               T* out,
-                               const Shape& arg_shape,
-                               const Shape& _in_low_shape,
-                               const Shape& _in_high_shape,
-                               const Shape& _out_low_shape,
-                               const Shape& _out_high_shape,
-                               size_t levels)
-            {
-                auto initial_round_mode = std::fegetround();
-                std::fesetround(FE_TONEAREST);
-                Shape in_low_shape(_in_low_shape);
-                Shape in_high_shape(_in_high_shape);
-                Shape out_low_shape(_out_low_shape);
-                Shape out_high_shape(_out_high_shape);
-
-                if (in_low_shape.size() > arg_shape.size() ||
-                    in_high_shape.size() > arg_shape.size() ||
-                    out_low_shape.size() > arg_shape.size() ||
-                    out_high_shape.size() > arg_shape.size())
+                template <typename T>
+                class QuantizationBound
                {
-                    throw std::runtime_error(
-                        std::string("Tensors with inout\\output ranges should have rank less or "
-                                    "equal to data tensor rank equal to ") +
-                        std::to_string(arg_shape.size()));
-                }
-
-                std::vector<size_t> arg_memory_offsets(arg_shape.size(), 0);
-                for (int i = arg_shape.size() - 2; i >= 0; i--)
-                {
-                    arg_memory_offsets[i] = std::accumulate(
-                        arg_shape.begin() + i + 1, arg_shape.end(), 1, std::multiplies<size_t>());
-                }
-                align_shape_sizes(in_low_shape, arg_shape.size());
-                align_shape_sizes(in_high_shape, arg_shape.size());
-                align_shape_sizes(out_low_shape, arg_shape.size());
-                align_shape_sizes(out_high_shape, arg_shape.size());
-
-                std::vector<size_t> in_low_offsets, in_high_offsets, out_low_offsets,
-                    out_high_offsets;
-                bool in_low_trivial_broadcast = false;
-                bool in_high_trivial_broadcast = false;
-                bool out_low_trivial_broadcast = false;
-                bool out_high_trivial_broadcast = false;
-                bool in_low_aligned = false;
-                bool in_high_aligned = false;
-                bool out_low_aligned = false;
-                bool out_high_aligned = false;
-
-                auto check_trivial_broadcast =
-                    [&arg_shape, &arg_memory_offsets](Shape& shape_to_check,
-                                                      std::vector<size_t>& target_offsets,
-                                                      bool& trivial_broadcast,
-                                                      bool& aligned) {
-                        if (shape_size(shape_to_check) == 1 || shape_size(shape_to_check) == 0)
+                public:
+                    enum class Bound
+                    {
+                        trivial,
+                        aligned,
+                        broadcast,
+                    };
+                    QuantizationBound(const T* const bound_data,
+                                      const Shape& bound_shape,
+                                      const Shape& arg_shape,
+                                      const op::AutoBroadcastSpec& broadcast_spec)
+                        : bounds(bound_data)
+                    {
+                        if (shape_size(bound_shape) == 1)
                        {
-                            trivial_broadcast = true;
+                            bound = Bound::trivial;
                        }
-                        else if (shape_to_check == arg_shape)
+                        else if (bound_shape == arg_shape)
                        {
-                            aligned = true;
+                            bound = Bound::aligned;
                        }
                        else
                        {
-                            target_offsets =
-                                calc_broadcast_index_offset(arg_memory_offsets, shape_to_check);
+                            bound = Bound::broadcast;
+                            const auto arg_memory_offsets = row_major_strides(arg_shape);
+                            const auto unsqueezed_bound_shape =
+                                align_shape_sizes(bound_shape, arg_shape, broadcast_spec);
+                            row_strides = calc_broadcast_index_offset(arg_memory_offsets,
+                                                                      unsqueezed_bound_shape);
                        }
-                    };
-                check_trivial_broadcast(
-                    in_low_shape, in_low_offsets, in_low_trivial_broadcast, in_low_aligned);
-                check_trivial_broadcast(
-                    in_high_shape, in_high_offsets, in_high_trivial_broadcast, in_high_aligned);
-                check_trivial_broadcast(
-                    out_low_shape, out_low_offsets, out_low_trivial_broadcast, out_low_aligned);
-                check_trivial_broadcast(
-                    out_high_shape, out_high_offsets, out_high_trivial_broadcast, out_high_aligned);
-
-                std::vector<size_t> current_dim(arg_shape.size(), 0);
-
-                auto get_value = [&current_dim](bool is_trivial_broadcast,
-                                                bool is_aligned,
-                                                const T* data,
-                                                size_t idx,
-                                                const std::vector<size_t>& offsets) {
-                    T val;
-                    if (is_aligned)
-                    {
-                        val = data[idx];
                    }
-                    else if (is_trivial_broadcast)
+                    T get_value(const std::vector<size_t>& current_dim, size_t idx) const
                    {
-                        val = data[0];
-                    }
-                    else
-                    {
-                        size_t index_offset = calc_full_broadcast_offset(current_dim, offsets);
-                        if (index_offset != 0)
+                        T val{};
+                        switch (bound)
                        {
-                            NGRAPH_CHECK(idx >= index_offset, "Incorrect index offset value!");
+                        case Bound::trivial: val = *bounds; break;
+                        case Bound::aligned: val = bounds[idx]; break;
+                        case Bound::broadcast:
+                        {
+                            const size_t index_offset =
+                                calc_full_broadcast_offset(current_dim, row_strides);
+                            NGRAPH_CHECK(0 <= index_offset && index_offset <= idx,
+                                         "Incorrect index offset value!");
+                            val = bounds[idx - index_offset];
+                            break;
                        }
-                        val = data[idx - index_offset];
+                        }
+                        return val;
                    }
-                    return val;
+
+                private:
+                    Bound bound;
+                    std::vector<size_t> row_strides;
+                    const T* const bounds;
                };
-                for (size_t i = 0; i < shape_size(arg_shape); ++i)
+
+                template <typename T>
+                inline T quantize(const T& arg,
+                                  const T& in_low,
+                                  const T& in_high,
+                                  const T& out_low,
+                                  const T& out_high,
+                                  const size_t& levels)
                {
-                    T in_low_val = get_value(
-                        in_low_trivial_broadcast, in_low_aligned, in_low, i, in_low_offsets);
-                    T in_high_val = get_value(
-                        in_high_trivial_broadcast, in_high_aligned, in_high, i, in_high_offsets);
-                    T out_low_val = get_value(
-                        out_low_trivial_broadcast, out_low_aligned, out_low, i, out_low_offsets);
-                    T out_high_val = get_value(out_high_trivial_broadcast,
-                                               out_high_aligned,
-                                               out_high,
-                                               i,
-                                               out_high_offsets);
-                    if (arg[i] <= std::min(in_low_val, in_high_val))
+                    if (arg <= std::min(in_low, in_high))
                    {
-                        out[i] = out_low_val;
+                        return out_low;
                    }
-                    else if (arg[i] > std::max(in_low_val, in_high_val))
+                    else if (arg > std::max(in_low, in_high))
                    {
-                        out[i] = out_high_val;
+                        return out_high;
+                    }
+                    return std::nearbyint((arg - in_low) / (in_high - in_low) * (levels - 1)) /
+                               (levels - 1) * (out_high - out_low) +
+                           out_low;
+                }
+
+            } // namespace fake_quantize_details
+            namespace v0
+            {
+                template <typename T>
+                void fake_quantize(const T* const arg,
+                                   const T* const in_low,
+                                   const T* const in_high,
+                                   const T* const out_low,
+                                   const T* const out_high,
+                                   T* const out,
+                                   const Shape& arg_shape,
+                                   const Shape& in_low_shape,
+                                   const Shape& in_high_shape,
+                                   const Shape& out_low_shape,
+                                   const Shape& out_high_shape,
+                                   size_t levels,
+                                   const op::AutoBroadcastSpec& broadcast)
+                {
+                    using namespace fake_quantize_details;
+
+                    if (shape_size(in_low_shape) == 1 && shape_size(in_high_shape) == 1 &&
+                        shape_size(out_low_shape) == 1 && shape_size(out_high_shape) == 1)
+                    {
+                        const size_t arg_size = shape_size(arg_shape);
+                        const auto q = [=](const T& a) {
+                            return quantize(a, *in_low, *in_high, *out_low, *out_high, levels);
+                        };
+                        for (size_t i = 0; i < arg_size; ++i)
+                        {
+                            out[i] = q(arg[i]);
+                        }
                    }
                    else
                    {
-                        out[i] = nearbyint((arg[i] - in_low_val) / (in_high_val - in_low_val) *
-                                           (levels - 1)) /
-                                     (levels - 1) * (out_high_val - out_low_val) +
-                                 out_low_val;
+                        NGRAPH_CHECK(in_low_shape.size() <= arg_shape.size() &&
+                                         in_high_shape.size() <= arg_shape.size() &&
+                                         out_low_shape.size() <= arg_shape.size() &&
+                                         out_high_shape.size() <= arg_shape.size(),
+                                     "Tensors with inout\\output ranges should have rank less or "
+                                     "equal to data tensor rank equal to ",
+                                     arg_shape.size());
+
+                        const QuantizationBound<T> in_low_bound(
+                            in_low, in_low_shape, arg_shape, broadcast);
+                        const QuantizationBound<T> in_high_bound(
+                            in_high, in_high_shape, arg_shape, broadcast);
+                        const QuantizationBound<T> out_low_bound(
+                            out_low, out_low_shape, arg_shape, broadcast);
+                        const QuantizationBound<T> out_high_bound(
+                            out_high, out_high_shape, arg_shape, broadcast);
+
+                        std::vector<size_t> current_dim(arg_shape.size(), 0);
+                        const auto arg_shape_size = shape_size(arg_shape);
+                        for (size_t index = 0; index < arg_shape_size; ++index)
+                        {
+                            const T in_low_val = in_low_bound.get_value(current_dim, index);
+                            const T in_high_val = in_high_bound.get_value(current_dim, index);
+                            const T out_low_val = out_low_bound.get_value(current_dim, index);
+                            const T out_high_val = out_high_bound.get_value(current_dim, index);
+
+                            out[index] = quantize(arg[index],
+                                                  in_low_val,
+                                                  in_high_val,
+                                                  out_low_val,
+                                                  out_high_val,
+                                                  levels);
+                            increment_current_dim(current_dim, arg_shape);
+                        }
                    }
-                    increment_current_dim(current_dim, arg_shape, arg_shape.size() - 1);
                }
-                std::fesetround(initial_round_mode);
+            } // namespace v0
+
+            template <typename T>
+            void fake_quantize(const T* const arg,
+                               const T* const in_low,
+                               const T* const in_high,
+                               const T* const out_low,
+                               const T* const out_high,
+                               T* const out,
+                               const Shape& arg_shape,
+                               const Shape& in_low_shape,
+                               const Shape& in_high_shape,
+                               const Shape& out_low_shape,
+                               const Shape& out_high_shape,
+                               size_t levels)
+            {
+                v0::fake_quantize(arg,
+                                  in_low,
+                                  in_high,
+                                  out_low,
+                                  out_high,
+                                  out,
+                                  arg_shape,
+                                  in_low_shape,
+                                  in_high_shape,
+                                  out_low_shape,
+                                  out_high_shape,
+                                  levels,
+                                  op::AutoBroadcastType::NUMPY);
            }
        } // namespace reference
    }     // namespace runtime
--- a/ngraph/test/CMakeLists.txt
+++ b/ngraph/test/CMakeLists.txt
@@ -416,6 +416,7 @@ set(MULTI_TEST_SRC
    backend/exp.in.cpp
    backend/experimental_detectron_detection_output.in.cpp
    backend/experimental_detectron_prior_grid.in.cpp
+    backend/fake_quantize.in.cpp
    backend/floor.in.cpp
    backend/floor_mod.in.cpp
    backend/function_name.in.cpp
--- a/ngraph/test/backend/fake_quantize.in.cpp
+++ b/ngraph/test/backend/fake_quantize.in.cpp
@@ -0,0 +1,187 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "ngraph/op/parameter.hpp"
+#include "ngraph/output_vector.hpp"
+#include "ngraph/shape.hpp"
+
+// clang-format off
+#ifdef ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
+#define DEFAULT_FLOAT_TOLERANCE_BITS ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
+#endif
+
+#ifdef ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
+#define DEFAULT_DOUBLE_TOLERANCE_BITS ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
+#endif
+// clang-format on
+
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "util/engine/test_engines.hpp"
+#include "util/test_case.hpp"
+#include "util/test_control.hpp"
+
+using namespace ngraph;
+
+static std::string s_manifest = "${MANIFEST}";
+using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
+
+namespace
+{
+    template <typename T>
+    std::vector<T> iota_vector(size_t size, T first_value = {})
+    {
+        std::vector<T> d(size);
+        std::iota(begin(d), end(d), first_value);
+        return d;
+    }
+} // namespace
+
+NGRAPH_TEST(${BACKEND_NAME}, fake_quantize)
+{
+    const Shape data_shape{1, 2, 3, 4};
+    const size_t levels = 4;
+    const auto data = std::make_shared<op::Parameter>(element::f32, data_shape);
+    const auto input_low = op::Constant::create(element::f32, Shape{}, {0.f});
+    const auto input_high = op::Constant::create(element::f32, Shape{}, {23.f});
+    const auto output_low = op::Constant::create(element::f32, Shape{}, {2.f});
+    const auto output_high = op::Constant::create(element::f32, Shape{}, {16.f});
+
+    const auto quantize = std::make_shared<op::FakeQuantize>(
+        data, input_low, input_high, output_low, output_high, levels);
+    const auto function = std::make_shared<Function>(NodeVector{quantize}, ParameterVector{data});
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    test_case.add_input<float>(iota_vector<float>(shape_size(data_shape)));
+
+    // expected result
+    test_case.add_expected_output<float>(
+        data_shape,
+        std::vector<float>{2.f,          2.f,          2.f,          2.f,          6.6666669f,
+                           6.6666669f,   6.6666669f,   6.6666669f,   6.6666669f,   6.6666669f,
+                           6.6666669f,   6.6666669f,   11.33333301f, 11.33333301f, 11.33333301f,
+                           11.33333301f, 11.33333301f, 11.33333301f, 11.33333301f, 11.33333301f,
+                           16.f,         16.f,         16.f,         16.f});
+
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip)
+{
+    const Shape data_shape{1, 2, 3, 4};
+    const size_t levels = 5;
+    const auto data = std::make_shared<op::Parameter>(element::f32, data_shape);
+    const auto input_low = op::Constant::create(element::f32, Shape{}, {3.f});
+    const auto input_high = op::Constant::create(element::f32, Shape{}, {17.f});
+    const auto output_low = op::Constant::create(element::f32, Shape{}, {2.f});
+    const auto output_high = op::Constant::create(element::f32, Shape{}, {16.f});
+
+    const auto quantize = std::make_shared<op::FakeQuantize>(
+        data, input_low, input_high, output_low, output_high, levels);
+    const auto function = std::make_shared<Function>(NodeVector{quantize}, ParameterVector{data});
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    test_case.add_input<float>(iota_vector<float>(shape_size(data_shape)));
+
+    // expected result
+    test_case.add_expected_output<float>(
+        data_shape, std::vector<float>{2.f,  2.f,  2.f,  2.f,  2.f,   5.5f,  5.5f,  5.5f,
+                                       5.5f, 9.f,  9.f,  9.f,  12.5f, 12.5f, 12.5f, 12.5f,
+                                       16.f, 16.f, 16.f, 16.f, 16.f,  16.f,  16.f,  16.f});
+
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_with_clip_across_channels)
+{
+    Shape data_shape{1, 2, 5, 5};
+    size_t levels = 5;
+    auto data = std::make_shared<op::Parameter>(element::f32, data_shape);
+    auto input_low = op::Constant::create(element::f32, Shape{2, 1, 1}, {5.f, 30.f});
+    auto input_high = op::Constant::create(element::f32, Shape{2, 1, 1}, {10.f, 40.f});
+    auto output_low = op::Constant::create(element::f32, Shape{2, 1, 1}, {0.f, 50.f});
+    auto output_high = op::Constant::create(element::f32, Shape{2, 1, 1}, {20.f, 70.f});
+
+    auto quantize = std::make_shared<op::FakeQuantize>(
+        data, input_low, input_high, output_low, output_high, levels);
+    auto function = std::make_shared<Function>(NodeVector{quantize}, ParameterVector{data});
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    test_case.add_input<float>(iota_vector<float>(shape_size(data_shape)));
+
+    // expected result
+    test_case.add_expected_output<float>(
+        data_shape,
+        std::vector<float>{0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  5.0f,  10.0f, 10.0f, 15.0f,
+                           20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f,
+                           20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 50.0f, 50.0f, 50.0f, 50.0f, 50.0f,
+                           50.0f, 50.0f, 55.0f, 55.0f, 60.0f, 60.0f, 60.0f, 65.0f, 65.0f, 70.0f,
+                           70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f});
+
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_pdpd)
+{
+    Shape data_shape{1, 2, 5, 5};
+    size_t levels = 5;
+    const auto broadcast = op::AutoBroadcastSpec(op::AutoBroadcastType::PDPD, 1);
+    auto data = std::make_shared<op::Parameter>(element::f32, data_shape);
+    auto input_low = op::Constant::create(element::f32, Shape{2, 1, 1, 1}, {5.f, 30.f});
+    auto input_high = op::Constant::create(element::f32, Shape{2, 1, 1, 1}, {10.f, 40.f});
+    auto output_low = op::Constant::create(element::f32, Shape{2, 1, 1, 1}, {0.f, 50.f});
+    auto output_high = op::Constant::create(element::f32, Shape{2, 1, 1, 1}, {20.f, 70.f});
+
+    auto quantize = std::make_shared<op::FakeQuantize>(
+        data, input_low, input_high, output_low, output_high, levels, broadcast);
+    auto function = std::make_shared<Function>(NodeVector{quantize}, ParameterVector{data});
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    test_case.add_input<float>(iota_vector<float>(shape_size(data_shape)));
+
+    // expected result
+    test_case.add_expected_output<float>(
+        data_shape,
+        std::vector<float>{0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  5.0f,  10.0f, 10.0f, 15.0f,
+                           20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f,
+                           20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 50.0f, 50.0f, 50.0f, 50.0f, 50.0f,
+                           50.0f, 50.0f, 55.0f, 55.0f, 60.0f, 60.0f, 60.0f, 65.0f, 65.0f, 70.0f,
+                           70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f});
+
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, fake_quantize_pdpd_default_axis)
+{
+    Shape data_shape{1, 2, 5, 5};
+    size_t levels = 5;
+    const auto broadcast = op::AutoBroadcastSpec(op::AutoBroadcastType::PDPD, -1);
+    auto data = std::make_shared<op::Parameter>(element::f32, data_shape);
+    auto input_low = op::Constant::create(element::f32, Shape{2, 1, 1}, {5.f, 30.f});
+    auto input_high = op::Constant::create(element::f32, Shape{2, 1, 1}, {10.f, 40.f});
+    auto output_low = op::Constant::create(element::f32, Shape{2, 1, 1}, {0.f, 50.f});
+    auto output_high = op::Constant::create(element::f32, Shape{2, 1, 1}, {20.f, 70.f});
+
+    auto quantize = std::make_shared<op::FakeQuantize>(
+        data, input_low, input_high, output_low, output_high, levels, broadcast);
+    auto function = std::make_shared<Function>(NodeVector{quantize}, ParameterVector{data});
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    test_case.add_input<float>(iota_vector<float>(shape_size(data_shape)));
+
+    // expected result
+    test_case.add_expected_output<float>(
+        data_shape,
+        std::vector<float>{0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  5.0f,  10.0f, 10.0f, 15.0f,
+                           20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f,
+                           20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 50.0f, 50.0f, 50.0f, 50.0f, 50.0f,
+                           50.0f, 50.0f, 55.0f, 55.0f, 60.0f, 60.0f, 60.0f, 65.0f, 65.0f, 70.0f,
+                           70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f});
+
+    test_case.run();
+}
--- a/ngraph/test/backend/fused_op.in.cpp
+++ b/ngraph/test/backend/fused_op.in.cpp
@@ -254,182 +254,6 @@ NGRAPH_TEST(${BACKEND_NAME}, DISABLED_squared_difference_broadcast)
    test_case.run();
 }

-// TODO: Issue: 37511
-NGRAPH_TEST(${BACKEND_NAME}, DISABLED_fake_quantize)
-{
-    const Shape data_shape{1, 2, 3, 4};
-    const size_t levels = 4;
-    const auto data = make_shared<op::Parameter>(element::f32, data_shape);
-    const auto input_low = make_shared<op::Parameter>(element::f32, Shape{});
-    const auto input_high = make_shared<op::Parameter>(element::f32, Shape{});
-    const auto output_low = make_shared<op::Parameter>(element::f32, Shape{});
-    const auto output_high = make_shared<op::Parameter>(element::f32, Shape{});
-
-    const auto quantize =
-        make_shared<op::FakeQuantize>(data, input_low, input_high, output_low, output_high, levels);
-    const auto function = make_shared<Function>(
-        NodeVector{quantize},
-        ParameterVector{data, input_low, input_high, output_low, output_high});
-    auto test_case = test::TestCase<TestEngine>(function);
-
-    const size_t n_elements = shape_size(data_shape);
-    vector<float> input_data(n_elements);
-    iota(begin(input_data), end(input_data), 0);
-
-    test_case.add_input<float>(input_data);
-    // input_low
-    test_case.add_input<float>({0.0f});
-    // input_high
-    test_case.add_input<float>({23.f});
-    // output_low
-    test_case.add_input<float>({2.f});
-    // output_high
-    test_case.add_input<float>({16.f});
-
-    // expected result
-    test_case.add_expected_output<float>(
-        data_shape,
-        vector<float>{2.f,          2.f,          2.f,          2.f,          6.6666669f,
-                      6.6666669f,   6.6666669f,   6.6666669f,   6.6666669f,   6.6666669f,
-                      6.6666669f,   6.6666669f,   11.33333301f, 11.33333301f, 11.33333301f,
-                      11.33333301f, 11.33333301f, 11.33333301f, 11.33333301f, 11.33333301f,
-                      16.f,         16.f,         16.f,         16.f});
-
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, DISABLED_fake_quantize_with_clip)
-{
-    const Shape data_shape{1, 2, 3, 4};
-    const size_t levels = 5;
-    const auto data = make_shared<op::Parameter>(element::f32, data_shape);
-    const auto input_low = make_shared<op::Parameter>(element::f32, Shape{});
-    const auto input_high = make_shared<op::Parameter>(element::f32, Shape{});
-    const auto output_low = make_shared<op::Parameter>(element::f32, Shape{});
-    const auto output_high = make_shared<op::Parameter>(element::f32, Shape{});
-
-    const auto quantize =
-        make_shared<op::FakeQuantize>(data, input_low, input_high, output_low, output_high, levels);
-    const auto function = make_shared<Function>(
-        NodeVector{quantize},
-        ParameterVector{data, input_low, input_high, output_low, output_high});
-    auto test_case = test::TestCase<TestEngine>(function);
-
-    const size_t n_elements = shape_size(data_shape);
-    vector<float> input_data(n_elements);
-    iota(begin(input_data), end(input_data), 0);
-
-    test_case.add_input<float>(input_data);
-    // input_low
-    test_case.add_input<float>({3.f});
-    // input_high
-    test_case.add_input<float>({17.f});
-    // output_low
-    test_case.add_input<float>({2.f});
-    // output_high
-    test_case.add_input<float>({16.f});
-
-    // expected result
-    test_case.add_expected_output<float>(
-        data_shape,
-        vector<float>{2.f,   2.f,   2.f,   2.f,   2.f,  5.5f, 5.5f, 5.5f, 5.5f, 9.f,  9.f,  9.f,
-                      12.5f, 12.5f, 12.5f, 12.5f, 16.f, 16.f, 16.f, 16.f, 16.f, 16.f, 16.f, 16.f});
-
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, DISABLED_fake_quantize_with_clip_across_channels)
-{
-    Shape data_shape{1, 2, 5, 5};
-    size_t levels = 5;
-    auto data = make_shared<op::Parameter>(element::f32, data_shape);
-    auto input_low = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
-    auto input_high = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
-    auto output_low = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
-    auto output_high = make_shared<op::Parameter>(element::f32, Shape{2, 1, 1});
-
-    auto quantize =
-        make_shared<op::FakeQuantize>(data, input_low, input_high, output_low, output_high, levels);
-    auto function = make_shared<Function>(
-        NodeVector{quantize},
-        ParameterVector{data, input_low, input_high, output_low, output_high});
-    auto test_case = test::TestCase<TestEngine>(function);
-
-    size_t n_elements = shape_size(data_shape);
-    vector<float> input_data(n_elements);
-    iota(begin(input_data), end(input_data), 0);
-
-    test_case.add_input<float>(input_data);
-    // input_low
-    test_case.add_input<float>(vector<float>{5.f, 30.f});
-    // input_high
-    test_case.add_input<float>(vector<float>{10.f, 40.f});
-    // output_low
-    test_case.add_input<float>(vector<float>{0.f, 50.f});
-    // output_high
-    test_case.add_input<float>(vector<float>{20.f, 70.f});
-
-    // expected result
-    test_case.add_expected_output<float>(
-        data_shape,
-        vector<float>{0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  5.0f,  10.0f, 10.0f, 15.0f,
-                      20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f,
-                      20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 50.0f, 50.0f, 50.0f, 50.0f, 50.0f,
-                      50.0f, 50.0f, 55.0f, 55.0f, 60.0f, 60.0f, 60.0f, 65.0f, 65.0f, 70.0f,
-                      70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f});
-
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, DISABLED_fake_quantize_pdpd)
-{
-    Shape data_shape{1, 2, 5, 5};
-    size_t levels = 5;
-    auto data = make_shared<op::Parameter>(element::f32, data_shape);
-    auto input_low = make_shared<op::Parameter>(element::f32, Shape{2});
-    auto input_high = make_shared<op::Parameter>(element::f32, Shape{2});
-    auto output_low = make_shared<op::Parameter>(element::f32, Shape{2});
-    auto output_high = make_shared<op::Parameter>(element::f32, Shape{2});
-
-    auto quantize =
-        make_shared<op::FakeQuantize>(data,
-                                      input_low,
-                                      input_high,
-                                      output_low,
-                                      output_high,
-                                      levels,
-                                      op::AutoBroadcastSpec(op::AutoBroadcastType::PDPD, 1));
-    auto function = make_shared<Function>(
-        NodeVector{quantize},
-        ParameterVector{data, input_low, input_high, output_low, output_high});
-    auto test_case = test::TestCase<TestEngine>(function);
-
-    size_t n_elements = shape_size(data_shape);
-    vector<float> input_data(n_elements);
-    iota(begin(input_data), end(input_data), 0);
-
-    test_case.add_input<float>(input_data);
-    // input_low
-    test_case.add_input<float>(vector<float>{5.f, 30.f});
-    // input_high
-    test_case.add_input<float>(vector<float>{10.f, 40.f});
-    // output_low
-    test_case.add_input<float>(vector<float>{0.f, 50.f});
-    // output_high
-    test_case.add_input<float>(vector<float>{20.f, 70.f});
-
-    // expected result
-    test_case.add_expected_output<float>(
-        data_shape,
-        vector<float>{0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  5.0f,  10.0f, 10.0f, 15.0f,
-                      20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 20.0f,
-                      20.0f, 20.0f, 20.0f, 20.0f, 20.0f, 50.0f, 50.0f, 50.0f, 50.0f, 50.0f,
-                      50.0f, 50.0f, 55.0f, 55.0f, 60.0f, 60.0f, 60.0f, 65.0f, 65.0f, 70.0f,
-                      70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f, 70.0f});
-
-    test_case.run();
-}
-
 NGRAPH_TEST(${BACKEND_NAME}, depth_to_space_space_to_depth_block_first)
 {
    auto backend = runtime::Backend::create("${BACKEND_NAME}");
--- a/ngraph/test/runtime/ie/unit_test.manifest
+++ b/ngraph/test/runtime/ie/unit_test.manifest
@@ -255,10 +255,11 @@ numeric_float_nan
 numeric_float_inf
 numeric_double_nan
 numeric_double_inf
-fake_quantize
-fake_quantize_with_clip
-fake_quantize_with_clip_across_channels
+
 fake_quantize_pdpd
+IE_GPU.fake_quantize
+IE_GPU.fake_quantize_with_clip
+IE_GPU.fake_quantize_with_clip_across_channels

 # <op name> has zero dimension that is not allowable
 zero_sized_abs
--- a/ngraph/test/runtime/interpreter/evaluates_map.cpp
+++ b/ngraph/test/runtime/interpreter/evaluates_map.cpp
@@ -1227,7 +1227,7 @@ namespace
                                                info.selected_outputs_shape,
                                                selected_indices.data(),
                                                info.selected_indices_shape,
-                                                valid_outputs.data());                                                  
+                                                valid_outputs.data());

        void* pscores = nullptr;
        void* pselected_num = nullptr;
@@ -2437,18 +2437,19 @@ namespace
                  const HostTensorVector& inputs)
    {
        using T = typename element_type_traits<ET>::value_type;
-        runtime::reference::fake_quantize<T>(inputs[0]->get_data_ptr<const T>(),
-                                             inputs[1]->get_data_ptr<const T>(),
-                                             inputs[2]->get_data_ptr<const T>(),
-                                             inputs[3]->get_data_ptr<const T>(),
-                                             inputs[4]->get_data_ptr<const T>(),
-                                             outputs[0]->get_data_ptr<T>(),
-                                             op->get_input_shape(0),
-                                             op->get_input_shape(1),
-                                             op->get_input_shape(2),
-                                             op->get_input_shape(3),
-                                             op->get_input_shape(4),
-                                             op->get_levels());
+        runtime::reference::v0::fake_quantize<T>(inputs[0]->get_data_ptr<const T>(),
+                                                 inputs[1]->get_data_ptr<const T>(),
+                                                 inputs[2]->get_data_ptr<const T>(),
+                                                 inputs[3]->get_data_ptr<const T>(),
+                                                 inputs[4]->get_data_ptr<const T>(),
+                                                 outputs[0]->get_data_ptr<T>(),
+                                                 op->get_input_shape(0),
+                                                 op->get_input_shape(1),
+                                                 op->get_input_shape(2),
+                                                 op->get_input_shape(3),
+                                                 op->get_input_shape(4),
+                                                 op->get_levels(),
+                                                 op->get_auto_broadcast());
        return true;
    }

@@ -2523,7 +2524,7 @@ namespace
                    op->get_merge_repeated());
            }
        }
-    }
+    } // ctc_greedy_decoder_v6
    template <element::Type_t ET>
    bool evaluate(const shared_ptr<op::v6::CTCGreedyDecoderSeqLen>& op,
                  const HostTensorVector& outputs,
@@ -2781,7 +2782,7 @@ namespace
    {
        using T = typename element_type_traits<ET>::value_type;
        NGRAPH_CHECK(inputs.size() > 1 && inputs[1]->get_shape().size() == 2,
-                        "2D tensor must be provided as second input. ");
+                     "2D tensor must be provided as second input. ");
        outputs[0]->set_shape({inputs[1]->get_shape()[0],
                               static_cast<size_t>(op->get_output_dim()),
                               static_cast<size_t>(op->get_group_size()),
--- a/ngraph/test/runtime/interpreter/unit_test.manifest
+++ b/ngraph/test/runtime/interpreter/unit_test.manifest
@@ -1,5 +1,3 @@
-fake_quantize_pdpd
-
 INTERPRETER.onnx_model_quant_conv_linear
 INTERPRETER.onnx_top_k_opset_10

--- a/ngraph/test/visitors/op/fake_quantize.cpp
+++ b/ngraph/test/visitors/op/fake_quantize.cpp
@@ -35,6 +35,10 @@ TEST(attributes, fake_quantize_op)
    NodeBuilder builder(fake_quantize);
    auto g_fake_quantize = as_type_ptr<opset1::FakeQuantize>(builder.create());

+    // attribute count
+    const auto expected_attr_count = 2;
+    EXPECT_EQ(builder.get_value_map_size(), expected_attr_count);
+
    EXPECT_EQ(g_fake_quantize->get_levels(), fake_quantize->get_levels());
    EXPECT_EQ(g_fake_quantize->get_auto_broadcast(), fake_quantize->get_auto_broadcast());
 }