[CPU] Fix for subnormal numbers nullifying routine (#10622)

2022-02-25 10:55:59 +03:00 · 2022-02-25 10:55:59 +03:00 · 9e3610c028
commit 9e3610c028
parent 6062e3d4b7
3 changed files with 114 additions and 4 deletions
--- a/src/plugins/intel_cpu/src/nodes/input.cpp
+++ b/src/plugins/intel_cpu/src/nodes/input.cpp
@ -249,7 +249,7 @@ MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr<ngraph::Node>& op, const
 void MKLDNNInputNode::cloneBlobIfRequired() {
    Shape shape(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape());
    const auto prec = convertPrecision(constOp->get_element_type());
-    const size_t size = shape.getRank();
+    const size_t size = shape.getElementsCount();
    DnnlBlockedMemoryDesc memDesc(prec, shape);

    auto cloneBlob = [&, this] () {
--- a/src/tests/functional/plugin/cpu/subgraph_tests/src/denormal_check.cpp
+++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/denormal_check.cpp
@ -0,0 +1,110 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph/runtime/aligned_buffer.hpp"
+
+using namespace InferenceEngine;
+using namespace ov::test;
+namespace SubgraphTestsDefinitions {
+
+template<typename T>
+class AlignedBufferWrapper {
+public:
+    AlignedBufferWrapper(size_t size, size_t alignment) {
+        _buffer.reset(new ngraph::runtime::AlignedBuffer(size * sizeof(T), alignment));
+    }
+    AlignedBufferWrapper(const AlignedBufferWrapper&) = delete;
+    AlignedBufferWrapper& operator=(const AlignedBufferWrapper&) = delete;
+    AlignedBufferWrapper(AlignedBufferWrapper&&) = default;
+    AlignedBufferWrapper& operator=(AlignedBufferWrapper&&) = default;
+
+    T* get_ptr() {
+        return _buffer->get_ptr<T>();
+    }
+
+    size_t size() const {
+        return _buffer->size() / sizeof(T);
+    }
+private:
+    std::unique_ptr<ngraph::runtime::AlignedBuffer> _buffer = nullptr;
+};
+
+class DenormalNullifyCheck : public SubgraphBaseTest {
+protected:
+std::unique_ptr<AlignedBufferWrapper<float>> pConstStorage;
+
+void validate() override {
+    const auto& actualOutputs = get_plugin_outputs();
+    ASSERT_FALSE(actualOutputs.empty());
+    auto& outTensor = actualOutputs.front();
+    ASSERT_EQ(ov::element::f32, outTensor.get_element_type()) << "Unexpected element type";
+    const uint32_t* data = reinterpret_cast<const uint32_t*>(outTensor.data());
+    bool hasDenormals = false;
+    for (size_t i = 0; i < outTensor.get_size(); ++i) {
+        if (data[i] && (data[i] & (0xff << 23)) == 0) {
+            hasDenormals = true;
+        }
+    }
+    ASSERT_FALSE(hasDenormals);
+}
+
+
+void SetUp() override {
+    constexpr size_t alignment = 64; // bytes cache line size, to avoid denormals zeroing due to memory reallocation in the input node implementation
+    const ov::Shape inpShape = {1, 24, 3, 3};
+    targetStaticShapes.push_back({inpShape});
+    targetDevice = CommonTestUtils::DEVICE_CPU;
+
+    const auto elemsCount = shape_size(inpShape);
+    const auto rtPrc = ov::element::f32;
+    auto params = ngraph::builder::makeParams(rtPrc, {inpShape});
+    pConstStorage.reset(new AlignedBufferWrapper<float>(elemsCount, alignment));
+
+    auto constTensor = std::make_shared<ov::HostTensor>(rtPrc, inpShape, pConstStorage->get_ptr());
+    auto constNode = std::make_shared<ngraph::opset1::Constant>(constTensor);
+    ov::NodeVector input = {params[0], constNode};
+    auto concat = std::make_shared<ngraph::opset1::Concat>(input, 1);
+
+    ov::ResultVector results{std::make_shared<ngraph::opset1::Result>(concat->output(0))};
+
+    function = std::make_shared<ngraph::Function>(results, params, "denormal_check");
+}
+};
+
+TEST_F(DenormalNullifyCheck, smoke_CPU_Denormal_Check) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    using indexInterval = std::pair<size_t, size_t>;
+    size_t elemsCount = pConstStorage->size();
+    const indexInterval intervals[] = {
+        {0, elemsCount/2},
+        {elemsCount/2, elemsCount},
+        {0, elemsCount}
+    };
+
+    constexpr unsigned seed = 1u;
+    constexpr unsigned denormalsCount = 15u;
+    constexpr uint32_t denormalsRange = (0xffffffffu >> 9u) - 1;
+    testing::internal::Random random(seed);
+    auto randomRange = NGraphFunctions::Utils::generateVector<ov::element::f32>(elemsCount, 10, -10);
+
+    for (auto& interval : intervals) {
+        auto randomIndices = NGraphFunctions::Utils::generateVector<ov::element::u32>(denormalsCount, interval.second, interval.first);
+        std::unordered_set<decltype(randomIndices)::value_type> randomIndexSet(randomIndices.begin(), randomIndices.end());
+        for (size_t i = 0; i < elemsCount; ++i) {
+            if (randomIndexSet.count(i)) {
+                auto denormal = random.Generate(denormalsRange) + 1;
+                pConstStorage->get_ptr()[i] = *(reinterpret_cast<float*>(&denormal));
+            } else {
+                pConstStorage->get_ptr()[i] = randomRange[i];
+            }
+        }
+
+        run();
+    }
+}
+}// namespace SubgraphTestsDefinitions
--- a/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml
+++ b/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml
@ -46,18 +46,18 @@
        <model path="public/mtcnn/mtcnn-r/FP16/mtcnn-r.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="908700" vmpeak="908700" vmrss="43227" vmhwm="43227" />
        <model path="public/mtcnn/mtcnn-r/FP16/mtcnn-r.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="2107846" vmpeak="2145161" vmrss="235248" vmhwm="305996" />

-        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="1120184" vmpeak="1120184" vmrss="359200" vmhwm="359200" />
+        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="1367288" vmpeak="1367288" vmrss="468748" vmhwm="468748" /> # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"}
        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="GPU" vmsize="1630569" vmpeak="1752530" vmrss="546364" vmhwm="874426" />
        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="inference_with_streams" device="CPU" vmsize="1457409" vmpeak="1458782" vmrss="572577" vmhwm="572577" />
        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="inference_with_streams" device="GPU" vmsize="1856405" vmpeak="1941602" vmrss="578843" vmhwm="872071" />
        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="1309838" vmpeak="1386434" vmrss="421626" vmhwm="421626" />
        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="1711881" vmpeak="1797078" vmrss="544310" vmhwm="875368" />

-        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="2405114" vmpeak="2405114" vmrss="1683084" vmhwm="1683084" />
+        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="3264497" vmpeak="3264497" vmrss="2393794" vmhwm="2393794" /> # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"}
        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="GPU" vmsize="2206724" vmpeak="2551770" vmrss="1023926" vmhwm="1487049" />
        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="inference_with_streams" device="CPU" vmsize="3430388" vmpeak="3600781" vmrss="2424016" vmhwm="2424016" />
        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="inference_with_streams" device="GPU" vmsize="2408536" vmpeak="2649150" vmrss="1052251" vmhwm="1493044" />
-        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="2406284" vmpeak="2490311" vmrss="1716967" vmhwm="1716967" />
+        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="3355554" vmpeak="3440221" vmrss="2426278" vmhwm="2426278" /> # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"}
        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="2292227" vmpeak="2549414" vmrss="1002950" vmhwm="1461891" />

        <!--Models with FP16-INT8 precision-->