[CPU] Fix for subnormal numbers nullifying routine (#10622)

2022-02-25 10:55:59 +03:00 · 2022-02-25 10:55:59 +03:00 · 9e3610c028
commit 9e3610c028
parent 6062e3d4b7
3 changed files with 114 additions and 4 deletions
--- a/src/plugins/intel_cpu/src/nodes/input.cpp
+++ b/src/plugins/intel_cpu/src/nodes/input.cpp
@ -249,7 +249,7 @@ MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr<ngraph::Node>& op, const
 void MKLDNNInputNode::cloneBlobIfRequired() {
    Shape shape(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape());
    const auto prec = convertPrecision(constOp->get_element_type());
-    const size_t size = shape.getRank();
+    const size_t size = shape.getElementsCount();
    DnnlBlockedMemoryDesc memDesc(prec, shape);
    auto cloneBlob = [&, this] () {
--- a/src/tests/functional/plugin/cpu/subgraph_tests/src/denormal_check.cpp
+++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/denormal_check.cpp
@ -0,0 +1,110 @@
 // Copyright (C) 2018-2022 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "shared_test_classes/base/layer_test_utils.hpp"
 #include "shared_test_classes/base/ov_subgraph.hpp"
 #include "ngraph_functions/utils/ngraph_helpers.hpp"
 #include "ngraph_functions/builders.hpp"
 #include "ngraph/runtime/aligned_buffer.hpp"
 using namespace InferenceEngine;
 using namespace ov::test;
 namespace SubgraphTestsDefinitions {
 template<typename T>
 class AlignedBufferWrapper {
 public:
    AlignedBufferWrapper(size_t size, size_t alignment) {
        _buffer.reset(new ngraph::runtime::AlignedBuffer(size * sizeof(T), alignment));
    }
    AlignedBufferWrapper(const AlignedBufferWrapper&) = delete;
    AlignedBufferWrapper& operator=(const AlignedBufferWrapper&) = delete;
    AlignedBufferWrapper(AlignedBufferWrapper&&) = default;
    AlignedBufferWrapper& operator=(AlignedBufferWrapper&&) = default;
    T* get_ptr() {
        return _buffer->get_ptr<T>();
    }
    size_t size() const {
        return _buffer->size() / sizeof(T);
    }
 private:
    std::unique_ptr<ngraph::runtime::AlignedBuffer> _buffer = nullptr;
 };
 class DenormalNullifyCheck : public SubgraphBaseTest {
 protected:
 std::unique_ptr<AlignedBufferWrapper<float>> pConstStorage;
 void validate() override {
    const auto& actualOutputs = get_plugin_outputs();
    ASSERT_FALSE(actualOutputs.empty());
    auto& outTensor = actualOutputs.front();
    ASSERT_EQ(ov::element::f32, outTensor.get_element_type()) << "Unexpected element type";
    const uint32_t* data = reinterpret_cast<const uint32_t*>(outTensor.data());
    bool hasDenormals = false;
    for (size_t i = 0; i < outTensor.get_size(); ++i) {
        if (data[i] && (data[i] & (0xff << 23)) == 0) {
            hasDenormals = true;
        }
    }
    ASSERT_FALSE(hasDenormals);
 }
 void SetUp() override {
    constexpr size_t alignment = 64; // bytes cache line size, to avoid denormals zeroing due to memory reallocation in the input node implementation
    const ov::Shape inpShape = {1, 24, 3, 3};
    targetStaticShapes.push_back({inpShape});
    targetDevice = CommonTestUtils::DEVICE_CPU;
    const auto elemsCount = shape_size(inpShape);
    const auto rtPrc = ov::element::f32;
    auto params = ngraph::builder::makeParams(rtPrc, {inpShape});
    pConstStorage.reset(new AlignedBufferWrapper<float>(elemsCount, alignment));
    auto constTensor = std::make_shared<ov::HostTensor>(rtPrc, inpShape, pConstStorage->get_ptr());
    auto constNode = std::make_shared<ngraph::opset1::Constant>(constTensor);
    ov::NodeVector input = {params[0], constNode};
    auto concat = std::make_shared<ngraph::opset1::Concat>(input, 1);
    ov::ResultVector results{std::make_shared<ngraph::opset1::Result>(concat->output(0))};
    function = std::make_shared<ngraph::Function>(results, params, "denormal_check");
 }
 };
 TEST_F(DenormalNullifyCheck, smoke_CPU_Denormal_Check) {
    SKIP_IF_CURRENT_TEST_IS_DISABLED()
    using indexInterval = std::pair<size_t, size_t>;
    size_t elemsCount = pConstStorage->size();
    const indexInterval intervals[] = {
        {0, elemsCount/2},
        {elemsCount/2, elemsCount},
        {0, elemsCount}
    };
    constexpr unsigned seed = 1u;
    constexpr unsigned denormalsCount = 15u;
    constexpr uint32_t denormalsRange = (0xffffffffu >> 9u) - 1;
    testing::internal::Random random(seed);
    auto randomRange = NGraphFunctions::Utils::generateVector<ov::element::f32>(elemsCount, 10, -10);
    for (auto& interval : intervals) {
        auto randomIndices = NGraphFunctions::Utils::generateVector<ov::element::u32>(denormalsCount, interval.second, interval.first);
        std::unordered_set<decltype(randomIndices)::value_type> randomIndexSet(randomIndices.begin(), randomIndices.end());
        for (size_t i = 0; i < elemsCount; ++i) {
            if (randomIndexSet.count(i)) {
                auto denormal = random.Generate(denormalsRange) + 1;
                pConstStorage->get_ptr()[i] = *(reinterpret_cast<float*>(&denormal));
            } else {
                pConstStorage->get_ptr()[i] = randomRange[i];
            }
        }
        run();
    }
 }
 }// namespace SubgraphTestsDefinitions
--- a/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml
+++ b/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml
@ -46,18 +46,18 @@
        <model path="public/mtcnn/mtcnn-r/FP16/mtcnn-r.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="908700" vmpeak="908700" vmrss="43227" vmhwm="43227" />
        <model path="public/mtcnn/mtcnn-r/FP16/mtcnn-r.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="2107846" vmpeak="2145161" vmrss="235248" vmhwm="305996" />
-        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="1120184" vmpeak="1120184" vmrss="359200" vmhwm="359200" />
+        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="1367288" vmpeak="1367288" vmrss="468748" vmhwm="468748" /> # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"}
        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="GPU" vmsize="1630569" vmpeak="1752530" vmrss="546364" vmhwm="874426" />
        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="inference_with_streams" device="CPU" vmsize="1457409" vmpeak="1458782" vmrss="572577" vmhwm="572577" />
        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="inference_with_streams" device="GPU" vmsize="1856405" vmpeak="1941602" vmrss="578843" vmhwm="872071" />
        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="1309838" vmpeak="1386434" vmrss="421626" vmhwm="421626" />
        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="1711881" vmpeak="1797078" vmrss="544310" vmhwm="875368" />
-        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="2405114" vmpeak="2405114" vmrss="1683084" vmhwm="1683084" />
+        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="3264497" vmpeak="3264497" vmrss="2393794" vmhwm="2393794" /> # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"}
        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="GPU" vmsize="2206724" vmpeak="2551770" vmrss="1023926" vmhwm="1487049" />
        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="inference_with_streams" device="CPU" vmsize="3430388" vmpeak="3600781" vmrss="2424016" vmhwm="2424016" />
        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="inference_with_streams" device="GPU" vmsize="2408536" vmpeak="2649150" vmrss="1052251" vmhwm="1493044" />
-        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="2406284" vmpeak="2490311" vmrss="1716967" vmhwm="1716967" />
+        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="3355554" vmpeak="3440221" vmrss="2426278" vmhwm="2426278" /> # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"}
        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="2292227" vmpeak="2549414" vmrss="1002950" vmhwm="1461891" />
        <!--Models with FP16-INT8 precision-->