[CPU] Fix for subnormal numbers nullifying routine (#10622)

This commit is contained in:
Maksim Kutakov 2022-02-25 10:55:59 +03:00 committed by GitHub
parent 6062e3d4b7
commit 9e3610c028
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 114 additions and 4 deletions

View File

@ -249,7 +249,7 @@ MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr<ngraph::Node>& op, const
void MKLDNNInputNode::cloneBlobIfRequired() { void MKLDNNInputNode::cloneBlobIfRequired() {
Shape shape(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape()); Shape shape(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape());
const auto prec = convertPrecision(constOp->get_element_type()); const auto prec = convertPrecision(constOp->get_element_type());
const size_t size = shape.getRank(); const size_t size = shape.getElementsCount();
DnnlBlockedMemoryDesc memDesc(prec, shape); DnnlBlockedMemoryDesc memDesc(prec, shape);
auto cloneBlob = [&, this] () { auto cloneBlob = [&, this] () {

View File

@ -0,0 +1,110 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
#include "ngraph/runtime/aligned_buffer.hpp"
using namespace InferenceEngine;
using namespace ov::test;
namespace SubgraphTestsDefinitions {
template<typename T>
class AlignedBufferWrapper {
public:
AlignedBufferWrapper(size_t size, size_t alignment) {
_buffer.reset(new ngraph::runtime::AlignedBuffer(size * sizeof(T), alignment));
}
AlignedBufferWrapper(const AlignedBufferWrapper&) = delete;
AlignedBufferWrapper& operator=(const AlignedBufferWrapper&) = delete;
AlignedBufferWrapper(AlignedBufferWrapper&&) = default;
AlignedBufferWrapper& operator=(AlignedBufferWrapper&&) = default;
T* get_ptr() {
return _buffer->get_ptr<T>();
}
size_t size() const {
return _buffer->size() / sizeof(T);
}
private:
std::unique_ptr<ngraph::runtime::AlignedBuffer> _buffer = nullptr;
};
class DenormalNullifyCheck : public SubgraphBaseTest {
protected:
std::unique_ptr<AlignedBufferWrapper<float>> pConstStorage;
void validate() override {
const auto& actualOutputs = get_plugin_outputs();
ASSERT_FALSE(actualOutputs.empty());
auto& outTensor = actualOutputs.front();
ASSERT_EQ(ov::element::f32, outTensor.get_element_type()) << "Unexpected element type";
const uint32_t* data = reinterpret_cast<const uint32_t*>(outTensor.data());
bool hasDenormals = false;
for (size_t i = 0; i < outTensor.get_size(); ++i) {
if (data[i] && (data[i] & (0xff << 23)) == 0) {
hasDenormals = true;
}
}
ASSERT_FALSE(hasDenormals);
}
void SetUp() override {
constexpr size_t alignment = 64; // bytes cache line size, to avoid denormals zeroing due to memory reallocation in the input node implementation
const ov::Shape inpShape = {1, 24, 3, 3};
targetStaticShapes.push_back({inpShape});
targetDevice = CommonTestUtils::DEVICE_CPU;
const auto elemsCount = shape_size(inpShape);
const auto rtPrc = ov::element::f32;
auto params = ngraph::builder::makeParams(rtPrc, {inpShape});
pConstStorage.reset(new AlignedBufferWrapper<float>(elemsCount, alignment));
auto constTensor = std::make_shared<ov::HostTensor>(rtPrc, inpShape, pConstStorage->get_ptr());
auto constNode = std::make_shared<ngraph::opset1::Constant>(constTensor);
ov::NodeVector input = {params[0], constNode};
auto concat = std::make_shared<ngraph::opset1::Concat>(input, 1);
ov::ResultVector results{std::make_shared<ngraph::opset1::Result>(concat->output(0))};
function = std::make_shared<ngraph::Function>(results, params, "denormal_check");
}
};
TEST_F(DenormalNullifyCheck, smoke_CPU_Denormal_Check) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
using indexInterval = std::pair<size_t, size_t>;
size_t elemsCount = pConstStorage->size();
const indexInterval intervals[] = {
{0, elemsCount/2},
{elemsCount/2, elemsCount},
{0, elemsCount}
};
constexpr unsigned seed = 1u;
constexpr unsigned denormalsCount = 15u;
constexpr uint32_t denormalsRange = (0xffffffffu >> 9u) - 1;
testing::internal::Random random(seed);
auto randomRange = NGraphFunctions::Utils::generateVector<ov::element::f32>(elemsCount, 10, -10);
for (auto& interval : intervals) {
auto randomIndices = NGraphFunctions::Utils::generateVector<ov::element::u32>(denormalsCount, interval.second, interval.first);
std::unordered_set<decltype(randomIndices)::value_type> randomIndexSet(randomIndices.begin(), randomIndices.end());
for (size_t i = 0; i < elemsCount; ++i) {
if (randomIndexSet.count(i)) {
auto denormal = random.Generate(denormalsRange) + 1;
pConstStorage->get_ptr()[i] = *(reinterpret_cast<float*>(&denormal));
} else {
pConstStorage->get_ptr()[i] = randomRange[i];
}
}
run();
}
}
}// namespace SubgraphTestsDefinitions

View File

@ -46,18 +46,18 @@
<model path="public/mtcnn/mtcnn-r/FP16/mtcnn-r.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="908700" vmpeak="908700" vmrss="43227" vmhwm="43227" /> <model path="public/mtcnn/mtcnn-r/FP16/mtcnn-r.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="908700" vmpeak="908700" vmrss="43227" vmhwm="43227" />
<model path="public/mtcnn/mtcnn-r/FP16/mtcnn-r.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="2107846" vmpeak="2145161" vmrss="235248" vmhwm="305996" /> <model path="public/mtcnn/mtcnn-r/FP16/mtcnn-r.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="2107846" vmpeak="2145161" vmrss="235248" vmhwm="305996" />
<model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="1120184" vmpeak="1120184" vmrss="359200" vmhwm="359200" /> <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="1367288" vmpeak="1367288" vmrss="468748" vmhwm="468748" /> # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"}
<model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="GPU" vmsize="1630569" vmpeak="1752530" vmrss="546364" vmhwm="874426" /> <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="GPU" vmsize="1630569" vmpeak="1752530" vmrss="546364" vmhwm="874426" />
<model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="inference_with_streams" device="CPU" vmsize="1457409" vmpeak="1458782" vmrss="572577" vmhwm="572577" /> <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="inference_with_streams" device="CPU" vmsize="1457409" vmpeak="1458782" vmrss="572577" vmhwm="572577" />
<model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="inference_with_streams" device="GPU" vmsize="1856405" vmpeak="1941602" vmrss="578843" vmhwm="872071" /> <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="inference_with_streams" device="GPU" vmsize="1856405" vmpeak="1941602" vmrss="578843" vmhwm="872071" />
<model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="1309838" vmpeak="1386434" vmrss="421626" vmhwm="421626" /> <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="1309838" vmpeak="1386434" vmrss="421626" vmhwm="421626" />
<model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="1711881" vmpeak="1797078" vmrss="544310" vmhwm="875368" /> <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="1711881" vmpeak="1797078" vmrss="544310" vmhwm="875368" />
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="2405114" vmpeak="2405114" vmrss="1683084" vmhwm="1683084" /> <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="3264497" vmpeak="3264497" vmrss="2393794" vmhwm="2393794" /> # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"}
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="GPU" vmsize="2206724" vmpeak="2551770" vmrss="1023926" vmhwm="1487049" /> <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="GPU" vmsize="2206724" vmpeak="2551770" vmrss="1023926" vmhwm="1487049" />
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="inference_with_streams" device="CPU" vmsize="3430388" vmpeak="3600781" vmrss="2424016" vmhwm="2424016" /> <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="inference_with_streams" device="CPU" vmsize="3430388" vmpeak="3600781" vmrss="2424016" vmhwm="2424016" />
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="inference_with_streams" device="GPU" vmsize="2408536" vmpeak="2649150" vmrss="1052251" vmhwm="1493044" /> <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="inference_with_streams" device="GPU" vmsize="2408536" vmpeak="2649150" vmrss="1052251" vmhwm="1493044" />
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="2406284" vmpeak="2490311" vmrss="1716967" vmhwm="1716967" /> <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="3355554" vmpeak="3440221" vmrss="2426278" vmhwm="2426278" /> # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"}
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="2292227" vmpeak="2549414" vmrss="1002950" vmhwm="1461891" /> <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="2292227" vmpeak="2549414" vmrss="1002950" vmhwm="1461891" />
<!--Models with FP16-INT8 precision--> <!--Models with FP16-INT8 precision-->