[CPU] Fix for subnormal numbers nullifying routine (#10622)
This commit is contained in:
parent
6062e3d4b7
commit
9e3610c028
@ -249,7 +249,7 @@ MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr<ngraph::Node>& op, const
|
||||
void MKLDNNInputNode::cloneBlobIfRequired() {
|
||||
Shape shape(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape());
|
||||
const auto prec = convertPrecision(constOp->get_element_type());
|
||||
const size_t size = shape.getRank();
|
||||
const size_t size = shape.getElementsCount();
|
||||
DnnlBlockedMemoryDesc memDesc(prec, shape);
|
||||
|
||||
auto cloneBlob = [&, this] () {
|
||||
|
@ -0,0 +1,110 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "ngraph/runtime/aligned_buffer.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace ov::test;
|
||||
namespace SubgraphTestsDefinitions {
|
||||
|
||||
template<typename T>
|
||||
class AlignedBufferWrapper {
|
||||
public:
|
||||
AlignedBufferWrapper(size_t size, size_t alignment) {
|
||||
_buffer.reset(new ngraph::runtime::AlignedBuffer(size * sizeof(T), alignment));
|
||||
}
|
||||
AlignedBufferWrapper(const AlignedBufferWrapper&) = delete;
|
||||
AlignedBufferWrapper& operator=(const AlignedBufferWrapper&) = delete;
|
||||
AlignedBufferWrapper(AlignedBufferWrapper&&) = default;
|
||||
AlignedBufferWrapper& operator=(AlignedBufferWrapper&&) = default;
|
||||
|
||||
T* get_ptr() {
|
||||
return _buffer->get_ptr<T>();
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return _buffer->size() / sizeof(T);
|
||||
}
|
||||
private:
|
||||
std::unique_ptr<ngraph::runtime::AlignedBuffer> _buffer = nullptr;
|
||||
};
|
||||
|
||||
class DenormalNullifyCheck : public SubgraphBaseTest {
|
||||
protected:
|
||||
std::unique_ptr<AlignedBufferWrapper<float>> pConstStorage;
|
||||
|
||||
void validate() override {
|
||||
const auto& actualOutputs = get_plugin_outputs();
|
||||
ASSERT_FALSE(actualOutputs.empty());
|
||||
auto& outTensor = actualOutputs.front();
|
||||
ASSERT_EQ(ov::element::f32, outTensor.get_element_type()) << "Unexpected element type";
|
||||
const uint32_t* data = reinterpret_cast<const uint32_t*>(outTensor.data());
|
||||
bool hasDenormals = false;
|
||||
for (size_t i = 0; i < outTensor.get_size(); ++i) {
|
||||
if (data[i] && (data[i] & (0xff << 23)) == 0) {
|
||||
hasDenormals = true;
|
||||
}
|
||||
}
|
||||
ASSERT_FALSE(hasDenormals);
|
||||
}
|
||||
|
||||
|
||||
void SetUp() override {
|
||||
constexpr size_t alignment = 64; // bytes cache line size, to avoid denormals zeroing due to memory reallocation in the input node implementation
|
||||
const ov::Shape inpShape = {1, 24, 3, 3};
|
||||
targetStaticShapes.push_back({inpShape});
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
|
||||
const auto elemsCount = shape_size(inpShape);
|
||||
const auto rtPrc = ov::element::f32;
|
||||
auto params = ngraph::builder::makeParams(rtPrc, {inpShape});
|
||||
pConstStorage.reset(new AlignedBufferWrapper<float>(elemsCount, alignment));
|
||||
|
||||
auto constTensor = std::make_shared<ov::HostTensor>(rtPrc, inpShape, pConstStorage->get_ptr());
|
||||
auto constNode = std::make_shared<ngraph::opset1::Constant>(constTensor);
|
||||
ov::NodeVector input = {params[0], constNode};
|
||||
auto concat = std::make_shared<ngraph::opset1::Concat>(input, 1);
|
||||
|
||||
ov::ResultVector results{std::make_shared<ngraph::opset1::Result>(concat->output(0))};
|
||||
|
||||
function = std::make_shared<ngraph::Function>(results, params, "denormal_check");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(DenormalNullifyCheck, smoke_CPU_Denormal_Check) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
using indexInterval = std::pair<size_t, size_t>;
|
||||
size_t elemsCount = pConstStorage->size();
|
||||
const indexInterval intervals[] = {
|
||||
{0, elemsCount/2},
|
||||
{elemsCount/2, elemsCount},
|
||||
{0, elemsCount}
|
||||
};
|
||||
|
||||
constexpr unsigned seed = 1u;
|
||||
constexpr unsigned denormalsCount = 15u;
|
||||
constexpr uint32_t denormalsRange = (0xffffffffu >> 9u) - 1;
|
||||
testing::internal::Random random(seed);
|
||||
auto randomRange = NGraphFunctions::Utils::generateVector<ov::element::f32>(elemsCount, 10, -10);
|
||||
|
||||
for (auto& interval : intervals) {
|
||||
auto randomIndices = NGraphFunctions::Utils::generateVector<ov::element::u32>(denormalsCount, interval.second, interval.first);
|
||||
std::unordered_set<decltype(randomIndices)::value_type> randomIndexSet(randomIndices.begin(), randomIndices.end());
|
||||
for (size_t i = 0; i < elemsCount; ++i) {
|
||||
if (randomIndexSet.count(i)) {
|
||||
auto denormal = random.Generate(denormalsRange) + 1;
|
||||
pConstStorage->get_ptr()[i] = *(reinterpret_cast<float*>(&denormal));
|
||||
} else {
|
||||
pConstStorage->get_ptr()[i] = randomRange[i];
|
||||
}
|
||||
}
|
||||
|
||||
run();
|
||||
}
|
||||
}
|
||||
}// namespace SubgraphTestsDefinitions
|
@ -46,18 +46,18 @@
|
||||
<model path="public/mtcnn/mtcnn-r/FP16/mtcnn-r.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="908700" vmpeak="908700" vmrss="43227" vmhwm="43227" />
|
||||
<model path="public/mtcnn/mtcnn-r/FP16/mtcnn-r.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="2107846" vmpeak="2145161" vmrss="235248" vmhwm="305996" />
|
||||
|
||||
<model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="1120184" vmpeak="1120184" vmrss="359200" vmhwm="359200" />
|
||||
<model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="1367288" vmpeak="1367288" vmrss="468748" vmhwm="468748" /> # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"}
|
||||
<model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="GPU" vmsize="1630569" vmpeak="1752530" vmrss="546364" vmhwm="874426" />
|
||||
<model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="inference_with_streams" device="CPU" vmsize="1457409" vmpeak="1458782" vmrss="572577" vmhwm="572577" />
|
||||
<model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="inference_with_streams" device="GPU" vmsize="1856405" vmpeak="1941602" vmrss="578843" vmhwm="872071" />
|
||||
<model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="1309838" vmpeak="1386434" vmrss="421626" vmhwm="421626" />
|
||||
<model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="1711881" vmpeak="1797078" vmrss="544310" vmhwm="875368" />
|
||||
|
||||
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="2405114" vmpeak="2405114" vmrss="1683084" vmhwm="1683084" />
|
||||
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="3264497" vmpeak="3264497" vmrss="2393794" vmhwm="2393794" /> # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"}
|
||||
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="GPU" vmsize="2206724" vmpeak="2551770" vmrss="1023926" vmhwm="1487049" />
|
||||
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="inference_with_streams" device="CPU" vmsize="3430388" vmpeak="3600781" vmrss="2424016" vmhwm="2424016" />
|
||||
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="inference_with_streams" device="GPU" vmsize="2408536" vmpeak="2649150" vmrss="1052251" vmhwm="1493044" />
|
||||
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="2406284" vmpeak="2490311" vmrss="1716967" vmhwm="1716967" />
|
||||
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="3355554" vmpeak="3440221" vmrss="2426278" vmhwm="2426278" /> # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"}
|
||||
<model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="2292227" vmpeak="2549414" vmrss="1002950" vmhwm="1461891" />
|
||||
|
||||
<!--Models with FP16-INT8 precision-->
|
||||
|
Loading…
Reference in New Issue
Block a user