[First Inference] Read time improvements via using 'mmap/munmap' (#10907)

* Performance improvement for constant creation

The issue is that 'are_all_data_elements_bitwise_identical()' is called every time in Constant constructor, and it potentially checks all buffer which is O(N) complexity.
While it is needed only if client uses 'get_all_data_elements_bitwise_identical'

Solution:
- Defer calculation until first call of 'get_all_data_elements_bitwise_identical'
- Store calculated value in mutable class member to reuse it on next calls of 'get_all_data_elements_bitwise_identical'

Test verifies both cases:
a) that constant creation with shared memory data (now O(1)) is significantly faster than creation+bitwiseCheck O(N)
b) Than once calculated, value is taken from cache, which is significantly faster than re-calculation

* fix clang-format

* Stash - Linux implementation

* Windows mmap implementation + unicode

* Clang for windows

* removed debug print

* Add handling of empty bin file

* fix windows includes

* Fix python test

* Unit tests
Fix for Constant with size > 4GB

* Fix review comments
This commit is contained in:
Mikhail Nosov 2022-03-17 17:16:06 +03:00 committed by GitHub
parent 3a8fd7135e
commit 4b3dd808df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 429 additions and 38 deletions

View File

@ -106,6 +106,17 @@ macro(ov_add_frontend)
set(FRONTEND_NAMES "${FRONTEND_NAMES}" CACHE INTERNAL "" FORCE) set(FRONTEND_NAMES "${FRONTEND_NAMES}" CACHE INTERNAL "" FORCE)
file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
if (WIN32)
# Remove linux specific files
file(GLOB_RECURSE LIN_FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/os/lin/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/os/lin/*.hpp)
list(REMOVE_ITEM LIBRARY_SRC "${LIN_FILES}")
else()
# Remove windows specific files
file(GLOB_RECURSE WIN_FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/os/win/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/os/win/*.hpp)
list(REMOVE_ITEM LIBRARY_SRC "${WIN_FILES}")
endif()
file(GLOB_RECURSE LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/src/*.hpp) file(GLOB_RECURSE LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/src/*.hpp)
file(GLOB_RECURSE LIBRARY_PUBLIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp) file(GLOB_RECURSE LIBRARY_PUBLIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp)

View File

@ -135,17 +135,21 @@ def test_batch_size_after_reshape():
def test_serialize(): def test_serialize():
ie = IECore() def run():
net = ie.read_network(model=test_net_xml, weights=test_net_bin) ie = IECore()
net.serialize("./serialized_net.xml", "./serialized_net.bin") net = ie.read_network(model=test_net_xml, weights=test_net_bin)
serialized_net = ie.read_network(model="./serialized_net.xml", weights="./serialized_net.bin") net.serialize("./serialized_net.xml", "./serialized_net.bin")
func_net = ng.function_from_cnn(net) serialized_net = ie.read_network(model="./serialized_net.xml", weights="./serialized_net.bin")
ops_net = func_net.get_ordered_ops() func_net = ng.function_from_cnn(net)
ops_net_names = [op.friendly_name for op in ops_net] ops_net = func_net.get_ordered_ops()
func_serialized_net = ng.function_from_cnn(serialized_net) ops_net_names = [op.friendly_name for op in ops_net]
ops_serialized_net = func_serialized_net.get_ordered_ops() func_serialized_net = ng.function_from_cnn(serialized_net)
ops_serialized_net_names = [op.friendly_name for op in ops_serialized_net] ops_serialized_net = func_serialized_net.get_ordered_ops()
assert ops_serialized_net_names == ops_net_names ops_serialized_net_names = [op.friendly_name for op in ops_serialized_net]
assert ops_serialized_net_names == ops_net_names
run()
# xml/bin files shall not be acquired after by 'net' here, can be removed
os.remove("./serialized_net.xml") os.remove("./serialized_net.xml")
os.remove("./serialized_net.bin") os.remove("./serialized_net.bin")

View File

@ -204,7 +204,7 @@ void ov::op::v0::Constant::allocate_buffer(bool memset_allocation) {
ov::op::v0::Constant::Constant(const element::Type& type, const ov::Shape& shape, const void* data) ov::op::v0::Constant::Constant(const element::Type& type, const ov::Shape& shape, const void* data)
: Constant(false, type, shape) { : Constant(false, type, shape) {
size_t size = ceil(shape_size(m_shape) * m_element_type.bitwidth() / 8.f); size_t size = (shape_size(m_shape) * m_element_type.bitwidth() + 7) >> 3;
std::memcpy(get_data_ptr_nc(), data, size); std::memcpy(get_data_ptr_nc(), data, size);
} }

View File

@ -124,7 +124,7 @@ const std::string& ov::element::Type::c_type_string() const {
} }
size_t ov::element::Type::size() const { size_t ov::element::Type::size() const {
return std::ceil(static_cast<float>(bitwidth()) / 8.0f); return (bitwidth() + 7) >> 3;
} }
size_t ov::element::Type::hash() const { size_t ov::element::Type::hash() const {

View File

@ -1776,3 +1776,20 @@ TEST(constant, lazy_bitwise_identical) {
// '10' times is guaranteed to be faster here (typical value is ~200'000) // '10' times is guaranteed to be faster here (typical value is ~200'000)
EXPECT_GT(bitwise_check_count_only, bitwise_check_count * 10); EXPECT_GT(bitwise_check_count_only, bitwise_check_count * 10);
} }
// Disabled just because of long execution time. Enable for nightly builds in future
TEST(constant, DISABLED_nightly_huge_size_4GB) {
size_t start = 1llu << 32;
size_t s = start + 5;
std::vector<uint8_t> data(s);
for (size_t i = start; i < s; i++) {
data[i] = i - start + 42;
}
Shape shape{s};
op::Constant c(element::u8, shape, data.data());
auto v = c.get_vector<uint8_t>();
ASSERT_EQ(v.size(), shape_size(shape));
for (size_t i = start; i < s; i++) {
EXPECT_EQ(v[i], i - start + 42) << i << " failed";
}
}

View File

@ -93,8 +93,112 @@ std::shared_ptr<ov::Model> create_model_if_mixed_inputs() {
return std::make_shared<Model>(OutputVector{res}, ParameterVector{X, Y, Z}); return std::make_shared<Model>(OutputVector{res}, ParameterVector{X, Y, Z});
} }
std::vector<SerializationFromModelParams> get_models() {
auto result = std::vector<SerializationFromModelParams>{};
result.emplace_back(std::make_tuple(create_model_if_mixed_inputs, "Model_with_if_mixed_inputs"));
// Zero size
{
auto builder = []() {
using namespace ov;
auto p1 = std::make_shared<op::v0::Parameter>(element::f32, Shape{2});
p1->output(0).set_names({"X"});
auto p2 = std::make_shared<op::v0::Parameter>(element::f32, Shape{2});
p2->output(0).set_names({"Y"});
auto op = std::make_shared<opset8::Add>(p1, p2);
auto res = std::make_shared<op::v0::Result>(op);
return std::make_shared<Model>(OutputVector{res}, ParameterVector{p1, p2});
};
result.emplace_back(std::make_tuple(builder, "Model_with_no_weights"));
}
// Various constant size 2^shift
std::vector<size_t> shifts = {0, 1, 2, 4, 8, 16, 20};
for (const auto& shift : shifts) {
for (size_t offset = 0; offset < 2; offset++) {
auto s = (1llu << shift) + offset;
auto builder = [s]() {
using namespace ov;
auto shape = Shape{s};
auto data = std::vector<uint8_t>(shape_size(shape));
std::iota(data.begin(), data.end(), 42);
auto p1 = std::make_shared<op::v0::Parameter>(element::u8, shape);
p1->output(0).set_names({"X"});
auto c1 = std::make_shared<op::v0::Constant>(element::u8, shape, data.data());
c1->output(0).set_names({"C"});
auto op = std::make_shared<opset8::Add>(p1, c1);
auto res = std::make_shared<op::v0::Result>(op);
return std::make_shared<Model>(OutputVector{res}, ParameterVector{p1});
};
result.emplace_back(
std::make_tuple(builder,
std::string("Model_size_") + std::to_string(s) + "_" + std::to_string(offset)));
}
}
return result;
}
INSTANTIATE_TEST_SUITE_P(IRSerializationFromModel, INSTANTIATE_TEST_SUITE_P(IRSerializationFromModel,
SerializationFromModelTest, SerializationFromModelTest,
testing::Values(std::make_tuple(create_model_if_mixed_inputs, "Model_with_if_mixed_inputs")), testing::ValuesIn(get_models()),
SerializationFromModelTest::getTestCaseName); SerializationFromModelTest::getTestCaseName);
} // namespace } // namespace
class SerializationFromModelTest_large : public ov::test::TestsCommon, public testing::WithParamInterface<size_t> {
public:
std::string m_out_xml_path;
std::string m_out_bin_path;
static std::string getTestCaseName(const testing::TestParamInfo<size_t>& obj) {
std::string res = std::to_string(obj.param);
return res;
}
void SetUp() override {
std::string test_name = std::to_string(GetParam()) + "_" + GetTimestamp();
m_out_xml_path = test_name + ".xml";
m_out_bin_path = test_name + ".bin";
}
void TearDown() override {
std::remove(m_out_xml_path.c_str());
std::remove(m_out_bin_path.c_str());
}
};
// Disabled just because of long execution time. Enable for nightly builds in future
TEST_P(SerializationFromModelTest_large, DISABLED_Model_very_large) {
using namespace ov;
std::string test_name = GetTimestamp();
size_t s = (1llu << GetParam()) + 5;
{
auto shape = Shape{s};
auto data = std::vector<uint8_t>(shape_size(shape), 42);
std::iota(data.begin(), data.end(), 42);
auto p1 = std::make_shared<op::v0::Parameter>(element::u8, shape);
p1->output(0).set_names({"X"});
auto c1 = std::make_shared<op::v0::Constant>(element::u8, shape, data.data());
c1->output(0).set_names({"C"});
auto op = std::make_shared<opset8::Add>(p1, c1);
auto res = std::make_shared<op::v0::Result>(op);
auto model = std::make_shared<Model>(OutputVector{res}, ParameterVector{p1});
ov::pass::Serialize(m_out_xml_path, m_out_bin_path).run_on_model(model);
}
auto actual = ov::test::readModel(m_out_xml_path, m_out_bin_path);
bool found = false;
for (const auto& op : actual->get_ordered_ops()) {
if (auto const1 = std::dynamic_pointer_cast<op::v0::Constant>(op)) {
auto ptr = const1->get_data_ptr<uint8_t>();
for (size_t i = 0; i < s; i++) {
EXPECT_EQ(ptr[i], uint8_t(i + 42)) << "Index " << i << " has value " << static_cast<int>(ptr[i]);
}
found = true;
}
}
EXPECT_TRUE(found);
}
namespace {
INSTANTIATE_TEST_SUITE_P(nightly_IRSerializationFromModel_large,
SerializationFromModelTest_large,
testing::ValuesIn(std::vector<size_t>{32}),
SerializationFromModelTest_large::getTestCaseName);
} // namespace

View File

@ -8,6 +8,7 @@
#include <vector> #include <vector>
#include "input_model.hpp" #include "input_model.hpp"
#include "mmap_object.hpp"
#include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/shared_buffer.hpp" #include "ngraph/runtime/shared_buffer.hpp"
#include "openvino/core/any.hpp" #include "openvino/core/any.hpp"
@ -197,29 +198,8 @@ InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const
weights_path.clear(); weights_path.clear();
} }
} }
if (!weights_path.empty()) { if (!weights_path.empty()) {
std::ifstream bin_stream; weights = ov::load_mmap_object(weights_path);
bin_stream.open(weights_path, std::ios::binary);
if (!bin_stream.is_open())
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
IE_THROW() << "Weights file " + ov::util::wstring_to_string(weights_path) + " cannot be opened!";
#else
IE_THROW() << "Weights file " + weights_path + " cannot be opened!";
#endif
bin_stream.seekg(0, std::ios::end);
size_t file_size = bin_stream.tellg();
bin_stream.seekg(0, std::ios::beg);
auto aligned_weights_buffer = std::make_shared<ngraph::runtime::AlignedBuffer>(file_size);
bin_stream.read(aligned_weights_buffer->get_ptr<char>(), aligned_weights_buffer->size());
bin_stream.close();
weights = std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<ngraph::runtime::AlignedBuffer>>>(
aligned_weights_buffer->get_ptr<char>(),
aligned_weights_buffer->size(),
aligned_weights_buffer);
} }
return create_input_model(); return create_input_model();

View File

@ -342,7 +342,7 @@ void XmlDeserializer::on_adapter(const std::string& name, ngraph::ValueAccessor<
IE_THROW() << "Empty weights data in bin file or bin file cannot be found!"; IE_THROW() << "Empty weights data in bin file or bin file cannot be found!";
if (m_weights->size() < offset + size) if (m_weights->size() < offset + size)
IE_THROW() << "Incorrect weights in bin file!"; IE_THROW() << "Incorrect weights in bin file!";
if (size < std::ceil(ngraph::shape_size(shape) * el_type.bitwidth() / 8.f)) if (size < ((ngraph::shape_size(shape) * el_type.bitwidth() + 7) >> 3))
IE_THROW() << "Attribute and shape size are inconsistent for " << type << " op!"; IE_THROW() << "Attribute and shape size are inconsistent for " << type << " op!";
char* data = m_weights->get_ptr<char>() + offset; char* data = m_weights->get_ptr<char>() + offset;

View File

@ -0,0 +1,26 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief A header file for definition of abstraction over platform specific shared memory map objects
* @file mmap_object.hpp
*/
#pragma once
#include <memory>
#include "ngraph/runtime/aligned_buffer.hpp"
namespace ov {
std::shared_ptr<ngraph::runtime::AlignedBuffer> load_mmap_object(const std::string& path);
#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
std::shared_ptr<ngraph::runtime::AlignedBuffer> load_mmap_object(const std::wstring& path);
#endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
} // namespace ov

View File

@ -0,0 +1,108 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <dlfcn.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <iostream>
#include <sstream>
#include "mmap_object.hpp"
#include "ngraph/runtime/shared_buffer.hpp"
#include "openvino/util/file_util.hpp"
namespace ov {
class HandleHolder {
int m_handle = -1;
void reset() noexcept {
if (m_handle != -1) {
close(m_handle);
m_handle = -1;
}
}
public:
explicit HandleHolder(int handle = -1) : m_handle(handle) {}
HandleHolder(const HandleHolder&) = delete;
HandleHolder& operator=(const HandleHolder&) = delete;
HandleHolder(HandleHolder&& other) noexcept : m_handle(other.m_handle) {
other.m_handle = -1;
}
HandleHolder& operator=(HandleHolder&& other) noexcept {
if (this == &other) {
return *this;
}
reset();
m_handle = other.m_handle;
other.m_handle = -1;
return *this;
}
~HandleHolder() {
reset();
}
int get() const noexcept {
return m_handle;
}
};
class MapHolder {
void* m_data = MAP_FAILED;
size_t m_size = 0;
HandleHolder m_handle;
public:
MapHolder() = default;
void set(const std::string& path) {
int prot = PROT_READ;
int mode = O_RDONLY;
struct stat sb = {};
m_handle = HandleHolder(open(path.c_str(), mode));
OPENVINO_ASSERT(m_handle.get() != -1,
"Can not open file ",
path,
" for mapping. Ensure that file exists and has appropriate permissions");
OPENVINO_ASSERT(fstat(m_handle.get(), &sb) != -1, "Can not get file size for ", path);
m_size = sb.st_size;
if (m_size > 0) {
m_data = mmap(nullptr, m_size, prot, MAP_PRIVATE, m_handle.get(), 0);
OPENVINO_ASSERT(m_data != MAP_FAILED, "Can not create file mapping for ", path, ", err=", strerror(errno));
} else {
m_data = MAP_FAILED;
}
}
~MapHolder() {
if (m_data != MAP_FAILED) {
munmap(m_data, m_size);
}
}
char* data() noexcept {
return static_cast<char*>(m_data);
}
size_t size() const noexcept {
return m_size;
}
};
std::shared_ptr<ngraph::runtime::AlignedBuffer> load_mmap_object(const std::string& path) {
auto holder = std::make_shared<MapHolder>();
holder->set(path);
return std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<MapHolder>>>(holder->data(),
holder->size(),
holder);
}
} // namespace ov

View File

@ -0,0 +1,141 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "mmap_object.hpp"
#include "ngraph/runtime/shared_buffer.hpp"
#include "openvino/util/file_util.hpp"
// clang-format-off
#include <windows.h>
// clang-format-on
namespace ov {
class HandleHolder {
HANDLE m_handle = INVALID_HANDLE_VALUE;
void reset() {
if (m_handle != INVALID_HANDLE_VALUE) {
::CloseHandle(m_handle);
m_handle = INVALID_HANDLE_VALUE;
}
}
public:
explicit HandleHolder(HANDLE handle = INVALID_HANDLE_VALUE) : m_handle(handle) {}
HandleHolder(const HandleHolder&) = delete;
HandleHolder(HandleHolder&& other) noexcept : m_handle(other.m_handle) {
other.m_handle = INVALID_HANDLE_VALUE;
}
HandleHolder& operator=(const HandleHolder&) = delete;
HandleHolder& operator=(HandleHolder&& other) noexcept {
if (this == &other) {
return *this;
}
reset();
m_handle = other.m_handle;
other.m_handle = INVALID_HANDLE_VALUE;
return *this;
}
~HandleHolder() {
reset();
}
HANDLE get() const noexcept {
return m_handle;
}
};
class MapHolder {
public:
MapHolder() = default;
~MapHolder() {
if (m_data) {
::UnmapViewOfFile(m_data);
}
}
void set(const std::string& path) {
auto h = ::CreateFileA(path.c_str(), GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
map(path, h);
}
#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
void set(const std::wstring& path) {
auto h = ::CreateFileW(path.c_str(), GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
map(ov::util::wstring_to_string(path), h);
}
#endif
char* data() noexcept {
return static_cast<char*>(m_data);
}
size_t size() const noexcept {
return m_size;
}
private:
void map(const std::string& path, HANDLE h) {
OPENVINO_ASSERT(h != INVALID_HANDLE_VALUE,
"Can not open file ",
path,
" for mapping. Ensure that file exists and has appropriate permissions");
m_handle = HandleHolder(h);
SYSTEM_INFO SystemInfo;
GetSystemInfo(&SystemInfo);
const int64_t page_size = SystemInfo.dwAllocationGranularity;
DWORD file_mode = GENERIC_READ;
DWORD map_mode = FILE_MAP_READ;
DWORD access = PAGE_READONLY;
LARGE_INTEGER file_size_large;
OPENVINO_ASSERT(::GetFileSizeEx(m_handle.get(), &file_size_large) != 0, "Can not get file size for ", path);
m_size = static_cast<uint64_t>(file_size_large.QuadPart);
if (m_size > 0) {
m_mapping =
HandleHolder(::CreateFileMapping(m_handle.get(), 0, access, m_size >> 32, m_size & 0xffffffff, 0));
OPENVINO_ASSERT(m_mapping.get() != INVALID_HANDLE_VALUE, "Can not create file mapping for ", path);
m_data = ::MapViewOfFile(m_mapping.get(),
map_mode,
0, // offset_align >> 32,
0, // offset_align & 0xffffffff,
m_size);
OPENVINO_ASSERT(m_data, "Can not create map view for ", path);
} else {
m_data = NULL;
}
}
private:
void* m_data = NULL;
size_t m_size = 0;
HandleHolder m_handle;
HandleHolder m_mapping;
};
std::shared_ptr<ngraph::runtime::AlignedBuffer> load_mmap_object(const std::string& path) {
auto holder = std::make_shared<MapHolder>();
holder->set(path);
return std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<MapHolder>>>(holder->data(),
holder->size(),
holder);
}
#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
std::shared_ptr<ngraph::runtime::AlignedBuffer> load_mmap_object(const std::wstring& path) {
auto holder = std::make_shared<MapHolder>();
holder->set(path);
return std::make_shared<ngraph::runtime::SharedBuffer<std::shared_ptr<MapHolder>>>(holder->data(),
holder->size(),
holder);
}
#endif
} // namespace ov

View File

@ -137,7 +137,7 @@ std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>>
auto& output = outputs[resultIndex]; auto& output = outputs[resultIndex];
output.first = results[resultIndex]->get_element_type(); output.first = results[resultIndex]->get_element_type();
const auto& outputTensor = outputTensors[resultIndex]; const auto& outputTensor = outputTensors[resultIndex];
output.second.resize(ceil(shape_size(outputTensor->get_shape()) * outputTensor->get_element_type().bitwidth() / 8.f)); output.second.resize((shape_size(outputTensor->get_shape()) * outputTensor->get_element_type().bitwidth() + 7) >> 3);
outputTensors[resultIndex]->read(output.second.data(), output.second.size()); outputTensors[resultIndex]->read(output.second.data(), output.second.size());
} }