resolve TODO in efficient compression: return hashing compression back (#21141)
* turn on compression by searching duplicate blobs (by hash) for fp16 * style-fix * apply comments * added multimap * style fix
This commit is contained in:
parent
8231d57c38
commit
493a338ad2
@ -496,7 +496,7 @@ void convert_from_f32_to_f16_with_clamp(const float* arg, float16* out, size_t c
|
||||
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
|
||||
convert_impl<float, float16, true>(arg, out, count);
|
||||
#else
|
||||
// FIXME: duplicate and stub for ARM, provide more optimized solution
|
||||
// FIXME CVS-125496: duplicate and stub for ARM, provide optimized solution
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
if (arg[i] > std::numeric_limits<ov::float16>::max()) {
|
||||
out[i] = std::numeric_limits<ov::float16>::max();
|
||||
|
@ -89,7 +89,7 @@ class ConstantWriter {
|
||||
public:
|
||||
using FilePosition = int64_t;
|
||||
using HashValue = size_t;
|
||||
using ConstWritePositions = std::unordered_map<HashValue, std::pair<FilePosition, void const*>>;
|
||||
using ConstWritePositions = std::multimap<HashValue, std::pair<FilePosition, void const*>>;
|
||||
|
||||
ConstantWriter(std::ostream& bin_data, bool enable_compression = true)
|
||||
: m_binary_output(bin_data),
|
||||
@ -105,56 +105,53 @@ public:
|
||||
const auto offset = write_pos - m_blob_offset;
|
||||
*new_size = size;
|
||||
|
||||
if (!m_enable_compression || compress_to_fp16) {
|
||||
write_with_optional_fp16_compression(ptr, size, new_size, compress_to_fp16, src_type);
|
||||
if (!m_enable_compression) {
|
||||
if (!compress_to_fp16) {
|
||||
m_binary_output.write(ptr, size);
|
||||
} else {
|
||||
OPENVINO_ASSERT(size % src_type.size() == 0);
|
||||
auto fp16_buffer = compress_data_to_fp16(ptr, size, src_type, new_size);
|
||||
m_binary_output.write(fp16_buffer.get(), *new_size);
|
||||
}
|
||||
return offset;
|
||||
} else {
|
||||
std::unique_ptr<char[]> fp16_buffer = nullptr;
|
||||
if (compress_to_fp16) {
|
||||
OPENVINO_ASSERT(size % src_type.size() == 0);
|
||||
fp16_buffer = compress_data_to_fp16(ptr, size, src_type, new_size);
|
||||
}
|
||||
const char* ptr_to_write;
|
||||
if (fp16_buffer) {
|
||||
ptr_to_write = fp16_buffer.get();
|
||||
} else {
|
||||
ptr_to_write = ptr;
|
||||
}
|
||||
|
||||
// This hash is weak (but efficient). For example current hash algorithms gives
|
||||
// the same hash for {2, 2} and {0, 128} arrays.
|
||||
// But even strong hashing algorithms sometimes give collisions.
|
||||
// Therefore we always have to compare values when finding a match in the hash multimap.
|
||||
const HashValue hash = hash_combine(ptr_to_write, *new_size);
|
||||
auto found = m_hash_to_file_positions.find(hash);
|
||||
// iterate over all matches of the key in the multimap
|
||||
while (found != m_hash_to_file_positions.end()) {
|
||||
if (memcmp(ptr, found->second.second, size) == 0)
|
||||
return found->second.first;
|
||||
found++;
|
||||
}
|
||||
// Since fp16_compressed data will be disposed at exit point and since we cannot reread it from the ostream,
|
||||
// we store pointer to the original uncompressed blob.
|
||||
m_hash_to_file_positions.insert({hash, {offset, static_cast<void const*>(ptr)}});
|
||||
m_binary_output.write(ptr_to_write, *new_size);
|
||||
}
|
||||
// TODO: Find a way to keep both types of compression (m_enable_compression and compress_to_fp16)
|
||||
// simultaneously. Disabled usual compression by m_enable_compression for those constants that are requested to
|
||||
// be compressed by compress_to_fp16 for now. To implement both compression types applied simultaneously
|
||||
// we need to save element_type for each constant in the cache together with the compression status
|
||||
// that implies a wider impact and requires a more accurate implementation of cache handling.
|
||||
// When FP16 compression is turned on together with the usual compression enabled by m_enable_compression, we
|
||||
// can avoid comparing FP32 weights, but it would require comparing with data from a file, because on-the-fly
|
||||
// converted FP16 constants are not kept in memory.
|
||||
|
||||
// This hash is weak (but efficient) and must be replace with some other
|
||||
// more stable hash algorithm. For example current hash algorithms gives
|
||||
// the same hash for {2, 2} and {0, 128} arrays. So we have to compare
|
||||
// values when finding a match in hash map.
|
||||
const HashValue hash = hash_combine(ptr, size);
|
||||
const auto found = m_hash_to_file_positions.find(hash);
|
||||
if (found != end(m_hash_to_file_positions) &&
|
||||
memcmp(static_cast<void const*>(ptr), found->second.second, size) == 0) {
|
||||
return found->second.first;
|
||||
}
|
||||
|
||||
write_with_optional_fp16_compression(ptr, size, new_size, compress_to_fp16, src_type);
|
||||
m_hash_to_file_positions.insert({hash, {offset, static_cast<void const*>(ptr)}});
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
private:
|
||||
void write_with_optional_fp16_compression(const char* ptr,
|
||||
size_t size,
|
||||
size_t* new_size,
|
||||
bool compress_to_fp16 = false,
|
||||
ov::element::Type src_type = ov::element::dynamic) {
|
||||
if (!compress_to_fp16) {
|
||||
m_binary_output.write(ptr, size);
|
||||
} else {
|
||||
OPENVINO_ASSERT(size % src_type.size() == 0);
|
||||
auto fp16_buffer = compress_data_to_fp16(ptr, size, src_type, new_size);
|
||||
m_binary_output.write(fp16_buffer.get(), *new_size);
|
||||
// Compressed data is disposed
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<char[]> compress_data_to_fp16(const char* ptr,
|
||||
size_t size,
|
||||
ov::element::Type src_type,
|
||||
size_t* compressed_size) {
|
||||
static std::unique_ptr<char[]> compress_data_to_fp16(const char* ptr,
|
||||
size_t size,
|
||||
ov::element::Type src_type,
|
||||
size_t* compressed_size) {
|
||||
auto num_src_elements = size / src_type.size();
|
||||
*compressed_size = num_src_elements * ov::element::f16.size();
|
||||
if (src_type == ov::element::f32) {
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "common_test_utils/test_common.hpp"
|
||||
#include "openvino/opsets/opset8.hpp"
|
||||
#include "openvino/pass/serialize.hpp"
|
||||
#include "transformations/common_optimizations/compress_float_constants.hpp"
|
||||
|
||||
class SerializationConstantCompressionTest : public ov::test::TestsCommon {
|
||||
protected:
|
||||
@ -51,7 +52,7 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsI32) {
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, IdenticalConstantsI64) {
|
||||
@ -68,7 +69,24 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsI64) {
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int64_t));
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int64_t));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, IdenticalConstantsFP32_COMPRESSED_TO_F16) {
|
||||
constexpr int unique_const_count = 1;
|
||||
const ov::Shape shape{2, 2, 2};
|
||||
|
||||
auto A = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
|
||||
auto B = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
|
||||
|
||||
auto model = std::make_shared<ov::Model>(ov::NodeVector{A, B}, ov::ParameterVector{});
|
||||
ov::pass::CompressFloatConstants(/*postponed=*/true).run_on_model(model);
|
||||
ov::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1).run_on_model(model);
|
||||
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(ov::float16));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, IdenticalConstantsFP16) {
|
||||
@ -85,7 +103,7 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsFP16) {
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(ov::float16));
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(ov::float16));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, IdenticalConstantsFP32) {
|
||||
@ -102,7 +120,7 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsFP32) {
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(float));
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(float));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, NonIdenticalConstantsI64) {
|
||||
@ -120,7 +138,27 @@ TEST_F(SerializationConstantCompressionTest, NonIdenticalConstantsI64) {
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int64_t));
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int64_t));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, NonIdenticalConstantsI64_CHECK_MULTIMAP) {
|
||||
constexpr int unique_const_count = 2;
|
||||
const ov::Shape shape{2};
|
||||
|
||||
// hash_combine returns the same hash for this two constants so we also check the content of arrays
|
||||
auto A = ov::opset8::Constant::create(ov::element::i64, shape, {2, 2});
|
||||
auto B = ov::opset8::Constant::create(ov::element::i64, shape, {0, 128});
|
||||
auto C = ov::opset8::Constant::create(ov::element::i64, shape, {2, 2});
|
||||
auto D = ov::opset8::Constant::create(ov::element::i64, shape, {0, 128});
|
||||
|
||||
auto model = std::make_shared<ov::Model>(ov::NodeVector{A, B, C, D}, ov::ParameterVector{});
|
||||
|
||||
ov::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1).run_on_model(model);
|
||||
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int64_t));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwo) {
|
||||
@ -139,7 +177,26 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwo) {
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwo_FP32_COMPRESSED_TO_FP16) {
|
||||
constexpr int unique_const_count = 2;
|
||||
const ov::Shape shape{2, 2, 2};
|
||||
|
||||
auto A = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
|
||||
auto B = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
|
||||
auto C = ov::opset8::Constant::create(ov::element::f32, shape, {0, 3, 1, 2, 5, 6, 25, 3});
|
||||
auto D = ov::opset8::Constant::create(ov::element::f32, shape, {0, 3, 1, 2, 5, 6, 25, 3});
|
||||
|
||||
auto model = std::make_shared<ov::Model>(ov::NodeVector{A, B, C, D}, ov::ParameterVector{});
|
||||
ov::pass::CompressFloatConstants(/*postponed=*/true).run_on_model(model);
|
||||
ov::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1).run_on_model(model);
|
||||
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(ov::float16));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwoMultipleOccurrences) {
|
||||
@ -160,7 +217,49 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwoMultipleO
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwoMultipleOccurrences_FP32_COMPRESSED_TO_FP16) {
|
||||
constexpr int unique_const_count = 2;
|
||||
const ov::Shape shape{2, 2, 2};
|
||||
|
||||
auto A = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
|
||||
auto B = ov::opset8::Constant::create(ov::element::f32, shape, {0, 3, 1, 2, 5, 6, 25, 3});
|
||||
auto C = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
|
||||
auto D = ov::opset8::Constant::create(ov::element::f32, shape, {0, 3, 1, 2, 5, 6, 25, 3});
|
||||
auto E = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
|
||||
auto F = ov::opset8::Constant::create(ov::element::f32, shape, {0, 3, 1, 2, 5, 6, 25, 3});
|
||||
|
||||
auto model = std::make_shared<ov::Model>(ov::NodeVector{A, B, C, D, E, F}, ov::ParameterVector{});
|
||||
ov::pass::CompressFloatConstants(/*postponed=*/true).run_on_model(model);
|
||||
ov::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1).run_on_model(model);
|
||||
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(ov::float16));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwoMultipleOccurrences_FP64_COMPRESSED_TO_FP16) {
|
||||
constexpr int unique_const_count = 2;
|
||||
const ov::Shape shape{2, 2, 2};
|
||||
|
||||
auto A = ov::opset8::Constant::create(ov::element::f64, shape, {1, 2, 3, 4, 5, 6, 7, 8});
|
||||
auto B = ov::opset8::Constant::create(ov::element::f64, shape, {0, 3, 1, 2, 5, 6, 25, 3});
|
||||
auto C = ov::opset8::Constant::create(ov::element::f64, shape, {1, 2, 3, 4, 5, 6, 7, 8});
|
||||
auto D = ov::opset8::Constant::create(ov::element::f64, shape, {0, 3, 1, 2, 5, 6, 25, 3});
|
||||
auto E = ov::opset8::Constant::create(ov::element::f64, shape, {1, 2, 3, 4, 5, 6, 7, 8});
|
||||
auto F = ov::opset8::Constant::create(ov::element::f64, shape, {0, 3, 1, 2, 5, 6, 25, 3});
|
||||
|
||||
auto model = std::make_shared<ov::Model>(ov::NodeVector{A, B, C, D, E, F}, ov::ParameterVector{});
|
||||
ov::pass::CompressFloatConstants(/*postponed=*/true).run_on_model(model);
|
||||
ov::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1).run_on_model(model);
|
||||
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(ov::float16));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, NonIdenticalConstants) {
|
||||
@ -177,7 +276,7 @@ TEST_F(SerializationConstantCompressionTest, NonIdenticalConstants) {
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, IdenticalConstantsDifferentTypesI32I64) {
|
||||
@ -194,7 +293,7 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsDifferentTypesI32
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
|
||||
}
|
||||
|
||||
TEST_F(SerializationConstantCompressionTest, IdenticalConstantsDifferentTypesI32I8) {
|
||||
@ -211,5 +310,5 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsDifferentTypesI32
|
||||
std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
|
||||
std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
|
||||
|
||||
ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
|
||||
ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user