resolve TODO in efficient compression: return hashing compression back (#21141)

* turn on compression by searching duplicate blobs (by hash) for fp16 * style-fix * apply comments * added multimap * style fix
2023-11-25 09:35:35 +01:00 · 2023-11-25 09:35:35 +01:00 · 493a338ad2
commit 493a338ad2
parent 8231d57c38
3 changed files with 152 additions and 56 deletions
--- a/src/core/reference/src/op/convert.cpp
+++ b/src/core/reference/src/op/convert.cpp
@ -496,7 +496,7 @@ void convert_from_f32_to_f16_with_clamp(const float* arg, float16* out, size_t c
 #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
    convert_impl<float, float16, true>(arg, out, count);
 #else
-    // FIXME: duplicate and stub for ARM, provide more optimized solution
+    // FIXME CVS-125496: duplicate and stub for ARM, provide optimized solution
    for (size_t i = 0; i < count; ++i) {
        if (arg[i] > std::numeric_limits<ov::float16>::max()) {
            out[i] = std::numeric_limits<ov::float16>::max();
--- a/src/core/src/pass/serialize.cpp
+++ b/src/core/src/pass/serialize.cpp
@ -89,7 +89,7 @@ class ConstantWriter {
 public:
    using FilePosition = int64_t;
    using HashValue = size_t;
-    using ConstWritePositions = std::unordered_map<HashValue, std::pair<FilePosition, void const*>>;
+    using ConstWritePositions = std::multimap<HashValue, std::pair<FilePosition, void const*>>;

    ConstantWriter(std::ostream& bin_data, bool enable_compression = true)
        : m_binary_output(bin_data),
@ -105,56 +105,53 @@ public:
        const auto offset = write_pos - m_blob_offset;
        *new_size = size;

-        if (!m_enable_compression || compress_to_fp16) {
-            write_with_optional_fp16_compression(ptr, size, new_size, compress_to_fp16, src_type);
+        if (!m_enable_compression) {
+            if (!compress_to_fp16) {
+                m_binary_output.write(ptr, size);
+            } else {
+                OPENVINO_ASSERT(size % src_type.size() == 0);
+                auto fp16_buffer = compress_data_to_fp16(ptr, size, src_type, new_size);
+                m_binary_output.write(fp16_buffer.get(), *new_size);
+            }
            return offset;
+        } else {
+            std::unique_ptr<char[]> fp16_buffer = nullptr;
+            if (compress_to_fp16) {
+                OPENVINO_ASSERT(size % src_type.size() == 0);
+                fp16_buffer = compress_data_to_fp16(ptr, size, src_type, new_size);
+            }
+            const char* ptr_to_write;
+            if (fp16_buffer) {
+                ptr_to_write = fp16_buffer.get();
+            } else {
+                ptr_to_write = ptr;
+            }
+
+            // This hash is weak (but efficient). For example current hash algorithms gives
+            // the same hash for {2, 2} and {0, 128} arrays.
+            // But even strong hashing algorithms sometimes give collisions.
+            // Therefore we always have to compare values when finding a match in the hash multimap.
+            const HashValue hash = hash_combine(ptr_to_write, *new_size);
+            auto found = m_hash_to_file_positions.find(hash);
+            // iterate over all matches of the key in the multimap
+            while (found != m_hash_to_file_positions.end()) {
+                if (memcmp(ptr, found->second.second, size) == 0)
+                    return found->second.first;
+                found++;
+            }
+            // Since fp16_compressed data will be disposed at exit point and since we cannot reread it from the ostream,
+            // we store pointer to the original uncompressed blob.
+            m_hash_to_file_positions.insert({hash, {offset, static_cast<void const*>(ptr)}});
+            m_binary_output.write(ptr_to_write, *new_size);
        }
-        // TODO: Find a way to keep both types of compression (m_enable_compression and compress_to_fp16)
-        // simultaneously. Disabled usual compression by m_enable_compression for those constants that are requested to
-        // be compressed by compress_to_fp16 for now. To implement both compression types applied simultaneously
-        // we need to save element_type for each constant in the cache together with the compression status
-        // that implies a wider impact and requires a more accurate implementation of cache handling.
-        // When FP16 compression is turned on together with the usual compression enabled by m_enable_compression, we
-        // can avoid comparing FP32 weights, but it would require comparing with data from a file, because on-the-fly
-        // converted FP16 constants are not kept in memory.
-
-        // This hash is weak (but efficient) and must be replace with some other
-        // more stable hash algorithm. For example current hash algorithms gives
-        // the same hash for {2, 2} and {0, 128} arrays. So we have to compare
-        // values when finding a match in hash map.
-        const HashValue hash = hash_combine(ptr, size);
-        const auto found = m_hash_to_file_positions.find(hash);
-        if (found != end(m_hash_to_file_positions) &&
-            memcmp(static_cast<void const*>(ptr), found->second.second, size) == 0) {
-            return found->second.first;
-        }
-
-        write_with_optional_fp16_compression(ptr, size, new_size, compress_to_fp16, src_type);
-        m_hash_to_file_positions.insert({hash, {offset, static_cast<void const*>(ptr)}});
-
        return offset;
    }

 private:
-    void write_with_optional_fp16_compression(const char* ptr,
-                                              size_t size,
-                                              size_t* new_size,
-                                              bool compress_to_fp16 = false,
-                                              ov::element::Type src_type = ov::element::dynamic) {
-        if (!compress_to_fp16) {
-            m_binary_output.write(ptr, size);
-        } else {
-            OPENVINO_ASSERT(size % src_type.size() == 0);
-            auto fp16_buffer = compress_data_to_fp16(ptr, size, src_type, new_size);
-            m_binary_output.write(fp16_buffer.get(), *new_size);
-            // Compressed data is disposed
-        }
-    }
-
-    std::unique_ptr<char[]> compress_data_to_fp16(const char* ptr,
-                                                  size_t size,
-                                                  ov::element::Type src_type,
-                                                  size_t* compressed_size) {
+    static std::unique_ptr<char[]> compress_data_to_fp16(const char* ptr,
+                                                         size_t size,
+                                                         ov::element::Type src_type,
+                                                         size_t* compressed_size) {
        auto num_src_elements = size / src_type.size();
        *compressed_size = num_src_elements * ov::element::f16.size();
        if (src_type == ov::element::f32) {
--- a/src/core/tests/pass/serialization/const_compression.cpp
+++ b/src/core/tests/pass/serialization/const_compression.cpp
@ -10,6 +10,7 @@
 #include "common_test_utils/test_common.hpp"
 #include "openvino/opsets/opset8.hpp"
 #include "openvino/pass/serialize.hpp"
+#include "transformations/common_optimizations/compress_float_constants.hpp"

 class SerializationConstantCompressionTest : public ov::test::TestsCommon {
 protected:
@ -51,7 +52,7 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsI32) {
    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);

-    ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
 }

 TEST_F(SerializationConstantCompressionTest, IdenticalConstantsI64) {
@ -68,7 +69,24 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsI64) {
    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);

-    ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int64_t));
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int64_t));
+}
+
+TEST_F(SerializationConstantCompressionTest, IdenticalConstantsFP32_COMPRESSED_TO_F16) {
+    constexpr int unique_const_count = 1;
+    const ov::Shape shape{2, 2, 2};
+
+    auto A = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
+    auto B = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
+
+    auto model = std::make_shared<ov::Model>(ov::NodeVector{A, B}, ov::ParameterVector{});
+    ov::pass::CompressFloatConstants(/*postponed=*/true).run_on_model(model);
+    ov::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1).run_on_model(model);
+
+    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
+    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
+
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(ov::float16));
 }

 TEST_F(SerializationConstantCompressionTest, IdenticalConstantsFP16) {
@ -85,7 +103,7 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsFP16) {
    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);

-    ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(ov::float16));
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(ov::float16));
 }

 TEST_F(SerializationConstantCompressionTest, IdenticalConstantsFP32) {
@ -102,7 +120,7 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsFP32) {
    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);

-    ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(float));
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(float));
 }

 TEST_F(SerializationConstantCompressionTest, NonIdenticalConstantsI64) {
@ -120,7 +138,27 @@ TEST_F(SerializationConstantCompressionTest, NonIdenticalConstantsI64) {
    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);

-    ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int64_t));
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int64_t));
+}
+
+TEST_F(SerializationConstantCompressionTest, NonIdenticalConstantsI64_CHECK_MULTIMAP) {
+    constexpr int unique_const_count = 2;
+    const ov::Shape shape{2};
+
+    // hash_combine returns the same hash for this two constants so we also check the content of arrays
+    auto A = ov::opset8::Constant::create(ov::element::i64, shape, {2, 2});
+    auto B = ov::opset8::Constant::create(ov::element::i64, shape, {0, 128});
+    auto C = ov::opset8::Constant::create(ov::element::i64, shape, {2, 2});
+    auto D = ov::opset8::Constant::create(ov::element::i64, shape, {0, 128});
+
+    auto model = std::make_shared<ov::Model>(ov::NodeVector{A, B, C, D}, ov::ParameterVector{});
+
+    ov::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1).run_on_model(model);
+
+    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
+    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
+
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int64_t));
 }

 TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwo) {
@ -139,7 +177,26 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwo) {
    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);

-    ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
+}
+
+TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwo_FP32_COMPRESSED_TO_FP16) {
+    constexpr int unique_const_count = 2;
+    const ov::Shape shape{2, 2, 2};
+
+    auto A = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
+    auto B = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
+    auto C = ov::opset8::Constant::create(ov::element::f32, shape, {0, 3, 1, 2, 5, 6, 25, 3});
+    auto D = ov::opset8::Constant::create(ov::element::f32, shape, {0, 3, 1, 2, 5, 6, 25, 3});
+
+    auto model = std::make_shared<ov::Model>(ov::NodeVector{A, B, C, D}, ov::ParameterVector{});
+    ov::pass::CompressFloatConstants(/*postponed=*/true).run_on_model(model);
+    ov::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1).run_on_model(model);
+
+    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
+    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
+
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(ov::float16));
 }

 TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwoMultipleOccurrences) {
@ -160,7 +217,49 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwoMultipleO
    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);

-    ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
+}
+
+TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwoMultipleOccurrences_FP32_COMPRESSED_TO_FP16) {
+    constexpr int unique_const_count = 2;
+    const ov::Shape shape{2, 2, 2};
+
+    auto A = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
+    auto B = ov::opset8::Constant::create(ov::element::f32, shape, {0, 3, 1, 2, 5, 6, 25, 3});
+    auto C = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
+    auto D = ov::opset8::Constant::create(ov::element::f32, shape, {0, 3, 1, 2, 5, 6, 25, 3});
+    auto E = ov::opset8::Constant::create(ov::element::f32, shape, {1, 2, 3, 4, 5, 6, 7, 8});
+    auto F = ov::opset8::Constant::create(ov::element::f32, shape, {0, 3, 1, 2, 5, 6, 25, 3});
+
+    auto model = std::make_shared<ov::Model>(ov::NodeVector{A, B, C, D, E, F}, ov::ParameterVector{});
+    ov::pass::CompressFloatConstants(/*postponed=*/true).run_on_model(model);
+    ov::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1).run_on_model(model);
+
+    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
+    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
+
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(ov::float16));
+}
+
+TEST_F(SerializationConstantCompressionTest, IdenticalConstantsTimesTwoMultipleOccurrences_FP64_COMPRESSED_TO_FP16) {
+    constexpr int unique_const_count = 2;
+    const ov::Shape shape{2, 2, 2};
+
+    auto A = ov::opset8::Constant::create(ov::element::f64, shape, {1, 2, 3, 4, 5, 6, 7, 8});
+    auto B = ov::opset8::Constant::create(ov::element::f64, shape, {0, 3, 1, 2, 5, 6, 25, 3});
+    auto C = ov::opset8::Constant::create(ov::element::f64, shape, {1, 2, 3, 4, 5, 6, 7, 8});
+    auto D = ov::opset8::Constant::create(ov::element::f64, shape, {0, 3, 1, 2, 5, 6, 25, 3});
+    auto E = ov::opset8::Constant::create(ov::element::f64, shape, {1, 2, 3, 4, 5, 6, 7, 8});
+    auto F = ov::opset8::Constant::create(ov::element::f64, shape, {0, 3, 1, 2, 5, 6, 25, 3});
+
+    auto model = std::make_shared<ov::Model>(ov::NodeVector{A, B, C, D, E, F}, ov::ParameterVector{});
+    ov::pass::CompressFloatConstants(/*postponed=*/true).run_on_model(model);
+    ov::pass::Serialize(m_out_xml_path_1, m_out_bin_path_1).run_on_model(model);
+
+    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
+    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);
+
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(ov::float16));
 }

 TEST_F(SerializationConstantCompressionTest, NonIdenticalConstants) {
@ -177,7 +276,7 @@ TEST_F(SerializationConstantCompressionTest, NonIdenticalConstants) {
    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);

-    ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
 }

 TEST_F(SerializationConstantCompressionTest, IdenticalConstantsDifferentTypesI32I64) {
@ -194,7 +293,7 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsDifferentTypesI32
    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);

-    ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
 }

 TEST_F(SerializationConstantCompressionTest, IdenticalConstantsDifferentTypesI32I8) {
@ -211,5 +310,5 @@ TEST_F(SerializationConstantCompressionTest, IdenticalConstantsDifferentTypesI32
    std::ifstream xml_1(m_out_xml_path_1, std::ios::binary);
    std::ifstream bin_1(m_out_bin_path_1, std::ios::binary);

-    ASSERT_TRUE(file_size(bin_1) == unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
+    ASSERT_EQ(file_size(bin_1), unique_const_count * ov::shape_size(shape) * sizeof(int32_t));
 }