From 1e6855efe560934f5fab1e4980ffff4ec5690777 Mon Sep 17 00:00:00 2001
From: John Ralls <jralls@ceridwen.us>
Date: Sun, 9 Nov 2014 11:55:59 -0800
Subject: [PATCH] Implement operator*= for QofInt128

---
 src/libqof/qof/qofint128.cpp            | 102 ++++++++++++++++++++++++
 src/libqof/qof/qofint128.hpp            |   4 +
 src/libqof/qof/test/gtest-qofint128.cpp |  20 +++++
 3 files changed, 126 insertions(+)

diff --git a/src/libqof/qof/qofint128.cpp b/src/libqof/qof/qofint128.cpp
index 09084c6988..8cc05806f4 100644
--- a/src/libqof/qof/qofint128.cpp
+++ b/src/libqof/qof/qofint128.cpp
@@ -210,6 +210,108 @@ QofInt128::operator-= (const QofInt128& b) noexcept
 QofInt128&
 QofInt128::operator*= (const QofInt128& b) noexcept
 {
+    /* Test for 0 first */
+    if (isZero() || b.isZero())
+    {
+        m_hi = m_lo = 0;
+        return *this;
+    }
+    /* Test for overflow before spending time on the calculation */
+    if (m_hi && b.m_hi)
+    {
+        m_flags |= overflow;
+        return *this;
+    }
+    uint abits {}, bbits {};
+    uint64_t temp {m_lo};
+    do
+        ++abits;
+    while (temp >>= 1);
+    temp = m_hi;
+    do
+        ++abits;
+    while (temp >>= 1);
+    temp = b.m_lo;
+    do
+        ++bbits;
+    while (temp >>= 1);
+    temp = b.m_hi;
+    do
+        ++bbits;
+    while (temp >>= 1);
+    if (abits + bbits > maxbits)
+    {
+        m_flags |= overflow;
+        return *this;
+    }
+    /* Handle the sign; ^ flips if b is negative */
+    m_flags ^= (b.m_flags & neg);
+    /* The trivial case */
+    if (abits + bbits <= legbits)
+    {
+        m_lo *= b.m_lo;
+        return *this;
+    }
+
+/* This is Knuth's "classical" multi-precision multiplication algorithm
+ * truncated to a QofInt128 result with the loop unrolled for clarity and with
+ * overflow and zero checks beforehand to save time. See Donald Knuth, "The Art
+ * of Computer Programming Volume 2: Seminumerical Algorithms", Addison-Wesley,
+ * 1998, p 268.
+ *
+ * There are potentially faster algorithms (O(n^1.6) instead of O(n^2) for the
+ * full precision), but this is already close to that fast because of truncation
+ * and it's not clear that the truncation is applicable to the faster
+ * algorithms.
+ */
+
+    static const uint sublegs = numlegs * 2;
+    static const uint sublegbits = legbits / 2;
+    static const uint sublegmask = (UINT64_C(1) << sublegbits) - 1;
+    uint64_t av[sublegs] {(m_lo & sublegmask), (m_lo >> sublegbits),
+            (m_hi & sublegmask), (m_hi >> sublegbits)};
+    uint64_t bv[sublegs] {(b.m_lo & sublegmask), (b.m_lo >> sublegbits),
+            (b.m_hi & sublegmask), (b.m_hi >> sublegbits)};
+    uint64_t rv[sublegs] {};
+    int64_t carry {}, scratch {};
+
+    rv[0] = av[0] * bv[0];
+
+    rv[1] = av[1] * bv [0];
+    scratch = rv[1] + av[0] * bv[1];
+    carry = rv[1] > scratch ? 1 : 0;
+    rv[1] = scratch;
+
+    rv[2] = av[2] * bv[0] + carry; //0xffff^2 + 1 < 0xffffffff, can't overflow
+    scratch = rv[2] + av[1] * bv[1];
+    carry = rv[2] > scratch ? 1 : 0;
+    rv[2] = scratch + av[0] * bv[2];
+    carry += scratch > rv[2] ? 1 : 0;
+
+    rv[3] = av[3] * bv[0] + carry;
+    scratch = rv[3] + av[2] * bv[1];
+    carry = rv[3] > scratch ? 1 : 0;
+    rv[3] = scratch + av[1] * bv[2];
+    carry += scratch > rv[3] ? 1 : 0;
+    scratch = rv[3] + av[0] * bv[3];
+    carry += rv[3] > scratch ? 1 : 0;
+    rv[3] = scratch;
+
+    if (carry) //Shouldn't happen because of the checks above
+    {
+        m_flags |= overflow;
+        return *this;
+    }
+
+    m_lo = rv[0] + (rv[1] << sublegbits);
+    carry = rv[1] >> sublegbits;
+    carry += (rv[1] << sublegbits) > m_lo || rv[0] > m_lo ? 1 : 0;
+    m_hi = rv[2] + (rv[3] << sublegbits) + carry;
+    if ((rv[3] << sublegbits) > m_hi || rv[2] > m_hi || (rv[3] >> sublegbits))
+    {
+        m_flags |= overflow;
+        return *this;
+    }
     return *this;
 }
 
diff --git a/src/libqof/qof/qofint128.hpp b/src/libqof/qof/qofint128.hpp
index 03b2ac9fd0..6b365c51ff 100644
--- a/src/libqof/qof/qofint128.hpp
+++ b/src/libqof/qof/qofint128.hpp
@@ -50,6 +50,10 @@ class QofInt128
     uint64_t m_lo;
 
 public:
+static const uint numlegs = 2;
+static const uint legbits = 64;
+static const uint maxbits = legbits * numlegs;
+
 enum // Values for m_flags
 {
     pos = 0,
diff --git a/src/libqof/qof/test/gtest-qofint128.cpp b/src/libqof/qof/test/gtest-qofint128.cpp
index 18bd59b33c..bd090ba1b9 100644
--- a/src/libqof/qof/test/gtest-qofint128.cpp
+++ b/src/libqof/qof/test/gtest-qofint128.cpp
@@ -317,3 +317,23 @@ TEST(qofint128_functions, add_and_subtract)
     EXPECT_TRUE (bigger.isOverflow());
  }
 
+TEST(qofint128_functions, multiply)
+{
+    int64_t barg {INT64_C(4878849681579065407)};
+    int64_t sarg {INT64_C(4344522355275710400)};
+    uint64_t uarg {UINT64_C(13567894392130486208)};
+
+    QofInt128 smallest (sarg);
+    QofInt128 smaller (barg);
+    QofInt128 small (uarg);
+    QofInt128 big (sarg, barg);
+    QofInt128 bigger (static_cast<uint64_t>(barg), uarg);
+
+    small *= big;
+    EXPECT_TRUE (small.isOverflow());
+    big *= bigger;
+    EXPECT_TRUE (big.isOverflow());
+    EXPECT_EQ (QofInt128(UINT64_C(1149052180967758316), UINT64_C(6323251814974894144)), smallest *= smaller);
+    EXPECT_FALSE (smallest.isOverflow());
+
+}