From af7046de53bacf5a2d791ab004f1b26dc10b0cfa Mon Sep 17 00:00:00 2001
From: Quinton Miller <nicetas.c@gmail.com>
Date: Fri, 6 Sep 2024 20:10:01 +0800
Subject: [PATCH 1/2] Fix exponent overflow in `BigFloat#to_s`

---
 spec/std/big/big_float_spec.cr |  5 ++++
 src/big/big_float.cr           | 53 +++++++++++++++++++++++++++++++++-
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/spec/std/big/big_float_spec.cr b/spec/std/big/big_float_spec.cr
index 73c6bcf06de8..9deeb12f525a 100644
--- a/spec/std/big/big_float_spec.cr
+++ b/spec/std/big/big_float_spec.cr
@@ -345,6 +345,11 @@ describe "BigFloat" do
     it { assert_prints (0.1).to_big_f.to_s, "0.100000000000000005551" }
     it { assert_prints Float64::MAX.to_big_f.to_s, "1.79769313486231570815e+308" }
     it { assert_prints Float64::MIN_POSITIVE.to_big_f.to_s, "2.22507385850720138309e-308" }
+
+    # least power of two with a base-10 exponent greater than Int32::MAX
+    (2.to_big_f ** 7133786264).to_s.should end_with("e+2147483648")
+    # least power of two with a base-10 exponent less than Int32::MIN
+    (2.to_big_f ** -7133786264).to_s.should end_with("e-2147483649")
   end
 
   describe "#inspect" do
diff --git a/src/big/big_float.cr b/src/big/big_float.cr
index ff78b7de8290..d899a4f725f4 100644
--- a/src/big/big_float.cr
+++ b/src/big/big_float.cr
@@ -362,10 +362,12 @@ struct BigFloat < Float
   end
 
   def to_s(io : IO) : Nil
-    cstr = LibGMP.mpf_get_str(nil, out decimal_exponent, 10, 0, self)
+    cstr = LibGMP.mpf_get_str(nil, out orig_decimal_exponent, 10, 0, self)
     length = LibC.strlen(cstr)
     buffer = Slice.new(cstr, length)
 
+    decimal_exponent = fix_exponent_overflow(orig_decimal_exponent)
+
     # add negative sign
     if buffer[0]? == 45 # '-'
       io << '-'
@@ -415,6 +417,55 @@ struct BigFloat < Float
     end
   end
 
+  # The same `LibGMP::MpExp` is used in `LibGMP::MPF` to represent a
+  # `BigFloat`'s exponent in base `256 ** sizeof(LibGMP::MpLimb)`, and to return
+  # a base-10 exponent in `LibGMP.mpf_get_str`. The latter is around 9.6x the
+  # former when `MpLimb` is 32-bit, or around 19.3x when `MpLimb` is 64-bit.
+  # This means the base-10 exponent will overflow for the majority of `MpExp`'s
+  # domain, even though `BigFloat`s will work correctly in this exponent range
+  # otherwise. This method exists to recover the original exponent for `#to_s`.
+  #
+  # Note that if `MpExp` is 64-bit, which is the case for non-Windows 64-bit
+  # targets, then `mpf_get_str` will simply crash for values above
+  # `2 ** 0x1_0000_0000_0000_0080`; here `exponent10` is around 5.553e+18, and
+  # never overflows. Thus there is no need to check for overflow in that case.
+  private def fix_exponent_overflow(exponent10)
+    {% if LibGMP::MpExp == Int64 %}
+      exponent10
+    {% else %}
+      # When `self` is non-zero,
+      #
+      #     @mpf.@_mp_exp == Math.log(abs, 256.0 ** sizeof(LibGMP::MpLimb)).floor + 1
+      #     @mpf.@_mp_exp - 1 <= Math.log(abs, 256.0 ** sizeof(LibGMP::MpLimb)) < @mpf.@_mp_exp
+      #     @mpf.@_mp_exp - 1 <= Math.log10(abs) / Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) < @mpf.@_mp_exp
+      #     Math.log10(abs) >= (@mpf.@_mp_exp - 1) * Math.log10(256.0 ** sizeof(LibGMP::MpLimb))
+      #     Math.log10(abs) <   @mpf.@_mp_exp      * Math.log10(256.0 ** sizeof(LibGMP::MpLimb))
+      #
+      # And also,
+      #
+      #     exponent10 == Math.log10(abs).floor + 1
+      #     exponent10 - 1 <= Math.log10(abs) < exponent10
+      #
+      # When `exponent10` overflows, it differs from its real value by an
+      # integer multiple of `256.0 ** sizeof(LibGMP::MpExp)`. We have to recover
+      # the integer `overflow_n` such that:
+      #
+      #     LibGMP::MpExp::MIN <= exponent10 <= LibGMP::MpExp::MAX
+      #     Math.log10(abs) ~= exponent10 + overflow_n * 256.0 ** sizeof(LibGMP::MpExp)
+      #                     ~= @mpf.@_mp_exp * Math.log10(256.0 ** sizeof(LibGMP::MpLimb))
+      #
+      # Because the possible intervals for the real `exponent10` are so far apart,
+      # it suffices to approximate `overflow_n` as follows:
+      #
+      #     overflow_n ~= (@mpf.@_mp_exp * Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) - exponent10) / 256.0 ** sizeof(LibGMP::MpExp)
+      #
+      # This value will be very close to an integer, which we then obtain with
+      # `#round`.
+      overflow_n = ((@mpf.@_mp_exp * Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) - exponent10) / 256.0 ** sizeof(LibGMP::MpExp))
+      exponent10.to_i64 + overflow_n.to_i64 * (256_i64 ** sizeof(LibGMP::MpExp))
+    {% end %}
+  end
+
   def clone
     self
   end

From f0167f1000afc12fd73afd7f9971974f63aa6f58 Mon Sep 17 00:00:00 2001
From: Quinton Miller <nicetas.c@gmail.com>
Date: Fri, 6 Sep 2024 20:17:44 +0800
Subject: [PATCH 2/2] fixup

---
 spec/std/big/big_float_spec.cr | 10 ++++++----
 src/big/big_float.cr           |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/spec/std/big/big_float_spec.cr b/spec/std/big/big_float_spec.cr
index 9deeb12f525a..1b5e4e3893fc 100644
--- a/spec/std/big/big_float_spec.cr
+++ b/spec/std/big/big_float_spec.cr
@@ -346,10 +346,12 @@ describe "BigFloat" do
     it { assert_prints Float64::MAX.to_big_f.to_s, "1.79769313486231570815e+308" }
     it { assert_prints Float64::MIN_POSITIVE.to_big_f.to_s, "2.22507385850720138309e-308" }
 
-    # least power of two with a base-10 exponent greater than Int32::MAX
-    (2.to_big_f ** 7133786264).to_s.should end_with("e+2147483648")
-    # least power of two with a base-10 exponent less than Int32::MIN
-    (2.to_big_f ** -7133786264).to_s.should end_with("e-2147483649")
+    it { (2.to_big_f ** 7133786264).to_s.should end_with("e+2147483648") }  # least power of two with a base-10 exponent greater than Int32::MAX
+    it { (2.to_big_f ** -7133786264).to_s.should end_with("e-2147483649") } # least power of two with a base-10 exponent less than Int32::MIN
+    it { (10.to_big_f ** 3000000000 * 1.5).to_s.should end_with("e+3000000000") }
+    it { (10.to_big_f ** -3000000000 * 1.5).to_s.should end_with("e-3000000000") }
+    it { (10.to_big_f ** 10000000000 * 1.5).to_s.should end_with("e+10000000000") }
+    it { (10.to_big_f ** -10000000000 * 1.5).to_s.should end_with("e-10000000000") }
   end
 
   describe "#inspect" do
diff --git a/src/big/big_float.cr b/src/big/big_float.cr
index d899a4f725f4..f6ab46def36d 100644
--- a/src/big/big_float.cr
+++ b/src/big/big_float.cr
@@ -462,7 +462,7 @@ struct BigFloat < Float
       # This value will be very close to an integer, which we then obtain with
       # `#round`.
       overflow_n = ((@mpf.@_mp_exp * Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) - exponent10) / 256.0 ** sizeof(LibGMP::MpExp))
-      exponent10.to_i64 + overflow_n.to_i64 * (256_i64 ** sizeof(LibGMP::MpExp))
+      exponent10.to_i64 + overflow_n.round.to_i64 * (256_i64 ** sizeof(LibGMP::MpExp))
     {% end %}
   end