Fix exponent overflow in BigFloat#to_s for very large values (#14982)

This is similar to #14971 where the base-10 exponent returned by `LibGMP.mpf_get_str` is not large enough to handle all values internally representable by GMP / MPIR.
crystal-lang · Sep 10, 2024 · b5f2468 · b5f2468
1 parent 4023f52
commit b5f2468
Show file tree

Hide file tree

Showing 2 changed files with 59 additions and 1 deletion.
diff --git a/spec/std/big/big_float_spec.cr b/spec/std/big/big_float_spec.cr
@@ -345,6 +345,13 @@ describe "BigFloat" do
     it { assert_prints (0.1).to_big_f.to_s, "0.100000000000000005551" }
     it { assert_prints Float64::MAX.to_big_f.to_s, "1.79769313486231570815e+308" }
     it { assert_prints Float64::MIN_POSITIVE.to_big_f.to_s, "2.22507385850720138309e-308" }
+
+    it { (2.to_big_f ** 7133786264).to_s.should end_with("e+2147483648") }  # least power of two with a base-10 exponent greater than Int32::MAX
+    it { (2.to_big_f ** -7133786264).to_s.should end_with("e-2147483649") } # least power of two with a base-10 exponent less than Int32::MIN
+    it { (10.to_big_f ** 3000000000 * 1.5).to_s.should end_with("e+3000000000") }
+    it { (10.to_big_f ** -3000000000 * 1.5).to_s.should end_with("e-3000000000") }
+    it { (10.to_big_f ** 10000000000 * 1.5).to_s.should end_with("e+10000000000") }
+    it { (10.to_big_f ** -10000000000 * 1.5).to_s.should end_with("e-10000000000") }
   end
 
   describe "#inspect" do

diff --git a/src/big/big_float.cr b/src/big/big_float.cr
@@ -362,10 +362,12 @@ struct BigFloat < Float
   end
 
   def to_s(io : IO) : Nil
-    cstr = LibGMP.mpf_get_str(nil, out decimal_exponent, 10, 0, self)
+    cstr = LibGMP.mpf_get_str(nil, out orig_decimal_exponent, 10, 0, self)
     length = LibC.strlen(cstr)
     buffer = Slice.new(cstr, length)
 
+    decimal_exponent = fix_exponent_overflow(orig_decimal_exponent)
+
     # add negative sign
     if buffer[0]? == 45 # '-'
       io << '-'
@@ -415,6 +417,55 @@ struct BigFloat < Float
     end
   end
 
+  # The same `LibGMP::MpExp` is used in `LibGMP::MPF` to represent a
+  # `BigFloat`'s exponent in base `256 ** sizeof(LibGMP::MpLimb)`, and to return
+  # a base-10 exponent in `LibGMP.mpf_get_str`. The latter is around 9.6x the
+  # former when `MpLimb` is 32-bit, or around 19.3x when `MpLimb` is 64-bit.
+  # This means the base-10 exponent will overflow for the majority of `MpExp`'s
+  # domain, even though `BigFloat`s will work correctly in this exponent range
+  # otherwise. This method exists to recover the original exponent for `#to_s`.
+  #
+  # Note that if `MpExp` is 64-bit, which is the case for non-Windows 64-bit
+  # targets, then `mpf_get_str` will simply crash for values above
+  # `2 ** 0x1_0000_0000_0000_0080`; here `exponent10` is around 5.553e+18, and
+  # never overflows. Thus there is no need to check for overflow in that case.
+  private def fix_exponent_overflow(exponent10)
+    {% if LibGMP::MpExp == Int64 %}
+      exponent10
+    {% else %}
+      # When `self` is non-zero,
+      #
+      #     @mpf.@_mp_exp == Math.log(abs, 256.0 ** sizeof(LibGMP::MpLimb)).floor + 1
+      #     @mpf.@_mp_exp - 1 <= Math.log(abs, 256.0 ** sizeof(LibGMP::MpLimb)) < @mpf.@_mp_exp
+      #     @mpf.@_mp_exp - 1 <= Math.log10(abs) / Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) < @mpf.@_mp_exp
+      #     Math.log10(abs) >= (@mpf.@_mp_exp - 1) * Math.log10(256.0 ** sizeof(LibGMP::MpLimb))
+      #     Math.log10(abs) <   @mpf.@_mp_exp      * Math.log10(256.0 ** sizeof(LibGMP::MpLimb))
+      #
+      # And also,
+      #
+      #     exponent10 == Math.log10(abs).floor + 1
+      #     exponent10 - 1 <= Math.log10(abs) < exponent10
+      #
+      # When `exponent10` overflows, it differs from its real value by an
+      # integer multiple of `256.0 ** sizeof(LibGMP::MpExp)`. We have to recover
+      # the integer `overflow_n` such that:
+      #
+      #     LibGMP::MpExp::MIN <= exponent10 <= LibGMP::MpExp::MAX
+      #     Math.log10(abs) ~= exponent10 + overflow_n * 256.0 ** sizeof(LibGMP::MpExp)
+      #                     ~= @mpf.@_mp_exp * Math.log10(256.0 ** sizeof(LibGMP::MpLimb))
+      #
+      # Because the possible intervals for the real `exponent10` are so far apart,
+      # it suffices to approximate `overflow_n` as follows:
+      #
+      #     overflow_n ~= (@mpf.@_mp_exp * Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) - exponent10) / 256.0 ** sizeof(LibGMP::MpExp)
+      #
+      # This value will be very close to an integer, which we then obtain with
+      # `#round`.
+      overflow_n = ((@mpf.@_mp_exp * Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) - exponent10) / 256.0 ** sizeof(LibGMP::MpExp))
+      exponent10.to_i64 + overflow_n.round.to_i64 * (256_i64 ** sizeof(LibGMP::MpExp))
+    {% end %}
+  end
+
   def clone
     self
   end