From af7046de53bacf5a2d791ab004f1b26dc10b0cfa Mon Sep 17 00:00:00 2001 From: Quinton Miller Date: Fri, 6 Sep 2024 20:10:01 +0800 Subject: [PATCH 1/2] Fix exponent overflow in `BigFloat#to_s` --- spec/std/big/big_float_spec.cr | 5 ++++ src/big/big_float.cr | 53 +++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/spec/std/big/big_float_spec.cr b/spec/std/big/big_float_spec.cr index 73c6bcf06de8..9deeb12f525a 100644 --- a/spec/std/big/big_float_spec.cr +++ b/spec/std/big/big_float_spec.cr @@ -345,6 +345,11 @@ describe "BigFloat" do it { assert_prints (0.1).to_big_f.to_s, "0.100000000000000005551" } it { assert_prints Float64::MAX.to_big_f.to_s, "1.79769313486231570815e+308" } it { assert_prints Float64::MIN_POSITIVE.to_big_f.to_s, "2.22507385850720138309e-308" } + + # least power of two with a base-10 exponent greater than Int32::MAX + (2.to_big_f ** 7133786264).to_s.should end_with("e+2147483648") + # least power of two with a base-10 exponent less than Int32::MIN + (2.to_big_f ** -7133786264).to_s.should end_with("e-2147483649") end describe "#inspect" do diff --git a/src/big/big_float.cr b/src/big/big_float.cr index ff78b7de8290..d899a4f725f4 100644 --- a/src/big/big_float.cr +++ b/src/big/big_float.cr @@ -362,10 +362,12 @@ struct BigFloat < Float end def to_s(io : IO) : Nil - cstr = LibGMP.mpf_get_str(nil, out decimal_exponent, 10, 0, self) + cstr = LibGMP.mpf_get_str(nil, out orig_decimal_exponent, 10, 0, self) length = LibC.strlen(cstr) buffer = Slice.new(cstr, length) + decimal_exponent = fix_exponent_overflow(orig_decimal_exponent) + # add negative sign if buffer[0]? == 45 # '-' io << '-' @@ -415,6 +417,55 @@ struct BigFloat < Float end end + # The same `LibGMP::MpExp` is used in `LibGMP::MPF` to represent a + # `BigFloat`'s exponent in base `256 ** sizeof(LibGMP::MpLimb)`, and to return + # a base-10 exponent in `LibGMP.mpf_get_str`. The latter is around 9.6x the + # former when `MpLimb` is 32-bit, or around 19.3x when `MpLimb` is 64-bit. + # This means the base-10 exponent will overflow for the majority of `MpExp`'s + # domain, even though `BigFloat`s will work correctly in this exponent range + # otherwise. This method exists to recover the original exponent for `#to_s`. + # + # Note that if `MpExp` is 64-bit, which is the case for non-Windows 64-bit + # targets, then `mpf_get_str` will simply crash for values above + # `2 ** 0x1_0000_0000_0000_0080`; here `exponent10` is around 5.553e+18, and + # never overflows. Thus there is no need to check for overflow in that case. + private def fix_exponent_overflow(exponent10) + {% if LibGMP::MpExp == Int64 %} + exponent10 + {% else %} + # When `self` is non-zero, + # + # @mpf.@_mp_exp == Math.log(abs, 256.0 ** sizeof(LibGMP::MpLimb)).floor + 1 + # @mpf.@_mp_exp - 1 <= Math.log(abs, 256.0 ** sizeof(LibGMP::MpLimb)) < @mpf.@_mp_exp + # @mpf.@_mp_exp - 1 <= Math.log10(abs) / Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) < @mpf.@_mp_exp + # Math.log10(abs) >= (@mpf.@_mp_exp - 1) * Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) + # Math.log10(abs) < @mpf.@_mp_exp * Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) + # + # And also, + # + # exponent10 == Math.log10(abs).floor + 1 + # exponent10 - 1 <= Math.log10(abs) < exponent10 + # + # When `exponent10` overflows, it differs from its real value by an + # integer multiple of `256.0 ** sizeof(LibGMP::MpExp)`. We have to recover + # the integer `overflow_n` such that: + # + # LibGMP::MpExp::MIN <= exponent10 <= LibGMP::MpExp::MAX + # Math.log10(abs) ~= exponent10 + overflow_n * 256.0 ** sizeof(LibGMP::MpExp) + # ~= @mpf.@_mp_exp * Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) + # + # Because the possible intervals for the real `exponent10` are so far apart, + # it suffices to approximate `overflow_n` as follows: + # + # overflow_n ~= (@mpf.@_mp_exp * Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) - exponent10) / 256.0 ** sizeof(LibGMP::MpExp) + # + # This value will be very close to an integer, which we then obtain with + # `#round`. + overflow_n = ((@mpf.@_mp_exp * Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) - exponent10) / 256.0 ** sizeof(LibGMP::MpExp)) + exponent10.to_i64 + overflow_n.to_i64 * (256_i64 ** sizeof(LibGMP::MpExp)) + {% end %} + end + def clone self end From f0167f1000afc12fd73afd7f9971974f63aa6f58 Mon Sep 17 00:00:00 2001 From: Quinton Miller Date: Fri, 6 Sep 2024 20:17:44 +0800 Subject: [PATCH 2/2] fixup --- spec/std/big/big_float_spec.cr | 10 ++++++---- src/big/big_float.cr | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/spec/std/big/big_float_spec.cr b/spec/std/big/big_float_spec.cr index 9deeb12f525a..1b5e4e3893fc 100644 --- a/spec/std/big/big_float_spec.cr +++ b/spec/std/big/big_float_spec.cr @@ -346,10 +346,12 @@ describe "BigFloat" do it { assert_prints Float64::MAX.to_big_f.to_s, "1.79769313486231570815e+308" } it { assert_prints Float64::MIN_POSITIVE.to_big_f.to_s, "2.22507385850720138309e-308" } - # least power of two with a base-10 exponent greater than Int32::MAX - (2.to_big_f ** 7133786264).to_s.should end_with("e+2147483648") - # least power of two with a base-10 exponent less than Int32::MIN - (2.to_big_f ** -7133786264).to_s.should end_with("e-2147483649") + it { (2.to_big_f ** 7133786264).to_s.should end_with("e+2147483648") } # least power of two with a base-10 exponent greater than Int32::MAX + it { (2.to_big_f ** -7133786264).to_s.should end_with("e-2147483649") } # least power of two with a base-10 exponent less than Int32::MIN + it { (10.to_big_f ** 3000000000 * 1.5).to_s.should end_with("e+3000000000") } + it { (10.to_big_f ** -3000000000 * 1.5).to_s.should end_with("e-3000000000") } + it { (10.to_big_f ** 10000000000 * 1.5).to_s.should end_with("e+10000000000") } + it { (10.to_big_f ** -10000000000 * 1.5).to_s.should end_with("e-10000000000") } end describe "#inspect" do diff --git a/src/big/big_float.cr b/src/big/big_float.cr index d899a4f725f4..f6ab46def36d 100644 --- a/src/big/big_float.cr +++ b/src/big/big_float.cr @@ -462,7 +462,7 @@ struct BigFloat < Float # This value will be very close to an integer, which we then obtain with # `#round`. overflow_n = ((@mpf.@_mp_exp * Math.log10(256.0 ** sizeof(LibGMP::MpLimb)) - exponent10) / 256.0 ** sizeof(LibGMP::MpExp)) - exponent10.to_i64 + overflow_n.to_i64 * (256_i64 ** sizeof(LibGMP::MpExp)) + exponent10.to_i64 + overflow_n.round.to_i64 * (256_i64 ** sizeof(LibGMP::MpExp)) {% end %} end