Skip to content

Commit 983c27a

Browse files
committed
Base: make constructing a float from a rational exact
Implementation approach: 1. Convert the (numerator, denominator) pair to a (sign bit, integral significand, exponent) triplet using integer arithmetic. The integer type in question must be wide enough. 2. Convert the above triplet into an instance of the chosen FP type. There is special support for IEEE 754 floating-point and for `BigFloat`, otherwise a fallback using `ldexp` is used. As a bonus, constructing a `BigFloat` from a `Rational` should now be thread-safe when the rounding mode and precision are provided to the constructor, because there is no access to the global precision or rounding mode settings. Updates #45213 Updates #50940 Updates #52507 Fixes #52394 Closes #52395 Fixes #52859
1 parent 681816c commit 983c27a

File tree

8 files changed

+756
-11
lines changed

8 files changed

+756
-11
lines changed

base/Base.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ include("rounding.jl")
253253
include("div.jl")
254254
include("rawbigints.jl")
255255
include("float.jl")
256+
include("rational_to_float.jl")
256257
include("twiceprecision.jl")
257258
include("complex.jl")
258259
include("rational.jl")

base/docs/basedocs.jl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2777,6 +2777,25 @@ julia> 4/2
27772777
julia> 4.5/2
27782778
2.25
27792779
```
2780+
2781+
This function may convert integer arguments to a floating-point number type
2782+
([`AbstractFloat`](@ref)), potentially resulting in a loss of accuracy. To avoid this,
2783+
instead construct a [`Rational`](@ref) from the arguments, then convert the resulting
2784+
rational number to a specific floating-point type of your choice:
2785+
2786+
```jldoctest
2787+
julia> n = 100000000000000000
2788+
100000000000000000
2789+
2790+
julia> m = n + 6
2791+
100000000000000006
2792+
2793+
julia> n/m
2794+
1.0
2795+
2796+
julia> Float64(n//m) # `//` constructs a `Rational`
2797+
0.9999999999999999
2798+
```
27802799
"""
27812800
/(x, y)
27822801

base/mpfr.jl

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ import
1919
isone, big, _string_n, decompose, minmax,
2020
sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
2121
uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask,
22-
RawBigIntRoundingIncrementHelper, truncated, RawBigInt
22+
RawBigIntRoundingIncrementHelper, truncated, RawBigInt, unsafe_rational,
23+
RationalToFloat, rational_to_floating_point
2324

2425

2526
using .Base.Libc
@@ -310,12 +311,51 @@ BigFloat(x::Union{UInt8,UInt16,UInt32}, r::MPFRRoundingMode=ROUNDING_MODE[]; pre
310311
BigFloat(x::Union{Float16,Float32}, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[]) =
311312
BigFloat(Float64(x), r; precision=precision)
312313

313-
function BigFloat(x::Rational, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
314-
setprecision(BigFloat, precision) do
315-
setrounding_raw(BigFloat, r) do
316-
BigFloat(numerator(x))::BigFloat / BigFloat(denominator(x))::BigFloat
314+
function set_2exp!(z::BigFloat, n::BigInt, exp::Int, rm::MPFRRoundingMode)
315+
ccall(
316+
(:mpfr_set_z_2exp, libmpfr),
317+
Int32,
318+
(Ref{BigFloat}, Ref{BigInt}, Int, MPFRRoundingMode),
319+
z, n, exp, rm,
320+
)
321+
nothing
322+
end
323+
324+
function RationalToFloat.to_floating_point_impl(::Type{BigFloat}, ::Type{BigInt}, num, den, romo, prec)
325+
num_is_zero = iszero(num)
326+
den_is_zero = iszero(den)
327+
s = Int8(sign(num))
328+
sb = signbit(s)
329+
is_zero = num_is_zero & !den_is_zero
330+
is_inf = !num_is_zero & den_is_zero
331+
is_regular = !num_is_zero & !den_is_zero
332+
333+
if is_regular
334+
let rtfc = RationalToFloat.to_float_components
335+
c = rtfc(BigInt, num, den, prec, nothing, romo, sb)
336+
ret = BigFloat(precision = prec)
337+
mpfr_romo = convert(MPFRRoundingMode, romo)
338+
set_2exp!(ret, s * c.integral_significand, Int(c.exponent - prec + true), mpfr_romo)
339+
ret
317340
end
318-
end
341+
else
342+
if is_zero
343+
BigFloat(false, MPFRRoundToZero, precision = prec)
344+
elseif is_inf
345+
BigFloat(s * Inf, MPFRRoundToZero, precision = prec)
346+
else
347+
BigFloat(precision = prec)
348+
end
349+
end::BigFloat
350+
end
351+
352+
function BigFloat(x::Rational, r::RoundingMode; precision::Integer = DEFAULT_PRECISION[])
353+
rational_to_floating_point(BigFloat, x, r, precision)
354+
end
355+
356+
function BigFloat(x::Rational, r::MPFRRoundingMode = ROUNDING_MODE[];
357+
precision::Integer = DEFAULT_PRECISION[])
358+
rational_to_floating_point(BigFloat, x, r, precision)
319359
end
320360

321361
function tryparse(::Type{BigFloat}, s::AbstractString; base::Integer=0, precision::Integer=DEFAULT_PRECISION[], rounding::MPFRRoundingMode=ROUNDING_MODE[])

base/rational.jl

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,21 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true :
140140
(::Type{T})(x::Rational) where {T<:Integer} = (isinteger(x) ? convert(T, x.num)::T :
141141
throw(InexactError(nameof(T), T, x)))
142142

143-
AbstractFloat(x::Rational) = (float(x.num)/float(x.den))::AbstractFloat
144-
function (::Type{T})(x::Rational{S}) where T<:AbstractFloat where S
145-
P = promote_type(T,S)
146-
convert(T, convert(P,x.num)/convert(P,x.den))::T
143+
function numerator_denominator_promoted(x)
144+
y = unsafe_rational(numerator(x), denominator(x))
145+
(numerator(y), denominator(y))
146+
end
147+
148+
function rational_to_floating_point(::Type{F}, x, rm = RoundNearest, prec = precision(F)) where {F}
149+
nd = numerator_denominator_promoted(x)
150+
RationalToFloat.to_floating_point(F, nd..., rm, prec)::F
151+
end
152+
153+
(::Type{F})(x::Rational) where {F<:AbstractFloat} = rational_to_floating_point(F, x)::F
154+
155+
function AbstractFloat(x::Q) where {Q<:Rational}
156+
T = float(Q)
157+
T(x)::T::AbstractFloat
147158
end
148159

149160
function Rational{T}(x::AbstractFloat) where T<:Integer

base/rational_to_float.jl

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
# This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
module RationalToFloat
4+
5+
const Rnd = Base.Rounding
6+
7+
# Performance optimization. Unlike raw `<<` or `>>>`, this is supposed
8+
# to compile to a single instruction, because the semantics correspond
9+
# to what hardware usually provides.
10+
function machine_shift(shift::S, a::T, b) where {S,T<:Base.BitInteger}
11+
@inline begin
12+
mask = 8*sizeof(T) - 1
13+
c = b & mask
14+
shift(a, c)
15+
end
16+
end
17+
18+
machine_shift(::S, a::Bool, ::Any) where {S} = error("unsupported")
19+
20+
# Fallback for `BigInt` etc.
21+
machine_shift(shift::S, a, b) where {S} = shift(a, b)
22+
23+
# Arguments are positive integers.
24+
function div_significand_with_remainder(num, den, minimum_significand_size)
25+
clamped = x -> max(zero(x), x)::typeof(x)
26+
bw = Base.top_set_bit # bit width
27+
shift = clamped(minimum_significand_size + bw(den) - bw(num) + 0x2)
28+
t = machine_shift(<<, num, shift)
29+
(divrem(t, den, RoundToZero)..., shift)
30+
end
31+
32+
# `divrem(n, 1<<k, RoundToZero)`
33+
function divrem_2(n, k)
34+
quo = machine_shift(>>>, n, k)
35+
tmp = machine_shift(<<, quo, k)
36+
rem = n - tmp
37+
(quo, rem)
38+
end
39+
40+
function to_float_components_impl(num, den, precision, max_subnormal_exp)
41+
# `+1` because we need an extra, "round", bit for some rounding modes.
42+
#
43+
# TODO: as a performance optimization, only do this when required
44+
# by the rounding mode
45+
prec_p_1 = precision + true
46+
47+
(quo0, rem0, shift) = div_significand_with_remainder(num, den, prec_p_1)
48+
width = Base.top_set_bit(quo0)
49+
excess_width = width - prec_p_1
50+
exp = width - shift - true
51+
52+
exp_underflow = if isnothing(max_subnormal_exp)
53+
zero(exp)
54+
else
55+
let d = max_subnormal_exp - exp, T = typeof(d), z = zero(d)::T
56+
(signbit(d) ? z : d + true)::T
57+
end
58+
end
59+
60+
(quo1, rem1) = divrem_2(quo0, exp_underflow + excess_width)
61+
integral_significand = quo1 >>> true
62+
round_bit = quo1 % Bool
63+
sticky_bit = !iszero(rem1) | !iszero(rem0)
64+
65+
(; integral_significand, exponent = exp, round_bit, sticky_bit)
66+
end
67+
68+
struct RoundingIncrementHelper
69+
final_bit::Bool
70+
round_bit::Bool
71+
sticky_bit::Bool
72+
end
73+
74+
(h::RoundingIncrementHelper)(::Rnd.FinalBit) = h.final_bit
75+
(h::RoundingIncrementHelper)(::Rnd.RoundBit) = h.round_bit
76+
(h::RoundingIncrementHelper)(::Rnd.StickyBit) = h.sticky_bit
77+
78+
function to_float_components_rounded(num, den, precision, max_subnormal_exp, romo, sign_bit)
79+
overflows = (x, p) -> x == machine_shift(<<, one(x), p)
80+
t = to_float_components_impl(num, den, precision, max_subnormal_exp)
81+
raw_significand = t.integral_significand
82+
rh = RoundingIncrementHelper(raw_significand % Bool, t.round_bit, t.sticky_bit)
83+
incr = Rnd.correct_rounding_requires_increment(rh, romo, sign_bit)
84+
rounded = raw_significand + incr
85+
(integral_significand, exponent) = let exp = t.exponent
86+
if overflows(rounded, precision)
87+
(rounded >>> true, exp + true)
88+
else
89+
(rounded, exp)
90+
end
91+
end
92+
(; integral_significand, exponent)
93+
end
94+
95+
function to_float_components(::Type{T}, num, den, precision, max_subnormal_exp, romo, sb) where {T}
96+
to_float_components_rounded(abs(T(num)), den, precision, max_subnormal_exp, romo, sb)
97+
end
98+
99+
function to_floating_point_fallback(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S}
100+
num_is_zero = iszero(num)
101+
den_is_zero = iszero(den)
102+
sb = signbit(num)
103+
is_zero = num_is_zero & !den_is_zero
104+
is_inf = !num_is_zero & den_is_zero
105+
is_regular = !num_is_zero & !den_is_zero
106+
if is_regular
107+
let
108+
c = to_float_components(S, num, den, prec, nothing, rm, sb)
109+
exp = c.exponent
110+
signif = T(c.integral_significand)::T
111+
let x = ldexp(signif, exp - prec + true)::T
112+
sb ? -x : x
113+
end::T
114+
end
115+
else
116+
if is_zero
117+
zero(T)::T
118+
elseif is_inf
119+
T(Inf)::T
120+
else
121+
T(NaN)::T
122+
end
123+
end::T
124+
end
125+
126+
function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S}
127+
to_floating_point_fallback(T, S, num, den, rm, prec)
128+
end
129+
130+
function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T<:Base.IEEEFloat,S}
131+
num_is_zero = iszero(num)
132+
den_is_zero = iszero(den)
133+
sb = signbit(num)
134+
is_zero = num_is_zero & !den_is_zero
135+
is_inf = !num_is_zero & den_is_zero
136+
is_regular = !num_is_zero & !den_is_zero
137+
(rm_is_to_zero, rm_is_from_zero) = if Rnd.rounds_to_nearest(rm)
138+
(false, false)
139+
else
140+
let from = Rnd.rounds_away_from_zero(rm, sb)
141+
(!from, from)
142+
end
143+
end::NTuple{2,Bool}
144+
exp_max = Base.exponent_max(T)
145+
exp_min = Base.exponent_min(T)
146+
ieee_repr = Base.ieee754_representation
147+
repr_zero = ieee_repr(T, sb, Val(:zero))
148+
repr_inf = ieee_repr(T, sb, Val(:inf))
149+
repr_nan = ieee_repr(T, sb, Val(:nan))
150+
U = typeof(repr_zero)
151+
repr_zero::U
152+
repr_inf::U
153+
repr_nan::U
154+
155+
ret_u = if is_regular
156+
let
157+
c = let e = exp_min - 1
158+
to_float_components(S, num, den, prec, e, rm, sb)
159+
end
160+
exp = c.exponent
161+
exp_diff = exp - exp_min
162+
is_normal = 0 exp_diff
163+
exp_is_huge_p = exp_max < exp
164+
exp_is_huge_n = signbit(exp_diff + prec)
165+
rounds_to_inf = exp_is_huge_p & !rm_is_to_zero
166+
rounds_to_zero = exp_is_huge_n & !rm_is_from_zero
167+
168+
if !rounds_to_zero & !exp_is_huge_p
169+
let signif = (c.integral_significand % U) & Base.significand_mask(T)
170+
exp_field = (max(exp_diff, zero(exp_diff)) + is_normal) % U
171+
ieee_repr(T, sb, exp_field, signif)::U
172+
end
173+
elseif rounds_to_zero
174+
repr_zero
175+
elseif rounds_to_inf
176+
repr_inf
177+
else
178+
ieee_repr(T, sb, Val(:omega))
179+
end
180+
end
181+
else
182+
if is_zero
183+
repr_zero
184+
elseif is_inf
185+
repr_inf
186+
else
187+
repr_nan
188+
end
189+
end::U
190+
191+
reinterpret(T, ret_u)::T
192+
end
193+
194+
# `BigInt` is a safe default.
195+
to_float_promote_type(::Type{F}, ::Type{S}) where {F,S} = BigInt
196+
197+
const BitIntegerOrBool = Union{Bool,Base.BitInteger}
198+
199+
# As an optimization, use an integer type narrower than `BigInt` when possible.
200+
function to_float_promote_type(::Type{F}, ::Type{S}) where {F<:Base.IEEEFloat,S<:BitIntegerOrBool}
201+
Max = if sizeof(F) sizeof(S)
202+
S
203+
else
204+
(S <: Signed) ? Base.inttype(F) : Base.uinttype(F)
205+
end
206+
widen(Max)
207+
end
208+
209+
function to_floating_point(::Type{F}, num::T, den::T, rm, prec) where {F,T}
210+
S = to_float_promote_type(F, T)
211+
to_floating_point_impl(F, S, num, den, rm, prec)
212+
end
213+
214+
end

test/choosetests.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ const TESTNAMES = [
1111
"char", "strings", "triplequote", "unicode", "intrinsics",
1212
"dict", "hashing", "iobuffer", "staged", "offsetarray",
1313
"arrayops", "tuple", "reduce", "reducedim", "abstractarray",
14-
"intfuncs", "simdloop", "vecelement", "rational",
14+
"intfuncs", "simdloop", "vecelement", "rational", "rational_to_float",
1515
"bitarray", "copy", "math", "fastmath", "functional", "iterators",
1616
"operators", "ordering", "path", "ccall", "parse", "loading", "gmp",
1717
"sorting", "spawn", "backtrace", "exceptions",

0 commit comments

Comments
 (0)