support UInt & BigInt in TOML (#47903)

* support parsing uint and long int Co-authored-by: Kristoffer Carlsson <[email protected]>
JuliaLang · Jan 12, 2023 · d61cfd2 · d61cfd2
1 parent d544e78
commit d61cfd2
Show file tree

Hide file tree

Showing 5 changed files with 139 additions and 24 deletions.
diff --git a/base/toml_parser.jl b/base/toml_parser.jl
@@ -823,15 +823,15 @@ function parse_number_or_date_start(l::Parser)
  elseif accept(l, 'x')
  parsed_sign && return ParserError(ErrSignInNonBase10Number)
  ate, contains_underscore = @try accept_batch_underscore(l, isvalid_hex)
- ate && return parse_int(l, contains_underscore)
+ ate && return parse_hex(l, contains_underscore)
  elseif accept(l, 'o')
  parsed_sign && return ParserError(ErrSignInNonBase10Number)
  ate, contains_underscore = @try accept_batch_underscore(l, isvalid_oct)
- ate && return parse_int(l, contains_underscore)
+ ate && return parse_oct(l, contains_underscore)
  elseif accept(l, 'b')
  parsed_sign && return ParserError(ErrSignInNonBase10Number)
  ate, contains_underscore = @try accept_batch_underscore(l, isvalid_binary)
- ate && return parse_int(l, contains_underscore)
+ ate && return parse_bin(l, contains_underscore)
  elseif accept(l, isdigit)
  return parse_local_time(l)
  end
@@ -899,15 +899,28 @@ function parse_float(l::Parser, contains_underscore)::Err{Float64}
  return v
 end
 
-function parse_int(l::Parser, contains_underscore, base=nothing)::Err{Int64}
- s = take_string_or_substring(l, contains_underscore)
- v = try
- Base.parse(Int64, s; base=base)
- catch e
- e isa Base.OverflowError && return(ParserError(ErrOverflowError))
- error("internal parser error: did not correctly discredit $(repr(s)) as an int")
+for (name, T1, T2, n1, n2) in (("int", Int64, Int128, 17, 33),
+ ("hex", UInt64, UInt128, 18, 34),
+ ("oct", UInt64, UInt128, 24, 45),
+ ("bin", UInt64, UInt128, 66, 130),
+ )
+ @eval function $(Symbol("parse_", name))(l::Parser, contains_underscore, base=nothing)::Err{Union{$(T1), $(T2), BigInt}}
+ s = take_string_or_substring(l, contains_underscore)
+ len = length(s)
+ v = try
+ if len ≤ $(n1)
+ Base.parse($(T1), s; base)
+ elseif $(n1) < len ≤ $(n2)
+ Base.parse($(T2), s; base)
+ else
+ Base.parse(BigInt, s; base)
+ end
+ catch e
+ e isa Base.OverflowError && return(ParserError(ErrOverflowError))
+ error("internal parser error: did not correctly discredit $(repr(s)) as an int")
+ end
+ return v
  end
- return v
 end
 
 

diff --git a/stdlib/TOML/src/print.jl b/stdlib/TOML/src/print.jl
@@ -93,7 +93,7 @@ function printvalue(f::MbyFunc, io::IO, value::TOMLValue)
  value isa Dates.Time ? Base.print(io, Dates.format(value, Dates.dateformat"HH:MM:SS.sss")) :
  value isa Dates.Date ? Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd")) :
  value isa Bool ? Base.print(io, value ? "true" : "false") :
- value isa Integer ? Base.print(io, Int64(value)) : # TOML specifies 64-bit signed long range for integer
+ value isa Integer ? print_integer(io, value) : # Julia's own printing should be compatible with TOML on integers
  value isa AbstractFloat ? Base.print(io, isnan(value) ? "nan" :
  isinf(value) ? string(value > 0 ? "+" : "-", "inf") :
  Float64(value)) : # TOML specifies IEEE 754 binary64 for float
@@ -104,6 +104,14 @@ function printvalue(f::MbyFunc, io::IO, value::TOMLValue)
  error("internal error in TOML printing, unhandled value")
 end
 
+function print_integer(io::IO, value::Integer)
+ value isa Signed && return Base.show(io, value)
+ # unsigned integers are printed as hex
+ n = 2 * ndigits(value, base=256)
+ Base.print(io, "0x", string(value, base=16, pad=n))
+ return
+end
+
 function print_inline_table(f::MbyFunc, io::IO, value::AbstractDict)
  Base.print(io, "{")
  for (i, (k,v)) in enumerate(value)

diff --git a/stdlib/TOML/test/print.jl b/stdlib/TOML/test/print.jl
@@ -96,6 +96,19 @@ loaders = ["gzip", { driver = "csv", args = {delim = "\t"}}]
  """
 end
 
+@testset "unsigned integers" for (x, s) in [
+ 0x1a0 => "0x01a0",
+ 0x1aea8 => "0x01aea8",
+ 0x1aeee8 => "0x1aeee8",
+ 0x1aea01231 => "0x01aea01231",
+ 0x1aea01231213ae13125 => "0x01aea01231213ae13125",
+ ]
+ d = Dict("x" => x)
+ @test toml_str(d) == """
+ x = $s
+ """
+end
+
 struct Foo
  a::Int64
  b::Float64

diff --git a/stdlib/TOML/test/readme.jl b/stdlib/TOML/test/readme.jl
@@ -410,31 +410,90 @@ d = parse(str)
 @test d["oct2"] == 0o755
 @test d["bin1"] == 0b11010110
 
+str = """
+hex1 = 0x6E # UInt8
+hex2 = 0x8f1e # UInt16
+hex3 = 0x765f3173 # UInt32
+hex4 = 0xc13b830a807cc7f4 # UInt64
+hex5 = 0x937efe0a4241edb24a04b97bd90ef363 # UInt128
+hex6 = 0x937efe0a4241edb24a04b97bd90ef3632 # BigInt
+"""
+@test roundtrip(str)
+d = parse(str)
+@test d["hex1"] isa UInt64
+@test d["hex2"] isa UInt64
+@test d["hex3"] isa UInt64
+@test d["hex4"] isa UInt64
+@test d["hex5"] isa UInt128
+@test d["hex6"] isa BigInt
+
+str = """
+oct1 = 0o140 # UInt8
+oct2 = 0o46244 # UInt16
+oct3 = 0o32542120656 # UInt32
+oct4 = 0o1526535761042630654411 # UInt64
+oct5 = 0o3467204325743773607311464533371572447656531 # UInt128
+oct6 = 0o34672043257437736073114645333715724476565312 # BigInt
+"""
+@test roundtrip(str)
+d = parse(str)
+@test d["oct1"] isa UInt64
+@test d["oct2"] isa UInt64
+@test d["oct3"] isa UInt64
+@test d["oct4"] isa UInt64
+@test d["oct5"] isa UInt128
+@test d["oct6"] isa BigInt
+
+str = """
+bin1 = 0b10001010 # UInt8
+bin2 = 0b11111010001100 # UInt16
+bin3 = 0b11100011110000010101000010101 # UInt32
+bin4 = 0b10000110100111011010001000000111110110000011111101101110011011 # UInt64
+bin5 = 0b1101101101101100110001010110111011101000111010101110011000011100110100101111110001010001011001000001000001010010011101100100111 # UInt128
+bin6 = 0b110110110110110011000101011011101110100011101010111001100001110011010010111111000101000101100100000100000101001001110110010011111 # BigInt
+"""
+
+@test roundtrip(str)
+d = parse(str)
+@test d["bin1"] isa UInt64
+@test d["bin2"] isa UInt64
+@test d["bin3"] isa UInt64
+@test d["bin4"] isa UInt64
+@test d["bin5"] isa UInt128
+@test d["bin6"] isa BigInt
+
 #Arbitrary 64-bit signed integers (from −2^63 to 2^63−1) should be accepted and
 #handled losslessly. If an integer cannot be represented losslessly, an error
 #must be thrown.
 str = """
-low = -9_223_372_036_854_775_808
-high = 9_223_372_036_854_775_807
+low = -170_141_183_460_469_231_731_687_303_715_884_105_728
+high = 170_141_183_460_469_231_731_687_303_715_884_105_727
+"""
+@test roundtrip(str)
+d = parse(str)
+@test d["low"] == typemin(Int128)
+@test d["high"] == typemax(Int128)
+
+str = """
+low = -170_141_183_460_469_231_731_687_303_715_884_105_728_123
+high = 170_141_183_460_469_231_731_687_303_715_884_105_727_123
 """
 @test roundtrip(str)
 d = parse(str)
-@test d["low"] == -9_223_372_036_854_775_808
-@test d["high"] == 9_223_372_036_854_775_807
+@test d["low"] == big"-170_141_183_460_469_231_731_687_303_715_884_105_728_123"
+@test d["high"] == big"170_141_183_460_469_231_731_687_303_715_884_105_727_123"
 
 str = """
 toolow = -9_223_372_036_854_775_809
 """
-err = tryparse(str)
-@test err isa ParserError
-@test err.type == Internals.ErrOverflowError
+d = parse(str)
+@test d["toolow"] == -9223372036854775809
 
 str = """
 toohigh = 9_223_372_036_854_775_808
 """
-err = tryparse(str)
-@test err isa ParserError
-@test err.type == Internals.ErrOverflowError
+d = parse(str)
+d["toohigh"] == 9_223_372_036_854_775_808
 
 end
 

diff --git a/stdlib/TOML/test/values.jl b/stdlib/TOML/test/values.jl
@@ -23,8 +23,6 @@ end
  @test failval("00.0" , Internals.ErrParsingDateTime)
  @test failval("-00.0" , Internals.ErrParsingDateTime)
  @test failval("+00.0" , Internals.ErrParsingDateTime)
- @test failval("9223372036854775808" , Internals.ErrOverflowError)
- @test failval("-9223372036854775809" , Internals.ErrOverflowError)
 
  @test failval("0." , Internals.ErrNoTrailingDigitAfterDot)
  @test failval("0.e" , Internals.ErrNoTrailingDigitAfterDot)
@@ -54,6 +52,30 @@ end
  @test testval("+1_000" , 1000 |> Int64)
  @test testval("-1_000" , -1000 |> Int64)
 
+ @test testval("0x6E", 0x6E|> UInt64)
+ @test testval("0x8f1e", 0x8f1e|> UInt64)
+ @test testval("0x765f3173", 0x765f3173|> UInt64)
+ @test testval("0xc13b830a807cc7f4", 0xc13b830a807cc7f4|> UInt64)
+ @test testval("0x937efe_0a4241_edb24a04b97bd90ef363", 0x937efe0a4241edb24a04b97bd90ef363 |> UInt128)
+
+ @test testval("0o140", 0o140 |> UInt64) # UInt8
+ @test testval("0o46244", 0o46244 |> UInt64) # UInt16
+ @test testval("0o32542120656", 0o32542120656 |> UInt64) # UInt32
+ @test testval("0o1526535761042630654411", 0o1526535761042630654411 |> UInt64) # UInt64
+ @test testval("0o3467204325743773607311464533371572447656531", 0o3467204325743773607311464533371572447656531 |> UInt128) # UInt128
+ @test testval("0o34672043257437736073114645333715724476565312", 0o34672043257437736073114645333715724476565312 |> BigInt) # BigInt
+
+ @test testval("0b10001010",0b10001010 |> UInt64) # UInt8
+ @test testval("0b11111010001100",0b11111010001100 |> UInt64) # UInt16
+ @test testval("0b11100011110000010101000010101",0b11100011110000010101000010101 |> UInt64) # UInt32
+ @test testval("0b10000110100111011010001000000111110110000011111101101110011011",0b10000110100111011010001000000111110110000011111101101110011011 |> UInt64) # UInt64
+ @test testval(
+ "0b1101101101101100110001010110111011101000111010101110011000011100110100101111110001010001011001000001000001010010011101100100111",
+ 0b1101101101101100110001010110111011101000111010101110011000011100110100101111110001010001011001000001000001010010011101100100111 |> UInt128) # UInt128
+ @test testval(
+ "0b110110110110110011000101011011101110100011101010111001100001110011010010111111000101000101100100000100000101001001110110010011111",
+ 0b110110110110110011000101011011101110100011101010111001100001110011010010111111000101000101100100000100000101001001110110010011111 |> BigInt) # BigInt
+
  @test failval("0_" , Internals.ErrUnderscoreNotSurroundedByDigits)
  @test failval("0__0" , Internals.ErrUnderscoreNotSurroundedByDigits)
  @test failval("__0" , Internals.ErrUnexpectedStartOfValue)