diff --git a/base/toml_parser.jl b/base/toml_parser.jl index 0e90f46315e5e..6c4ff6e2a52c0 100644 --- a/base/toml_parser.jl +++ b/base/toml_parser.jl @@ -823,15 +823,15 @@ function parse_number_or_date_start(l::Parser) elseif accept(l, 'x') parsed_sign && return ParserError(ErrSignInNonBase10Number) ate, contains_underscore = @try accept_batch_underscore(l, isvalid_hex) - ate && return parse_int(l, contains_underscore) + ate && return parse_hex(l, contains_underscore) elseif accept(l, 'o') parsed_sign && return ParserError(ErrSignInNonBase10Number) ate, contains_underscore = @try accept_batch_underscore(l, isvalid_oct) - ate && return parse_int(l, contains_underscore) + ate && return parse_oct(l, contains_underscore) elseif accept(l, 'b') parsed_sign && return ParserError(ErrSignInNonBase10Number) ate, contains_underscore = @try accept_batch_underscore(l, isvalid_binary) - ate && return parse_int(l, contains_underscore) + ate && return parse_bin(l, contains_underscore) elseif accept(l, isdigit) return parse_local_time(l) end @@ -899,15 +899,28 @@ function parse_float(l::Parser, contains_underscore)::Err{Float64} return v end -function parse_int(l::Parser, contains_underscore, base=nothing)::Err{Int64} - s = take_string_or_substring(l, contains_underscore) - v = try - Base.parse(Int64, s; base=base) - catch e - e isa Base.OverflowError && return(ParserError(ErrOverflowError)) - error("internal parser error: did not correctly discredit $(repr(s)) as an int") +for (name, T1, T2, n1, n2) in (("int", Int64, Int128, 17, 33), + ("hex", UInt64, UInt128, 18, 34), + ("oct", UInt64, UInt128, 24, 45), + ("bin", UInt64, UInt128, 66, 130), + ) + @eval function $(Symbol("parse_", name))(l::Parser, contains_underscore, base=nothing)::Err{Union{$(T1), $(T2), BigInt}} + s = take_string_or_substring(l, contains_underscore) + len = length(s) + v = try + if len ≤ $(n1) + Base.parse($(T1), s; base) + elseif $(n1) < len ≤ $(n2) + Base.parse($(T2), s; base) + else + Base.parse(BigInt, s; base) + end + catch e + e isa Base.OverflowError && return(ParserError(ErrOverflowError)) + error("internal parser error: did not correctly discredit $(repr(s)) as an int") + end + return v end - return v end diff --git a/stdlib/TOML/src/print.jl b/stdlib/TOML/src/print.jl index 61d13a8f4853e..f5bef8344f64f 100644 --- a/stdlib/TOML/src/print.jl +++ b/stdlib/TOML/src/print.jl @@ -93,7 +93,7 @@ function printvalue(f::MbyFunc, io::IO, value::TOMLValue) value isa Dates.Time ? Base.print(io, Dates.format(value, Dates.dateformat"HH:MM:SS.sss")) : value isa Dates.Date ? Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd")) : value isa Bool ? Base.print(io, value ? "true" : "false") : - value isa Integer ? Base.print(io, Int64(value)) : # TOML specifies 64-bit signed long range for integer + value isa Integer ? print_integer(io, value) : # Julia's own printing should be compatible with TOML on integers value isa AbstractFloat ? Base.print(io, isnan(value) ? "nan" : isinf(value) ? string(value > 0 ? "+" : "-", "inf") : Float64(value)) : # TOML specifies IEEE 754 binary64 for float @@ -104,6 +104,14 @@ function printvalue(f::MbyFunc, io::IO, value::TOMLValue) error("internal error in TOML printing, unhandled value") end +function print_integer(io::IO, value::Integer) + value isa Signed && return Base.show(io, value) + # unsigned integers are printed as hex + n = 2 * ndigits(value, base=256) + Base.print(io, "0x", string(value, base=16, pad=n)) + return +end + function print_inline_table(f::MbyFunc, io::IO, value::AbstractDict) Base.print(io, "{") for (i, (k,v)) in enumerate(value) diff --git a/stdlib/TOML/test/print.jl b/stdlib/TOML/test/print.jl index bbfce3b7d7474..765b6feb491a5 100644 --- a/stdlib/TOML/test/print.jl +++ b/stdlib/TOML/test/print.jl @@ -96,6 +96,19 @@ loaders = ["gzip", { driver = "csv", args = {delim = "\t"}}] """ end +@testset "unsigned integers" for (x, s) in [ + 0x1a0 => "0x01a0", + 0x1aea8 => "0x01aea8", + 0x1aeee8 => "0x1aeee8", + 0x1aea01231 => "0x01aea01231", + 0x1aea01231213ae13125 => "0x01aea01231213ae13125", + ] + d = Dict("x" => x) + @test toml_str(d) == """ + x = $s + """ +end + struct Foo a::Int64 b::Float64 diff --git a/stdlib/TOML/test/readme.jl b/stdlib/TOML/test/readme.jl index 50d47dafeec22..ee267414485ba 100644 --- a/stdlib/TOML/test/readme.jl +++ b/stdlib/TOML/test/readme.jl @@ -410,31 +410,90 @@ d = parse(str) @test d["oct2"] == 0o755 @test d["bin1"] == 0b11010110 +str = """ +hex1 = 0x6E # UInt8 +hex2 = 0x8f1e # UInt16 +hex3 = 0x765f3173 # UInt32 +hex4 = 0xc13b830a807cc7f4 # UInt64 +hex5 = 0x937efe0a4241edb24a04b97bd90ef363 # UInt128 +hex6 = 0x937efe0a4241edb24a04b97bd90ef3632 # BigInt +""" +@test roundtrip(str) +d = parse(str) +@test d["hex1"] isa UInt64 +@test d["hex2"] isa UInt64 +@test d["hex3"] isa UInt64 +@test d["hex4"] isa UInt64 +@test d["hex5"] isa UInt128 +@test d["hex6"] isa BigInt + +str = """ +oct1 = 0o140 # UInt8 +oct2 = 0o46244 # UInt16 +oct3 = 0o32542120656 # UInt32 +oct4 = 0o1526535761042630654411 # UInt64 +oct5 = 0o3467204325743773607311464533371572447656531 # UInt128 +oct6 = 0o34672043257437736073114645333715724476565312 # BigInt +""" +@test roundtrip(str) +d = parse(str) +@test d["oct1"] isa UInt64 +@test d["oct2"] isa UInt64 +@test d["oct3"] isa UInt64 +@test d["oct4"] isa UInt64 +@test d["oct5"] isa UInt128 +@test d["oct6"] isa BigInt + +str = """ +bin1 = 0b10001010 # UInt8 +bin2 = 0b11111010001100 # UInt16 +bin3 = 0b11100011110000010101000010101 # UInt32 +bin4 = 0b10000110100111011010001000000111110110000011111101101110011011 # UInt64 +bin5 = 0b1101101101101100110001010110111011101000111010101110011000011100110100101111110001010001011001000001000001010010011101100100111 # UInt128 +bin6 = 0b110110110110110011000101011011101110100011101010111001100001110011010010111111000101000101100100000100000101001001110110010011111 # BigInt +""" + +@test roundtrip(str) +d = parse(str) +@test d["bin1"] isa UInt64 +@test d["bin2"] isa UInt64 +@test d["bin3"] isa UInt64 +@test d["bin4"] isa UInt64 +@test d["bin5"] isa UInt128 +@test d["bin6"] isa BigInt + #Arbitrary 64-bit signed integers (from −2^63 to 2^63−1) should be accepted and #handled losslessly. If an integer cannot be represented losslessly, an error #must be thrown. str = """ -low = -9_223_372_036_854_775_808 -high = 9_223_372_036_854_775_807 +low = -170_141_183_460_469_231_731_687_303_715_884_105_728 +high = 170_141_183_460_469_231_731_687_303_715_884_105_727 +""" +@test roundtrip(str) +d = parse(str) +@test d["low"] == typemin(Int128) +@test d["high"] == typemax(Int128) + +str = """ +low = -170_141_183_460_469_231_731_687_303_715_884_105_728_123 +high = 170_141_183_460_469_231_731_687_303_715_884_105_727_123 """ @test roundtrip(str) d = parse(str) -@test d["low"] == -9_223_372_036_854_775_808 -@test d["high"] == 9_223_372_036_854_775_807 +@test d["low"] == big"-170_141_183_460_469_231_731_687_303_715_884_105_728_123" +@test d["high"] == big"170_141_183_460_469_231_731_687_303_715_884_105_727_123" str = """ toolow = -9_223_372_036_854_775_809 """ -err = tryparse(str) -@test err isa ParserError -@test err.type == Internals.ErrOverflowError +d = parse(str) +@test d["toolow"] == -9223372036854775809 str = """ toohigh = 9_223_372_036_854_775_808 """ -err = tryparse(str) -@test err isa ParserError -@test err.type == Internals.ErrOverflowError +d = parse(str) +d["toohigh"] == 9_223_372_036_854_775_808 end diff --git a/stdlib/TOML/test/values.jl b/stdlib/TOML/test/values.jl index 8337bb5a54714..be2ed3acce5b5 100644 --- a/stdlib/TOML/test/values.jl +++ b/stdlib/TOML/test/values.jl @@ -23,8 +23,6 @@ end @test failval("00.0" , Internals.ErrParsingDateTime) @test failval("-00.0" , Internals.ErrParsingDateTime) @test failval("+00.0" , Internals.ErrParsingDateTime) - @test failval("9223372036854775808" , Internals.ErrOverflowError) - @test failval("-9223372036854775809" , Internals.ErrOverflowError) @test failval("0." , Internals.ErrNoTrailingDigitAfterDot) @test failval("0.e" , Internals.ErrNoTrailingDigitAfterDot) @@ -54,6 +52,30 @@ end @test testval("+1_000" , 1000 |> Int64) @test testval("-1_000" , -1000 |> Int64) + @test testval("0x6E", 0x6E|> UInt64) + @test testval("0x8f1e", 0x8f1e|> UInt64) + @test testval("0x765f3173", 0x765f3173|> UInt64) + @test testval("0xc13b830a807cc7f4", 0xc13b830a807cc7f4|> UInt64) + @test testval("0x937efe_0a4241_edb24a04b97bd90ef363", 0x937efe0a4241edb24a04b97bd90ef363 |> UInt128) + + @test testval("0o140", 0o140 |> UInt64) # UInt8 + @test testval("0o46244", 0o46244 |> UInt64) # UInt16 + @test testval("0o32542120656", 0o32542120656 |> UInt64) # UInt32 + @test testval("0o1526535761042630654411", 0o1526535761042630654411 |> UInt64) # UInt64 + @test testval("0o3467204325743773607311464533371572447656531", 0o3467204325743773607311464533371572447656531 |> UInt128) # UInt128 + @test testval("0o34672043257437736073114645333715724476565312", 0o34672043257437736073114645333715724476565312 |> BigInt) # BigInt + + @test testval("0b10001010",0b10001010 |> UInt64) # UInt8 + @test testval("0b11111010001100",0b11111010001100 |> UInt64) # UInt16 + @test testval("0b11100011110000010101000010101",0b11100011110000010101000010101 |> UInt64) # UInt32 + @test testval("0b10000110100111011010001000000111110110000011111101101110011011",0b10000110100111011010001000000111110110000011111101101110011011 |> UInt64) # UInt64 + @test testval( + "0b1101101101101100110001010110111011101000111010101110011000011100110100101111110001010001011001000001000001010010011101100100111", + 0b1101101101101100110001010110111011101000111010101110011000011100110100101111110001010001011001000001000001010010011101100100111 |> UInt128) # UInt128 + @test testval( + "0b110110110110110011000101011011101110100011101010111001100001110011010010111111000101000101100100000100000101001001110110010011111", + 0b110110110110110011000101011011101110100011101010111001100001110011010010111111000101000101100100000100000101001001110110010011111 |> BigInt) # BigInt + @test failval("0_" , Internals.ErrUnderscoreNotSurroundedByDigits) @test failval("0__0" , Internals.ErrUnderscoreNotSurroundedByDigits) @test failval("__0" , Internals.ErrUnexpectedStartOfValue)