From 2c327d3ffaa8a4b14219b7a2797230e80d78dedf Mon Sep 17 00:00:00 2001 From: tan Date: Sat, 20 Dec 2014 16:05:00 +0530 Subject: [PATCH] tryparse: parse string to Nullable Introduces the tryparse method: - tryparse{T<:Integer}(::Type{T<:Integer},s::AbstractString) - tryparse(::Type{Float..},s::AbstractString) - a few variants of the above And: - tryparse(Float.., ...) call the corresponding C functions jl_try_strtof, jl_try_substrtof, jl_try_strtod and jl_try_substrtod. - The parseint, parsefloat, float64_isvalid and float32_isvalid methods wrap the corresponding tryparse methods. - The jl_strtod, jl_strtof, ... functions are wrappers over the jl_try_str... functions. This should fix #10498 as well. Ref: discussions at #9316, #3631, #5704 --- base/base.jl | 1 - base/combinatorics.jl | 21 ----- base/exports.jl | 1 + base/gmp.jl | 18 +++-- base/nullable.jl | 4 +- base/string.jl | 129 +++++++++++++++++++------------ base/sysimg.jl | 26 ++++++- src/builtins.c | 174 ++++++++++++++++++++++++++++++------------ src/julia.h | 11 +++ test/strings.jl | 22 ++++++ 10 files changed, 277 insertions(+), 130 deletions(-) diff --git a/base/base.jl b/base/base.jl index 5940c40099257..545360216541f 100644 --- a/base/base.jl +++ b/base/base.jl @@ -277,4 +277,3 @@ immutable Nullable{T} Nullable() = new(true) Nullable(value::T) = new(false, value) end - diff --git a/base/combinatorics.jl b/base/combinatorics.jl index 765cd48149c5d..8abafc0e34570 100644 --- a/base/combinatorics.jl +++ b/base/combinatorics.jl @@ -3,25 +3,6 @@ const _fact_table64 = 87178291200,1307674368000,20922789888000,355687428096000,6402373705728000, 121645100408832000,2432902008176640000] -const _fact_table128 = - UInt128[0x00000000000000000000000000000001, 0x00000000000000000000000000000002, - 0x00000000000000000000000000000006, 0x00000000000000000000000000000018, - 0x00000000000000000000000000000078, 0x000000000000000000000000000002d0, - 0x000000000000000000000000000013b0, 0x00000000000000000000000000009d80, - 0x00000000000000000000000000058980, 0x00000000000000000000000000375f00, - 0x00000000000000000000000002611500, 0x0000000000000000000000001c8cfc00, - 0x0000000000000000000000017328cc00, 0x0000000000000000000000144c3b2800, - 0x00000000000000000000013077775800, 0x00000000000000000000130777758000, - 0x00000000000000000001437eeecd8000, 0x00000000000000000016beecca730000, - 0x000000000000000001b02b9306890000, 0x000000000000000021c3677c82b40000, - 0x0000000000000002c5077d36b8c40000, 0x000000000000003ceea4c2b3e0d80000, - 0x000000000000057970cd7e2933680000, 0x00000000000083629343d3dcd1c00000, - 0x00000000000cd4a0619fb0907bc00000, 0x00000000014d9849ea37eeac91800000, - 0x00000000232f0fcbb3e62c3358800000, 0x00000003d925ba47ad2cd59dae000000, - 0x0000006f99461a1e9e1432dcb6000000, 0x00000d13f6370f96865df5dd54000000, - 0x0001956ad0aae33a4560c5cd2c000000, 0x0032ad5a155c6748ac18b9a580000000, - 0x0688589cc0e9505e2f2fee5580000000, 0xde1bc4d19efcac82445da75b00000000] - function factorial_lookup(n::Integer, table, lim) n < 0 && throw(DomainError()) n > lim && throw(OverflowError()) @@ -30,8 +11,6 @@ function factorial_lookup(n::Integer, table, lim) return oftype(n, f) end -factorial(n::Int128) = factorial_lookup(n, _fact_table128, 33) -factorial(n::UInt128) = factorial_lookup(n, _fact_table128, 34) factorial(n::Union(Int64,UInt64)) = factorial_lookup(n, _fact_table64, 20) if Int === Int32 diff --git a/base/exports.jl b/base/exports.jl index f8cd1de2fa4f9..dcd350119f031 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -343,6 +343,7 @@ export fldmod, flipsign, float, + tryparse, floor, fma, frexp, diff --git a/base/gmp.jl b/base/gmp.jl index f885cdc452cba..79c32d22f3050 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -5,7 +5,7 @@ export BigInt import Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), ($), binomial, cmp, convert, div, divrem, factorial, fld, gcd, gcdx, lcm, mod, ndigits, promote_rule, rem, show, isqrt, string, isprime, powermod, - sum, trailing_zeros, trailing_ones, count_ones, base, parseint, + sum, trailing_zeros, trailing_ones, count_ones, base, parseint, tryparse_internal, serialize, deserialize, bin, oct, dec, hex, isequal, invmod, prevpow2, nextpow2, ndigits0z, widen, signed @@ -76,15 +76,23 @@ signed(x::BigInt) = x BigInt(x::BigInt) = x BigInt(s::AbstractString) = parseint(BigInt,s) -function Base.parseint_nocheck(::Type{BigInt}, s::AbstractString, base::Int) +function tryparse_internal(::Type{BigInt}, s::AbstractString, base::Int, raise::Bool) + _n = Nullable{BigInt}() s = bytestring(s) sgn, base, i = Base.parseint_preamble(true,s,base) + if i == 0 + raise && throw(ArgumentError("premature end of integer: $(repr(s))")) + return _n + end z = BigInt() err = ccall((:__gmpz_set_str, :libgmp), Int32, (Ptr{BigInt}, Ptr{UInt8}, Int32), &z, SubString(s,i), base) - err == 0 || throw(ArgumentError("invalid BigInt: $(repr(s))")) - return sgn < 0 ? -z : z + if err != 0 + raise && throw(ArgumentError("invalid BigInt: $(repr(s))")) + return _n + end + Nullable(sgn < 0 ? -z : z) end function BigInt(x::Union(Clong,Int32)) @@ -217,7 +225,7 @@ function serialize(s, n::BigInt) serialize(s, base(62,n)) end -deserialize(s, ::Type{BigInt}) = Base.parseint_nocheck(BigInt, deserialize(s), 62) +deserialize(s, ::Type{BigInt}) = get(tryparse_internal(BigInt, deserialize(s), 62, true)) # Binary ops for (fJ, fC) in ((:+, :add), (:-,:sub), (:*, :mul), diff --git a/base/nullable.jl b/base/nullable.jl index dd8e6d0591709..9facf184eb30c 100644 --- a/base/nullable.jl +++ b/base/nullable.jl @@ -17,9 +17,9 @@ convert( ::Type{Nullable }, ::Void) = Nullable{Union()}() function show{T}(io::IO, x::Nullable{T}) if x.isnull - @printf(io, "Nullable{%s}()", repr(T)) + println(io, "Nullable{$(repr(T))}()") else - @printf(io, "Nullable(%s)", repr(x.value)) + println(io, "Nullable($(repr(x.value)))") end end diff --git a/base/string.jl b/base/string.jl index 300b7d83b5cae..34169fd29b1e4 100644 --- a/base/string.jl +++ b/base/string.jl @@ -1487,7 +1487,7 @@ parseint{T<:Integer}(::Type{T}, c::Char, base::Integer) = convert(T,parseint(c,b parseint{T<:Integer}(::Type{T}, c::Char) = convert(T,parseint(c)) function parseint_next(s::AbstractString, i::Int=start(s)) - done(s,i) && throw(ArgumentError("premature end of integer: $(repr(s))")) + done(s,i) && (return Char(0), 0, 0) j = i c, i = next(s,i) c, i, j @@ -1495,9 +1495,12 @@ end function parseint_preamble(signed::Bool, s::AbstractString, base::Int) c, i, j = parseint_next(s) + while isspace(c) c, i, j = parseint_next(s,i) end + (j == 0) && (return 0, 0, 0) + sgn = 1 if signed if c == '-' || c == '+' @@ -1505,9 +1508,12 @@ function parseint_preamble(signed::Bool, s::AbstractString, base::Int) c, i, j = parseint_next(s,i) end end + while isspace(c) c, i, j = parseint_next(s,i) end + (j == 0) && (return 0, 0, 0) + if base == 0 if c == '0' && !done(s,i) c, i = next(s,i) @@ -1522,23 +1528,40 @@ function parseint_preamble(signed::Bool, s::AbstractString, base::Int) return sgn, base, j end -function parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int) +safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2) +safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) : + (n2 < -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) : + ((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2) + +function tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int, raise::Bool) + _n = Nullable{T}() sgn, base, i = parseint_preamble(T<:Signed,s,base) + if i == 0 + raise && throw(ArgumentError("premature end of integer: $(repr(s))")) + return _n + end c, i = parseint_next(s,i) + if i == 0 + raise && throw(ArgumentError("premature end of integer: $(repr(s))")) + return _n + end + base = convert(T,base) - ## FIXME: remove 128-bit specific code once 128-bit div doesn't rely on BigInt - m::T = T===UInt128 || T===Int128 ? typemax(T) : div(typemax(T)-base+1,base) + m::T = div(typemax(T)-base+1,base) n::T = 0 while n <= m d::T = '0' <= c <= '9' ? c-'0' : 'A' <= c <= 'Z' ? c-'A'+10 : 'a' <= c <= 'z' ? c-'a'+a : base - d < base || throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))")) + if d >= base + raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))")) + return _n + end n *= base n += d if done(s,i) n *= sgn - return n + return Nullable{T}(n) end c, i = next(s,i) isspace(c) && break @@ -1546,29 +1569,46 @@ function parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a (T <: Signed) && (n *= sgn) while !isspace(c) d::T = '0' <= c <= '9' ? c-'0' : - 'A' <= c <= 'Z' ? c-'A'+10 : - 'a' <= c <= 'z' ? c-'a'+a : base - d < base || throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))")) + 'A' <= c <= 'Z' ? c-'A'+10 : + 'a' <= c <= 'z' ? c-'a'+a : base + if d >= base + raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))")) + return _n + end (T <: Signed) && (d *= sgn) - n = checked_mul(n,base) - n = checked_add(n,d) - done(s,i) && return n + + safe_n = safe_mul(n, base) + isnull(safe_n) || (safe_n = safe_add(get(safe_n), d)) + if isnull(safe_n) + raise && throw(OverflowError()) + return _n + end + n = get(safe_n) + done(s,i) && return Nullable{T}(n) c, i = next(s,i) end while !done(s,i) c, i = next(s,i) - isspace(c) || throw(ArgumentError("extra characters after whitespace in $(repr(s))")) + if !isspace(c) + raise && throw(ArgumentError("extra characters after whitespace in $(repr(s))")) + return _n + end end - return n + return Nullable{T}(n) end -parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int) = - parseint_nocheck(T, s, base, base <= 36 ? 10 : 36) +tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, raise::Bool) = + tryparse_internal(T, s, base, base <= 36 ? 10 : 36, raise) +tryparse{T<:Integer}(::Type{T}, s::AbstractString, base::Int) = + 2 <= base <= 62 ? tryparse_internal(T,s,Int(base),false) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base")) +tryparse{T<:Integer}(::Type{T}, s::AbstractString) = tryparse_internal(T,s,0,false) -parseint{T<:Integer}(::Type{T}, s::AbstractString, base::Integer) = - 2 <= base <= 62 ? parseint_nocheck(T,s,Int(base)) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base")) -parseint{T<:Integer}(::Type{T}, s::AbstractString) = parseint_nocheck(T,s,0) +function parseint{T<:Integer}(::Type{T}, s::AbstractString, base::Integer) + (2 <= base <= 62) || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base")) + get(tryparse_internal(T, s, base, true)) +end +parseint{T<:Integer}(::Type{T}, s::AbstractString) = get(tryparse_internal(T, s, 0, true)) parseint(s::AbstractString, base::Integer) = parseint(Int,s,base) -parseint(s::AbstractString) = parseint_nocheck(Int,s,0) +parseint(s::AbstractString) = parseint(Int,s) ## stringifying integers more efficiently ## @@ -1576,40 +1616,33 @@ string(x::Union(Int8,Int16,Int32,Int64,Int128)) = dec(x) ## string to float functions ## -float64_isvalid(s::AbstractString, out::Array{Float64,1}) = - ccall(:jl_strtod, Int32, (Ptr{UInt8},Ptr{Float64}), s, out) == 0 -float32_isvalid(s::AbstractString, out::Array{Float32,1}) = - ccall(:jl_strtof, Int32, (Ptr{UInt8},Ptr{Float32}), s, out) == 0 - -float64_isvalid(s::SubString, out::Array{Float64,1}) = - ccall(:jl_substrtod, Int32, (Ptr{UInt8},Csize_t,Cint,Ptr{Float64}), s.string, s.offset, s.endof, out) == 0 -float32_isvalid(s::SubString, out::Array{Float32,1}) = - ccall(:jl_substrtof, Int32, (Ptr{UInt8},Csize_t,Cint,Ptr{Float32}), s.string, s.offset, s.endof, out) == 0 - -begin - local tmp::Array{Float64,1} = Array(Float64,1) - local tmpf::Array{Float32,1} = Array(Float32,1) - global parsefloat - function parsefloat(::Type{Float64}, s::AbstractString) - if !float64_isvalid(s, tmp) - throw(ArgumentError("parsefloat(Float64,::AbstractString): invalid number format $(repr(s))")) - end - return tmp[1] - end +tryparse(::Type{Float64}, s::AbstractString) = ccall(:jl_try_strtod, Nullable{Float64}, (Ptr{UInt8},), s) +tryparse(::Type{Float64}, s::SubString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof) - function parsefloat(::Type{Float32}, s::AbstractString) - if !float32_isvalid(s, tmpf) - throw(ArgumentError("parsefloat(Float32,::AbstractString): invalid number format $(repr(s))")) - end - return tmpf[1] - end +tryparse(::Type{Float32}, s::AbstractString) = ccall(:jl_try_strtof, Nullable{Float32}, (Ptr{UInt8},), s) +tryparse(::Type{Float32}, s::SubString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof) + +function parse{T<:Union(Float32,Float64)}(::Type{T}, s::AbstractString) + nf = tryparse(T, s) + isnull(nf) ? throw(ArgumentError("invalid number format $(repr(s)) for $T")) : get(nf) end -float(x::AbstractString) = parsefloat(x) -parsefloat(x::AbstractString) = parsefloat(Float64,x) +parsefloat{T<:Union(Float32,Float64)}(::Type{T}, s::AbstractString) = parse(T,s) + +float(x::AbstractString) = parse(Float64,x) +parsefloat(x::AbstractString) = parse(Float64,x) float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a) +function float_isvalid{T<:Union(Float32,Float64)}(s::AbstractString, out::Array{T,1}) + tf = tryparse(T, s) + isnull(tf) || (out[1] = get(tf)) + !isnull(tf) +end + +float32_isvalid(s::AbstractString, out::Array{Float32,1}) = float_isvalid(s, out) +float64_isvalid(s::AbstractString, out::Array{Float64,1}) = float_isvalid(s, out) + # find the index of the first occurrence of a value in a byte array typealias ByteArray Union(Array{UInt8,1},Array{Int8,1}) diff --git a/base/sysimg.jl b/base/sysimg.jl index 1f4e2ec77fad2..b176389c7ed76 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -107,6 +107,8 @@ include("env.jl") include("path.jl") include("intfuncs.jl") +# nullable types +include("nullable.jl") # I/O include("task.jl") @@ -176,6 +178,27 @@ big(n::Integer) = convert(BigInt,n) big(x::FloatingPoint) = convert(BigFloat,x) big(q::Rational) = big(num(q))//big(den(q)) +const _fact_table128 = + UInt128[0x00000000000000000000000000000001, 0x00000000000000000000000000000002, + 0x00000000000000000000000000000006, 0x00000000000000000000000000000018, + 0x00000000000000000000000000000078, 0x000000000000000000000000000002d0, + 0x000000000000000000000000000013b0, 0x00000000000000000000000000009d80, + 0x00000000000000000000000000058980, 0x00000000000000000000000000375f00, + 0x00000000000000000000000002611500, 0x0000000000000000000000001c8cfc00, + 0x0000000000000000000000017328cc00, 0x0000000000000000000000144c3b2800, + 0x00000000000000000000013077775800, 0x00000000000000000000130777758000, + 0x00000000000000000001437eeecd8000, 0x00000000000000000016beecca730000, + 0x000000000000000001b02b9306890000, 0x000000000000000021c3677c82b40000, + 0x0000000000000002c5077d36b8c40000, 0x000000000000003ceea4c2b3e0d80000, + 0x000000000000057970cd7e2933680000, 0x00000000000083629343d3dcd1c00000, + 0x00000000000cd4a0619fb0907bc00000, 0x00000000014d9849ea37eeac91800000, + 0x00000000232f0fcbb3e62c3358800000, 0x00000003d925ba47ad2cd59dae000000, + 0x0000006f99461a1e9e1432dcb6000000, 0x00000d13f6370f96865df5dd54000000, + 0x0001956ad0aae33a4560c5cd2c000000, 0x0032ad5a155c6748ac18b9a580000000, + 0x0688589cc0e9505e2f2fee5580000000, 0xde1bc4d19efcac82445da75b00000000] +factorial(n::Int128) = factorial_lookup(n, _fact_table128, 33) +factorial(n::UInt128) = factorial_lookup(n, _fact_table128, 34) + # more hashing definitions include("hashing2.jl") @@ -188,9 +211,6 @@ importall .Random include("printf.jl") importall .Printf -# nullable types -include("nullable.jl") - # concurrency and parallelism include("serialize.jl") include("multi.jl") diff --git a/src/builtins.c b/src/builtins.c index 31cdc992a8daa..eaa8659c2308c 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -733,13 +733,34 @@ DLLEXPORT jl_value_t *jl_value_ptr(jl_value_t *a) // printing ------------------------------------------------------------------- -DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out) +int substr_isspace(char *p, char *pend) { + while (p != pend) { + if (!isspace((unsigned char)*p)) { + return 0; + } + p++; + } + return 1; +} + +int str_isspace(char *p) { + while (*p != '\0') { + if (!isspace((unsigned char)*p)) { + return 0; + } + p++; + } + return 1; +} + +DLLEXPORT jl_nullable_float64_t jl_try_substrtod(char *str, size_t offset, int len) { char *p; - errno = 0; char *bstr = str+offset; char *pend = bstr+len; int err = 0; + + errno = 0; if (!(*pend == '\0' || isspace((unsigned char)*pend) || *pend == ',')) { // confusing data outside substring. must copy. char *newstr = (char*)malloc(len+1); @@ -748,38 +769,65 @@ DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out) bstr = newstr; pend = bstr+len; } - *out = strtod_c(bstr, &p); - if (p == bstr || - (errno==ERANGE && (*out==0 || *out==HUGE_VAL || *out==-HUGE_VAL))) + double out = strtod_c(bstr, &p); + + if (errno==ERANGE && (out==0 || out==HUGE_VAL || out==-HUGE_VAL)) { err = 1; - // Deal with case where the substring might be something like "1 ", - // which is OK, and "1 X", which we don't allow. - while (p != pend) { - if (!isspace((unsigned char)*p)) { - err = 1; - break; - } - p++; } + else if (p == bstr) { + err = 1; + } + else { + // Deal with case where the substring might be something like "1 ", + // which is OK, and "1 X", which we don't allow. + err = substr_isspace(p, pend) ? 0 : 1; + } + if (bstr != str+offset) free(bstr); - return err; + + return (jl_nullable_float64_t){(uint8_t)err, out}; } -DLLEXPORT int jl_strtod(char *str, double *out) +DLLEXPORT jl_nullable_float64_t jl_try_strtod(char *str) { char *p; + int err = 0; + errno = 0; - *out = strtod_c(str, &p); - if (p == str || - (errno==ERANGE && (*out==0 || *out==HUGE_VAL || *out==-HUGE_VAL))) - return 1; - while (*p != '\0') { - if (!isspace((unsigned char)*p)) - return 1; - p++; + double out = strtod_c(str, &p); + + if (errno==ERANGE && (out==0 || out==HUGE_VAL || out==-HUGE_VAL)) { + err = 1; + } + else if (p == str) { + err = 1; + } + else { + err = str_isspace(p) ? 0 : 1; } - return 0; + + return (jl_nullable_float64_t){(uint8_t)err, out}; +} + +DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out) +{ + jl_nullable_float64_t nd = jl_try_substrtod(str, offset, len); + if(0 == nd.isnull) { + *out = nd.value; + return 0; + } + return 1; +} + +DLLEXPORT int jl_strtod(char *str, double *out) +{ + jl_nullable_float64_t nd = jl_try_strtod(str); + if(0 == nd.isnull) { + *out = nd.value; + return 0; + } + return 1; } // MSVC pre-2013 did not define HUGE_VALF @@ -787,13 +835,14 @@ DLLEXPORT int jl_strtod(char *str, double *out) #define HUGE_VALF (1e25f * 1e25f) #endif -DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out) +DLLEXPORT jl_nullable_float32_t jl_try_substrtof(char *str, size_t offset, int len) { char *p; - errno = 0; char *bstr = str+offset; char *pend = bstr+len; int err = 0; + + errno = 0; if (!(*pend == '\0' || isspace((unsigned char)*pend) || *pend == ',')) { // confusing data outside substring. must copy. char *newstr = (char*)malloc(len+1); @@ -803,46 +852,71 @@ DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out) pend = bstr+len; } #if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_) - *out = (float)strtod_c(bstr, &p); + float out = (float)strtod_c(bstr, &p); #else - *out = strtof_c(bstr, &p); + float out = strtof_c(bstr, &p); #endif - if (p == bstr || - (errno==ERANGE && (*out==0 || *out==HUGE_VALF || *out==-HUGE_VALF))) + if (errno==ERANGE && (out==0 || out==HUGE_VALF || out==-HUGE_VALF)) { err = 1; - // Deal with case where the substring might be something like "1 ", - // which is OK, and "1 X", which we don't allow. - while (p != pend) { - if (!isspace((unsigned char)*p)) { - err = 1; - break; - } - p++; } + else if (p == bstr) { + err = 1; + } + else { + // Deal with case where the substring might be something like "1 ", + // which is OK, and "1 X", which we don't allow. + err = substr_isspace(p, pend) ? 0 : 1; + } + if (bstr != str+offset) free(bstr); - return err; + + return (jl_nullable_float32_t){(uint8_t)err, out}; } -DLLEXPORT int jl_strtof(char *str, float *out) +DLLEXPORT jl_nullable_float32_t jl_try_strtof(char *str) { char *p; + int err = 0; + errno = 0; #if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_) - *out = (float)strtod_c(str, &p); + float out = (float)strtod_c(str, &p); #else - *out = strtof_c(str, &p); + float out = strtof_c(str, &p); #endif - if (p == str || - (errno==ERANGE && (*out==0 || *out==HUGE_VALF || *out==-HUGE_VALF))) - return 1; - while (*p != '\0') { - if (!isspace((unsigned char)*p)) - return 1; - p++; + if (errno==ERANGE && (out==0 || out==HUGE_VALF || out==-HUGE_VALF)) { + err = 1; + } + else if (p == str) { + err = 1; } - return 0; + else { + err = str_isspace(p) ? 0 : 1; + } + + return (jl_nullable_float32_t){(uint8_t)err, out}; +} + +DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out) +{ + jl_nullable_float32_t nf = jl_try_substrtof(str, offset, len); + if(0 == nf.isnull) { + *out = nf.value; + return 0; + } + return 1; +} + +DLLEXPORT int jl_strtof(char *str, float *out) +{ + jl_nullable_float32_t nf = jl_try_strtof(str); + if(0 == nf.isnull) { + *out = nf.value; + return 0; + } + return 1; } // showing -------------------------------------------------------------------- diff --git a/src/julia.h b/src/julia.h index 618ba84b121d3..c2f42fe4cd404 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1494,6 +1494,17 @@ DLLEXPORT extern int jl_ver_patch(void); DLLEXPORT extern int jl_ver_is_release(void); DLLEXPORT extern const char* jl_ver_string(void); +// nullable struct representations +typedef struct { + uint8_t isnull; + double value; +} jl_nullable_float64_t; + +typedef struct { + uint8_t isnull; + float value; +} jl_nullable_float32_t; + #ifdef __cplusplus } #endif diff --git a/test/strings.jl b/test/strings.jl index 5dbaabef9ba2d..12caa0909aa2b 100644 --- a/test/strings.jl +++ b/test/strings.jl @@ -1402,3 +1402,25 @@ gstr = Base.GenericString("12"); # issue #10307 @test typeof(map(Int16,String[])) == Vector{Int16} + +for T in [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128] + for i in [typemax(T), typemin(T)] + s = "$i" + @test get(tryparse(T, s)) == i + end +end + +for T in [Int8, Int16, Int32, Int64, Int128] + for i in [typemax(T), typemin(T)] + f = "$(i)0" + @test isnull(tryparse(T, f)) + end +end + +@test get(tryparse(BigInt, "1234567890")) == BigInt(1234567890) +@test isnull(tryparse(BigInt, "1234567890-")) + +@test get(tryparse(Float64, "64")) == 64.0 +@test isnull(tryparse(Float64, "64o")) +@test get(tryparse(Float32, "32")) == 32.0f0 +@test isnull(tryparse(Float32, "32o"))