forked from JuliaLang/julia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.jl
323 lines (273 loc) · 10.5 KB
/
parse.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
# This file is a part of Julia. License is MIT: https://julialang.org/license
import Base.Checked: add_with_overflow, mul_with_overflow
## string to integer functions ##
"""
parse(type, str, [base])
Parse a string as a number. If the type is an integer type, then a base can be specified
(the default is 10). If the type is a floating point type, the string is parsed as a decimal
floating point number. If the string does not contain a valid number, an error is raised.
```jldoctest
julia> parse(Int, "1234")
1234
julia> parse(Int, "1234", 5)
194
julia> parse(Int, "afc", 16)
2812
julia> parse(Float64, "1.2e-3")
0.0012
```
"""
parse(T::Type, str, base=Int)
function parse(::Type{T}, c::Char, base::Integer=36) where T<:Integer
a::Int = (base <= 36 ? 10 : 36)
2 <= base <= 62 || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
d = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : throw(ArgumentError("invalid digit: $(repr(c))"))
d < base || throw(ArgumentError("invalid base $base digit $(repr(c))"))
convert(T, d)
end
function parseint_next(s::AbstractString, startpos::Int, endpos::Int)
(0 < startpos <= endpos) || (return Char(0), 0, 0)
j = startpos
c, startpos = next(s,startpos)
c, startpos, j
end
function parseint_preamble(signed::Bool, base::Int, s::AbstractString, startpos::Int, endpos::Int)
c, i, j = parseint_next(s, startpos, endpos)
while isspace(c)
c, i, j = parseint_next(s,i,endpos)
end
(j == 0) && (return 0, 0, 0)
sgn = 1
if signed
if c == '-' || c == '+'
(c == '-') && (sgn = -1)
c, i, j = parseint_next(s,i,endpos)
end
end
while isspace(c)
c, i, j = parseint_next(s,i,endpos)
end
(j == 0) && (return 0, 0, 0)
if base == 0
if c == '0' && !done(s,i)
c, i = next(s,i)
base = c=='b' ? 2 : c=='o' ? 8 : c=='x' ? 16 : 10
if base != 10
c, i, j = parseint_next(s,i,endpos)
end
else
base = 10
end
end
return sgn, base, j
end
function tryparse_internal(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, base_::Integer, raise::Bool) where T<:Integer
_n = Nullable{T}()
sgn, base, i = parseint_preamble(T<:Signed, Int(base_), s, startpos, endpos)
if sgn == 0 && base == 0 && i == 0
raise && throw(ArgumentError("input string is empty or only contains whitespace"))
return _n
end
if !(2 <= base <= 62)
raise && throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
return _n
end
if i == 0
raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))"))
return _n
end
c, i = parseint_next(s,i,endpos)
if i == 0
raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))"))
return _n
end
base = convert(T,base)
m::T = div(typemax(T)-base+1,base)
n::T = 0
a::Int = base <= 36 ? 10 : 36
while n <= m
d::T = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
if d >= base
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
return _n
end
n *= base
n += d
if i > endpos
n *= sgn
return Nullable{T}(n)
end
c, i = next(s,i)
isspace(c) && break
end
(T <: Signed) && (n *= sgn)
while !isspace(c)
d::T = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
if d >= base
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
return _n
end
(T <: Signed) && (d *= sgn)
n, ov_mul = mul_with_overflow(n, base)
n, ov_add = add_with_overflow(n, d)
if ov_mul | ov_add
raise && throw(OverflowError("overflow parsing $(repr(SubString(s,startpos,endpos)))"))
return _n
end
(i > endpos) && return Nullable{T}(n)
c, i = next(s,i)
end
while i <= endpos
c, i = next(s,i)
if !isspace(c)
raise && throw(ArgumentError("extra characters after whitespace in $(repr(SubString(s,startpos,endpos)))"))
return _n
end
end
return Nullable{T}(n)
end
function tryparse_internal(::Type{Bool}, sbuff::Union{String,SubString},
startpos::Int, endpos::Int, base::Integer, raise::Bool)
if isempty(sbuff)
raise && throw(ArgumentError("input string is empty"))
return Nullable{Bool}()
end
orig_start = startpos
orig_end = endpos
# Ignore leading and trailing whitespace
while isspace(sbuff[startpos]) && startpos <= endpos
startpos = nextind(sbuff, startpos)
end
while isspace(sbuff[endpos]) && endpos >= startpos
endpos = prevind(sbuff, endpos)
end
len = endpos - startpos + 1
p = pointer(sbuff) + startpos - 1
(len == 4) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt),
p, "true", 4)) && (return Nullable(true))
(len == 5) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt),
p, "false", 5)) && (return Nullable(false))
if raise
substr = SubString(sbuff, orig_start, orig_end) # show input string in the error to avoid confusion
if all(isspace, substr)
throw(ArgumentError("input string only contains whitespace"))
else
throw(ArgumentError("invalid Bool representation: $(repr(substr))"))
end
end
return Nullable{Bool}()
end
@inline function check_valid_base(base)
if 2 <= base <= 62
return base
end
throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
end
"""
tryparse(type, str, [base])
Like [`parse`](@ref), but returns a [`Nullable`](@ref) of the requested type. The result
will be null if the string does not contain a valid number.
"""
tryparse(::Type{T}, s::AbstractString, base::Integer) where {T<:Integer} =
tryparse_internal(T, s, start(s), endof(s), check_valid_base(base), false)
tryparse(::Type{T}, s::AbstractString) where {T<:Integer} =
tryparse_internal(T, s, start(s), endof(s), 0, false)
function parse(::Type{T}, s::AbstractString, base::Integer) where T<:Integer
get(tryparse_internal(T, s, start(s), endof(s), check_valid_base(base), true))
end
function parse(::Type{T}, s::AbstractString) where T<:Integer
get(tryparse_internal(T, s, start(s), endof(s), 0, true)) # Zero means, "figure it out"
end
## string to float functions ##
tryparse(::Type{Float64}, s::String) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
tryparse(::Type{Float64}, s::SubString{String}) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof)
tryparse(::Type{Float32}, s::String) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
tryparse(::Type{Float32}, s::SubString{String}) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof)
tryparse(::Type{T}, s::AbstractString) where {T<:Union{Float32,Float64}} = tryparse(T, String(s))
tryparse(::Type{Float16}, s::AbstractString) = convert(Nullable{Float16}, tryparse(Float32, s))
function parse(::Type{T}, s::AbstractString) where T<:AbstractFloat
result = tryparse(T, s)
if isnull(result)
throw(ArgumentError("cannot parse $(repr(s)) as $T"))
end
return unsafe_get(result)
end
float(x::AbstractString) = parse(Float64,x)
float(a::AbstractArray{<:AbstractString}) = map!(float, similar(a,typeof(float(0))), a)
## interface to parser ##
"""
ParseError(msg)
The expression passed to the `parse` function could not be interpreted as a valid Julia
expression.
"""
mutable struct ParseError <: Exception
msg::AbstractString
end
"""
parse(str, start; greedy=true, raise=true)
Parse the expression string and return an expression (which could later be passed to eval
for execution). `start` is the index of the first character to start parsing. If `greedy` is
`true` (default), `parse` will try to consume as much input as it can; otherwise, it will
stop as soon as it has parsed a valid expression. Incomplete but otherwise syntactically
valid expressions will return `Expr(:incomplete, "(error message)")`. If `raise` is `true`
(default), syntax errors other than incomplete expressions will raise an error. If `raise`
is `false`, `parse` will return an expression that will raise an error upon evaluation.
```jldoctest
julia> parse("x = 3, y = 5", 7)
(:(y = 5), 13)
julia> parse("x = 3, y = 5", 5)
(:((3, y) = 5), 13)
```
"""
function parse(str::AbstractString, pos::Int; greedy::Bool=true, raise::Bool=true)
# pos is one based byte offset.
# returns (expr, end_pos). expr is () in case of parse error.
bstr = String(str)
ex, pos = ccall(:jl_parse_string, Any,
(Ptr{UInt8}, Csize_t, Int32, Int32),
bstr, sizeof(bstr), pos-1, greedy ? 1 : 0)
if raise && isa(ex,Expr) && ex.head === :error
throw(ParseError(ex.args[1]))
end
if ex === ()
raise && throw(ParseError("end of input"))
ex = Expr(:error, "end of input")
end
return ex, pos+1 # C is zero-based, Julia is 1-based
end
"""
parse(str; raise=true)
Parse the expression string greedily, returning a single expression. An error is thrown if
there are additional characters after the first expression. If `raise` is `true` (default),
syntax errors will raise an error; otherwise, `parse` will return an expression that will
raise an error upon evaluation.
```jldoctest
julia> parse("x = 3")
:(x = 3)
julia> parse("x = ")
:($(Expr(:incomplete, "incomplete: premature end of input")))
julia> parse("1.0.2")
ERROR: ParseError("invalid numeric constant \\\"1.0.\\\"")
Stacktrace:
[...]
julia> parse("1.0.2"; raise = false)
:($(Expr(:error, "invalid numeric constant \"1.0.\"")))
```
"""
function parse(str::AbstractString; raise::Bool=true)
ex, pos = parse(str, 1, greedy=true, raise=raise)
if isa(ex,Expr) && ex.head === :error
return ex
end
if !done(str, pos)
raise && throw(ParseError("extra token after end of expression"))
return Expr(:error, "extra token after end of expression")
end
return ex
end