Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

exclude delimiter by default in readuntil; rename chomp option to keep #25646

Merged
merged 2 commits into from
Jan 22, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ Breaking changes

This section lists changes that do not have deprecation warnings.

* `readuntil` now does *not* include the delimiter in its result, matching the
behavior of `readline`. Pass `keep=true` to get the old behavior ([#25633]).

* `getindex(s::String, r::UnitRange{Int})` now throws `UnicodeError` if `last(r)`
is not a valid index into `s` ([#22572]).

Expand Down
2 changes: 1 addition & 1 deletion base/client.jl
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ parse_input_line(s::AbstractString) = parse_input_line(String(s))
function parse_input_line(io::IO)
s = ""
while !eof(io)
s *= readline(io, chomp=false)
s *= readline(io, keep=true)
e = parse_input_line(s)
if !(isa(e,Expr) && e.head === :incomplete)
return e
Expand Down
2 changes: 1 addition & 1 deletion base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ end

@deprecate read(cmd::AbstractCmd, stdin::Redirectable) read(pipeline(stdin, cmd))
@deprecate readstring(cmd::AbstractCmd, stdin::Redirectable) readstring(pipeline(stdin, cmd))
@deprecate eachline(cmd::AbstractCmd, stdin; chomp::Bool=true) eachline(pipeline(stdin, cmd), chomp=chomp)
@deprecate eachline(cmd::AbstractCmd, stdin; kw...) eachline(pipeline(stdin, cmd), kw...)

@deprecate showall(x) show(x)
@deprecate showall(io, x) show(IOContext(io, :limit => false), x)
Expand Down
151 changes: 91 additions & 60 deletions base/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,10 @@ flush(io::AbstractPipe) = flush(pipe_writer(io))
read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io), byte)
unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io), p, nb)
read(io::AbstractPipe) = read(pipe_reader(io))
readuntil(io::AbstractPipe, arg::UInt8) = readuntil(pipe_reader(io), arg)
readuntil(io::AbstractPipe, arg::Char) = readuntil(pipe_reader(io), arg)
readuntil(io::AbstractPipe, arg::UInt8; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil(io::AbstractPipe, arg::Char; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil(io::AbstractPipe, arg::AbstractString; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil(io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil_indexable(io::AbstractPipe, target#=::Indexable{T}=#, out) = readuntil_indexable(pipe_reader(io), target, out)

readavailable(io::AbstractPipe) = readavailable(pipe_reader(io))
Expand Down Expand Up @@ -297,10 +299,12 @@ function read! end
read!(filename::AbstractString, a) = open(io->read!(io, a), filename)

"""
readuntil(stream::IO, delim)
readuntil(filename::AbstractString, delim)
readuntil(stream::IO, delim; keep::Bool = false)
readuntil(filename::AbstractString, delim; keep::Bool = false)

Read a string from an I/O stream or a file, up to and including the given delimiter byte.
Read a string from an I/O stream or a file, up to the given delimiter.
The delimiter can be a `UInt8`, `Char`, string, or vector.
Keyword argument `keep` controls whether the delimiter is included in the result.
The text is assumed to be encoded in UTF-8.

# Examples
Expand All @@ -319,17 +323,17 @@ julia> readuntil("my_file.txt", '.')
julia> rm("my_file.txt")
```
"""
readuntil(filename::AbstractString, args...) = open(io->readuntil(io, args...), filename)
readuntil(filename::AbstractString, args...; kw...) = open(io->readuntil(io, args...; kw...), filename)

"""
readline(io::IO=STDIN; chomp::Bool=true)
readline(filename::AbstractString; chomp::Bool=true)
readline(io::IO=STDIN; keep::Bool=false)
readline(filename::AbstractString; keep::Bool=false)

Read a single line of text from the given I/O stream or file (defaults to `STDIN`).
When reading from a file, the text is assumed to be encoded in UTF-8. Lines in the
input end with `'\\n'` or `"\\r\\n"` or the end of an input stream. When `chomp` is
true (as it is by default), these trailing newline characters are removed from the
line before it is returned. When `chomp` is false, they are returned as part of the
input end with `'\\n'` or `"\\r\\n"` or the end of an input stream. When `keep` is
false (as it is by default), these trailing newline characters are removed from the
line before it is returned. When `keep` is true, they are returned as part of the
line.

# Examples
Expand All @@ -342,22 +346,30 @@ julia> open("my_file.txt", "w") do io
julia> readline("my_file.txt")
"JuliaLang is a GitHub organization."

julia> readline("my_file.txt", chomp=false)
julia> readline("my_file.txt", keep=true)
"JuliaLang is a GitHub organization.\\n"

julia> rm("my_file.txt")
```
"""
function readline(filename::AbstractString; chomp::Bool=true)
function readline(filename::AbstractString; chomp=nothing, keep::Bool=false)
if chomp !== nothing
keep = !chomp
depwarn("The `chomp=$chomp` argument to `readline` is deprecated in favor of `keep=$keep`.", :readline)
end
open(filename) do f
readline(f, chomp=chomp)
readline(f, keep=keep)
end
end

function readline(s::IO=STDIN; chomp::Bool=true)
line = readuntil(s, 0x0a)
function readline(s::IO=STDIN; chomp=nothing, keep::Bool=false)
if chomp !== nothing
keep = !chomp
depwarn("The `chomp=$chomp` argument to `readline` is deprecated in favor of `keep=$keep`.", :readline)
end
line = readuntil(s, 0x0a, keep=true)
i = length(line)
if !chomp || i == 0 || line[i] != 0x0a
if keep || i == 0 || line[i] != 0x0a
return String(line)
elseif i < 2 || line[i-1] != 0x0d
return String(resize!(line,i-1))
Expand All @@ -367,8 +379,8 @@ function readline(s::IO=STDIN; chomp::Bool=true)
end

"""
readlines(io::IO=STDIN; chomp::Bool=true)
readlines(filename::AbstractString; chomp::Bool=true)
readlines(io::IO=STDIN; keep::Bool=false)
readlines(filename::AbstractString; keep::Bool=false)

Read all lines of an I/O stream or a file as a vector of strings. Behavior is
equivalent to saving the result of reading [`readline`](@ref) repeatedly with the same
Expand All @@ -386,20 +398,20 @@ julia> readlines("my_file.txt")
"JuliaLang is a GitHub organization."
"It has many members."

julia> readlines("my_file.txt", chomp=false)
julia> readlines("my_file.txt", keep=true)
2-element Array{String,1}:
"JuliaLang is a GitHub organization.\\n"
"It has many members.\\n"

julia> rm("my_file.txt")
```
"""
function readlines(filename::AbstractString; chomp::Bool=true)
function readlines(filename::AbstractString; kw...)
open(filename) do f
readlines(f, chomp=chomp)
readlines(f; kw...)
end
end
readlines(s=STDIN; chomp::Bool=true) = collect(eachline(s, chomp=chomp))
readlines(s=STDIN; kw...) = collect(eachline(s; kw...))

## byte-order mark, ntoh & hton ##

Expand Down Expand Up @@ -618,41 +630,44 @@ end

# readuntil_string is useful below since it has
# an optimized method for s::IOStream
readuntil_string(s::IO, delim::UInt8) = String(readuntil(s, delim))
readuntil_string(s::IO, delim::UInt8, keep::Bool) = String(readuntil(s, delim, keep=keep))

function readuntil(s::IO, delim::Char)
function readuntil(s::IO, delim::Char; keep::Bool=false)
if delim ≤ '\x7f'
return readuntil_string(s, delim % UInt8)
return readuntil_string(s, delim % UInt8, keep)
end
out = IOBuffer()
while !eof(s)
c = read(s, Char)
write(out, c)
if c == delim
keep && write(out, c)
break
end
write(out, c)
end
return String(take!(out))
end

function readuntil(s::IO, delim::T) where T
function readuntil(s::IO, delim::T; keep::Bool=false) where T
out = (T === UInt8 ? StringVector(0) : Vector{T}())
while !eof(s)
c = read(s, T)
push!(out, c)
if c == delim
keep && push!(out, c)
break
end
push!(out, c)
end
return out
end

# requires that indices for target are small ordered integers bounded by start and endof
# returns whether the delimiter was matched
function readuntil_indexable(io::IO, target#=::Indexable{T}=#, out)
T = eltype(target)
first = start(target)
if done(target, first)
return
return true
end
len = endof(target)
local cache # will be lazy initialized when needed
Expand Down Expand Up @@ -693,39 +708,45 @@ function readuntil_indexable(io::IO, target#=::Indexable{T}=#, out)
pos = cache[pos] + first
end
end
done(target, pos) && break
done(target, pos) && return true
end
return false
end

function readuntil(io::IO, target::AbstractString)
function readuntil(io::IO, target::AbstractString; keep::Bool=false)
# small-string target optimizations
i = start(target)
done(target, i) && return ""
c, i = next(target, start(target))
if done(target, i) && c <= '\x7f'
return readuntil_string(io, c % UInt8)
return readuntil_string(io, c % UInt8, keep)
end
# decide how we can index target
if target isa String
# convert String to a utf8-byte-iterator
# convert String to a utf8-byte-iterator
if target isa String || target isa SubString{String}
target = codeunits(target)
#elseif applicable(codeunit, target)
# TODO: a more general version of above optimization
# would be to permit accessing any string via codeunit
# target = CodeUnitVector(target)
elseif !(target isa SubString{String})
# type with unknown indexing behavior: convert to array
target = collect(target)
end
out = (eltype(target) === UInt8 ? StringVector(0) : IOBuffer())
readuntil_indexable(io, target, out)
out = isa(out, IO) ? take!(out) : out
else
target = codeunits(String(target))
end
out = StringVector(0)
found = readuntil_indexable(io, target, out)
if !keep && found
lo, lt = length(out), length(target)
if lt <= lo
resize!(out, lo - lt)
end
end
return String(out)
end

function readuntil(io::IO, target::AbstractVector{T}) where T
function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T
out = (T === UInt8 ? StringVector(0) : Vector{T}())
readuntil_indexable(io, target, out)
found = readuntil_indexable(io, target, out)
if !keep && found
lo, lt = length(out), length(target)
if lt <= lo
resize!(out, lo - lt)
end
end
return out
end

Expand Down Expand Up @@ -797,20 +818,20 @@ read(s::IO, T::Type) = error("The IO stream does not support reading objects of
mutable struct EachLine
stream::IO
ondone::Function
chomp::Bool
keep::Bool

EachLine(stream::IO=STDIN; ondone::Function=()->nothing, chomp::Bool=true) =
new(stream, ondone, chomp)
EachLine(stream::IO=STDIN; ondone::Function=()->nothing, keep::Bool=false) =
new(stream, ondone, keep)
end

"""
eachline(io::IO=STDIN; chomp::Bool=true)
eachline(filename::AbstractString; chomp::Bool=true)
eachline(io::IO=STDIN; keep::Bool=false)
eachline(filename::AbstractString; keep::Bool=false)

Create an iterable `EachLine` object that will yield each line from an I/O stream
or a file. Iteration calls [`readline`](@ref) on the stream argument repeatedly with
`chomp` passed through, determining whether trailing end-of-line characters are
removed. When called with a file name, the file is opened once at the beginning of
`keep` passed through, determining whether trailing end-of-line characters are
retained. When called with a file name, the file is opened once at the beginning of
iteration and closed at the end. If iteration is interrupted, the file will be
closed when the `EachLine` object is garbage collected.

Expand All @@ -828,11 +849,21 @@ JuliaLang is a GitHub organization. It has many members.
julia> rm("my_file.txt");
```
"""
eachline(stream::IO=STDIN; chomp::Bool=true) = EachLine(stream, chomp=chomp)::EachLine
function eachline(stream::IO=STDIN; chomp=nothing, keep::Bool=false)
if chomp !== nothing
keep = !chomp
depwarn("The `chomp=$chomp` argument to `eachline` is deprecated in favor of `keep=$keep`.", :eachline)
end
EachLine(stream, keep=keep)::EachLine
end

function eachline(filename::AbstractString; chomp::Bool=true)
function eachline(filename::AbstractString; chomp=nothing, keep::Bool=false)
if chomp !== nothing
keep = !chomp
depwarn("The `chomp=$chomp` argument to `eachline` is deprecated in favor of `keep=$keep`.", :eachline)
end
s = open(filename)
EachLine(s, ondone=()->close(s), chomp=chomp)::EachLine
EachLine(s, ondone=()->close(s), keep=keep)::EachLine
end

start(itr::EachLine) = nothing
Expand All @@ -841,7 +872,7 @@ function done(itr::EachLine, ::Nothing)
itr.ondone()
true
end
next(itr::EachLine, ::Nothing) = (readline(itr.stream, chomp=itr.chomp), nothing)
next(itr::EachLine, ::Nothing) = (readline(itr.stream, keep=itr.keep), nothing)

eltype(::Type{EachLine}) = String

Expand Down
26 changes: 15 additions & 11 deletions base/iobuffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -444,26 +444,30 @@ function findfirst(delim::EqualTo{UInt8}, buf::GenericIOBuffer)
return nothing
end

function readuntil(io::GenericIOBuffer, delim::UInt8)
function readuntil(io::GenericIOBuffer, delim::UInt8; keep::Bool=false)
lb = 70
A = StringVector(lb)
n = 0
nread = 0
nout = 0
data = io.data
for i = io.ptr : io.size
n += 1
if n > lb
lb = n*2
resize!(A, lb)
end
@inbounds b = data[i]
@inbounds A[n] = b
nread += 1
if keep || b != delim
nout += 1
if nout > lb
lb = nout*2
resize!(A, lb)
end
@inbounds A[nout] = b
end
if b == delim
break
end
end
io.ptr += n
if lb != n
resize!(A, n)
io.ptr += nread
if lb != nout
resize!(A, nout)
end
A
end
Expand Down
Loading