Skip to content

Commit

Permalink
add keep option to readuntil; change default to exclude delimiter
Browse files Browse the repository at this point in the history
  • Loading branch information
JeffBezanson committed Jan 22, 2018
1 parent 51eea57 commit 7d3c538
Show file tree
Hide file tree
Showing 15 changed files with 131 additions and 101 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ Breaking changes

This section lists changes that do not have deprecation warnings.

* `readuntil` now does *not* include the delimiter in its result, matching the
behavior of `readline`. Pass `keep=true` to get the old behavior ([#25633]).

* `getindex(s::String, r::UnitRange{Int})` now throws `UnicodeError` if `last(r)`
is not a valid index into `s` ([#22572]).

Expand Down
79 changes: 46 additions & 33 deletions base/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,10 @@ flush(io::AbstractPipe) = flush(pipe_writer(io))
read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io), byte)
unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io), p, nb)
read(io::AbstractPipe) = read(pipe_reader(io))
readuntil(io::AbstractPipe, arg::UInt8) = readuntil(pipe_reader(io), arg)
readuntil(io::AbstractPipe, arg::Char) = readuntil(pipe_reader(io), arg)
readuntil(io::AbstractPipe, arg::UInt8; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil(io::AbstractPipe, arg::Char; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil(io::AbstractPipe, arg::AbstractString; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil(io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil_indexable(io::AbstractPipe, target#=::Indexable{T}=#, out) = readuntil_indexable(pipe_reader(io), target, out)

readavailable(io::AbstractPipe) = readavailable(pipe_reader(io))
Expand Down Expand Up @@ -297,10 +299,12 @@ function read! end
read!(filename::AbstractString, a) = open(io->read!(io, a), filename)

"""
readuntil(stream::IO, delim)
readuntil(filename::AbstractString, delim)
readuntil(stream::IO, delim; keep::Bool = false)
readuntil(filename::AbstractString, delim; keep::Bool = false)
Read a string from an I/O stream or a file, up to and including the given delimiter byte.
Read a string from an I/O stream or a file, up to the given delimiter.
The delimiter can be a `UInt8`, `Char`, string, or vector.
Keyword argument `keep` controls whether the delimiter is included in the result.
The text is assumed to be encoded in UTF-8.
# Examples
Expand All @@ -319,7 +323,7 @@ julia> readuntil("my_file.txt", '.')
julia> rm("my_file.txt")
```
"""
readuntil(filename::AbstractString, args...) = open(io->readuntil(io, args...), filename)
readuntil(filename::AbstractString, args...; kw...) = open(io->readuntil(io, args...; kw...), filename)

"""
readline(io::IO=STDIN; keep::Bool=false)
Expand Down Expand Up @@ -363,7 +367,7 @@ function readline(s::IO=STDIN; chomp=nothing, keep::Bool=false)
keep = !chomp
depwarn("The `chomp=$chomp` argument to `readline` is deprecated in favor of `keep=$keep`.", :readline)
end
line = readuntil(s, 0x0a)
line = readuntil(s, 0x0a, keep=true)
i = length(line)
if keep || i == 0 || line[i] != 0x0a
return String(line)
Expand Down Expand Up @@ -626,41 +630,44 @@ end

# readuntil_string is useful below since it has
# an optimized method for s::IOStream
readuntil_string(s::IO, delim::UInt8) = String(readuntil(s, delim))
readuntil_string(s::IO, delim::UInt8, keep::Bool) = String(readuntil(s, delim, keep=keep))

function readuntil(s::IO, delim::Char)
function readuntil(s::IO, delim::Char; keep::Bool=false)
if delim '\x7f'
return readuntil_string(s, delim % UInt8)
return readuntil_string(s, delim % UInt8, keep)
end
out = IOBuffer()
while !eof(s)
c = read(s, Char)
write(out, c)
if c == delim
keep && write(out, c)
break
end
write(out, c)
end
return String(take!(out))
end

function readuntil(s::IO, delim::T) where T
function readuntil(s::IO, delim::T; keep::Bool=false) where T
out = (T === UInt8 ? StringVector(0) : Vector{T}())
while !eof(s)
c = read(s, T)
push!(out, c)
if c == delim
keep && push!(out, c)
break
end
push!(out, c)
end
return out
end

# requires that indices for target are small ordered integers bounded by start and endof
# returns whether the delimiter was matched
function readuntil_indexable(io::IO, target#=::Indexable{T}=#, out)
T = eltype(target)
first = start(target)
if done(target, first)
return
return true
end
len = endof(target)
local cache # will be lazy initialized when needed
Expand Down Expand Up @@ -701,39 +708,45 @@ function readuntil_indexable(io::IO, target#=::Indexable{T}=#, out)
pos = cache[pos] + first
end
end
done(target, pos) && break
done(target, pos) && return true
end
return false
end

function readuntil(io::IO, target::AbstractString)
function readuntil(io::IO, target::AbstractString; keep::Bool=false)
# small-string target optimizations
i = start(target)
done(target, i) && return ""
c, i = next(target, start(target))
if done(target, i) && c <= '\x7f'
return readuntil_string(io, c % UInt8)
return readuntil_string(io, c % UInt8, keep)
end
# decide how we can index target
if target isa String
# convert String to a utf8-byte-iterator
# convert String to a utf8-byte-iterator
if target isa String || target isa SubString{String}
target = codeunits(target)
#elseif applicable(codeunit, target)
# TODO: a more general version of above optimization
# would be to permit accessing any string via codeunit
# target = CodeUnitVector(target)
elseif !(target isa SubString{String})
# type with unknown indexing behavior: convert to array
target = collect(target)
end
out = (eltype(target) === UInt8 ? StringVector(0) : IOBuffer())
readuntil_indexable(io, target, out)
out = isa(out, IO) ? take!(out) : out
else
target = codeunits(String(target))
end
out = StringVector(0)
found = readuntil_indexable(io, target, out)
if !keep && found
lo, lt = length(out), length(target)
if lt <= lo
resize!(out, lo - lt)
end
end
return String(out)
end

function readuntil(io::IO, target::AbstractVector{T}) where T
function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T
out = (T === UInt8 ? StringVector(0) : Vector{T}())
readuntil_indexable(io, target, out)
found = readuntil_indexable(io, target, out)
if !keep && found
lo, lt = length(out), length(target)
if lt <= lo
resize!(out, lo - lt)
end
end
return out
end

Expand Down
26 changes: 15 additions & 11 deletions base/iobuffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -444,26 +444,30 @@ function findfirst(delim::EqualTo{UInt8}, buf::GenericIOBuffer)
return nothing
end

function readuntil(io::GenericIOBuffer, delim::UInt8)
function readuntil(io::GenericIOBuffer, delim::UInt8; keep::Bool=false)
lb = 70
A = StringVector(lb)
n = 0
nread = 0
nout = 0
data = io.data
for i = io.ptr : io.size
n += 1
if n > lb
lb = n*2
resize!(A, lb)
end
@inbounds b = data[i]
@inbounds A[n] = b
nread += 1
if keep || b != delim
nout += 1
if nout > lb
lb = nout*2
resize!(A, lb)
end
@inbounds A[nout] = b
end
if b == delim
break
end
end
io.ptr += n
if lb != n
resize!(A, n)
io.ptr += nread
if lb != nout
resize!(A, nout)
end
A
end
Expand Down
10 changes: 5 additions & 5 deletions base/iostream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -357,21 +357,21 @@ end
take!(s::IOStream) =
ccall(:jl_take_buffer, Vector{UInt8}, (Ptr{Cvoid},), s.ios)

function readuntil(s::IOStream, delim::UInt8)
ccall(:jl_readuntil, Array{UInt8,1}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, delim, 0, 0)
function readuntil(s::IOStream, delim::UInt8; keep::Bool=false)
ccall(:jl_readuntil, Array{UInt8,1}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, delim, 0, !keep)
end

# like readuntil, above, but returns a String without requiring a copy
function readuntil_string(s::IOStream, delim::UInt8)
ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, delim, 1, false)
function readuntil_string(s::IOStream, delim::UInt8, keep::Bool)
ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, delim, 1, !keep)
end

function readline(s::IOStream; chomp=nothing, keep::Bool=false)
if chomp !== nothing
keep = !chomp
depwarn("The `chomp=$chomp` argument to `readline` is deprecated in favor of `keep=$keep`.", :readline)
end
ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, !keep)
ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, keep ? 0 : 2)
end

function readbytes_all!(s::IOStream, b::Array{UInt8}, nb)
Expand Down
2 changes: 1 addition & 1 deletion base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1051,7 +1051,7 @@ function create_expr_cache(input::String, output::String, concrete_deps::typeof(
rm(output, force=true) # Remove file if it exists
code_object = """
while !eof(STDIN)
code = chop(readuntil(STDIN, '\\0'))
code = readuntil(STDIN, '\\0')
eval(Main, Meta.parse(code))
end
"""
Expand Down
4 changes: 2 additions & 2 deletions base/pkg/entry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -635,8 +635,8 @@ function build!(pkgs::Vector, errs::Dict, seen::Set=Set())
mktemp() do errfile, f
build!(pkgs, seen, errfile)
while !eof(f)
pkg = chop(readuntil(f, '\0'))
err = chop(readuntil(f, '\0'))
pkg = readuntil(f, '\0')
err = readuntil(f, '\0')
errs[pkg] = err
end
end
Expand Down
4 changes: 2 additions & 2 deletions base/stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -840,11 +840,11 @@ function readavailable(this::LibuvStream)
return take!(buf)
end

function readuntil(this::LibuvStream, c::UInt8)
function readuntil(this::LibuvStream, c::UInt8; keep::Bool=false)
wait_readbyte(this, c)
buf = this.buffer
@assert buf.seekable == false
return readuntil(buf, c)
return readuntil(buf, c, keep=keep)
end

uv_write(s::LibuvStream, p::Vector{UInt8}) = uv_write(s, pointer(p), UInt(sizeof(p)))
Expand Down
21 changes: 13 additions & 8 deletions src/sys.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,24 +252,29 @@ JL_DLLEXPORT jl_array_t *jl_take_buffer(ios_t *s)
return a;
}

// str: if 1 return a string, otherwise return a Vector{UInt8}
// chomp:
// 0 - keep delimiter
// 1 - remove 1 byte delimiter
// 2 - remove 2 bytes \r\n if present
JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint8_t chomp)
{
jl_array_t *a;
// manually inlined common case
char *pd = (char*)memchr(s->buf + s->bpos, delim, (size_t)(s->size - s->bpos));
if (pd) {
size_t n = pd - (s->buf + s->bpos) + 1;
size_t nchomp = 0;
if (chomp) {
nchomp = chomp == 2 ? ios_nchomp(s, n) : 1;
}
if (str) {
size_t nchomp = 0;
if (chomp) {
nchomp = ios_nchomp(s, n);
}
jl_value_t *str = jl_pchar_to_string(s->buf + s->bpos, n - nchomp);
s->bpos += n;
return str;
}
a = jl_alloc_array_1d(jl_array_uint8_type, n);
memcpy(jl_array_data(a), s->buf + s->bpos, n);
a = jl_alloc_array_1d(jl_array_uint8_type, n - nchomp);
memcpy(jl_array_data(a), s->buf + s->bpos, n - nchomp);
s->bpos += n;
}
else {
Expand All @@ -278,9 +283,9 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint
ios_mem(&dest, 0);
ios_setbuf(&dest, (char*)a->data, 80, 0);
size_t n = ios_copyuntil(&dest, s, delim);
if (chomp && n > 0 && dest.buf[n - 1] == '\n') {
if (chomp && n > 0 && dest.buf[n - 1] == delim) {
n--;
if (n > 0 && dest.buf[n - 1] == '\r') {
if (chomp == 2 && n > 0 && dest.buf[n - 1] == '\r') {
n--;
}
int truncret = ios_trunc(&dest, n); // it should always be possible to truncate dest
Expand Down
6 changes: 2 additions & 4 deletions stdlib/REPL/src/LineEdit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1816,8 +1816,7 @@ function setup_search_keymap(hp)
# Bracketed paste mode
"\e[200~" => (s,data,c)-> begin
ps = state(s, mode(s))
str = readuntil(ps.terminal, "\e[201~")
input = str[1:prevind(str, end-5)]
input = readuntil(ps.terminal, "\e[201~", keep=true)
edit_insert(data.query_buffer, input); update_display_buffer(s, data)
end,
"*" => (s,data,c)->(edit_insert(data.query_buffer, c); update_display_buffer(s, data))
Expand Down Expand Up @@ -1881,8 +1880,7 @@ end

function bracketed_paste(s; tabwidth=options(s).tabwidth)
ps = state(s, mode(s))
str = readuntil(ps.terminal, "\e[201~")
input = str[1:prevind(str, end-5)]
input = readuntil(ps.terminal, "\e[201~")
input = replace(input, '\r' => '\n')
if position(buffer(s)) == 0
indent = Base.indentation(input; tabwidth=tabwidth)[1]
Expand Down
Loading

0 comments on commit 7d3c538

Please sign in to comment.