Skip to content

Commit

Permalink
do lazy reallocation after take!(iobuffer) (#48676)
Browse files Browse the repository at this point in the history
closes #27741. closes #48651
  • Loading branch information
stevengj committed Feb 17, 2023
1 parent 8e3e970 commit 12d329b
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 23 deletions.
60 changes: 47 additions & 13 deletions base/iobuffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Stateful string
mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
data::T # T should support: getindex, setindex!, length, copyto!, and resize!
reinit::Bool # if true, data needs to be re-allocated (after take!)
readable::Bool
writable::Bool
seekable::Bool # if not seekable, implementation is free to destroy (compact) past read data
Expand All @@ -17,7 +18,7 @@ mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
function GenericIOBuffer{T}(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
maxsize::Integer) where T<:AbstractVector{UInt8}
require_one_based_indexing(data)
new(data,readable,writable,seekable,append,length(data),maxsize,1,-1)
new(data,false,readable,writable,seekable,append,length(data),maxsize,1,-1)
end
end
const IOBuffer = GenericIOBuffer{Vector{UInt8}}
Expand Down Expand Up @@ -137,8 +138,12 @@ PipeBuffer(data::Vector{UInt8}=UInt8[]; maxsize::Int = typemax(Int)) =
GenericIOBuffer(data,true,true,false,true,maxsize)
PipeBuffer(maxsize::Integer) = (x = PipeBuffer(StringVector(maxsize), maxsize = maxsize); x.size=0; x)

_similar_data(b::GenericIOBuffer, len::Int) = similar(b.data, len)
_similar_data(b::IOBuffer, len::Int) = StringVector(len)

function copy(b::GenericIOBuffer)
ret = typeof(b)(b.writable ? copy(b.data) : b.data,
ret = typeof(b)(b.reinit ? _similar_data(b, 0) : b.writable ?
copyto!(_similar_data(b, length(b.data)), b.data) : b.data,
b.readable, b.writable, b.seekable, b.append, b.maxsize)
ret.size = b.size
ret.ptr = b.ptr
Expand Down Expand Up @@ -270,7 +275,10 @@ function truncate(io::GenericIOBuffer, n::Integer)
io.seekable || throw(ArgumentError("truncate failed, IOBuffer is not seekable"))
n < 0 && throw(ArgumentError("truncate failed, n bytes must be ≥ 0, got $n"))
n > io.maxsize && throw(ArgumentError("truncate failed, $(n) bytes is exceeds IOBuffer maxsize $(io.maxsize)"))
if n > length(io.data)
if io.reinit
io.data = _similar_data(io, n)
io.reinit = false
elseif n > length(io.data)
resize!(io.data, n)
end
io.data[io.size+1:n] .= 0
Expand Down Expand Up @@ -325,9 +333,14 @@ end
ensureroom_slowpath(io, nshort)
end
n = min((nshort % Int) + (io.append ? io.size : io.ptr-1), io.maxsize)
l = length(io.data)
if n > l
_growend!(io.data, (n - l) % UInt)
if io.reinit
io.data = _similar_data(io, n)
io.reinit = false
else
l = length(io.data)
if n > l
_growend!(io.data, (n - l) % UInt)
end
end
return io
end
Expand Down Expand Up @@ -390,18 +403,26 @@ end
function take!(io::IOBuffer)
ismarked(io) && unmark(io)
if io.seekable
data = io.data
if io.writable
maxsize = (io.maxsize == typemax(Int) ? 0 : min(length(io.data),io.maxsize))
io.data = StringVector(maxsize)
if io.reinit
data = StringVector(0)
else
data = resize!(io.data, io.size)
io.reinit = true
end
else
data = copy(data)
data = copyto!(StringVector(io.size), 1, io.data, 1, io.size)
end
resize!(data,io.size)
else
nbytes = bytesavailable(io)
a = StringVector(nbytes)
data = read!(io, a)
if io.writable
data = io.data
io.reinit = true
_deletebeg!(data, io.ptr-1)
resize!(data, nbytes)
else
data = read!(io, StringVector(nbytes))
end
end
if io.writable
io.ptr = 1
Expand All @@ -410,6 +431,19 @@ function take!(io::IOBuffer)
return data
end

"""
_unsafe_take!(io::IOBuffer)
This simply returns the raw resized `io.data`, with no checks to be
sure that `io` is readable etcetera, and leaves `io` in an inconsistent
state. This should only be used internally for performance-critical
`String` routines that immediately discard `io` afterwards, and it
*assumes* that `io` is writable and seekable.
It saves no allocations compared to `take!`, it just omits some checks.
"""
_unsafe_take!(io::IOBuffer) = resize!(io.data, io.size)

function write(to::IO, from::GenericIOBuffer)
if to === from
from.ptr = from.size + 1
Expand Down
2 changes: 1 addition & 1 deletion base/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ function filter(f, s::AbstractString)
for c in s
f(c) && write(out, c)
end
String(take!(out))
String(_unsafe_take!(out))
end

## string first and last ##
Expand Down
6 changes: 3 additions & 3 deletions base/strings/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ function sprint(f::Function, args...; context=nothing, sizehint::Integer=0)
else
f(s, args...)
end
String(resize!(s.data, s.size))
String(_unsafe_take!(s))
end

function _str_sizehint(x)
Expand Down Expand Up @@ -147,7 +147,7 @@ function print_to_string(xs...)
for x in xs
print(s, x)
end
String(resize!(s.data, s.size))
String(_unsafe_take!(s))
end

function string_with_env(env, xs...)
Expand All @@ -164,7 +164,7 @@ function string_with_env(env, xs...)
for x in xs
print(env_io, x)
end
String(resize!(s.data, s.size))
String(_unsafe_take!(s))
end

"""
Expand Down
7 changes: 4 additions & 3 deletions stdlib/REPL/src/LineEdit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -761,10 +761,11 @@ function edit_splice!(s::BufferLike, r::Region=region(s), ins::String = ""; rigi
elseif buf.mark >= B
buf.mark += sizeof(ins) - B + A
end
ensureroom(buf, B) # handle !buf.reinit from take!
ret = splice!(buf.data, A+1:B, codeunits(String(ins))) # position(), etc, are 0-indexed
buf.size = buf.size + sizeof(ins) - B + A
adjust_pos && seek(buf, position(buf) + sizeof(ins))
return String(ret)
return String(copy(ret))
end

edit_splice!(s::MIState, ins::AbstractString) = edit_splice!(s, region(s), ins)
Expand Down Expand Up @@ -1281,7 +1282,7 @@ end
# compute the number of spaces from b till the next non-space on the right
# (which can also be "end of line" or "end of buffer")
function leadingspaces(buf::IOBuffer, b::Int)
ls = something(findnext(_notspace, buf.data, b+1), 0)-1
@views ls = something(findnext(_notspace, buf.data[1:buf.size], b+1), 0)-1
ls == -1 && (ls = buf.size)
ls -= b
return ls
Expand Down Expand Up @@ -2238,7 +2239,7 @@ end

function move_line_end(buf::IOBuffer)
eof(buf) && return
pos = findnext(isequal(UInt8('\n')), buf.data, position(buf)+1)
@views pos = findnext(isequal(UInt8('\n')), buf.data[1:buf.size], position(buf)+1)
if pos === nothing
move_input_end(buf)
return
Expand Down
6 changes: 3 additions & 3 deletions stdlib/REPL/test/lineedit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -306,21 +306,21 @@ seek(buf,0)

## edit_delete_prev_word ##

buf = IOBuffer("type X\n ")
buf = IOBuffer(Vector{UInt8}("type X\n "), read=true, write=true)
seekend(buf)
@test !isempty(@inferred(LineEdit.edit_delete_prev_word(buf)))
@test position(buf) == 5
@test buf.size == 5
@test content(buf) == "type "

buf = IOBuffer("4 +aaa+ x")
buf = IOBuffer(Vector{UInt8}("4 +aaa+ x"), read=true, write=true)
seek(buf,8)
@test !isempty(LineEdit.edit_delete_prev_word(buf))
@test position(buf) == 3
@test buf.size == 4
@test content(buf) == "4 +x"

buf = IOBuffer("x = func(arg1,arg2 , arg3)")
buf = IOBuffer(Vector{UInt8}("x = func(arg1,arg2 , arg3)"), read=true, write=true)
seekend(buf)
LineEdit.char_move_word_left(buf)
@test position(buf) == 21
Expand Down

2 comments on commit 12d329b

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your package evaluation job has completed - possible new issues were detected.
A full report can be found here.

Please sign in to comment.