Make _insert_annotations! optimising

This is a rather important optimisation, since it prevents the annotation blow-up that can result from say writing to an AnnotatedIOBuffer char-by-char. Originally I was just going to pass the AnnotatedString produced when reading the AnnotatedIOBuffer through annotatedstring_optimize!, but now that's been removed, this seems like the best past forwards (it's also actually a better approach than applying annotatedstring_optimize!, just hard to justify when that code already existed).
JuliaLang · tecosaur · Apr 30, 2024 · Apr 28, 2024 · Apr 27, 2024 · Apr 27, 2024
commit 5c6245e2dc4261a1ce0d5c7732ee239496add8be
diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl
@@ -509,9 +509,45 @@ function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int},
 end
 
 function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, offset::Int = position(io))
- for (region, annot) in annotations
- region = first(region)+offset:last(region)+offset
- push!(io.annotations, (region, annot))
+ # The most basic (but correct) approach would be just to push
+ # each of `annotations` to `io.annotations`, adjusting the region by
+ # `offset`. However, there is a specific common case probably worth
+ # optimising, which is when an existing styles are just extended.
+ # To handle this efficiently and conservatively, we look to see if
+ # there's a run at the end of `io.annotations` that matches annotations
+ # at the start of `annotations`. If so, this run of annotations is merged.
+ run = 0
+ if !isempty(io.annotations) && last(first(last(io.annotations))) == offset
+ for i in reverse(axes(annotations, 1))
+ annot = annotations[i]
+ first(first(annot)) == 1 || continue
+ if last(annot) == last(last(io.annotations))
+ valid_run = true
+ for runlen in 1:i
+ new_range, new_annot = annotations[begin+runlen-1]
+ old_range, old_annot = io.annotations[end-i+runlen]
+ if last(old_range) != offset || first(new_range) != 1 || old_annot != new_annot
+ valid_run = false
+ break
+ end
+ end
+ if valid_run
+ run = i
+ break
+ end
+ end
+ end
+ end
+ for runindex in 0:run-1
+ old_index = lastindex(io.annotations) - run + 1 + runindex
+ old_region, annot = io.annotations[old_index]
+ new_region, _ = annotations[begin+runindex]
+ io.annotations[old_index] = (first(old_region):last(new_region)+offset, annot)
+ end
+ for index in run+1:lastindex(annotations)
+ region, annot = annotations[index]
+ start, stop = first(region), last(region)
+ push!(io.annotations, (start+offset:stop+offset, annot))
  end
 end
 

diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl
@@ -191,4 +191,22 @@ end
  @test write(newaio, seek(aio, 5)) == 6
  @test read(seekstart(newaio), String) == "abyss abbie abbie"
  @test sort(Base.annotations(newaio)) == sort(vcat(Base.annotations(aio), [(13:13, :tag => 2), (14:16, :hey => 'a'), (17:17, :tag => 2)]))
+ # The `_insert_annotations!` cautious-merging optimisation
+ aio = Base.AnnotatedIOBuffer()
+ @test write(aio, Base.AnnotatedChar('a', [:a => 1, :b => 2])) == 1
+ @test Base.annotations(aio) == [(1:1, :a => 1), (1:1, :b => 2)]
+ @test write(aio, Base.AnnotatedChar('b', [:a => 1, :b => 2])) == 1
+ @test Base.annotations(aio) == [(1:2, :a => 1), (1:2, :b => 2)]
+ let aio2 = copy(aio) # A different start makes merging too risky to do.
+ @test write(aio2, Base.AnnotatedChar('c', [:a => 0, :b => 2])) == 1
+ @test Base.annotations(aio2) == [(1:2, :a => 1), (1:2, :b => 2), (3:3, :a => 0), (3:3, :b => 2)]
+ end
+ let aio2 = copy(aio) # Merging some run of the most recent annotations is fine though.
+ @test write(aio2, Base.AnnotatedChar('c', [:b => 2])) == 1
+ @test Base.annotations(aio2) == [(1:2, :a => 1), (1:3, :b => 2)]
+ end
+ let aio2 = copy(aio) # ...and any subsequent annotations after a matching run can just be copied over.
+ @test write(aio2, Base.AnnotatedChar('c', [:b => 2, :c => 3, :d => 4])) == 1
+ @test Base.annotations(aio2) == [(1:2, :a => 1), (1:3, :b => 2), (3:3, :c => 3), (3:3, :d => 4)]
+ end
 end