Skip to content

Commit

Permalink
improve string effects (JuliaLang#48996)
Browse files Browse the repository at this point in the history
  • Loading branch information
oscardssmith committed Mar 23, 2023
1 parent 20b8139 commit 56950e2
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 26 deletions.
2 changes: 1 addition & 1 deletion base/hashing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ end
const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81

function hash(s::String, h::UInt)
@assume_effects :total function hash(s::String, h::UInt)
h += memhash_seed
ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), s, sizeof(s), h % UInt32) + h
end
38 changes: 22 additions & 16 deletions base/strings/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,28 +113,28 @@ pointer(s::String, i::Integer) = pointer(s) + Int(i)::Int - 1
ncodeunits(s::String) = Core.sizeof(s)
codeunit(s::String) = UInt8

@inline function codeunit(s::String, i::Integer)
@assume_effects :foldable @inline function codeunit(s::String, i::Integer)
@boundscheck checkbounds(s, i)
b = GC.@preserve s unsafe_load(pointer(s, i))
return b
end

## comparison ##

_memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}, len) =
@assume_effects :total _memcmp(a::String, b::String) = @invoke _memcmp(a::Union{Ptr{UInt8},AbstractString},b::Union{Ptr{UInt8},AbstractString})

_memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}) = _memcmp(a, b, min(sizeof(a), sizeof(b)))
function _memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}, len::Int)
ccall(:memcmp, Cint, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), a, b, len % Csize_t) % Int
end

function cmp(a::String, b::String)
al, bl = sizeof(a), sizeof(b)
c = _memcmp(a, b, min(al,bl))
c = _memcmp(a, b)
return c < 0 ? -1 : c > 0 ? +1 : cmp(al,bl)
end

function ==(a::String, b::String)
pointer_from_objref(a) == pointer_from_objref(b) && return true
al = sizeof(a)
return al == sizeof(b) && 0 == _memcmp(a, b, al)
end
==(a::String, b::String) = a===b

typemin(::Type{String}) = ""
typemin(::String) = typemin(String)
Expand Down Expand Up @@ -284,23 +284,25 @@ getindex(s::String, r::AbstractUnitRange{<:Integer}) = s[Int(first(r)):Int(last(
return ss
end

length(s::String) = length_continued(s, 1, ncodeunits(s), ncodeunits(s))
# nothrow because we know the start and end indices are valid
@assume_effects :nothrow length(s::String) = length_continued(s, 1, ncodeunits(s), ncodeunits(s))

@inline function length(s::String, i::Int, j::Int)
# effects needed because @inbounds
@assume_effects :consistent :effect_free @inline function length(s::String, i::Int, j::Int)
@boundscheck begin
0 < i ncodeunits(s)+1 || throw(BoundsError(s, i))
0 j < ncodeunits(s)+1 || throw(BoundsError(s, j))
end
j < i && return 0
@inbounds i, k = thisind(s, i), i
c = j - i + (i == k)
length_continued(s, i, j, c)
@inbounds length_continued(s, i, j, c)
end

@inline function length_continued(s::String, i::Int, n::Int, c::Int)
@assume_effects :terminates_locally @inline @propagate_inbounds function length_continued(s::String, i::Int, n::Int, c::Int)
i < n || return c
@inbounds b = codeunit(s, i)
@inbounds while true
b = codeunit(s, i)
while true
while true
(i += 1) n || return c
0xc0 b 0xf7 && break
Expand Down Expand Up @@ -328,6 +330,10 @@ isvalid(s::String, i::Int) = checkbounds(Bool, s, i) && thisind(s, i) == i

isascii(s::String) = isascii(codeunits(s))

# don't assume effects for general integers since we cannot know their implementation
@assume_effects :foldable function repeat(c::Char, r::BitInteger)
@invoke repeat(c, r::Integer)
end
"""
repeat(c::AbstractChar, r::Integer) -> String
Expand All @@ -340,8 +346,8 @@ julia> repeat('A', 3)
"AAA"
```
"""
repeat(c::AbstractChar, r::Integer) = repeat(Char(c), r) # fallback
function repeat(c::Char, r::Integer)
function repeat(c::AbstractChar, r::Integer)
c = Char(c)
r == 0 && return ""
r < 0 && throw(ArgumentError("can't repeat a character $r times"))
u = bswap(reinterpret(UInt32, c))
Expand Down
34 changes: 26 additions & 8 deletions base/strings/substring.jl
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,12 @@ thisind(s::SubString{String}, i::Int) = _thisind_str(s, i)
nextind(s::SubString{String}, i::Int) = _nextind_str(s, i)

function ==(a::Union{String, SubString{String}}, b::Union{String, SubString{String}})
s = sizeof(a)
s == sizeof(b) && 0 == _memcmp(a, b, s)
sizeof(a) == sizeof(b) && _memcmp(a, b) == 0
end

function cmp(a::SubString{String}, b::SubString{String})
na = sizeof(a)
nb = sizeof(b)
c = _memcmp(a, b, min(na, nb))
return c < 0 ? -1 : c > 0 ? +1 : cmp(na, nb)
c = _memcmp(a, b)
return c < 0 ? -1 : c > 0 ? +1 : cmp(sizeof(a), sizeof(b))
end

# don't make unnecessary copies when passing substrings to C functions
Expand Down Expand Up @@ -209,19 +206,34 @@ end
return n
end

@inline function __unsafe_string!(out, s::Union{String, SubString{String}}, offs::Integer)
@assume_effects :nothrow @inline function __unsafe_string!(out, s::String, offs::Integer)
n = sizeof(s)
GC.@preserve s out unsafe_copyto!(pointer(out, offs), pointer(s), n)
return n
end

@inline function __unsafe_string!(out, s::Symbol, offs::Integer)
@inline function __unsafe_string!(out, s::SubString{String}, offs::Integer)
n = sizeof(s)
GC.@preserve s out unsafe_copyto!(pointer(out, offs), pointer(s), n)
return n
end

@assume_effects :nothrow @inline function __unsafe_string!(out, s::Symbol, offs::Integer)
n = sizeof(s)
GC.@preserve s out unsafe_copyto!(pointer(out, offs), unsafe_convert(Ptr{UInt8},s), n)
return n
end

# nothrow needed here because for v in a can't prove the indexing is inbounds.
@assume_effects :foldable :nothrow function string(a::Union{Char, String, Symbol}...)
_string(a...)
end

function string(a::Union{Char, String, SubString{String}, Symbol}...)
_string(a...)
end

function _string(a::Union{Char, String, SubString{String}, Symbol}...)
n = 0
for v in a
# 4 types is too many for automatic Union-splitting, so we split manually
Expand Down Expand Up @@ -250,6 +262,12 @@ function string(a::Union{Char, String, SubString{String}, Symbol}...)
return out
end

# don't assume effects for general integers since we cannot know their implementation
# not nothrow because r<0 throws
@assume_effects :foldable function repeat(s::String, r::BitInteger)
@invoke repeat(s, r::Integer)
end

function repeat(s::Union{String, SubString{String}}, r::Integer)
r < 0 && throw(ArgumentError("can't repeat a string $r times"))
r == 0 && return ""
Expand Down
3 changes: 2 additions & 1 deletion test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1384,7 +1384,8 @@ end

# sanity check `@allocations` returns what we expect in some very simple cases
@test (@allocations "a") == 0
@test (@allocations "a" * "b") == 1
@test (@allocations "a" * "b") == 0 # constant propagation
@test (@allocations "a" * Base.inferencebarrier("b")) == 1
end

@testset "in_finalizer" begin
Expand Down
39 changes: 39 additions & 0 deletions test/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1192,3 +1192,42 @@ end
return a
end |> Core.Compiler.is_foldable
end

@testset "String Effects" begin
for (f, Ts) in [(*, (String, String)),
(*, (Char, String)),
(*, (Char, Char)),
(string, (Symbol, String, Char)),
(==, (String, String)),
(cmp, (String, String)),
(==, (Symbol, Symbol)),
(cmp, (Symbol, Symbol)),
(String, (Symbol,)),
(length, (String,)),
(hash, (String,UInt)),
(hash, (Char,UInt)),]
e = Base.infer_effects(f, Ts)
@test Core.Compiler.is_foldable(e) || (f, Ts)
@test Core.Compiler.is_removable_if_unused(e) || (f, Ts)
end
for (f, Ts) in [(^, (String, Int)),
(^, (Char, Int)),
(codeunit, (String, Int)),
]
e = Base.infer_effects(f, Ts)
@test Core.Compiler.is_foldable(e) || (f, Ts)
@test !Core.Compiler.is_removable_if_unused(e) || (f, Ts)
end
# Substrings don't have any nice effects because the compiler can
# invent fake indices leading to out of bounds
for (f, Ts) in [(^, (SubString{String}, Int)),
(string, (String, SubString{String})),
(string, (Symbol, SubString{String})),
(hash, (SubString{String},UInt)),
]
e = Base.infer_effects(f, Ts)
@test !Core.Compiler.is_foldable(e) || (f, Ts)
@test !Core.Compiler.is_removable_if_unused(e) || (f, Ts)
end
@test_throws ArgumentError Symbol("a\0a")
end

0 comments on commit 56950e2

Please sign in to comment.