Skip to content

Commit

Permalink
Reland "Generalize broadcast!(f, ::BitVector) optimization to `BitArr…
Browse files Browse the repository at this point in the history
…ay`." (JuliaLang#52736) (JuliaLang#52776)

Reland "Generalize broadcast!(f, ::BitVector) optimization to `BitArray`." (JuliaLang#52736)
  • Loading branch information
N5N3 committed Jan 7, 2024
2 parents ea085ea + dd7f1f8 commit 8fe383f
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 55 deletions.
89 changes: 37 additions & 52 deletions base/broadcast.jl
Original file line number Diff line number Diff line change
Expand Up @@ -571,15 +571,15 @@ an `Int`.
Any remaining indices in `I` beyond the length of the `keep` tuple are truncated. The `keep` and `default`
tuples may be created by `newindexer(argument)`.
"""
Base.@propagate_inbounds newindex(arg, I::CartesianIndex) = CartesianIndex(_newindex(axes(arg), I.I))
Base.@propagate_inbounds newindex(arg, I::Integer) = CartesianIndex(_newindex(axes(arg), (I,)))
Base.@propagate_inbounds newindex(arg, I::CartesianIndex) = to_index(_newindex(axes(arg), I.I))
Base.@propagate_inbounds newindex(arg, I::Integer) = to_index(_newindex(axes(arg), (I,)))
Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple) = (ifelse(length(ax[1]) == 1, ax[1][1], I[1]), _newindex(tail(ax), tail(I))...)
Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple) = ()
Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple{}) = (ax[1][1], _newindex(tail(ax), ())...)
Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple{}) = ()

# If dot-broadcasting were already defined, this would be `ifelse.(keep, I, Idefault)`.
@inline newindex(I::CartesianIndex, keep, Idefault) = CartesianIndex(_newindex(I.I, keep, Idefault))
@inline newindex(I::CartesianIndex, keep, Idefault) = to_index(_newindex(I.I, keep, Idefault))
@inline newindex(i::Integer, keep::Tuple, idefault) = ifelse(keep[1], i, idefault[1])
@inline newindex(i::Integer, keep::Tuple{}, idefault) = CartesianIndex(())
@inline _newindex(I, keep, Idefault) =
Expand All @@ -599,18 +599,14 @@ Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple{}) = ()
(Base.length(ind1)::Integer != 1, keep...), (first(ind1), Idefault...)
end

@inline function Base.getindex(bc::Broadcasted, I::Union{Integer,CartesianIndex})
@inline function Base.getindex(bc::Broadcasted, Is::Vararg{Union{Integer,CartesianIndex},N}) where {N}
I = to_index(Base.IteratorsMD.flatten(Is))
@boundscheck checkbounds(bc, I)
@inbounds _broadcast_getindex(bc, I)
end
Base.@propagate_inbounds Base.getindex(
bc::Broadcasted,
i1::Union{Integer,CartesianIndex},
i2::Union{Integer,CartesianIndex},
I::Union{Integer,CartesianIndex}...,
) =
bc[CartesianIndex((i1, i2, I...))]
Base.@propagate_inbounds Base.getindex(bc::Broadcasted) = bc[CartesianIndex(())]
to_index(::Tuple{}) = CartesianIndex()
to_index(Is::Tuple{Any}) = Is[1]
to_index(Is::Tuple) = CartesianIndex(Is)

@inline Base.checkbounds(bc::Broadcasted, I::Union{Integer,CartesianIndex}) =
Base.checkbounds_indices(Bool, axes(bc), (I,)) || Base.throw_boundserror(bc, (I,))
Expand Down Expand Up @@ -974,53 +970,42 @@ preprocess(dest, x) = extrude(broadcast_unalias(dest, x))
return dest
end

# Performance optimization: for BitVector outputs, we cache the result
# in a 64-bit register before writing into memory (to bypass LSQ)
@inline function copyto!(dest::BitVector, bc::Broadcasted{Nothing})
axes(dest) == axes(bc) || throwdm(axes(dest), axes(bc))
ischunkedbroadcast(dest, bc) && return chunkedcopyto!(dest, bc)
destc = dest.chunks
bcp = preprocess(dest, bc)
length(bcp) <= 0 && return dest
len = Base.num_bit_chunks(Int(length(bcp)))
@inbounds for i = 0:(len - 2)
z = UInt64(0)
for j = 0:63
z |= UInt64(bcp[i*64 + j + 1]::Bool) << (j & 63)
end
destc[i + 1] = z
end
@inbounds let i = len - 1
z = UInt64(0)
for j = 0:((length(bcp) - 1) & 63)
z |= UInt64(bcp[i*64 + j + 1]::Bool) << (j & 63)
end
destc[i + 1] = z
end
return dest
end

# Performance optimization: for BitArray outputs, we cache the result
# in a "small" Vector{Bool}, and then copy in chunks into the output
# in a 64-bit register before writing into memory (to bypass LSQ)
@inline function copyto!(dest::BitArray, bc::Broadcasted{Nothing})
axes(dest) == axes(bc) || throwdm(axes(dest), axes(bc))
ischunkedbroadcast(dest, bc) && return chunkedcopyto!(dest, bc)
length(dest) < 256 && return invoke(copyto!, Tuple{AbstractArray, Broadcasted{Nothing}}, dest, bc)
tmp = Vector{Bool}(undef, bitcache_size)
destc = dest.chunks
cind = 1
ndims(dest) == 0 && (dest[] = bc[]; return dest)
bc′ = preprocess(dest, bc)
@inbounds for P in Iterators.partition(eachindex(bc′), bitcache_size)
ind = 1
@simd for I in P
tmp[ind] = bc′[I]
ind += 1
ax = axes(bc′)
ax1, out = ax[1], CartesianIndices(tail(ax))
destc, indc = dest.chunks, 0
bitst, remain = 0, UInt64(0)
for I in out
i = first(ax1) - 1
if ndims(bc) == 1 || bitst >= 64 - length(ax1)
if ndims(bc) > 1 && bitst != 0
@inbounds @simd for j = bitst:63
remain |= UInt64(convert(Bool, bc′[i+=1, I])) << (j & 63)
end
@inbounds destc[indc+=1] = remain
bitst, remain = 0, UInt64(0)
end
while i <= last(ax1) - 64
z = UInt64(0)
@inbounds @simd for j = 0:63
z |= UInt64(convert(Bool, bc′[i+=1, I])) << (j & 63)
end
@inbounds destc[indc+=1] = z
end
end
@simd for i in ind:bitcache_size
tmp[i] = false
@inbounds @simd for j = i+1:last(ax1)
remain |= UInt64(convert(Bool, bc′[j, I])) << (bitst & 63)
bitst += 1
end
dumpbitcache(destc, cind, tmp)
cind += bitcache_chunks
end
@inbounds if bitst != 0
destc[indc+=1] = remain
end
return dest
end
Expand Down
29 changes: 26 additions & 3 deletions test/broadcast.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ ci(x) = CartesianIndex(x)
@test @inferred(newindex(ci((2,2)), (true, false), (-1,-1))) == ci((2,-1))
@test @inferred(newindex(ci((2,2)), (false, true), (-1,-1))) == ci((-1,2))
@test @inferred(newindex(ci((2,2)), (false, false), (-1,-1))) == ci((-1,-1))
@test @inferred(newindex(ci((2,2)), (true,), (-1,-1))) == ci((2,))
@test @inferred(newindex(ci((2,2)), (true,), (-1,))) == ci((2,))
@test @inferred(newindex(ci((2,2)), (false,), (-1,))) == ci((-1,))
@test @inferred(newindex(ci((2,2)), (true,), (-1,-1))) == 2
@test @inferred(newindex(ci((2,2)), (true,), (-1,))) == 2
@test @inferred(newindex(ci((2,2)), (false,), (-1,))) == -1
@test @inferred(newindex(ci((2,2)), (), ())) == ci(())

end
Expand Down Expand Up @@ -592,6 +592,16 @@ end
end
end

@testset "convert behavior of logical broadcast" begin
a = mod.(1:4, 2)
@test !isa(a, BitArray)
for T in (Array{Bool}, BitArray)
la = T(a)
la .= mod.(0:3, 2)
@test la == [false; true; false; true]
end
end

# Test that broadcast treats type arguments as scalars, i.e. containertype yields Any,
# even for subtypes of abstract array. (https://github.com/JuliaStats/DataArrays.jl/issues/229)
@testset "treat type arguments as scalars, DataArrays issue 229" begin
Expand Down Expand Up @@ -1165,3 +1175,16 @@ import Base.Broadcast: BroadcastStyle, DefaultArrayStyle

f51129(v, x) = (1 .- (v ./ x) .^ 2)
@test @inferred(f51129([13.0], 6.5)) == [-3.0]

@testset "broadcast for `AbstractArray` without `CartesianIndex` support" begin
struct BVec52775 <: AbstractVector{Int}
a::Vector{Int}
end
Base.size(a::BVec52775) = size(a.a)
Base.getindex(a::BVec52775, i::Real) = a.a[i]
Base.getindex(a::BVec52775, i) = error("unsupported index!")
a = BVec52775([1,2,3])
bc = Base.broadcasted(identity, a)
@test bc[1] == bc[CartesianIndex(1)] == bc[1, CartesianIndex()]
@test a .+ [1 2] == a.a .+ [1 2]
end

0 comments on commit 8fe383f

Please sign in to comment.