Skip to content

Commit

Permalink
hvncat: Added inbounds annotations that improve performance (#41200)
Browse files Browse the repository at this point in the history
* Added judicious inbounds/inline decorations

* add inline to other one

* bump

* grammar

Co-authored-by: Jeff Bezanson <[email protected]>

* Remove `@inline`

* bump CI

* bump CI 2

* Merge fix

* Ensure `hvncat_fill!` can't execute when N < 2

* Bounds check in three-arg `hvncat_fill!`

* Narrow inbounds

* Moved bounds check up

---------

Co-authored-by: Jeff Bezanson <[email protected]>
  • Loading branch information
BioTurboNick and JeffBezanson committed Jul 1, 2023
1 parent 36a39b0 commit 27e21c8
Showing 1 changed file with 22 additions and 10 deletions.
32 changes: 22 additions & 10 deletions base/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2404,26 +2404,30 @@ function _typed_hvncat(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, xs::Num
end

function hvncat_fill!(A::Array, row_first::Bool, xs::Tuple)
nr, nc = size(A, 1), size(A, 2)
na = prod(size(A)[3:end])
len = length(xs)
nrc = nr * nc
if nrc * na != len
throw(ArgumentError("argument count $(len) does not match specified shape $(size(A))"))
end
# putting these in separate functions leads to unnecessary allocations
if row_first
nr, nc = size(A, 1), size(A, 2)
nrc = nr * nc
na = prod(size(A)[3:end])
k = 1
for d 1:na
dd = nrc * (d - 1)
for i 1:nr
Ai = dd + i
for j 1:nc
A[Ai] = xs[k]
@inbounds A[Ai] = xs[k]
k += 1
Ai += nr
end
end
end
else
for k eachindex(xs)
A[k] = xs[k]
@inbounds A[k] = xs[k]
end
end
end
Expand Down Expand Up @@ -2609,28 +2613,36 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::
return A
end

function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int}, d1::Int, d2::Int, as::Tuple{Vararg}) where {T, N}
function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int},
d1::Int, d2::Int, as::Tuple) where {T, N}
N > 1 || throw(ArgumentError("dimensions of the destination array must be at least 2"))
length(scratch1) == length(scratch2) == N ||
throw(ArgumentError("scratch vectors must have as many elements as the destination array has dimensions"))
0 < d1 < 3 &&
0 < d2 < 3 &&
d1 != d2 ||
throw(ArgumentError("d1 and d2 must be either 1 or 2, exclusive."))
outdims = size(A)
offsets = scratch1
inneroffsets = scratch2
for a as
if isa(a, AbstractArray)
for ai a
Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
@inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
A[Ai] = ai

for j 1:N
@inbounds for j 1:N
inneroffsets[j] += 1
inneroffsets[j] < cat_size(a, j) && break
inneroffsets[j] = 0
end
end
else
Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
@inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
A[Ai] = a
end

for j (d1, d2, 3:N...)
@inbounds for j (d1, d2, 3:N...)
offsets[j] += cat_size(a, j)
offsets[j] < outdims[j] && break
offsets[j] = 0
Expand Down

2 comments on commit 27e21c8

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your package evaluation job has completed - possible new issues were detected.
A full report can be found here.

Please sign in to comment.