diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl index 8b39011d383ab..b312782ed7dcd 100644 --- a/stdlib/LinearAlgebra/src/dense.jl +++ b/stdlib/LinearAlgebra/src/dense.jl @@ -4,7 +4,6 @@ ## BLAS cutoff threshold constants -const SCAL_CUTOFF = 2048 #TODO const DOT_CUTOFF = 128 const ASUM_CUTOFF = 32 const NRM2_CUTOFF = 32 @@ -14,27 +13,6 @@ const NRM2_CUTOFF = 32 # This constant should ideally be determined by the actual CPU cache size const ISONE_CUTOFF = 2^21 # 2M -function rmul!(X::Array{T}, s::T) where T<:BlasFloat - s == 0 && return fill!(X, zero(T)) - s == 1 && return X - if length(X) < SCAL_CUTOFF - generic_rmul!(X, s) - else - BLAS.scal!(length(X), s, X, 1) - end - X -end - -lmul!(s::T, X::Array{T}) where {T<:BlasFloat} = rmul!(X, s) - -rmul!(X::Array{T}, s::Number) where {T<:BlasFloat} = rmul!(X, convert(T, s)) -function rmul!(X::Array{T}, s::Real) where T<:BlasComplex - R = typeof(real(zero(T))) - GC.@preserve X BLAS.scal!(2*length(X), convert(R,s), convert(Ptr{R},pointer(X)), 1) - X -end - - function isone(A::StridedMatrix) m, n = size(A) m != n && return false # only square matrices can satisfy x == one(x) diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl index fa0238a739c2c..f63eb1b78764c 100644 --- a/stdlib/LinearAlgebra/src/generic.jl +++ b/stdlib/LinearAlgebra/src/generic.jl @@ -2,22 +2,6 @@ ## linalg.jl: Some generic Linear Algebra definitions -# For better performance when input and output are the same array -# See https://github.com/JuliaLang/julia/issues/8415#issuecomment-56608729 -function generic_rmul!(X::AbstractArray, s::Number) - @simd for I in eachindex(X) - @inbounds X[I] *= s - end - X -end - -function generic_lmul!(s::Number, X::AbstractArray) - @simd for I in eachindex(X) - @inbounds X[I] = s*X[I] - end - X -end - function generic_mul!(C::AbstractArray, X::AbstractArray, s::Number) if length(C) != length(X) throw(DimensionMismatch("first array has length $(length(C)) which does not match the length of the second, $(length(X)).")) @@ -42,6 +26,8 @@ end mul!(C::AbstractArray, s::Number, X::AbstractArray) = generic_mul!(C, X, s) mul!(C::AbstractArray, X::AbstractArray, s::Number) = generic_mul!(C, s, X) +# For better performance when input and output are the same array +# See https://github.com/JuliaLang/julia/issues/8415#issuecomment-56608729 """ rmul!(A::AbstractArray, b::Number) @@ -60,7 +46,13 @@ julia> rmul!(A, 2) 6 8 ``` """ -rmul!(A::AbstractArray, b::Number) = generic_rmul!(A, b) +function rmul!(X::AbstractArray, s::Number) + @simd for I in eachindex(X) + @inbounds X[I] *= s + end + X +end + """ lmul!(a::Number, B::AbstractArray) @@ -80,7 +72,12 @@ julia> lmul!(2, B) 6 8 ``` """ -lmul!(a::Number, B::AbstractArray) = generic_lmul!(a, B) +function lmul!(s::Number, X::AbstractArray) + @simd for I in eachindex(X) + @inbounds X[I] = s*X[I] + end + X +end """ cross(x, y) @@ -1361,18 +1358,17 @@ end # The largest positive floating point number whose inverse is less than infinity δ = inv(prevfloat(typemax(nrm))) - if nrm == Inf # Just divide since performance isn't important in this corner case - v ./= Inf - elseif nrm ≥ δ # Safe to multiply with inverse + if nrm ≥ δ # Safe to multiply with inverse invnrm = inv(nrm) rmul!(v, invnrm) - else # Scale elements to avoid overflow + + else # scale elements to avoid overflow εδ = eps(one(nrm))/δ rmul!(v, εδ) rmul!(v, inv(nrm*εδ)) end - return v + v end """ diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl index 47fbf290a79a7..8591a07aa9f8f 100644 --- a/stdlib/LinearAlgebra/test/generic.jl +++ b/stdlib/LinearAlgebra/test/generic.jl @@ -127,7 +127,7 @@ end @testset "Scaling with rmul! and lmul" begin @test rmul!(copy(a), 5.) == a*5 @test lmul!(5., copy(a)) == a*5 - b = randn(LinearAlgebra.SCAL_CUTOFF) # make sure we try BLAS path + b = randn(2048) subB = view(b, :, :) @test rmul!(copy(b), 5.) == b*5 @test rmul!(copy(subB), 5.) == subB*5