diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index 772cd95f9e47a..ec33b42e046bf 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -927,7 +927,7 @@ _getindex(::IndexStyle, A::AbstractArray, I...) =
 ## IndexLinear Scalar indexing: canonical method is one Int
 _getindex(::IndexLinear, A::AbstractArray, i::Int) = (@_propagate_inbounds_meta; getindex(A, i))
 _getindex(::IndexLinear, A::AbstractArray) = (@_propagate_inbounds_meta; getindex(A, _to_linear_index(A)))
-function _getindex(::IndexLinear, A::AbstractArray, I::Int...)
+function _getindex(::IndexLinear, A::AbstractArray, I::Vararg{Int,M}) where M
     @_inline_meta
     @boundscheck checkbounds(A, I...) # generally _to_linear_index requires bounds checking
     @inbounds r = getindex(A, _to_linear_index(A, I...))
@@ -944,7 +944,7 @@ function _getindex(::IndexCartesian, A::AbstractArray)
     @_propagate_inbounds_meta
     getindex(A, _to_subscript_indices(A)...)
 end
-function _getindex(::IndexCartesian, A::AbstractArray, I::Int...)
+function _getindex(::IndexCartesian, A::AbstractArray, I::Vararg{Int,M}) where M
     @_inline_meta
     @boundscheck checkbounds(A, I...) # generally _to_subscript_indices requires bounds checking
     @inbounds r = getindex(A, _to_subscript_indices(A, I...)...)
@@ -1011,7 +1011,7 @@ _setindex!(::IndexStyle, A::AbstractArray, v, I...) =
 ## IndexLinear Scalar indexing
 _setindex!(::IndexLinear, A::AbstractArray, v, i::Int) = (@_propagate_inbounds_meta; setindex!(A, v, i))
 _setindex!(::IndexLinear, A::AbstractArray, v) = (@_propagate_inbounds_meta; setindex!(A, v, _to_linear_index(A)))
-function _setindex!(::IndexLinear, A::AbstractArray, v, I::Int...)
+function _setindex!(::IndexLinear, A::AbstractArray, v, I::Vararg{Int,M}) where M
     @_inline_meta
     @boundscheck checkbounds(A, I...)
     @inbounds r = setindex!(A, v, _to_linear_index(A, I...))
@@ -1027,7 +1027,7 @@ function _setindex!(::IndexCartesian, A::AbstractArray, v)
     @_propagate_inbounds_meta
     setindex!(A, v, _to_subscript_indices(A)...)
 end
-function _setindex!(::IndexCartesian, A::AbstractArray, v, I::Int...)
+function _setindex!(::IndexCartesian, A::AbstractArray, v, I::Vararg{Int,M}) where M
     @_inline_meta
     @boundscheck checkbounds(A, I...)
     @inbounds r = setindex!(A, v, _to_subscript_indices(A, I...)...)
diff --git a/base/abstractarraymath.jl b/base/abstractarraymath.jl
index 3815098bceeea..ab85a794a8a56 100644
--- a/base/abstractarraymath.jl
+++ b/base/abstractarraymath.jl
@@ -397,7 +397,7 @@ _rshps(shp, shp_i, sz, i, ::Tuple{}) =
 _reperr(s, n, N) = throw(ArgumentError("number of " * s * " repetitions " *
     "($n) cannot be less than number of dimensions of input ($N)"))
 
-@propagate_inbounds function _repeat(A::AbstractArray, inner, outer)
+@noinline function _repeat(A::AbstractArray, inner, outer)
     shape, inner_shape = rep_shapes(A, inner, outer)
 
     R = similar(A, shape)
diff --git a/base/array.jl b/base/array.jl
index 78cf82a4b5b37..52cda7ac637df 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -170,15 +170,15 @@ function reinterpret(::Type{T}, a::Array{S}) where T where S
 end
 
 function reinterpret(::Type{T}, a::Array{S}, dims::NTuple{N,Int}) where T where S where N
-    if !isbits(T)
-        throw(ArgumentError("cannot reinterpret Array{$(S)} to ::Type{Array{$(T)}}, type $(T) is not a bits type"))
-    end
-    if !isbits(S)
-        throw(ArgumentError("cannot reinterpret Array{$(S)} to ::Type{Array{$(T)}}, type $(S) is not a bits type"))
+    function throwbits(::Type{S}, ::Type{T}, ::Type{U}) where {S,T,U}
+        @_noinline_meta
+        throw(ArgumentError("cannot reinterpret Array{$(S)} to ::Type{Array{$(T)}}, type $(U) is not a bits type"))
     end
+    isbits(T) || throwbits(S, T, T)
+    isbits(S) || throwbits(S, T, S)
     nel = div(length(a)*sizeof(S),sizeof(T))
     if prod(dims) != nel
-        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $(nel)"))
+        _throw_dmrsa(dims, nel)
     end
     ccall(:jl_reshape_array, Array{T,N}, (Any, Any, Any), Array{T,N}, a, dims)
 end
@@ -186,7 +186,7 @@ end
 # reshaping to same # of dimensions
 function reshape(a::Array{T,N}, dims::NTuple{N,Int}) where T where N
     if prod(dims) != length(a)
-        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $(length(a))"))
+        _throw_dmrsa(dims, length(a))
     end
     if dims == size(a)
         return a
@@ -197,11 +197,16 @@ end
 # reshaping to different # of dimensions
 function reshape(a::Array{T}, dims::NTuple{N,Int}) where T where N
     if prod(dims) != length(a)
-        throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $(length(a))"))
+        _throw_dmrsa(dims, length(a))
     end
     ccall(:jl_reshape_array, Array{T,N}, (Any, Any, Any), Array{T,N}, a, dims)
 end
 
+function _throw_dmrsa(dims, len)
+    @_noinline_meta
+    throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $len"))
+end
+
 ## Constructors ##
 
 similar(a::Array{T,1}) where {T}                    = Array{T,1}(size(a,1))
diff --git a/base/error.jl b/base/error.jl
index f26df61ee91d8..3d9a27eab6446 100644
--- a/base/error.jl
+++ b/base/error.jl
@@ -27,7 +27,10 @@ Raise an `ErrorException` with the given message.
 
 See also [`logging`](@ref).
 """
-error(s...) = throw(ErrorException(Main.Base.string(s...)))
+function error(s::Vararg{Any,N}) where {N}
+    @_noinline_meta
+    throw(ErrorException(Main.Base.string(s...)))
+end
 
 """
     rethrow([e])
@@ -53,7 +56,10 @@ Get the backtrace of the current exception, for use within `catch` blocks.
 catch_backtrace() = ccall(:jl_get_backtrace, Array{Ptr{Void},1}, ())
 
 ## keyword arg lowering generates calls to this ##
-kwerr(kw, args...) = throw(MethodError(typeof(args[1]).name.mt.kwsorter, (kw,args...)))
+function kwerr(kw, args::Vararg{Any,N}) where {N}
+    @_noinline_meta
+    throw(MethodError(typeof(args[1]).name.mt.kwsorter, (kw,args...)))
+end
 
 ## system error handling ##
 """
diff --git a/base/inference.jl b/base/inference.jl
index c2fcdd04f04b6..9348bfcb29978 100644
--- a/base/inference.jl
+++ b/base/inference.jl
@@ -14,6 +14,9 @@ struct InferenceParams
 
     # optimization
     inlining::Bool
+    inline_cost_threshold::Int  # number of CPU cycles beyond which it's not worth inlining
+    inline_nonleaf_penalty::Int # penalty for dynamic dispatch
+    inline_tupleret_bonus::Int  # extra willingness for non-isbits tuple return types
 
     # parameters limiting potentially-infinite types (configurable)
     MAX_METHODS::Int
@@ -26,14 +29,18 @@ struct InferenceParams
     # reasonable defaults
     function InferenceParams(world::UInt;
                     inlining::Bool = inlining_enabled(),
+                    inline_cost_threshold::Int = 100,
+                    inline_nonleaf_penalty::Int = 1000,
+                    inline_tupleret_bonus::Int = 400,
                     max_methods::Int = 4,
                     tupletype_len::Int = 15,
                     tuple_depth::Int = 4,
                     tuple_splat::Int = 16,
                     union_splitting::Int = 4,
                     apply_union_enum::Int = 8)
-        return new(world, inlining, max_methods, tupletype_len,
-            tuple_depth, tuple_splat, union_splitting, apply_union_enum)
+        return new(world, inlining, inline_cost_threshold, inline_nonleaf_penalty,
+                   inline_tupleret_bonus, max_methods, tupletype_len,
+                   tuple_depth, tuple_splat, union_splitting, apply_union_enum)
     end
 end
 
@@ -374,17 +381,22 @@ isconstType(t::ANY) = isType(t) && (isleaftype(t.parameters[1]) || t.parameters[
 const IInf = typemax(Int) # integer infinity
 const n_ifunc = reinterpret(Int32,arraylen)+1
 const t_ifunc = Array{Tuple{Int,Int,Any},1}(n_ifunc)
+const t_ifunc_cost = Array{Int,1}(n_ifunc)
 const t_ffunc_key = Array{Function,1}(0)
 const t_ffunc_val = Array{Tuple{Int,Int,Any},1}(0)
-function add_tfunc(f::IntrinsicFunction, minarg::Int, maxarg::Int, tfunc::ANY)
-    t_ifunc[reinterpret(Int32,f)+1] = (minarg, maxarg, tfunc)
+const t_ffunc_cost = Array{Int,1}(0)
+function add_tfunc(f::IntrinsicFunction, minarg::Int, maxarg::Int, tfunc::ANY, cost::Int)
+    idx = reinterpret(Int32,f)+1
+    t_ifunc[idx] = (minarg, maxarg, tfunc)
+    t_ifunc_cost[idx] = cost
 end
-function add_tfunc(f::Function, minarg::Int, maxarg::Int, tfunc::ANY)
+function add_tfunc(f::Function, minarg::Int, maxarg::Int, tfunc::ANY, cost::Int)
     push!(t_ffunc_key, f)
     push!(t_ffunc_val, (minarg, maxarg, tfunc))
+    push!(t_ffunc_cost, cost)
 end
 
-add_tfunc(throw, 1, 1, (x::ANY) -> Bottom)
+add_tfunc(throw, 1, 1, (x::ANY) -> Bottom, 0)
 
 # the inverse of typeof_tfunc
 function instanceof_tfunc(t::ANY)
@@ -427,104 +439,104 @@ function fptosi_tfunc(x::ANY)
 end
 
     ## conversion ##
-add_tfunc(bitcast, 2, 2, bitcast_tfunc)
-add_tfunc(sext_int, 2, 2, bitcast_tfunc)
-add_tfunc(zext_int, 2, 2, bitcast_tfunc)
-add_tfunc(trunc_int, 2, 2, bitcast_tfunc)
-add_tfunc(fptoui, 1, 2, fptoui_tfunc)
-add_tfunc(fptosi, 1, 2, fptosi_tfunc)
-add_tfunc(uitofp, 2, 2, bitcast_tfunc)
-add_tfunc(sitofp, 2, 2, bitcast_tfunc)
-add_tfunc(fptrunc, 2, 2, bitcast_tfunc)
-add_tfunc(fpext, 2, 2, bitcast_tfunc)
+add_tfunc(bitcast, 2, 2, bitcast_tfunc, 1)
+add_tfunc(sext_int, 2, 2, bitcast_tfunc, 1)
+add_tfunc(zext_int, 2, 2, bitcast_tfunc, 1)
+add_tfunc(trunc_int, 2, 2, bitcast_tfunc, 1)
+add_tfunc(fptoui, 1, 2, fptoui_tfunc, 1)
+add_tfunc(fptosi, 1, 2, fptosi_tfunc, 1)
+add_tfunc(uitofp, 2, 2, bitcast_tfunc, 1)
+add_tfunc(sitofp, 2, 2, bitcast_tfunc, 1)
+add_tfunc(fptrunc, 2, 2, bitcast_tfunc, 1)
+add_tfunc(fpext, 2, 2, bitcast_tfunc, 1)
     ## checked conversion ##
-add_tfunc(checked_trunc_sint, 2, 2, bitcast_tfunc)
-add_tfunc(checked_trunc_uint, 2, 2, bitcast_tfunc)
-add_tfunc(check_top_bit, 1, 1, math_tfunc)
+add_tfunc(checked_trunc_sint, 2, 2, bitcast_tfunc, 3)
+add_tfunc(checked_trunc_uint, 2, 2, bitcast_tfunc, 3)
+add_tfunc(check_top_bit, 1, 1, math_tfunc, 2)
     ## arithmetic ##
-add_tfunc(neg_int, 1, 1, math_tfunc)
-add_tfunc(add_int, 2, 2, math_tfunc)
-add_tfunc(sub_int, 2, 2, math_tfunc)
-add_tfunc(mul_int, 2, 2, math_tfunc)
-add_tfunc(sdiv_int, 2, 2, math_tfunc)
-add_tfunc(udiv_int, 2, 2, math_tfunc)
-add_tfunc(srem_int, 2, 2, math_tfunc)
-add_tfunc(urem_int, 2, 2, math_tfunc)
-add_tfunc(neg_float, 1, 1, math_tfunc)
-add_tfunc(add_float, 2, 2, math_tfunc)
-add_tfunc(sub_float, 2, 2, math_tfunc)
-add_tfunc(mul_float, 2, 2, math_tfunc)
-add_tfunc(div_float, 2, 2, math_tfunc)
-add_tfunc(rem_float, 2, 2, math_tfunc)
-add_tfunc(fma_float, 3, 3, math_tfunc)
-add_tfunc(muladd_float, 3, 3, math_tfunc)
+add_tfunc(neg_int, 1, 1, math_tfunc, 1)
+add_tfunc(add_int, 2, 2, math_tfunc, 1)
+add_tfunc(sub_int, 2, 2, math_tfunc, 1)
+add_tfunc(mul_int, 2, 2, math_tfunc, 4)
+add_tfunc(sdiv_int, 2, 2, math_tfunc, 30)
+add_tfunc(udiv_int, 2, 2, math_tfunc, 30)
+add_tfunc(srem_int, 2, 2, math_tfunc, 30)
+add_tfunc(urem_int, 2, 2, math_tfunc, 30)
+add_tfunc(neg_float, 1, 1, math_tfunc, 1)
+add_tfunc(add_float, 2, 2, math_tfunc, 1)
+add_tfunc(sub_float, 2, 2, math_tfunc, 1)
+add_tfunc(mul_float, 2, 2, math_tfunc, 4)
+add_tfunc(div_float, 2, 2, math_tfunc, 20)
+add_tfunc(rem_float, 2, 2, math_tfunc, 20)
+add_tfunc(fma_float, 3, 3, math_tfunc, 5)
+add_tfunc(muladd_float, 3, 3, math_tfunc, 5)
     ## fast arithmetic ##
-add_tfunc(neg_float_fast, 1, 1, math_tfunc)
-add_tfunc(add_float_fast, 2, 2, math_tfunc)
-add_tfunc(sub_float_fast, 2, 2, math_tfunc)
-add_tfunc(mul_float_fast, 2, 2, math_tfunc)
-add_tfunc(div_float_fast, 2, 2, math_tfunc)
-add_tfunc(rem_float_fast, 2, 2, math_tfunc)
+add_tfunc(neg_float_fast, 1, 1, math_tfunc, 1)
+add_tfunc(add_float_fast, 2, 2, math_tfunc, 1)
+add_tfunc(sub_float_fast, 2, 2, math_tfunc, 1)
+add_tfunc(mul_float_fast, 2, 2, math_tfunc, 2)
+add_tfunc(div_float_fast, 2, 2, math_tfunc, 10)
+add_tfunc(rem_float_fast, 2, 2, math_tfunc, 10)
     ## bitwise operators ##
-add_tfunc(and_int, 2, 2, math_tfunc)
-add_tfunc(or_int, 2, 2, math_tfunc)
-add_tfunc(xor_int, 2, 2, math_tfunc)
-add_tfunc(not_int, 1, 1, math_tfunc)
-add_tfunc(shl_int, 2, 2, math_tfunc)
-add_tfunc(lshr_int, 2, 2, math_tfunc)
-add_tfunc(ashr_int, 2, 2, math_tfunc)
-add_tfunc(bswap_int, 1, 1, math_tfunc)
-add_tfunc(ctpop_int, 1, 1, math_tfunc)
-add_tfunc(ctlz_int, 1, 1, math_tfunc)
-add_tfunc(cttz_int, 1, 1, math_tfunc)
-add_tfunc(checked_sdiv_int, 2, 2, math_tfunc)
-add_tfunc(checked_udiv_int, 2, 2, math_tfunc)
-add_tfunc(checked_srem_int, 2, 2, math_tfunc)
-add_tfunc(checked_urem_int, 2, 2, math_tfunc)
+add_tfunc(and_int, 2, 2, math_tfunc, 1)
+add_tfunc(or_int, 2, 2, math_tfunc, 1)
+add_tfunc(xor_int, 2, 2, math_tfunc, 1)
+add_tfunc(not_int, 1, 1, math_tfunc, 1)
+add_tfunc(shl_int, 2, 2, math_tfunc, 1)
+add_tfunc(lshr_int, 2, 2, math_tfunc, 1)
+add_tfunc(ashr_int, 2, 2, math_tfunc, 1)
+add_tfunc(bswap_int, 1, 1, math_tfunc, 1)
+add_tfunc(ctpop_int, 1, 1, math_tfunc, 1)
+add_tfunc(ctlz_int, 1, 1, math_tfunc, 1)
+add_tfunc(cttz_int, 1, 1, math_tfunc, 1)
+add_tfunc(checked_sdiv_int, 2, 2, math_tfunc, 40)
+add_tfunc(checked_udiv_int, 2, 2, math_tfunc, 40)
+add_tfunc(checked_srem_int, 2, 2, math_tfunc, 40)
+add_tfunc(checked_urem_int, 2, 2, math_tfunc, 40)
     ## functions ##
-add_tfunc(abs_float, 1, 1, math_tfunc)
-add_tfunc(copysign_float, 2, 2, math_tfunc)
-add_tfunc(flipsign_int, 2, 2, math_tfunc)
-add_tfunc(ceil_llvm, 1, 1, math_tfunc)
-add_tfunc(floor_llvm, 1, 1, math_tfunc)
-add_tfunc(trunc_llvm, 1, 1, math_tfunc)
-add_tfunc(rint_llvm, 1, 1, math_tfunc)
-add_tfunc(sqrt_llvm, 1, 1, math_tfunc)
-add_tfunc(sqrt_llvm_fast, 1, 1, math_tfunc)
+add_tfunc(abs_float, 1, 1, math_tfunc, 2)
+add_tfunc(copysign_float, 2, 2, math_tfunc, 2)
+add_tfunc(flipsign_int, 2, 2, math_tfunc, 1)
+add_tfunc(ceil_llvm, 1, 1, math_tfunc, 10)
+add_tfunc(floor_llvm, 1, 1, math_tfunc, 10)
+add_tfunc(trunc_llvm, 1, 1, math_tfunc, 10)
+add_tfunc(rint_llvm, 1, 1, math_tfunc, 10)
+add_tfunc(sqrt_llvm, 1, 1, math_tfunc, 20)
+add_tfunc(sqrt_llvm_fast, 1, 1, math_tfunc, 20)
     ## same-type comparisons ##
 cmp_tfunc(x::ANY, y::ANY) = Bool
-add_tfunc(eq_int, 2, 2, cmp_tfunc)
-add_tfunc(ne_int, 2, 2, cmp_tfunc)
-add_tfunc(slt_int, 2, 2, cmp_tfunc)
-add_tfunc(ult_int, 2, 2, cmp_tfunc)
-add_tfunc(sle_int, 2, 2, cmp_tfunc)
-add_tfunc(ule_int, 2, 2, cmp_tfunc)
-add_tfunc(eq_float, 2, 2, cmp_tfunc)
-add_tfunc(ne_float, 2, 2, cmp_tfunc)
-add_tfunc(lt_float, 2, 2, cmp_tfunc)
-add_tfunc(le_float, 2, 2, cmp_tfunc)
-add_tfunc(fpiseq, 2, 2, cmp_tfunc)
-add_tfunc(fpislt, 2, 2, cmp_tfunc)
-add_tfunc(eq_float_fast, 2, 2, cmp_tfunc)
-add_tfunc(ne_float_fast, 2, 2, cmp_tfunc)
-add_tfunc(lt_float_fast, 2, 2, cmp_tfunc)
-add_tfunc(le_float_fast, 2, 2, cmp_tfunc)
+add_tfunc(eq_int, 2, 2, cmp_tfunc, 1)
+add_tfunc(ne_int, 2, 2, cmp_tfunc, 1)
+add_tfunc(slt_int, 2, 2, cmp_tfunc, 1)
+add_tfunc(ult_int, 2, 2, cmp_tfunc, 1)
+add_tfunc(sle_int, 2, 2, cmp_tfunc, 1)
+add_tfunc(ule_int, 2, 2, cmp_tfunc, 1)
+add_tfunc(eq_float, 2, 2, cmp_tfunc, 2)
+add_tfunc(ne_float, 2, 2, cmp_tfunc, 2)
+add_tfunc(lt_float, 2, 2, cmp_tfunc, 2)
+add_tfunc(le_float, 2, 2, cmp_tfunc, 2)
+add_tfunc(fpiseq, 2, 2, cmp_tfunc, 1)
+add_tfunc(fpislt, 2, 2, cmp_tfunc, 1)
+add_tfunc(eq_float_fast, 2, 2, cmp_tfunc, 1)
+add_tfunc(ne_float_fast, 2, 2, cmp_tfunc, 1)
+add_tfunc(lt_float_fast, 2, 2, cmp_tfunc, 1)
+add_tfunc(le_float_fast, 2, 2, cmp_tfunc, 1)
 
     ## checked arithmetic ##
 chk_tfunc(x::ANY, y::ANY) = Tuple{widenconst(x), Bool}
-add_tfunc(checked_sadd_int, 2, 2, chk_tfunc)
-add_tfunc(checked_uadd_int, 2, 2, chk_tfunc)
-add_tfunc(checked_ssub_int, 2, 2, chk_tfunc)
-add_tfunc(checked_usub_int, 2, 2, chk_tfunc)
-add_tfunc(checked_smul_int, 2, 2, chk_tfunc)
-add_tfunc(checked_umul_int, 2, 2, chk_tfunc)
+add_tfunc(checked_sadd_int, 2, 2, chk_tfunc, 10)
+add_tfunc(checked_uadd_int, 2, 2, chk_tfunc, 10)
+add_tfunc(checked_ssub_int, 2, 2, chk_tfunc, 10)
+add_tfunc(checked_usub_int, 2, 2, chk_tfunc, 10)
+add_tfunc(checked_smul_int, 2, 2, chk_tfunc, 10)
+add_tfunc(checked_umul_int, 2, 2, chk_tfunc, 10)
     ## other, misc intrinsics ##
 add_tfunc(Core.Intrinsics.llvmcall, 3, IInf,
-    (fptr::ANY, rt::ANY, at::ANY, a...) -> instanceof_tfunc(rt))
+    (fptr::ANY, rt::ANY, at::ANY, a...) -> instanceof_tfunc(rt), 10)
 cglobal_tfunc(fptr::ANY) = Ptr{Void}
 cglobal_tfunc(fptr::ANY, t::ANY) = (isType(t) ? Ptr{t.parameters[1]} : Ptr)
 cglobal_tfunc(fptr::ANY, t::Const) = (isa(t.val, Type) ? Ptr{t.val} : Ptr)
-add_tfunc(Core.Intrinsics.cglobal, 1, 2, cglobal_tfunc)
+add_tfunc(Core.Intrinsics.cglobal, 1, 2, cglobal_tfunc, 5)
 add_tfunc(Core.Intrinsics.select_value, 3, 3,
     function (cnd::ANY, x::ANY, y::ANY)
         if isa(cnd, Const)
@@ -538,7 +550,7 @@ add_tfunc(Core.Intrinsics.select_value, 3, 3,
         end
         (Bool ⊑ cnd) || return Bottom
         return tmerge(x, y)
-    end)
+    end, 1)
 add_tfunc(===, 2, 2,
     function (x::ANY, y::ANY)
         if isa(x, Const) && isa(y, Const)
@@ -557,7 +569,7 @@ add_tfunc(===, 2, 2,
             x.val === true && return y
         end
         return Bool
-    end)
+    end, 1)
 function isdefined_tfunc(args...)
     arg1 = args[1]
     if isa(arg1, Const)
@@ -598,7 +610,7 @@ function isdefined_tfunc(args...)
     Bool
 end
 # TODO change IInf to 2 when deprecation is removed
-add_tfunc(isdefined, 1, IInf, isdefined_tfunc)
+add_tfunc(isdefined, 1, IInf, isdefined_tfunc, 1)
 _const_sizeof(x::ANY) = try
     # Constant Vector does not have constant size
     isa(x, Vector) && return Int
@@ -613,7 +625,7 @@ add_tfunc(Core.sizeof, 1, 1,
               isType(x) && return _const_sizeof(x.parameters[1])
               x !== DataType && isleaftype(x) && return _const_sizeof(x)
               return Int
-          end)
+          end, 0)
 add_tfunc(nfields, 1, 1,
     function (x::ANY)
         isa(x,Const) && return Const(nfields(x.val))
@@ -624,11 +636,11 @@ add_tfunc(nfields, 1, 1,
             return Const(length(x.types))
         end
         return Int
-    end)
-add_tfunc(Core._expr, 1, IInf, (args...)->Expr)
-add_tfunc(applicable, 1, IInf, (f::ANY, args...)->Bool)
-add_tfunc(Core.Intrinsics.arraylen, 1, 1, x->Int)
-add_tfunc(arraysize, 2, 2, (a::ANY, d::ANY)->Int)
+    end, 0)
+add_tfunc(Core._expr, 1, IInf, (args...)->Expr, 100)
+add_tfunc(applicable, 1, IInf, (f::ANY, args...)->Bool, 100)
+add_tfunc(Core.Intrinsics.arraylen, 1, 1, x->Int, 4)
+add_tfunc(arraysize, 2, 2, (a::ANY, d::ANY)->Int, 4)
 add_tfunc(pointerref, 3, 3,
           function (a::ANY, i::ANY, align::ANY)
               a = widenconst(a)
@@ -643,8 +655,8 @@ add_tfunc(pointerref, 3, 3,
                   end
               end
               return Any
-          end)
-add_tfunc(pointerset, 4, 4, (a::ANY, v::ANY, i::ANY, align::ANY) -> a)
+          end, 4)
+add_tfunc(pointerset, 4, 4, (a::ANY, v::ANY, i::ANY, align::ANY) -> a, 5)
 
 function typeof_tfunc(t::ANY)
     if isa(t, Const)
@@ -678,7 +690,7 @@ function typeof_tfunc(t::ANY)
         return DataType # typeof(anything)::DataType
     end
 end
-add_tfunc(typeof, 1, 1, typeof_tfunc)
+add_tfunc(typeof, 1, 1, typeof_tfunc, 0)
 add_tfunc(typeassert, 2, 2,
           function (v::ANY, t::ANY)
               t = instanceof_tfunc(t)
@@ -695,7 +707,7 @@ add_tfunc(typeassert, 2, 2,
                   return v
               end
               return typeintersect(v, t)
-          end)
+          end, 4)
 add_tfunc(isa, 2, 2,
           function (v::ANY, t::ANY)
               t = instanceof_tfunc(t)
@@ -708,7 +720,7 @@ add_tfunc(isa, 2, 2,
               end
               # TODO: handle non-leaftype(t) by testing against lower and upper bounds
               return Bool
-          end)
+          end, 0)
 add_tfunc(issubtype, 2, 2,
           function (a::ANY, b::ANY)
               if (isa(a,Const) || isType(a)) && (isa(b,Const) || isType(b))
@@ -719,7 +731,7 @@ add_tfunc(issubtype, 2, 2,
                   end
               end
               return Bool
-          end)
+          end, 0)
 
 function type_depth(t::ANY)
     if t === Bottom
@@ -1194,8 +1206,8 @@ function getfield_tfunc(s00::ANY, name)
     # in the current type system
     return rewrap_unionall(limit_type_depth(R, MAX_TYPE_DEPTH), s00)
 end
-add_tfunc(getfield, 2, 2, (s::ANY, name::ANY) -> getfield_tfunc(s, name))
-add_tfunc(setfield!, 3, 3, (o::ANY, f::ANY, v::ANY) -> v)
+add_tfunc(getfield, 2, 2, (s::ANY, name::ANY) -> getfield_tfunc(s, name), 1)
+add_tfunc(setfield!, 3, 3, (o::ANY, f::ANY, v::ANY) -> v, 3)
 function fieldtype_tfunc(s0::ANY, name::ANY)
     if s0 === Any || s0 === Type || DataType ⊑ s0 || UnionAll ⊑ s0
         return Type
@@ -1255,7 +1267,7 @@ function fieldtype_tfunc(s0::ANY, name::ANY)
     end
     return Type{<:ft}
 end
-add_tfunc(fieldtype, 2, 2, fieldtype_tfunc)
+add_tfunc(fieldtype, 2, 2, fieldtype_tfunc, 0)
 
 function valid_tparam(x::ANY)
     if isa(x,Tuple)
@@ -1382,7 +1394,7 @@ function apply_type_tfunc(headtypetype::ANY, args::ANY...)
     end
     return ans
 end
-add_tfunc(apply_type, 1, IInf, apply_type_tfunc)
+add_tfunc(apply_type, 1, IInf, apply_type_tfunc, 10)
 
 @pure function type_typeof(v::ANY)
     if isa(v, Type)
@@ -3245,11 +3257,12 @@ end
 
 #### finalize and record the result of running type inference ####
 
-function isinlineable(m::Method, src::CodeInfo)
+function isinlineable(m::Method, src::CodeInfo, mod::Module, params::InferenceParams, bonus::Int=0)
     # compute the cost (size) of inlining this code
     inlineable = false
-    cost = 1000
+    cost_threshold = params.inline_cost_threshold
     if m.module === _topmod(m.module)
+        # a few functions get special treatment
         name = m.name
         sig = m.sig
         if ((name === :+ || name === :* || name === :min || name === :max) &&
@@ -3258,11 +3271,11 @@ function isinlineable(m::Method, src::CodeInfo)
             inlineable = true
         elseif (name === :next || name === :done || name === :unsafe_convert ||
                 name === :cconvert)
-            cost ÷= 4
+            cost_threshold *= 4
         end
     end
     if !inlineable
-        inlineable = inline_worthy_stmts(src.code, cost)
+        inlineable = inline_worthy(src.code, src, mod, params, cost_threshold + bonus)
     end
     return inlineable
 end
@@ -3359,7 +3372,11 @@ function optimize(me::InferenceState)
     if force_noinline
         me.src.inlineable = false
     elseif !me.src.inlineable && isa(def, Method)
-        me.src.inlineable = isinlineable(def, me.src)
+        bonus = 0
+        if me.bestguess ⊑ Tuple && !isbits(widenconst(me.bestguess))
+            bonus = me.params.inline_tupleret_bonus
+        end
+        me.src.inlineable = isinlineable(def, me.src, me.mod, me.params, bonus)
     end
     me.src.inferred = true
     nothing
@@ -4342,27 +4359,6 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
     end
     ast = ast::Array{Any,1}
 
-    # `promote` is a tuple-returning function that is very important to inline
-    if isdefined(Main, :Base) && isdefined(Main.Base, :promote) &&
-        length(sv.src.slottypes) > 0 && sv.src.slottypes[1] ⊑ typeof(getfield(Main.Base, :promote))
-        # check for non-isbits Tuple return
-        if sv.bestguess ⊑ Tuple && !isbits(widenconst(sv.bestguess))
-            # See if inlining this call would change the enclosing function
-            # from inlineable to not inlineable.
-            # This heuristic is applied to functions that return non-bits
-            # tuples, since we want to be able to inline those functions to
-            # avoid the tuple allocation.
-            current_stmts = vcat(sv.src.code, pending_stmts)
-            if inline_worthy_stmts(current_stmts)
-                append!(current_stmts, ast)
-                if !inline_worthy_stmts(current_stmts)
-                    return invoke_NF(argexprs0, e.typ, atypes0, sv, atype_unlimited,
-                                     invoke_data)
-                end
-            end
-        end
-    end
-
     # create the backedge
     if isa(frame, InferenceState) && !frame.inferred && frame.cached
         # in this case, the actual backedge linfo hasn't been computed
@@ -4422,7 +4418,7 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
             end
         end
         free = effect_free(aei, sv.src, sv.mod, true)
-        if ((occ==0 && aeitype===Bottom) || (occ > 1 && !inline_worthy(aei, occ*2000)) ||
+        if ((occ==0 && aeitype===Bottom) || (occ > 1 && !inline_worthy(aei, sv.src, sv.mod, sv.params)) ||
                 (affect_free && !free) || (!affect_free && !effect_free(aei, sv.src, sv.mod, false)))
             if occ != 0
                 vnew = newvar!(sv, aeitype)
@@ -4600,39 +4596,99 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
     return (expr, stmts)
 end
 
-inline_worthy(body::ANY, cost::Integer) = true
+## Computing the cost of a function body
 
-# should the expression be part of the inline cost model
-function inline_ignore(ex::ANY)
-    if isa(ex, LineNumberNode) || ex === nothing
-        return true
+# saturating sum (inputs are nonnegative), prevents overflow with typemax(Int) below
+plus_saturate(x, y) = max(x, y, x+y)
+# known return type
+isknowntype(T) = (T == Union{}) || isleaftype(T)
+
+statement_cost(::Any, src::CodeInfo, mod::Module, params::InferenceParams) = 0
+statement_cost(qn::QuoteNode, src::CodeInfo, mod::Module, params::InferenceParams) =
+    statement_cost(qn.value, src, mod, params)
+function statement_cost(ex::Expr, src::CodeInfo, mod::Module, params::InferenceParams)
+    head = ex.head
+    if is_meta_expr(ex) || head == :copyast # not sure if copyast is right
+        return 0
     end
-    return isa(ex, Expr) && is_meta_expr(ex::Expr)
+    argcost = 0
+    for a in ex.args
+        argcost = plus_saturate(argcost, statement_cost(a, src, mod, params))
+    end
+    if head == :return || head == :(=)
+        return argcost
+    end
+    if head == :call
+        extyp = exprtype(ex.args[1], src, mod)
+        if isa(extyp, Type)
+            return argcost
+        end
+        if isa(extyp, Const)
+            f = (extyp::Const).val
+            if isa(f, IntrinsicFunction)
+                iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
+                if !isassigned(t_ifunc_cost, iidx)
+                    # unknown/unhandled intrinsic
+                    return plus_saturate(argcost, params.inline_nonleaf_penalty)
+                end
+                return plus_saturate(argcost, t_ifunc_cost[iidx])
+            end
+            if isa(f, Builtin)
+                # The efficiency of operations like a[i] and s.b
+                # depend strongly on whether the result can be
+                # inferred, so check ex.typ
+                if f == Main.Core.getfield || f == Main.Core.tuple
+                    # we might like to penalize non-inferrability, but
+                    # tuple iteration/destructuring makes that
+                    # impossible
+                    # return plus_saturate(argcost, isknowntype(ex.typ) ? 1 : params.inline_nonleaf_penalty)
+                    return argcost
+                elseif f == Main.Core.arrayref
+                    return plus_saturate(argcost, isknowntype(ex.typ) ? 4 : params.inline_nonleaf_penalty)
+                end
+                fidx = findfirst(t_ffunc_key, f::Function)
+                if fidx == 0
+                    # unknown/unhandled builtin or anonymous function
+                    # Use the generic cost of a direct function call
+                    return plus_saturate(argcost, 20)
+                end
+                return plus_saturate(argcost, t_ffunc_cost[fidx])
+            end
+        end
+        return plus_saturate(argcost, params.inline_nonleaf_penalty)
+    elseif head == :foreigncall || ex.head == :invoke
+        return plus_saturate(20, argcost)
+    elseif head == :llvmcall
+        return plus_saturate(10, argcost) # a wild guess at typical cost
+    elseif (head == :&)
+        return plus_saturate(length(ex.args), argcost)
+    end
+    argcost
 end
 
-function inline_worthy_stmts(stmts::Vector{Any}, cost::Integer = 1000)
-    body = Expr(:block)
-    body.args = stmts
-    return inline_worthy(body, cost)
+function inline_worthy(body::Array{Any,1}, src::CodeInfo, mod::Module,
+                       params::InferenceParams,
+                       cost_threshold::Integer=params.inline_cost_threshold)
+    bodycost = 0
+    for line = 1:length(body)
+        stmt = body[line]
+        thiscost = statement_cost(stmt, src, mod, params)
+        bodycost = plus_saturate(bodycost, thiscost)
+    end
+    bodycost <= cost_threshold
 end
 
-function inline_worthy(body::Expr, cost::Integer=1000) # precondition: 0 < cost; nominal cost = 1000
-    symlim = 1000 + 5_000_000 ÷ cost
-    nstmt = 0
-    for stmt in body.args
-        if !(isa(stmt, SSAValue) || inline_ignore(stmt))
-            nstmt += 1
-        end
-        isa(stmt, Expr) && stmt.head == :enter && return false # don't inline functions with try/catch
-    end
-    if nstmt < (symlim + 500) ÷ 1000
-        symlim *= 16
-        symlim ÷= 1000
-        if occurs_more(body, e->!inline_ignore(e), symlim) < symlim
-            return true
-        end
-    end
-    return false
+function inline_worthy(body::Expr, src::CodeInfo, mod::Module, params::InferenceParams,
+                       cost_threshold::Integer=params.inline_cost_threshold)
+    bodycost = statement_cost(body, src, mod, params)
+    bodycost <= cost_threshold
+end
+
+function inline_worthy(body::ANY, src::CodeInfo, mod::Module, params::InferenceParams,
+                       cost_threshold::Integer=params.inline_cost_threshold)
+    newbody = exprtype(body, src, mod)
+    !isa(newbody, Expr) && return true
+    inline_worthy(newbody, src, mod, params, cost_threshold)
 end
 
 ssavalue_increment(body::ANY, incr) = body
diff --git a/base/reduce.jl b/base/reduce.jl
index a8b3e80d802be..1860541c43b68 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -33,7 +33,7 @@ r_promote(op, x::T) where {T} = convert(r_promote_type(op, T), x)
 
 ## foldl && mapfoldl
 
-function mapfoldl_impl(f, op, v0, itr, i)
+@noinline function mapfoldl_impl(f, op, v0, itr, i)
     # Unroll the while loop once; if v0 is known, the call to op may
     # be evaluated at compile time
     if done(itr, i)
@@ -174,7 +174,7 @@ foldr(op, itr) = mapfoldr(identity, op, itr)
 
 # This is a generic implementation of `mapreduce_impl()`,
 # certain `op` (e.g. `min` and `max`) may have their own specialized versions.
-function mapreduce_impl(f, op, A::AbstractArray, ifirst::Integer, ilast::Integer, blksize::Int=pairwise_blocksize(f, op))
+@noinline function mapreduce_impl(f, op, A::AbstractArray, ifirst::Integer, ilast::Integer, blksize::Int)
     if ifirst == ilast
         @inbounds a1 = A[ifirst]
         return r_promote(op, f(a1))
@@ -197,6 +197,9 @@ function mapreduce_impl(f, op, A::AbstractArray, ifirst::Integer, ilast::Integer
     end
 end
 
+mapreduce_impl(f, op, A::AbstractArray, ifirst::Integer, ilast::Integer) =
+    mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op))
+
 """
     mapreduce(f, op, itr)
 
diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl
index 44b01c413d8c2..1c63d5b5349c2 100644
--- a/base/reshapedarray.jl
+++ b/base/reshapedarray.jl
@@ -96,21 +96,21 @@ reshape(parent::AbstractArray, dims::Int...) = reshape(parent, dims)
 reshape(parent::AbstractArray, dims::Union{Int,Colon}...) = reshape(parent, dims)
 reshape(parent::AbstractArray, dims::Tuple{Vararg{Union{Int,Colon}}}) = _reshape(parent, _reshape_uncolon(parent, dims))
 @inline function _reshape_uncolon(A, dims)
+    @noinline throw1(dims) = throw(DimensionMismatch(string("new dimensions $(dims) ",
+        "may have at most one omitted dimension specified by `Colon()`")))
+    @noinline throw2(A, dims) = throw(DimensionMismatch(string("array size $(length(A)) ",
+        "must be divisible by the product of the new dimensions $dims")))
     pre = _before_colon(dims...)
     post = _after_colon(dims...)
-    if any(d -> d isa Colon, post)
-        throw(DimensionMismatch("new dimensions $(dims) may have at most one omitted dimension specified by Colon()"))
-    end
+    any(d -> d isa Colon, post) && throw1(dims)
     sz, remainder = divrem(length(A), prod(pre)*prod(post))
-    remainder == 0 || _throw_reshape_colon_dimmismatch(A, dims)
+    remainder == 0 || throw2(A, dims)
     (pre..., sz, post...)
 end
 @inline _before_colon(dim::Any, tail...) =  (dim, _before_colon(tail...)...)
 @inline _before_colon(dim::Colon, tail...) = ()
 @inline _after_colon(dim::Any, tail...) =  _after_colon(tail...)
 @inline _after_colon(dim::Colon, tail...) = tail
-@noinline _throw_reshape_colon_dimmismatch(A, dims) =
-    throw(DimensionMismatch("array size $(length(A)) must be divisible by the product of the new dimensions $dims"))
 
 reshape(parent::AbstractArray{T,N}, ndims::Val{N}) where {T,N} = parent
 function reshape(parent::AbstractArray, ndims::Val{N}) where N
@@ -124,8 +124,8 @@ rdims_trailing(l, inds...) = length(l) * rdims_trailing(inds...)
 rdims_trailing(l) = length(l)
 rdims(out::Val{N}, inds::Tuple) where {N} = rdims(ntuple(i -> OneTo(1), Val(N)), inds)
 rdims(out::Tuple{}, inds::Tuple{}) = () # N == 0, M == 0
-rdims(out::Tuple{}, inds::Tuple{Any}) = throw(ArgumentError("new dimensions cannot be empty")) # N == 0
-rdims(out::Tuple{}, inds::NTuple{M,Any}) where {M} = throw(ArgumentError("new dimensions cannot be empty")) # N == 0
+@noinline rdims(out::Tuple{}, inds::Tuple{Any}) = throw(ArgumentError("new dimensions cannot be empty")) # N == 0
+@noinline rdims(out::Tuple{}, inds::NTuple{M,Any}) where {M} = throw(ArgumentError("new dimensions cannot be empty")) # N == 0
 rdims(out::Tuple{Any}, inds::Tuple{}) = out # N == 1, M == 0
 rdims(out::NTuple{N,Any}, inds::Tuple{}) where {N} = out # N > 1, M == 0
 rdims(out::Tuple{Any}, inds::Tuple{Any}) = inds # N == 1, M == 1
@@ -142,16 +142,20 @@ _reshape(parent::Array, dims::Dims) = reshape(parent, dims)
 # When reshaping Vector->Vector, don't wrap with a ReshapedArray
 function _reshape(v::AbstractVector, dims::Dims{1})
     len = dims[1]
-    len == length(v) || throw(DimensionMismatch("parent has $(length(v)) elements, which is incompatible with length $len"))
+    len == length(v) || _throw_dmrs(n, "length", len)
     v
 end
 # General reshape
 function _reshape(parent::AbstractArray, dims::Dims)
     n = _length(parent)
-    prod(dims) == n || throw(DimensionMismatch("parent has $n elements, which is incompatible with size $dims"))
+    prod(dims) == n || _throw_dmrs(n, "size", dims)
     __reshape((parent, IndexStyle(parent)), dims)
 end
 
+@noinline function _throw_dmrs(n, str, dims)
+    throw(DimensionMismatch("parent has $n elements, which is incompatible with $str $dims"))
+end
+
 # Reshaping a ReshapedArray
 _reshape(v::ReshapedArray{<:Any,1}, dims::Dims{1}) = _reshape(v.parent, dims)
 _reshape(R::ReshapedArray, dims::Dims) = _reshape(R.parent, dims)
@@ -189,7 +193,7 @@ end
     @inbounds ret = parent(A)[index]
     ret
 end
-@inline function getindex(A::ReshapedArray, indexes::Int...)
+@inline function getindex(A::ReshapedArray{T,N}, indexes::Vararg{Int,N}) where {T,N}
     @boundscheck checkbounds(A, indexes...)
     _unsafe_getindex(A, indexes...)
 end
@@ -199,21 +203,20 @@ end
     ret
 end
 
-@inline function _unsafe_getindex(A::ReshapedArray, indexes::Int...)
-    @inbounds ret = parent(A)[ind2sub_rs(A.mi, sub2ind(size(A), indexes...))...]
-    ret
-end
-@inline function _unsafe_getindex(A::ReshapedArrayLF, indexes::Int...)
-    @inbounds ret = parent(A)[sub2ind(size(A), indexes...)]
-    ret
+@inline function _unsafe_getindex(A::ReshapedArray{T,N}, indexes::Vararg{Int,N}) where {T,N}
+    i = sub2ind(size(A), indexes...)
+    I = ind2sub_rs(A.mi, i)
+    _unsafe_getindex_rs(parent(A), I)
 end
+_unsafe_getindex_rs(A, i::Integer) = (@inbounds ret = A[i]; ret)
+@inline _unsafe_getindex_rs(A, I) = (@inbounds ret = A[I...]; ret)
 
 @inline function setindex!(A::ReshapedArrayLF, val, index::Int)
     @boundscheck checkbounds(A, index)
     @inbounds parent(A)[index] = val
     val
 end
-@inline function setindex!(A::ReshapedArray, val, indexes::Int...)
+@inline function setindex!(A::ReshapedArray{T,N}, val, indexes::Vararg{Int,N}) where {T,N}
     @boundscheck checkbounds(A, indexes...)
     _unsafe_setindex!(A, val, indexes...)
 end
@@ -223,21 +226,17 @@ end
     val
 end
 
-@inline function _unsafe_setindex!(A::ReshapedArray, val, indexes::Int...)
+@inline function _unsafe_setindex!(A::ReshapedArray{T,N}, val, indexes::Vararg{Int,N}) where {T,N}
     @inbounds parent(A)[ind2sub_rs(A.mi, sub2ind(size(A), indexes...))...] = val
     val
 end
-@inline function _unsafe_setindex!(A::ReshapedArrayLF, val, indexes::Int...)
-    @inbounds parent(A)[sub2ind(size(A), indexes...)] = val
-    val
-end
 
 # helpful error message for a common failure case
 const ReshapedRange{T,N,A<:Range} = ReshapedArray{T,N,A,Tuple{}}
 setindex!(A::ReshapedRange, val, index::Int) = _rs_setindex!_err()
-setindex!(A::ReshapedRange, val, indexes::Int...) = _rs_setindex!_err()
+setindex!(A::ReshapedRange{T,N}, val, indexes::Vararg{Int,N}) where {T,N} = _rs_setindex!_err()
 setindex!(A::ReshapedRange, val, index::ReshapedIndex) = _rs_setindex!_err()
 
-_rs_setindex!_err() = error("indexed assignment fails for a reshaped range; consider calling collect")
+@noinline _rs_setindex!_err() = error("indexed assignment fails for a reshaped range; consider calling collect")
 
 unsafe_convert(::Type{Ptr{T}}, a::ReshapedArray{T}) where {T} = unsafe_convert(Ptr{T}, parent(a))
diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl
index 842a83359e45e..709016fa9678c 100644
--- a/base/sparse/sparsematrix.jl
+++ b/base/sparse/sparsematrix.jl
@@ -20,8 +20,10 @@ struct SparseMatrixCSC{Tv,Ti<:Integer} <: AbstractSparseMatrix{Tv,Ti}
 
     function SparseMatrixCSC{Tv,Ti}(m::Integer, n::Integer, colptr::Vector{Ti}, rowval::Vector{Ti},
                                     nzval::Vector{Tv}) where {Tv,Ti<:Integer}
-        m < 0 && throw(ArgumentError("number of rows (m) must be ≥ 0, got $m"))
-        n < 0 && throw(ArgumentError("number of columns (n) must be ≥ 0, got $n"))
+        @noinline throwsz(str, lbl, k) =
+            throw(ArgumentError("number of $str ($lbl) must be ≥ 0, got $k"))
+        m < 0 && throwsz("rows", 'm', m)
+        n < 0 && throwsz("columns", 'n', n)
         new(Int(m), Int(n), colptr, rowval, nzval)
     end
 end
@@ -798,7 +800,7 @@ position forward in `X.colptr`, computes `map(f, transpose(A[:,q]))` by appropri
 distributing `A.rowval` and `f`-transformed `A.nzval` into `X.rowval` and `X.nzval`
 respectively. Simultaneously fixes the one-position-forward shift in `X.colptr`.
 """
-function _distributevals_halfperm!(X::SparseMatrixCSC{Tv,Ti},
+@noinline function _distributevals_halfperm!(X::SparseMatrixCSC{Tv,Ti},
         A::SparseMatrixCSC{Tv,Ti}, q::AbstractVector{<:Integer}, f::Function) where {Tv,Ti}
     @inbounds for Xi in 1:A.n
         Aj = q[Xi]
diff --git a/base/subarray.jl b/base/subarray.jl
index d9371201294b5..ad591d6ad5e9c 100644
--- a/base/subarray.jl
+++ b/base/subarray.jl
@@ -107,14 +107,14 @@ julia> A # Note A has changed even though we modified b
  0  4
 ```
 """
-function view(A::AbstractArray, I...)
+function view(A::AbstractArray, I::Vararg{Any,N}) where {N}
     @_inline_meta
     J = to_indices(A, I)
     @boundscheck checkbounds(A, J...)
     unsafe_view(_maybe_reshape_parent(A, index_ndims(J...)), J...)
 end
 
-function unsafe_view(A::AbstractArray, I::ViewIndex...)
+function unsafe_view(A::AbstractArray, I::Vararg{ViewIndex,N}) where {N}
     @_inline_meta
     SubArray(A, I)
 end
@@ -124,7 +124,8 @@ end
 # might span multiple parent indices, making the reindex calculation very hard.
 # So we use _maybe_reindex to figure out if there are any arrays of
 # `CartesianIndex`, and if so, we punt and keep two layers of indirection.
-unsafe_view(V::SubArray, I::ViewIndex...) = (@_inline_meta; _maybe_reindex(V, I))
+unsafe_view(V::SubArray, I::Vararg{ViewIndex,N}) where {N} =
+    (@_inline_meta; _maybe_reindex(V, I))
 _maybe_reindex(V, I) = (@_inline_meta; _maybe_reindex(V, I, I))
 _maybe_reindex(V, I, ::Tuple{AbstractArray{<:AbstractCartesianIndex}, Vararg{Any}}) =
     (@_inline_meta; SubArray(V, I))