Adds AbstractRNG to various methods which utilise rand (see JuliaLang…

…#10345). Also utilise randexp instead of taking log.
jiahao · Mar 12, 2015 · de2d0d0 · de2d0d0
1 parent 7e8b10c
commit de2d0d0
Show file tree

Hide file tree

Showing 10 changed files with 200 additions and 114 deletions.
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
@@ -1401,43 +1401,3 @@ push!(A, a, b) = push!(push!(A, a), b)
 push!(A, a, b, c...) = push!(push!(A, a, b), c...)
 unshift!(A, a, b) = unshift!(unshift!(A, b), a)
 unshift!(A, a, b, c...) = unshift!(unshift!(A, c...), a, b)
-
-# Fill S (resized as needed) with a random subsequence of A, where
-# each element of A is included in S with independent probability p.
-# (Note that this is different from the problem of finding a random
-# size-m subset of A where m is fixed!)
-function randsubseq!(S::AbstractArray, A::AbstractArray, p::Real)
- 0 <= p <= 1 || throw(ArgumentError("probability $p not in [0,1]"))
- n = length(A)
- p == 1 && return copy!(resize!(S, n), A)
- empty!(S)
- p == 0 && return S
- nexpected = p * length(A)
- sizehint!(S, round(Int,nexpected + 5*sqrt(nexpected)))
- if p > 0.15 # empirical threshold for trivial O(n) algorithm to be better
- for i = 1:n
- rand() <= p && push!(S, A[i])
- end
- else
- # Skip through A, in order, from each element i to the next element i+s
- # included in S. The probability that the next included element is
- # s==k (k > 0) is (1-p)^(k-1) * p, and hence the probability (CDF) that
- # s is in {1,...,k} is 1-(1-p)^k = F(k). Thus, we can draw the skip s
- # from this probability distribution via the discrete inverse-transform
- # method: s = ceil(F^{-1}(u)) where u = rand(), which is simply
- # s = ceil(log(rand()) / log1p(-p)).
- L = 1 / log1p(-p)
- i = 0
- while true
- s = log(rand()) * L # note that rand() < 1, so s > 0
- s >= n - i && return S # compare before ceil to avoid overflow
- push!(S, A[i += ceil(Int,s)])
- end
- # [This algorithm is similar in spirit to, but much simpler than,
- # the one by Vitter for a related problem in "Faster methods for
- # random sampling," Comm. ACM Magazine 7, 703-718 (1984).]
- end
- return S
-end
-
-randsubseq{T}(A::AbstractArray{T}, p::Real) = randsubseq!(T[], A, p)
diff --git a/base/combinatorics.jl b/base/combinatorics.jl
@@ -104,39 +104,6 @@ function binomial{T<:Integer}(n::T, k::T)
 end
 
 ## other ordering related functions ##
-
-function shuffle!(a::AbstractVector)
- for i = length(a):-1:2
- j = rand(1:i)
- a[i], a[j] = a[j], a[i]
- end
- return a
-end
-
-shuffle(a::AbstractVector) = shuffle!(copy(a))
-
-function randperm(n::Integer)
- a = Array(typeof(n), n)
- a[1] = 1
- @inbounds for i = 2:n
- j = rand(1:i)
- a[i] = a[j]
- a[j] = i
- end
- return a
-end
-
-function randcycle(n::Integer)
- a = Array(typeof(n), n)
- a[1] = 1
- @inbounds for i = 2:n
- j = rand(1:i-1)
- a[i] = a[j]
- a[j] = i
- end
- return a
-end
-
 function nthperm!(a::AbstractVector, k::Integer)
  k -= 1 # make k 1-indexed
  k < 0 && throw(ArgumentError("permutation k must be ≥ 0, got $k"))

diff --git a/base/random.jl b/base/random.jl
@@ -8,7 +8,12 @@ export srand,
  randn, randn!,
  randexp, randexp!,
  bitrand,
- AbstractRNG, RNG, MersenneTwister, RandomDevice
+ randstring,
+ randsubseq,randsubseq!,
+ shuffle,shuffle!,
+ randperm, randcycle,
+ AbstractRNG, RNG, MersenneTwister, RandomDevice,
+ GLOBAL_RNG
 
 
 abstract AbstractRNG
@@ -1193,4 +1198,95 @@ end
 
 Base.show(io::IO, u::UUID) = write(io, Base.repr(u))
 
+# return a random string (often useful for temporary filenames/dirnames)
+let b = UInt8['0':'9';'A':'Z';'a':'z']
+ global randstring
+ randstring(r::AbstractRNG, n::Int) = ASCIIString(b[rand(r, 1:length(b), n)])
+ randstring(r::AbstractRNG) = randstring(r,8)
+ randstring(n::Int) = randstring(GLOBAL_RNG, n)
+ randstring() = randstring(GLOBAL_RNG)
+end
+
+
+
+# Fill S (resized as needed) with a random subsequence of A, where
+# each element of A is included in S with independent probability p.
+# (Note that this is different from the problem of finding a random
+# size-m subset of A where m is fixed!)
+function randsubseq!(r::AbstractRNG, S::AbstractArray, A::AbstractArray, p::Real)
+ 0 <= p <= 1 || throw(ArgumentError("probability $p not in [0,1]"))
+ n = length(A)
+ p == 1 && return copy!(resize!(S, n), A)
+ empty!(S)
+ p == 0 && return S
+ nexpected = p * length(A)
+ sizehint!(S, round(Int,nexpected + 5*sqrt(nexpected)))
+ if p > 0.15 # empirical threshold for trivial O(n) algorithm to be better
+ for i = 1:n
+ rand(r) <= p && push!(S, A[i])
+ end
+ else
+ # Skip through A, in order, from each element i to the next element i+s
+ # included in S. The probability that the next included element is
+ # s==k (k > 0) is (1-p)^(k-1) * p, and hence the probability (CDF) that
+ # s is in {1,...,k} is 1-(1-p)^k = F(k). Thus, we can draw the skip s
+ # from this probability distribution via the discrete inverse-transform
+ # method: s = ceil(F^{-1}(u)) where u = rand(), which is simply
+ # s = ceil(log(rand()) / log1p(-p)).
+ # -log(rand()) is an exponential variate, so can use randexp().
+ L = -1 / log1p(-p) # L > 0
+ i = 0
+ while true
+ s = randexp(r) * L
+ s >= n - i && return S # compare before ceil to avoid overflow
+ push!(S, A[i += ceil(Int,s)])
+ end
+ # [This algorithm is similar in spirit to, but much simpler than,
+ # the one by Vitter for a related problem in "Faster methods for
+ # random sampling," Comm. ACM Magazine 7, 703-718 (1984).]
+ end
+ return S
+end
+randsubseq!(S::AbstractArray, A::AbstractArray, p::Real) = randsubseq!(GLOBAL_RNG, S, A, p)
+
+randsubseq{T}(r::AbstractRNG, A::AbstractArray{T}, p::Real) = randsubseq!(r, T[], A, p)
+randsubseq(A::AbstractArray, p::Real) = randsubseq(GLOBAL_RNG, A, p)
+
+
+function shuffle!(r::AbstractRNG, a::AbstractVector)
+ for i = length(a):-1:2
+ j = rand(r, 1:i)
+ a[i], a[j] = a[j], a[i]
+ end
+ return a
+end
+shuffle!(a::AbstractVector) = shuffle!(GLOBAL_RNG, a)
+
+shuffle(r::AbstractRNG, a::AbstractVector) = shuffle!(r, copy(a))
+shuffle(a::AbstractVector) = shuffle(GLOBAL_RNG, a)
+
+function randperm(r::AbstractRNG, n::Integer)
+ a = Array(typeof(n), n)
+ a[1] = 1
+ @inbounds for i = 2:n
+ j = rand(r, 1:i)
+ a[i] = a[j]
+ a[j] = i
+ end
+ return a
+end
+randperm(n::Integer) = randperm(GLOBAL_RNG, n)
+
+function randcycle(r::AbstractRNG, n::Integer)
+ a = Array(typeof(n), n)
+ a[1] = 1
+ @inbounds for i = 2:n
+ j = rand(r, 1:i-1)
+ a[i] = a[j]
+ a[j] = i
+ end
+ return a
+end
+randcycle(n::Integer) = randcycle(GLOBAL_RNG, n)
+
 end # module
diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl
@@ -364,13 +364,13 @@ function findnz{Tv,Ti}(S::SparseMatrixCSC{Tv,Ti})
  return (I, J, V)
 end
 
-function sprand{T}(m::Integer, n::Integer, density::FloatingPoint,
- rng::Function,::Type{T}=eltype(rng(1)))
+
+
+import Base.Random.GLOBAL_RNG
+function sprand_IJ(r::AbstractRNG, m::Integer, n::Integer, density::FloatingPoint)
  ((m < 0) || (n < 0)) && throw(ArgumentError("invalid Array dimensions"))
  0 <= density <= 1 || throw(ArgumentError("$density not in [0,1]"))
  N = n*m
- N == 0 && return spzeros(T,m,n)
- N == 1 && return rand() <= density ? sparse(rng(1)) : spzeros(T,1,1)
 
  I, J = Array(Int, 0), Array(Int, 0) # indices of nonzero elements
  sizehint!(I, round(Int,N*density))
@@ -380,18 +380,18 @@ function sprand{T}(m::Integer, n::Integer, density::FloatingPoint,
  L = log1p(-density)
  coldensity = -expm1(m*L) # = 1 - (1-density)^m
  colsparsity = exp(m*L) # = 1 - coldensity
- L = 1/L
+ iL = 1/L
 
  rows = Array(Int, 0)
- for j in randsubseq(1:n, coldensity)
+ for j in randsubseq(r, 1:n, coldensity)
  # To get the right statistics, we *must* have a nonempty column j
  # even if p*m << 1. To do this, we use an approach similar to
  # the one in randsubseq to compute the expected first nonzero row k,
  # except given that at least one is nonzero (via Bayes' rule);
  # carefully rearranged to avoid excessive roundoff errors.
- k = ceil(log(colsparsity + rand()*coldensity) * L)
+ k = ceil(log(colsparsity + rand(r)*coldensity) * iL)
  ik = k < 1 ? 1 : k > m ? m : Int(k) # roundoff-error/underflow paranoia
- randsubseq!(rows, 1:m-ik, density)
+ randsubseq!(r, rows, 1:m-ik, density)
  push!(rows, m-ik+1)
  append!(I, rows)
  nrows = length(rows)
@@ -401,13 +401,37 @@ function sprand{T}(m::Integer, n::Integer, density::FloatingPoint,
  J[i] = j
  end
  end
- return sparse_IJ_sorted!(I, J, rng(length(I)), m, n, +) # it will never need to combine
+ I, J
+end
+
+function sprand{T}(r::AbstractRNG, m::Integer, n::Integer, density::FloatingPoint,
+ rfn::Function, ::Type{T}=eltype(rfn(r,1)))
+ N = m*n
+ N == 0 && return spzeros(T,m,n)
+ N == 1 && return rand(r) <= density ? sparse(rfn(r,1)) : spzeros(T,1,1)
+
+ I,J = sprand_IJ(r, m, n, density)
+ sparse_IJ_sorted!(I, J, rfn(r,length(I)), m, n, +) # it will never need to combine
 end
 
-sprand(m::Integer, n::Integer, density::FloatingPoint) = sprand(m,n,density,rand,Float64)
-sprandn(m::Integer, n::Integer, density::FloatingPoint) = sprand(m,n,density,randn,Float64)
-truebools(n::Integer) = ones(Bool, n)
-sprandbool(m::Integer, n::Integer, density::FloatingPoint) = sprand(m,n,density,truebools,Bool)
+function sprand{T}(m::Integer, n::Integer, density::FloatingPoint,
+ rfn::Function, ::Type{T}=eltype(rfn(1)))
+ N = m*n
+ N == 0 && return spzeros(T,m,n)
+ N == 1 && return rand() <= density ? sparse(rfn(1)) : spzeros(T,1,1)
+
+ I,J = sprand_IJ(GLOBAL_RNG, m, n, density)
+ sparse_IJ_sorted!(I, J, rfn(length(I)), m, n, +) # it will never need to combine
+end
+
+sprand(r::AbstractRNG, m::Integer, n::Integer, density::FloatingPoint) = sprand(r,m,n,density,rand,Float64)
+sprand(m::Integer, n::Integer, density::FloatingPoint) = sprand(GLOBAL_RNG,m,n,density)
+sprandn(r::AbstractRNG, m::Integer, n::Integer, density::FloatingPoint) = sprand(r,m,n,density,randn,Float64)
+sprandn( m::Integer, n::Integer, density::FloatingPoint) = sprandn(GLOBAL_RNG,m,n,density)
+
+truebools(r::AbstractRNG, n::Integer) = ones(Bool, n)
+sprandbool(r::AbstractRNG, m::Integer, n::Integer, density::FloatingPoint) = sprand(r,m,n,density,truebools,Bool)
+sprandbool(m::Integer, n::Integer, density::FloatingPoint) = sprandbool(GLOBAL_RNG,m,n,density)
 
 spones{T}(S::SparseMatrixCSC{T}) =
  SparseMatrixCSC(S.m, S.n, copy(S.colptr), copy(S.rowval), ones(T, S.colptr[end]-1))

diff --git a/base/string.jl b/base/string.jl
@@ -1656,13 +1656,6 @@ function rsearch(a::ByteArray, b::Char, i::Integer)
 end
 rsearch(a::ByteArray, b::Union(Int8,UInt8,Char)) = rsearch(a,b,length(a))
 
-# return a random string (often useful for temporary filenames/dirnames)
-let b = UInt8['0':'9';'A':'Z';'a':'z']
- global randstring
- randstring(n::Int) = ASCIIString(b[rand(1:length(b),n)])
- randstring() = randstring(8)
-end
-
 function hex2bytes(s::ASCIIString)
  len = length(s)
  iseven(len) || throw(ArgumentError("string length must be even: length($(repr(s))) == $len"))

diff --git a/doc/helpdb.jl b/doc/helpdb.jl
@@ -793,9 +793,11 @@ Any[
 
 "),
 
-("Base","randperm","randperm(n)
+("Base","randperm","randperm([rng], n)
 
- Construct a random permutation of the given length.
+ Construct a random permutation of length \"n\". The optional
+ \"rng\" argument specifies a random number generator, see *Random
+ Numbers*.
 
 "),
 
@@ -827,19 +829,22 @@ Any[
 
 "),
 
-("Base","randcycle","randcycle(n)
+("Base","randcycle","randcycle([rng], n)
 
- Construct a random cyclic permutation of the given length.
+ Construct a random cyclic permutation of length \"n\". The optional
+ \"rng\" argument specifies a random number generator, see *Random
+ Numbers*.
 
 "),
 
-("Base","shuffle","shuffle(v)
+("Base","shuffle","shuffle([rng], v)
 
- Return a randomly permuted copy of \"v\".
+ Return a randomly permuted copy of \"v\". The optional \"rng\"
+ argument specifies a random number generator, see *Random Numbers*.
 
 "),
 
-("Base","shuffle!","shuffle!(v)
+("Base","shuffle!","shuffle!([rng], v)
 
  In-place version of \"shuffle()\".
 
@@ -12493,10 +12498,12 @@ popdisplay(d::Display)
 
 "),
 
-("Base","randstring","randstring(len)
+("Base","randstring","randstring([rng], len=8)
 
  Create a random ASCII string of length \"len\", consisting of
- upper- and lower-case letters and the digits 0-9
+ upper- and lower-case letters and the digits 0-9. The optional
+ \"rng\" argument specifies a random number generator, see *Random
+ Numbers*.
 
 "),
 

diff --git a/doc/stdlib/arrays.rst b/doc/stdlib/arrays.rst
@@ -523,9 +523,10 @@ Combinatorics
 
  In-place version of :func:`nthperm`.
 
-.. function:: randperm(n)
+.. function:: randperm([rng,] n)
 
- Construct a random permutation of the given length.
+ Construct a random permutation of length ``n``. The optional ``rng`` argument
+ specifies a random number generator, see :ref:`Random Numbers <random-numbers>`.
 
 .. function:: invperm(v)
 
@@ -547,15 +548,19 @@ Combinatorics
 
  Like permute!, but the inverse of the given permutation is applied.
 
-.. function:: randcycle(n)
+.. function:: randcycle([rng,] n)
 
- Construct a random cyclic permutation of the given length.
+ Construct a random cyclic permutation of length ``n``. The optional ``rng``
+ argument specifies a random number generator, see :ref:`Random Numbers
+ <random-numbers>`.
 
-.. function:: shuffle(v)
+.. function:: shuffle([rng,] v)
 
- Return a randomly permuted copy of ``v``.
+ Return a randomly permuted copy of ``v``. The optional ``rng`` argument
+ specifies a random number generator, see :ref:`Random Numbers
+ <random-numbers>`.
 
-.. function:: shuffle!(v)
+.. function:: shuffle!([rng,] v)
 
  In-place version of :func:`shuffle`.
 
@@ -716,9 +721,9 @@ Sparse matrices support much of the same set of operations as dense matrices. Th
 
  Construct a sparse diagonal matrix. ``B`` is a tuple of vectors containing the diagonals and ``d`` is a tuple containing the positions of the diagonals. In the case the input contains only one diagonaly, ``B`` can be a vector (instead of a tuple) and ``d`` can be the diagonal position (instead of a tuple), defaulting to 0 (diagonal). Optionally, ``m`` and ``n`` specify the size of the resulting sparse matrix.
 
-.. function:: sprand(m,n,p[,rng])
+.. function:: sprand([rng,] m,n,p [,rfn])
 
- Create a random ``m`` by ``n`` sparse matrix, in which the probability of any element being nonzero is independently given by ``p`` (and hence the mean density of nonzeros is also exactly ``p``). Nonzero values are sampled from the distribution specified by ``rng``. The uniform distribution is used in case ``rng`` is not specified.
+ Create a random ``m`` by ``n`` sparse matrix, in which the probability of any element being nonzero is independently given by ``p`` (and hence the mean density of nonzeros is also exactly ``p``). Nonzero values are sampled from the distribution specified by ``rfn``. The uniform distribution is used in case ``rfn`` is not specified. The optional ``rng`` argument specifies a random number generator, see :ref:`Random Numbers <random-numbers>`.
 
 .. function:: sprandn(m,n,p)