Skip to content

Commit

Permalink
Adds AbstractRNG to various methods which utilise rand (see JuliaLang…
Browse files Browse the repository at this point in the history
…#10345).

Also utilise randexp instead of taking log.
  • Loading branch information
simonbyrne committed Mar 12, 2015
1 parent 7e8b10c commit de2d0d0
Show file tree
Hide file tree
Showing 10 changed files with 200 additions and 114 deletions.
40 changes: 0 additions & 40 deletions base/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1401,43 +1401,3 @@ push!(A, a, b) = push!(push!(A, a), b)
push!(A, a, b, c...) = push!(push!(A, a, b), c...)
unshift!(A, a, b) = unshift!(unshift!(A, b), a)
unshift!(A, a, b, c...) = unshift!(unshift!(A, c...), a, b)

# Fill S (resized as needed) with a random subsequence of A, where
# each element of A is included in S with independent probability p.
# (Note that this is different from the problem of finding a random
# size-m subset of A where m is fixed!)
function randsubseq!(S::AbstractArray, A::AbstractArray, p::Real)
0 <= p <= 1 || throw(ArgumentError("probability $p not in [0,1]"))
n = length(A)
p == 1 && return copy!(resize!(S, n), A)
empty!(S)
p == 0 && return S
nexpected = p * length(A)
sizehint!(S, round(Int,nexpected + 5*sqrt(nexpected)))
if p > 0.15 # empirical threshold for trivial O(n) algorithm to be better
for i = 1:n
rand() <= p && push!(S, A[i])
end
else
# Skip through A, in order, from each element i to the next element i+s
# included in S. The probability that the next included element is
# s==k (k > 0) is (1-p)^(k-1) * p, and hence the probability (CDF) that
# s is in {1,...,k} is 1-(1-p)^k = F(k). Thus, we can draw the skip s
# from this probability distribution via the discrete inverse-transform
# method: s = ceil(F^{-1}(u)) where u = rand(), which is simply
# s = ceil(log(rand()) / log1p(-p)).
L = 1 / log1p(-p)
i = 0
while true
s = log(rand()) * L # note that rand() < 1, so s > 0
s >= n - i && return S # compare before ceil to avoid overflow
push!(S, A[i += ceil(Int,s)])
end
# [This algorithm is similar in spirit to, but much simpler than,
# the one by Vitter for a related problem in "Faster methods for
# random sampling," Comm. ACM Magazine 7, 703-718 (1984).]
end
return S
end

randsubseq{T}(A::AbstractArray{T}, p::Real) = randsubseq!(T[], A, p)
33 changes: 0 additions & 33 deletions base/combinatorics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,39 +104,6 @@ function binomial{T<:Integer}(n::T, k::T)
end

## other ordering related functions ##

function shuffle!(a::AbstractVector)
for i = length(a):-1:2
j = rand(1:i)
a[i], a[j] = a[j], a[i]
end
return a
end

shuffle(a::AbstractVector) = shuffle!(copy(a))

function randperm(n::Integer)
a = Array(typeof(n), n)
a[1] = 1
@inbounds for i = 2:n
j = rand(1:i)
a[i] = a[j]
a[j] = i
end
return a
end

function randcycle(n::Integer)
a = Array(typeof(n), n)
a[1] = 1
@inbounds for i = 2:n
j = rand(1:i-1)
a[i] = a[j]
a[j] = i
end
return a
end

function nthperm!(a::AbstractVector, k::Integer)
k -= 1 # make k 1-indexed
k < 0 && throw(ArgumentError("permutation k must be ≥ 0, got $k"))
Expand Down
98 changes: 97 additions & 1 deletion base/random.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@ export srand,
randn, randn!,
randexp, randexp!,
bitrand,
AbstractRNG, RNG, MersenneTwister, RandomDevice
randstring,
randsubseq,randsubseq!,
shuffle,shuffle!,
randperm, randcycle,
AbstractRNG, RNG, MersenneTwister, RandomDevice,
GLOBAL_RNG


abstract AbstractRNG
Expand Down Expand Up @@ -1193,4 +1198,95 @@ end

Base.show(io::IO, u::UUID) = write(io, Base.repr(u))

# return a random string (often useful for temporary filenames/dirnames)
let b = UInt8['0':'9';'A':'Z';'a':'z']
global randstring
randstring(r::AbstractRNG, n::Int) = ASCIIString(b[rand(r, 1:length(b), n)])
randstring(r::AbstractRNG) = randstring(r,8)
randstring(n::Int) = randstring(GLOBAL_RNG, n)
randstring() = randstring(GLOBAL_RNG)
end



# Fill S (resized as needed) with a random subsequence of A, where
# each element of A is included in S with independent probability p.
# (Note that this is different from the problem of finding a random
# size-m subset of A where m is fixed!)
function randsubseq!(r::AbstractRNG, S::AbstractArray, A::AbstractArray, p::Real)
0 <= p <= 1 || throw(ArgumentError("probability $p not in [0,1]"))
n = length(A)
p == 1 && return copy!(resize!(S, n), A)
empty!(S)
p == 0 && return S
nexpected = p * length(A)
sizehint!(S, round(Int,nexpected + 5*sqrt(nexpected)))
if p > 0.15 # empirical threshold for trivial O(n) algorithm to be better
for i = 1:n
rand(r) <= p && push!(S, A[i])
end
else
# Skip through A, in order, from each element i to the next element i+s
# included in S. The probability that the next included element is
# s==k (k > 0) is (1-p)^(k-1) * p, and hence the probability (CDF) that
# s is in {1,...,k} is 1-(1-p)^k = F(k). Thus, we can draw the skip s
# from this probability distribution via the discrete inverse-transform
# method: s = ceil(F^{-1}(u)) where u = rand(), which is simply
# s = ceil(log(rand()) / log1p(-p)).
# -log(rand()) is an exponential variate, so can use randexp().
L = -1 / log1p(-p) # L > 0
i = 0
while true
s = randexp(r) * L
s >= n - i && return S # compare before ceil to avoid overflow
push!(S, A[i += ceil(Int,s)])
end
# [This algorithm is similar in spirit to, but much simpler than,
# the one by Vitter for a related problem in "Faster methods for
# random sampling," Comm. ACM Magazine 7, 703-718 (1984).]
end
return S
end
randsubseq!(S::AbstractArray, A::AbstractArray, p::Real) = randsubseq!(GLOBAL_RNG, S, A, p)

randsubseq{T}(r::AbstractRNG, A::AbstractArray{T}, p::Real) = randsubseq!(r, T[], A, p)
randsubseq(A::AbstractArray, p::Real) = randsubseq(GLOBAL_RNG, A, p)


function shuffle!(r::AbstractRNG, a::AbstractVector)
for i = length(a):-1:2
j = rand(r, 1:i)
a[i], a[j] = a[j], a[i]
end
return a
end
shuffle!(a::AbstractVector) = shuffle!(GLOBAL_RNG, a)

shuffle(r::AbstractRNG, a::AbstractVector) = shuffle!(r, copy(a))
shuffle(a::AbstractVector) = shuffle(GLOBAL_RNG, a)

function randperm(r::AbstractRNG, n::Integer)
a = Array(typeof(n), n)
a[1] = 1
@inbounds for i = 2:n
j = rand(r, 1:i)
a[i] = a[j]
a[j] = i
end
return a
end
randperm(n::Integer) = randperm(GLOBAL_RNG, n)

function randcycle(r::AbstractRNG, n::Integer)
a = Array(typeof(n), n)
a[1] = 1
@inbounds for i = 2:n
j = rand(r, 1:i-1)
a[i] = a[j]
a[j] = i
end
return a
end
randcycle(n::Integer) = randcycle(GLOBAL_RNG, n)

end # module
50 changes: 37 additions & 13 deletions base/sparse/sparsematrix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -364,13 +364,13 @@ function findnz{Tv,Ti}(S::SparseMatrixCSC{Tv,Ti})
return (I, J, V)
end

function sprand{T}(m::Integer, n::Integer, density::FloatingPoint,
rng::Function,::Type{T}=eltype(rng(1)))


import Base.Random.GLOBAL_RNG
function sprand_IJ(r::AbstractRNG, m::Integer, n::Integer, density::FloatingPoint)
((m < 0) || (n < 0)) && throw(ArgumentError("invalid Array dimensions"))
0 <= density <= 1 || throw(ArgumentError("$density not in [0,1]"))
N = n*m
N == 0 && return spzeros(T,m,n)
N == 1 && return rand() <= density ? sparse(rng(1)) : spzeros(T,1,1)

I, J = Array(Int, 0), Array(Int, 0) # indices of nonzero elements
sizehint!(I, round(Int,N*density))
Expand All @@ -380,18 +380,18 @@ function sprand{T}(m::Integer, n::Integer, density::FloatingPoint,
L = log1p(-density)
coldensity = -expm1(m*L) # = 1 - (1-density)^m
colsparsity = exp(m*L) # = 1 - coldensity
L = 1/L
iL = 1/L

rows = Array(Int, 0)
for j in randsubseq(1:n, coldensity)
for j in randsubseq(r, 1:n, coldensity)
# To get the right statistics, we *must* have a nonempty column j
# even if p*m << 1. To do this, we use an approach similar to
# the one in randsubseq to compute the expected first nonzero row k,
# except given that at least one is nonzero (via Bayes' rule);
# carefully rearranged to avoid excessive roundoff errors.
k = ceil(log(colsparsity + rand()*coldensity) * L)
k = ceil(log(colsparsity + rand(r)*coldensity) * iL)
ik = k < 1 ? 1 : k > m ? m : Int(k) # roundoff-error/underflow paranoia
randsubseq!(rows, 1:m-ik, density)
randsubseq!(r, rows, 1:m-ik, density)
push!(rows, m-ik+1)
append!(I, rows)
nrows = length(rows)
Expand All @@ -401,13 +401,37 @@ function sprand{T}(m::Integer, n::Integer, density::FloatingPoint,
J[i] = j
end
end
return sparse_IJ_sorted!(I, J, rng(length(I)), m, n, +) # it will never need to combine
I, J
end

function sprand{T}(r::AbstractRNG, m::Integer, n::Integer, density::FloatingPoint,
rfn::Function, ::Type{T}=eltype(rfn(r,1)))
N = m*n
N == 0 && return spzeros(T,m,n)
N == 1 && return rand(r) <= density ? sparse(rfn(r,1)) : spzeros(T,1,1)

I,J = sprand_IJ(r, m, n, density)
sparse_IJ_sorted!(I, J, rfn(r,length(I)), m, n, +) # it will never need to combine
end

sprand(m::Integer, n::Integer, density::FloatingPoint) = sprand(m,n,density,rand,Float64)
sprandn(m::Integer, n::Integer, density::FloatingPoint) = sprand(m,n,density,randn,Float64)
truebools(n::Integer) = ones(Bool, n)
sprandbool(m::Integer, n::Integer, density::FloatingPoint) = sprand(m,n,density,truebools,Bool)
function sprand{T}(m::Integer, n::Integer, density::FloatingPoint,
rfn::Function, ::Type{T}=eltype(rfn(1)))
N = m*n
N == 0 && return spzeros(T,m,n)
N == 1 && return rand() <= density ? sparse(rfn(1)) : spzeros(T,1,1)

I,J = sprand_IJ(GLOBAL_RNG, m, n, density)
sparse_IJ_sorted!(I, J, rfn(length(I)), m, n, +) # it will never need to combine
end

sprand(r::AbstractRNG, m::Integer, n::Integer, density::FloatingPoint) = sprand(r,m,n,density,rand,Float64)
sprand(m::Integer, n::Integer, density::FloatingPoint) = sprand(GLOBAL_RNG,m,n,density)
sprandn(r::AbstractRNG, m::Integer, n::Integer, density::FloatingPoint) = sprand(r,m,n,density,randn,Float64)
sprandn( m::Integer, n::Integer, density::FloatingPoint) = sprandn(GLOBAL_RNG,m,n,density)

truebools(r::AbstractRNG, n::Integer) = ones(Bool, n)
sprandbool(r::AbstractRNG, m::Integer, n::Integer, density::FloatingPoint) = sprand(r,m,n,density,truebools,Bool)
sprandbool(m::Integer, n::Integer, density::FloatingPoint) = sprandbool(GLOBAL_RNG,m,n,density)

spones{T}(S::SparseMatrixCSC{T}) =
SparseMatrixCSC(S.m, S.n, copy(S.colptr), copy(S.rowval), ones(T, S.colptr[end]-1))
Expand Down
7 changes: 0 additions & 7 deletions base/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1656,13 +1656,6 @@ function rsearch(a::ByteArray, b::Char, i::Integer)
end
rsearch(a::ByteArray, b::Union(Int8,UInt8,Char)) = rsearch(a,b,length(a))

# return a random string (often useful for temporary filenames/dirnames)
let b = UInt8['0':'9';'A':'Z';'a':'z']
global randstring
randstring(n::Int) = ASCIIString(b[rand(1:length(b),n)])
randstring() = randstring(8)
end

function hex2bytes(s::ASCIIString)
len = length(s)
iseven(len) || throw(ArgumentError("string length must be even: length($(repr(s))) == $len"))
Expand Down
25 changes: 16 additions & 9 deletions doc/helpdb.jl
Original file line number Diff line number Diff line change
Expand Up @@ -793,9 +793,11 @@ Any[
"),

("Base","randperm","randperm(n)
("Base","randperm","randperm([rng], n)
Construct a random permutation of the given length.
Construct a random permutation of length \"n\". The optional
\"rng\" argument specifies a random number generator, see *Random
Numbers*.
"),

Expand Down Expand Up @@ -827,19 +829,22 @@ Any[
"),

("Base","randcycle","randcycle(n)
("Base","randcycle","randcycle([rng], n)
Construct a random cyclic permutation of the given length.
Construct a random cyclic permutation of length \"n\". The optional
\"rng\" argument specifies a random number generator, see *Random
Numbers*.
"),

("Base","shuffle","shuffle(v)
("Base","shuffle","shuffle([rng], v)
Return a randomly permuted copy of \"v\".
Return a randomly permuted copy of \"v\". The optional \"rng\"
argument specifies a random number generator, see *Random Numbers*.
"),

("Base","shuffle!","shuffle!(v)
("Base","shuffle!","shuffle!([rng], v)
In-place version of \"shuffle()\".
Expand Down Expand Up @@ -12493,10 +12498,12 @@ popdisplay(d::Display)
"),

("Base","randstring","randstring(len)
("Base","randstring","randstring([rng], len=8)
Create a random ASCII string of length \"len\", consisting of
upper- and lower-case letters and the digits 0-9
upper- and lower-case letters and the digits 0-9. The optional
\"rng\" argument specifies a random number generator, see *Random
Numbers*.
"),

Expand Down
23 changes: 14 additions & 9 deletions doc/stdlib/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -523,9 +523,10 @@ Combinatorics

In-place version of :func:`nthperm`.

.. function:: randperm(n)
.. function:: randperm([rng,] n)

Construct a random permutation of the given length.
Construct a random permutation of length ``n``. The optional ``rng`` argument
specifies a random number generator, see :ref:`Random Numbers <random-numbers>`.

.. function:: invperm(v)

Expand All @@ -547,15 +548,19 @@ Combinatorics

Like permute!, but the inverse of the given permutation is applied.

.. function:: randcycle(n)
.. function:: randcycle([rng,] n)

Construct a random cyclic permutation of the given length.
Construct a random cyclic permutation of length ``n``. The optional ``rng``
argument specifies a random number generator, see :ref:`Random Numbers
<random-numbers>`.

.. function:: shuffle(v)
.. function:: shuffle([rng,] v)

Return a randomly permuted copy of ``v``.
Return a randomly permuted copy of ``v``. The optional ``rng`` argument
specifies a random number generator, see :ref:`Random Numbers
<random-numbers>`.

.. function:: shuffle!(v)
.. function:: shuffle!([rng,] v)

In-place version of :func:`shuffle`.

Expand Down Expand Up @@ -716,9 +721,9 @@ Sparse matrices support much of the same set of operations as dense matrices. Th

Construct a sparse diagonal matrix. ``B`` is a tuple of vectors containing the diagonals and ``d`` is a tuple containing the positions of the diagonals. In the case the input contains only one diagonaly, ``B`` can be a vector (instead of a tuple) and ``d`` can be the diagonal position (instead of a tuple), defaulting to 0 (diagonal). Optionally, ``m`` and ``n`` specify the size of the resulting sparse matrix.

.. function:: sprand(m,n,p[,rng])
.. function:: sprand([rng,] m,n,p [,rfn])

Create a random ``m`` by ``n`` sparse matrix, in which the probability of any element being nonzero is independently given by ``p`` (and hence the mean density of nonzeros is also exactly ``p``). Nonzero values are sampled from the distribution specified by ``rng``. The uniform distribution is used in case ``rng`` is not specified.
Create a random ``m`` by ``n`` sparse matrix, in which the probability of any element being nonzero is independently given by ``p`` (and hence the mean density of nonzeros is also exactly ``p``). Nonzero values are sampled from the distribution specified by ``rfn``. The uniform distribution is used in case ``rfn`` is not specified. The optional ``rng`` argument specifies a random number generator, see :ref:`Random Numbers <random-numbers>`.

.. function:: sprandn(m,n,p)

Expand Down
Loading

0 comments on commit de2d0d0

Please sign in to comment.