Skip to content

Commit

Permalink
ENH: added selectperm and selectperm! based on new PartialQuickSort a…
Browse files Browse the repository at this point in the history
…lgorithm

closes JuliaLang#10767
  • Loading branch information
sglyon authored and kmsquire committed Jul 24, 2015
1 parent fc89a82 commit 7e8b3a9
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 9 deletions.
3 changes: 3 additions & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ export
ObjectIdDict,
OrdinalRange,
Pair,
PartialQuickSort,
PollingFileWatcher,
ProcessGroup,
QuickSort,
Expand Down Expand Up @@ -594,6 +595,8 @@ export
sort!,
sort,
sortcols,
selectperm,
selectperm!,
sortperm,
sortperm!,
sortrows,
Expand Down
82 changes: 81 additions & 1 deletion base/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,17 @@ export # also exported by Base
# order & algorithm:
sort,
sort!,
selectperm,
selectperm!,
sortperm,
sortperm!,
sortrows,
sortcols,
# algorithms:
InsertionSort,
QuickSort,
MergeSort
MergeSort,
PartialQuickSort

export # not exported by Base
Algorithm,
Expand Down Expand Up @@ -247,6 +250,13 @@ abstract Algorithm
immutable InsertionSortAlg <: Algorithm end
immutable QuickSortAlg <: Algorithm end
immutable MergeSortAlg <: Algorithm end
immutable PartialQuickSort <: Algorithm
k::Int
end

# partially sort until the end of the range
PartialQuickSort(r::OrdinalRange) = PartialQuickSort(last(r))


const InsertionSort = InsertionSortAlg()
const QuickSort = QuickSortAlg()
Expand Down Expand Up @@ -351,6 +361,38 @@ function sort!(v::AbstractVector, lo::Int, hi::Int, a::MergeSortAlg, o::Ordering
return v
end

function sort!(v::AbstractVector, lo::Int, hi::Int, a::PartialQuickSort,
o::Ordering)
k = a.k
while lo < hi
hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
pivot = v[(lo+hi)>>>1]
i, j = lo, hi
while true
while lt(o, v[i], pivot); i += 1; end
while lt(o, pivot, v[j]); j -= 1; end
i <= j || break
v[i], v[j] = v[j], v[i]
i += 1; j -= 1
end
if lo < j
if j - lo <= k
sort!(v, lo, j, QuickSort, o)
else
sort!(v, lo, j, PartialQuickSort(k), o)
end
end
jk = min(j, lo + k - 1)
if (i - lo + 1) <= k
k -= j - lo + 1
lo = i
else
break
end
end
return v
end

## generic sorting methods ##

defalg(v::AbstractArray) = DEFAULT_STABLE
Expand All @@ -369,6 +411,40 @@ end

sort(v::AbstractVector; kws...) = sort!(copy(v); kws...)


## selectperm: the permutation to sort the first k elements of an array ##

function selectperm(v::AbstractVector,
k::Union(Int,OrdinalRange);
lt::Function=isless,
by::Function=identity,
rev::Bool=false,
order::Ordering=Base.Order.Forward)
select!(collect(1:length(v)), k, Perm(ord(lt, by, rev, order), v))
end

function selectperm!{I<:Integer}(ix::AbstractVector{I}, v::AbstractVector,
k::Union(Int, OrdinalRange);
lt::Function=isless,
by::Function=identity,
rev::Bool=false,
order::Ordering=Forward,
initialized::Bool=false)
if !initialized
@inbounds for i = 1:length(ix)
ix[i] = i
end
end

# do partial quicksort
sort!(ix, PartialQuickSort(k), Perm(ord(lt, by, rev, order), v))

# TODO: Not type stable. If k is an int, this will return an Int, of it is
# an OrdinalRange it will return a Vector{Int}. This, however, seems
# to be the same behavior as as `select`
return ix[k]
end

## sortperm: the permutation to sort an array ##

function sortperm(v::AbstractVector;
Expand Down Expand Up @@ -499,6 +575,10 @@ function fpsort!(v::AbstractVector, a::Algorithm, o::Ordering)
return v
end


fpsort!(v::AbstractVector, a::PartialQuickSort, o::Ordering) =
sort!(v, 1, length(v), a, o)

sort!{T<:Floats}(v::AbstractVector{T}, a::Algorithm, o::DirectOrdering) = fpsort!(v,a,o)
sort!{O<:DirectOrdering,T<:Floats}(v::Vector{Int}, a::Algorithm, o::Perm{O,Vector{T}}) = fpsort!(v,a,o)

Expand Down
32 changes: 31 additions & 1 deletion doc/stdlib/sort.rst
Original file line number Diff line number Diff line change
Expand Up @@ -206,14 +206,33 @@ Order-Related Functions
Variant of ``select!`` which copies ``v`` before partially sorting it, thereby
returning the same thing as ``select!`` but leaving ``v`` unmodified.

.. function:: selectperm(v, k, [alg=<algorithm>,] [by=<transform>,] [lt=<comparison>,] [rev=false])

Return a partial permutation of the the vector ``v``, according to the order
specified by ``by``, ``lt`` and ``rev``, so that ``v[output]`` returns the
first ``k`` (or range of adjacent values if ``k`` is a range) values of a
fully sorted version of ``v``. If ``k`` is a single index (Integer), an
array of the first ``k`` indices is returned; if ``k`` is a range, an array
of those indices is returned. Note that the handling of integer values for
``k`` is different from ``select`` in that it returns a vector of ``k``
elements instead of just the ``k`` th element. Also note that this is
equivalent to, but more efficient than, calling ``sortperm(...)[k]``

.. function:: selectperm!(ix, v, k, [alg=<algorithm>,] [by=<transform>,] [lt=<comparison>,] [rev=false,] [initialized=false])

Like ``selectperm``, but accepts a preallocated index vector ``ix``. If
``initialized`` is ``false`` (the default), ix is initialized to contain the
values ``1:length(ix)``.


Sorting Algorithms
------------------

There are currently three sorting algorithms available in base Julia:
There are currently four sorting algorithms available in base Julia:

- ``InsertionSort``
- ``QuickSort``
- ``PartialQuickSort(k)``
- ``MergeSort``

``InsertionSort`` is an O(n^2) stable sorting algorithm. It is efficient
Expand All @@ -225,6 +244,17 @@ equal will not remain in the same order in which they originally
appeared in the array to be sorted. ``QuickSort`` is the default
algorithm for numeric values, including integers and floats.

``PartialQuickSort(k)`` is similar to ``QuickSort``, but the output array
is only sorted up to index ``k``. For example::

x = rand(1:500, 100)
k = 50
s = sort(x; alg=QuickSort)
ps = sort(x; alg=PartialQuickSort(k))
map(issorted, (s, ps)) # => (true, false)
map(x->issorted(x[1:k]), (s, ps)) # => (true, true)
s[1:k] == ps[1:k] # => true

``MergeSort`` is an O(n log n) stable sorting algorithm but is not
in-place – it requires a temporary array of half the size of the
input array – and is typically not quite as fast as ``QuickSort``.
Expand Down
38 changes: 31 additions & 7 deletions test/sorting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@ end
@test reverse([2,3,1]) == [1,3,2]
@test select([3,6,30,1,9],3) == 6
@test select([3,6,30,1,9],3:4) == [6,9]
@test selectperm([3,6,30,1,9], 3:4) == [2,5]
@test selectperm!(collect(1:5), [3,6,30,1,9], 3:4) == [2,5]
let a=[1:10;]
for r in Any[2:4, 1:2, 10:10, 4:2, 2:1, 4:-1:2, 2:-1:1, 10:-1:10, 4:1:3, 1:2:8, 10:-3:1]
@test select(a, r) == [r;]
@test selectperm(a, r) == [r;]
@test select(a, r, rev=true) == (11 .- [r;])
@test selectperm(a, r, rev=true) == (11 .- [r;])
end
end
@test sum(randperm(6)) == 21
Expand Down Expand Up @@ -171,15 +175,34 @@ for alg in [InsertionSort, MergeSort]
@test b == c
end

b = sort(a, alg=QuickSort)
@test issorted(b)
b = sort(a, alg=QuickSort, rev=true)
@test issorted(b, rev=true)
b = sort(a, alg=QuickSort, by=x->1/x)
@test issorted(b, by=x->1/x)
# unstable algorithms
for alg in [QuickSort, PartialQuickSort(length(a))]
b = sort(a, alg=alg)
@test issorted(b)
b = sort(a, alg=alg, rev=true)
@test issorted(b, rev=true)
b = sort(a, alg=alg, by=x->1/x)
@test issorted(b, by=x->1/x)
end

# test PartialQuickSort only does a partial sort
let alg = PartialQuickSort(div(length(a), 10))
k = alg.k
b = sort(a, alg=alg)
c = sort(a, alg=alg, by=x->1/x)
d = sort(a, alg=alg, rev=true)
@test issorted(b[1:k])
@test issorted(c[1:k], by=x->1/x)
@test issorted(d[1:k], rev=true)
@test !issorted(b)
@test !issorted(c, by=x->1/x)
@test !issorted(d, rev=true)
end

@test select([3,6,30,1,9], 2, rev=true) == 9
@test select([3,6,30,1,9], 2, by=x->1/x) == 9
@test selectperm([3,6,30,1,9], 2, rev=true) == 5
@test selectperm([3,6,30,1,9], 2, by=x->1/x) == 5

## more advanced sorting tests ##

Expand Down Expand Up @@ -227,7 +250,7 @@ for n in [0:10; 100; 101; 1000; 1001]
end

# unstable algorithms
for alg in [QuickSort]
for alg in [QuickSort, PartialQuickSort(n)]
p = sortperm(v, alg=alg, rev=rev)
@test p == sortperm(float(v), alg=alg, rev=rev)
@test isperm(p)
Expand All @@ -241,6 +264,7 @@ for n in [0:10; 100; 101; 1000; 1001]
end

v = randn_with_nans(n,0.1)
# TODO: alg = PartialQuickSort(n) fails here....
for alg in [InsertionSort, QuickSort, MergeSort],
rev in [false,true]
# test float sorting with NaNs
Expand Down

0 comments on commit 7e8b3a9

Please sign in to comment.