├── REQUIRE ├── test ├── test_trie.jl ├── bench_disjoint_set.jl ├── test_orderedset.jl ├── bench_heaps.jl ├── test_disjoint_set.jl ├── test_ordereddict.jl ├── test_stack_and_queue.jl ├── bench_deque.jl ├── test_accumulator.jl ├── test_classifiedcollections.jl ├── test_binheap.jl ├── test_defaultdict.jl ├── test_deque.jl └── test_mutable_binheap.jl ├── run_tests.jl ├── src ├── stack.jl ├── queue.jl ├── delegate.jl ├── DataStructures.jl ├── accumulator.jl ├── classifiedcollections.jl ├── trie.jl ├── ordereddict.jl ├── heaps.jl ├── tree.jl ├── list.jl ├── orderedset.jl ├── disjoint_set.jl ├── heaps │ ├── binary_heap.jl │ └── mutable_binary_heap.jl ├── deque.jl ├── defaultdict.jl └── hashdict.jl ├── .travis.yml ├── License.md └── README.md /REQUIRE: -------------------------------------------------------------------------------- 1 | julia 0.2- 2 | -------------------------------------------------------------------------------- /test/test_trie.jl: -------------------------------------------------------------------------------- 1 | using DataStructures 2 | using Base.Test 3 | 4 | t=Trie{Int}() 5 | 6 | t["amy"]=56 7 | t["ann"]=15 8 | t["emma"]=30 9 | t["rob"]=27 10 | t["roger"]=52 11 | 12 | @test haskey(t, "roger") 13 | @test get(t,"rob") == 27 14 | -------------------------------------------------------------------------------- /run_tests.jl: -------------------------------------------------------------------------------- 1 | tests = ["deque", 2 | "stack_and_queue", 3 | "accumulator", 4 | "classifiedcollections", 5 | "disjoint_set", 6 | "binheap", 7 | "mutable_binheap", 8 | "defaultdict", 9 | "ordereddict", 10 | "orderedset", 11 | "trie"] 12 | 13 | for t in tests 14 | fp = joinpath("test", "test_$t.jl") 15 | println("$fp ...") 16 | include(fp) 17 | end 18 | 19 | -------------------------------------------------------------------------------- /src/stack.jl: -------------------------------------------------------------------------------- 1 | # stacks 2 | 3 | type Stack{S} # S is the type of the internal dequeue instance 4 | store::S 5 | end 6 | 7 | Stack{T}(ty::Type{T}) = Stack(Deque{T}()) 8 | Stack{T}(ty::Type{T}, blksize::Integer) = Stack(Deque{T}(blksize)) 9 | 10 | isempty(s::Stack) = isempty(s.store) 11 | length(s::Stack) = length(s.store) 12 | 13 | top(s::Stack) = back(s.store) 14 | 15 | function push!(s::Stack, x) 16 | push!(s.store, x) 17 | s 18 | end 19 | 20 | pop!(s::Stack) = pop!(s.store) 21 | -------------------------------------------------------------------------------- /src/queue.jl: -------------------------------------------------------------------------------- 1 | # FIFO queue 2 | 3 | type Queue{S} # S is the type of internal deque 4 | store::S 5 | end 6 | 7 | Queue{T}(ty::Type{T}) = Queue(Deque{T}()) 8 | Queue{T}(ty::Type{T}, blksize::Integer) = Queue(Deque{T}(blksize)) 9 | 10 | isempty(s::Queue) = isempty(s.store) 11 | length(s::Queue) = length(s.store) 12 | 13 | front(s::Queue) = front(s.store) 14 | back(s::Queue) = back(s.store) 15 | 16 | function enqueue!(s::Queue, x) 17 | push!(s.store, x) 18 | s 19 | end 20 | 21 | dequeue!(s::Queue) = shift!(s.store) 22 | -------------------------------------------------------------------------------- /src/delegate.jl: -------------------------------------------------------------------------------- 1 | # by JMW 2 | macro delegate(source, targets) 3 | typename = esc(source.args[1]) 4 | fieldname = esc(Expr(:quote, source.args[2].args[1])) 5 | funcnames = targets.args 6 | n = length(funcnames) 7 | fdefs = Array(Any, n) 8 | for i in 1:n 9 | funcname = esc(funcnames[i]) 10 | fdefs[i] = quote 11 | ($funcname)(a::($typename), args...) = 12 | ($funcname)(a.($fieldname), args...) 13 | end 14 | end 15 | return Expr(:block, fdefs...) 16 | end 17 | -------------------------------------------------------------------------------- /test/bench_disjoint_set.jl: -------------------------------------------------------------------------------- 1 | # Benchmark on disjoint set forests 2 | 3 | using DataStructures 4 | 5 | # do 10^6 random unions over 10^6 element set 6 | 7 | const n = 2 * (10^6) 8 | const T0 = 10 9 | const T = 10^6 10 | 11 | function batch_union!(s::IntDisjointSets, x::Vector{Int}, y::Vector{Int}) 12 | for i = 1 : length(x) 13 | @inbounds union!(s, x[i], y[i]) 14 | end 15 | end 16 | 17 | s = IntDisjointSets(n) 18 | 19 | # warming 20 | 21 | x0 = rand(1:n, T0) 22 | y0 = rand(1:n, T0) 23 | 24 | batch_union!(s, x0, y0) 25 | 26 | # measure 27 | 28 | x = rand(1:n, T) 29 | y = rand(1:n, T) 30 | 31 | @time batch_union!(s, x, y) 32 | 33 | -------------------------------------------------------------------------------- /test/test_orderedset.jl: -------------------------------------------------------------------------------- 1 | using DataStructures 2 | using Base.Test 3 | 4 | # construction 5 | 6 | @test typeof(OrderedSet()) == OrderedSet{Any} 7 | @test typeof(OrderedSet('a')) == OrderedSet{Char} 8 | @test typeof(OrderedSet(1,2,3,4)) == OrderedSet{Int} 9 | 10 | # empty set 11 | d = OrderedSet{Char}() 12 | @test length(d) == 0 13 | @test isempty(d) 14 | @test !('c' in d) 15 | push!(d, 'c') 16 | @test !isempty(d) 17 | empty!(d) 18 | @test isempty(d) 19 | 20 | # access, modification 21 | 22 | for c in 'a':'z' 23 | push!(d, c) 24 | end 25 | 26 | for c in 'a':'z' 27 | @test c in d 28 | end 29 | 30 | @test collect(d) == ['a':'z'] 31 | 32 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | compiler: 3 | - gcc 4 | notifications: 5 | email: false 6 | env: 7 | matrix: 8 | - JULIAVERSION="juliareleases" 9 | - JULIAVERSION="julianightlies" 10 | before_install: 11 | - sudo add-apt-repository ppa:staticfloat/julia-deps -y 12 | - sudo add-apt-repository ppa:staticfloat/${JULIAVERSION} -y 13 | - sudo apt-get update -qq -y 14 | - sudo apt-get install libpcre3-dev julia -y 15 | - git config --global user.name "Dummy Travis User" 16 | - git config --global user.email "travis@example.net" 17 | script: 18 | - julia -e 'versioninfo(); Pkg.init(); run(`ln -s $(pwd()) $(Pkg.dir("DataStructures"))`); Pkg.pin("DataStructures"); Pkg.resolve()' 19 | - julia ./run_tests.jl 20 | -------------------------------------------------------------------------------- /test/bench_heaps.jl: -------------------------------------------------------------------------------- 1 | # Benchmark on heaps 2 | 3 | using DataStructures 4 | 5 | # benchmark function 6 | 7 | function benchmark_heap(title::ASCIIString, h::AbstractHeap, xs::Vector{Float64}) 8 | @assert isempty(h) 9 | 10 | # warming 11 | push!(h, 0.5) 12 | pop!(h) 13 | 14 | # bench 15 | n = length(xs) 16 | 17 | t1 = @elapsed for i = 1 : n 18 | push!(h, xs[i]) 19 | end 20 | t2 = @elapsed for i = 1 : n 21 | pop!(h) 22 | end 23 | 24 | @printf(" On %-24s: add.elapsed = %7.4fs pop.elapsed = %7.4fs\n", title, t1, t2) 25 | end 26 | 27 | 28 | # Benchmark on add! and pop! 29 | 30 | xs = rand(10^6) 31 | 32 | h_bin = binary_minheap(Float64) 33 | h_mbin = mutable_binary_minheap(Float64) 34 | 35 | benchmark_heap("BinaryHeap", h_bin, xs) 36 | benchmark_heap("MutableBinaryHeap", h_mbin, xs) 37 | -------------------------------------------------------------------------------- /test/test_disjoint_set.jl: -------------------------------------------------------------------------------- 1 | # Test disjoint set 2 | 3 | using DataStructures 4 | using Base.Test 5 | 6 | s = DisjointSets{Int}(1:10) 7 | 8 | @test length(s) == 10 9 | @test num_groups(s) == 10 10 | 11 | r = [find_root(s, i) for i in 1 : 10] 12 | @test isa(r, Vector{Int}) 13 | @test isequal(r, [1:10]) 14 | 15 | for i = 1 : 5 16 | x = 2 * i - 1 17 | y = 2 * i 18 | union!(s, x, y) 19 | end 20 | 21 | @test length(s) == 10 22 | @test num_groups(s) == 5 23 | 24 | r0 = [1, 1, 3, 3, 5, 5, 7, 7, 9, 9] 25 | r = [find_root(s, i) for i in 1 : 10] 26 | @test isa(r, Vector{Int}) 27 | @test isequal(r, r0) 28 | 29 | union!(s, 1, 4) 30 | union!(s, 3, 5) 31 | union!(s, 7, 9) 32 | 33 | @test length(s) == 10 34 | @test num_groups(s) == 2 35 | 36 | r0 = [1, 1, 1, 1, 1, 1, 7, 7, 7, 7] 37 | r = [find_root(s, i) for i in 1 : 10] 38 | @test isa(r, Vector{Int}) 39 | @test isequal(r, r0) 40 | -------------------------------------------------------------------------------- /test/test_ordereddict.jl: -------------------------------------------------------------------------------- 1 | using DataStructures 2 | using Base.Test 3 | 4 | # construction 5 | 6 | @test typeof(OrderedDict()) == OrderedDict{Any,Any} 7 | @test typeof(OrderedDict('a',1)) == OrderedDict{Char,Int} 8 | @test typeof(OrderedDict([("a",1),("b",2)])) == OrderedDict{ASCIIString,Int} 9 | 10 | # empty dictionary 11 | d = OrderedDict(Char, Int) 12 | @test length(d) == 0 13 | @test isempty(d) 14 | @test_throws d['c'] == 1 15 | d['c'] = 1 16 | @test !isempty(d) 17 | empty!(d) 18 | @test isempty(d) 19 | 20 | # access, modification 21 | 22 | for c in 'a':'z' 23 | d[c] = c-'a'+1 24 | end 25 | 26 | @test (d['a'] += 1) == 2 27 | @test 'a' in keys(d) 28 | @test haskey(d, 'a') 29 | @test get(d, 'B', 0) == 0 30 | @test !('B' in keys(d)) 31 | @test !haskey(d, 'B') 32 | @test pop!(d, 'a') == 2 33 | 34 | @test collect(keys(d)) == ['b':'z'] 35 | @test collect(values(d)) == [2:26] 36 | @test collect(d) == [(a,i) for (a,i) in zip('b':'z', 2:26)] 37 | 38 | -------------------------------------------------------------------------------- /License.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Dahua Lin 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /test/test_stack_and_queue.jl: -------------------------------------------------------------------------------- 1 | # test stacks and queues 2 | 3 | using DataStructures 4 | using Base.Test 5 | 6 | # Stack 7 | 8 | s = Stack(Int, 5) 9 | n = 100 10 | 11 | @test length(s) == 0 12 | @test isempty(s) 13 | @test_throws top(s) 14 | @test_throws pop!(s) 15 | 16 | for i = 1 : n 17 | push!(s, i) 18 | @test top(s) == i 19 | @test !isempty(s) 20 | @test length(s) == i 21 | end 22 | 23 | for i = 1 : n 24 | x = pop!(s) 25 | @test x == n - i + 1 26 | if i < n 27 | @test top(s) == n - i 28 | else 29 | @test_throws top(s) 30 | end 31 | @test isempty(s) == (i == n) 32 | @test length(s) == n - i 33 | end 34 | 35 | # Queue 36 | 37 | s = Queue(Int, 5) 38 | n = 100 39 | 40 | @test length(s) == 0 41 | @test isempty(s) 42 | @test_throws front(s) 43 | @test_throws back(s) 44 | @test_throws dequeue!(s) 45 | 46 | for i = 1 : n 47 | enqueue!(s, i) 48 | @test front(s) == 1 49 | @test back(s) == i 50 | @test !isempty(s) 51 | @test length(s) == i 52 | end 53 | 54 | for i = 1 : n 55 | x = dequeue!(s) 56 | @test x == i 57 | if i < n 58 | @test front(s) == i + 1 59 | @test back(s) == n 60 | else 61 | @test_throws front(s) 62 | @test_throws back(s) 63 | end 64 | @test isempty(s) == (i == n) 65 | @test length(s) == n - i 66 | end 67 | -------------------------------------------------------------------------------- /test/bench_deque.jl: -------------------------------------------------------------------------------- 1 | # benchmark of deque 2 | 3 | using DataStructures 4 | 5 | # push_back 6 | 7 | function batch_pushback!{Container,T}(v::Container, n::Int, e::T) 8 | for i = 1 : n 9 | push!(v, e) 10 | end 11 | end 12 | 13 | v = Int[] 14 | q = deque(Int) 15 | 16 | batch_pushback!(v, 10, 0) 17 | t1 = @elapsed batch_pushback!(v, 10^7, 0) 18 | 19 | batch_pushback!(q, 10, 0) 20 | t2 = @elapsed batch_pushback!(q, 10^7, 0) 21 | 22 | println("push back 10^7 integers:") 23 | @printf(" Vector: elapsed = %8.4fs\n", t1) 24 | @printf(" Deque: elapsed = %8.4fs\n", t2) 25 | 26 | 27 | # push_front 28 | 29 | function batch_pushfront!{Container,T}(v::Container, n::Int, e::T) 30 | for i = 1 : n 31 | unshift!(v, e) 32 | end 33 | end 34 | 35 | v = Int[] 36 | q = deque(Int) 37 | 38 | batch_pushfront!(v, 10, 0) 39 | t1 = @elapsed batch_pushfront!(v, 10^7, 0) 40 | 41 | batch_pushfront!(q, 10, 0) 42 | t2 = @elapsed batch_pushfront!(q, 10^7, 0) 43 | 44 | println("push front 10^7 integers:") 45 | @printf(" Vector: elapsed = %8.4fs\n", t1) 46 | @printf(" Deque: elapsed = %8.4fs\n", t2) 47 | 48 | 49 | # traverse 50 | 51 | function traverse(container) 52 | for e in container 53 | end 54 | end 55 | 56 | traverse(v) 57 | t1 = @elapsed traverse(v) 58 | 59 | traverse(q) 60 | t2 = @elapsed traverse(q) 61 | 62 | println("traverse 10^7 integers:") 63 | @printf(" Vector: elapsed = %8.4fs\n", t1) 64 | @printf(" Deque: elapsed = %8.4fs\n", t2) 65 | 66 | -------------------------------------------------------------------------------- /test/test_accumulator.jl: -------------------------------------------------------------------------------- 1 | # Test of accumulators 2 | 3 | using DataStructures 4 | using Base.Test 5 | 6 | ct = counter(ASCIIString) 7 | @assert isa(ct, Accumulator{ASCIIString,Int}) 8 | 9 | @test ct["abc"] == 0 10 | @test !haskey(ct, "abc") 11 | @test isempty(collect(keys(ct))) 12 | 13 | add!(ct, "a") 14 | @test haskey(ct, "a") 15 | @test ct["a"] == 1 16 | 17 | add!(ct, "b", 2) 18 | @test haskey(ct, "b") 19 | @test ct["b"] == 2 20 | 21 | add!(ct, "b", 3) 22 | @test ct["b"] == 5 23 | 24 | @test !haskey(ct, "abc") 25 | @test ct["abc"] == 0 26 | 27 | @test length(ct) == 2 28 | @test length(collect(ct)) == 2 29 | @test length(collect(keys(ct))) == 2 30 | 31 | ct2 = counter(["a", "a", "b", "b", "a", "c", "c"]) 32 | @test isa(ct2, Accumulator{ASCIIString,Int}) 33 | @test haskey(ct2, "a") 34 | @test haskey(ct2, "b") 35 | @test haskey(ct2, "c") 36 | @test ct2["a"] == 3 37 | @test ct2["b"] == 2 38 | @test ct2["c"] == 2 39 | 40 | add!(ct, ct2) 41 | @test ct["a"] == 4 42 | @test ct["b"] == 7 43 | @test ct["c"] == 2 44 | 45 | ct3 = counter((ASCIIString=>Int)["a"=>10, "b"=>20]) 46 | @test isa(ct3, Accumulator{ASCIIString,Int}) 47 | @test haskey(ct3, "a") 48 | @test haskey(ct3, "b") 49 | @test ct3["a"] == 10 50 | @test ct3["b"] == 20 51 | 52 | ctm = merge(ct2, ct3) 53 | @test isa(ctm, Accumulator{ASCIIString,Int}) 54 | @test haskey(ctm, "a") 55 | @test haskey(ctm, "b") 56 | @test haskey(ctm, "c") 57 | @test ctm["a"] == 13 58 | @test ctm["b"] == 22 59 | @test ctm["c"] == 2 60 | 61 | @test pop!(ctm, "b") == 22 62 | @test !haskey(ctm, "b") 63 | @test ctm["b"] == 0 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /src/DataStructures.jl: -------------------------------------------------------------------------------- 1 | module DataStructures 2 | 3 | import Base: length, isempty, start, next, done, 4 | show, dump, empty!, getindex, setindex!, get, get!, 5 | in, haskey, keys, merge, copy, 6 | push!, pop!, shift!, unshift!, add!, 7 | union!, delete!, similar, sizehint, 8 | isequal, hash 9 | 10 | export Deque, Stack, Queue 11 | export deque, enqueue!, dequeue!, update! 12 | export capacity, num_blocks, front, back, top, sizehint 13 | 14 | export Accumulator, counter 15 | export ClassifiedCollections 16 | export classified_lists, classified_sets, classified_counters 17 | 18 | export IntDisjointSets, DisjointSets, num_groups, find_root, in_same_set 19 | 20 | export AbstractHeap, compare, extract_all! 21 | export BinaryHeap, binary_minheap, binary_maxheap 22 | export MutableBinaryHeap, mutable_binary_minheap, mutable_binary_maxheap 23 | 24 | export OrderedDict, OrderedSet 25 | export DefaultDict, DefaultOrderedDict 26 | export Trie, subtrie, keys_with_prefix 27 | 28 | include("delegate.jl") 29 | 30 | include("deque.jl") 31 | include("stack.jl") 32 | include("queue.jl") 33 | include("accumulator.jl") 34 | include("classifiedcollections.jl") 35 | include("disjoint_set.jl") 36 | include("heaps.jl") 37 | 38 | include("hashdict.jl") 39 | include("ordereddict.jl") 40 | include("orderedset.jl") 41 | include("defaultdict.jl") 42 | include("trie.jl") 43 | 44 | @deprecate stack Stack 45 | @deprecate queue Queue 46 | end 47 | -------------------------------------------------------------------------------- /test/test_classifiedcollections.jl: -------------------------------------------------------------------------------- 1 | # Test classified collections 2 | 3 | using DataStructures 4 | using Base.Test 5 | 6 | # classified lists 7 | 8 | c = classified_lists(ASCIIString, Int) 9 | 10 | add!(c, "low", 1) 11 | add!(c, "low", 2) 12 | add!(c, "low", 3) 13 | add!(c, "high", 4) 14 | add!(c, "high", 5) 15 | 16 | @test haskey(c, "low") 17 | @test haskey(c, "high") 18 | @test !haskey(c, "mid") 19 | 20 | @test c["low"] == [1, 2, 3] 21 | @test c["high"] == [4, 5] 22 | 23 | # classified sets 24 | 25 | c = classified_sets(ASCIIString, Int) 26 | 27 | add!(c, "low", 1) 28 | add!(c, "low", 2) 29 | add!(c, "low", 3) 30 | add!(c, "low", 1) 31 | add!(c, "low", 2) 32 | 33 | add!(c, "high", 4) 34 | add!(c, "high", 5) 35 | add!(c, "high", 5) 36 | 37 | @test haskey(c, "low") 38 | @test haskey(c, "high") 39 | @test !haskey(c, "mid") 40 | 41 | @test isa(c["low"], Set{Int}) 42 | @test isa(c["high"], Set{Int}) 43 | 44 | @test sort(collect(c["low"])) == [1, 2, 3] 45 | @test sort(collect(c["high"])) == [4, 5] 46 | 47 | # classified counters 48 | 49 | c = classified_counters(ASCIIString, Float64) 50 | 51 | add!(c, "low", 1.) 52 | add!(c, "low", 2.) 53 | add!(c, "low", 3.) 54 | add!(c, "low", 1.) 55 | add!(c, "low", 2.) 56 | add!(c, "low", 2.) 57 | 58 | add!(c, "high", 4.) 59 | add!(c, "high", 5.) 60 | add!(c, "high", 5.) 61 | 62 | @test haskey(c, "low") 63 | @test haskey(c, "high") 64 | @test !haskey(c, "mid") 65 | 66 | cl = c["low"] 67 | ch = c["high"] 68 | 69 | @test isa(cl, Accumulator{Float64, Int}) 70 | @test isa(ch, Accumulator{Float64, Int}) 71 | 72 | @test cl[1.] == 2 73 | @test cl[2.] == 3 74 | @test cl[3.] == 1 75 | @test ch[4.] == 1 76 | @test ch[5.] == 2 77 | 78 | -------------------------------------------------------------------------------- /src/accumulator.jl: -------------------------------------------------------------------------------- 1 | # A counter type 2 | 3 | type Accumulator{T, V<:Number} 4 | map::Dict{T,V} 5 | end 6 | 7 | ## constructors 8 | 9 | Accumulator{T,V<:Number}(::Type{T}, ::Type{V}) = Accumulator{T,V}((T=>V)[]) 10 | counter(T::Type) = Accumulator(T,Int) 11 | 12 | Accumulator{T,V<:Number}(dct::Dict{T,V}) = Accumulator{T,V}(copy(dct)) 13 | counter{T}(dct::Dict{T,Int}) = Accumulator{T,Int}(copy(dct)) 14 | 15 | function counter{T}(seq::AbstractArray{T}) 16 | ct = counter(T) 17 | for x in seq 18 | add!(ct, x) 19 | end 20 | return ct 21 | end 22 | 23 | copy{T,V<:Number}(ct::Accumulator{T,V}) = Accumulator{T,V}(copy(ct.map)) 24 | 25 | length(a::Accumulator) = length(a.map) 26 | 27 | ## retrieval 28 | 29 | getindex{T,V}(ct::Accumulator{T,V}, x::T) = get(ct.map, x, zero(V)) 30 | 31 | haskey{T,V}(ct::Accumulator{T,V}, x::T) = haskey(ct.map, x) 32 | 33 | keys(ct::Accumulator) = keys(ct.map) 34 | 35 | 36 | ## iteration 37 | 38 | start(ct::Accumulator) = start(ct.map) 39 | next(ct::Accumulator, state) = next(ct.map, state) 40 | done(ct::Accumulator, state) = done(ct.map, state) 41 | 42 | 43 | # manipulation 44 | 45 | add!{T,V<:Number}(ct::Accumulator{T,V}, x::T, a::V) = (ct.map[x] = ct[x] + a) 46 | add!{T,V<:Number,V2<:Number}(ct::Accumulator{T,V}, x::T, a::V2) = add!(ct, x, convert(V,a)) 47 | add!{T,V<:Number}(ct::Accumulator{T,V}, x::T) = add!(ct, x, one(V)) 48 | push!{T,V<:Number}(ct::Accumulator{T,V}, x::T) = add!(ct, x) 49 | 50 | function add!{T,V<:Number,V2<:Number}(ct::Accumulator{T,V}, r::Accumulator{T,V2}) 51 | for (x::T, v::V2) in r 52 | add!(ct, x, v) 53 | end 54 | ct 55 | end 56 | 57 | pop!{T,V<:Number}(ct::Accumulator{T,V}, x::T) = pop!(ct.map, x) 58 | 59 | merge{T,V<:Number}(ct1::Accumulator{T,V}, ct2::Accumulator{T,V}) = add!(copy(ct1), ct2) 60 | 61 | -------------------------------------------------------------------------------- /src/classifiedcollections.jl: -------------------------------------------------------------------------------- 1 | # A Classified Collection is a map which associates a collection to each key 2 | # 3 | # The collection can be either an array or a set, a counter, or other data structures 4 | # that support the push! method 5 | # 6 | 7 | type ClassifiedCollections{K, Collection} 8 | map::Dict{K, Collection} 9 | end 10 | 11 | ## constructors 12 | 13 | ClassifiedCollections(K::Type, C::Type) = ClassifiedCollections{K, C}((K=>C)[]) 14 | 15 | classified_lists(K::Type, V::Type) = ClassifiedCollections(K, Vector{V}) 16 | classified_sets(K::Type, V::Type) = ClassifiedCollections(K, Set{V}) 17 | classified_counters(K::Type, T::Type) = ClassifiedCollections(K, Accumulator{T, Int}) 18 | 19 | _create_empty{T}(::Type{Vector{T}}) = Array(T, 0) 20 | _create_empty{T}(::Type{Set{T}}) = Set{T}() 21 | _create_empty{T,V}(::Type{Accumulator{T,V}}) = Accumulator(T, V) 22 | 23 | copy{K, C}(cc::ClassifiedCollections{K, C}) = ClassifiedCollections{K, C}(copy(cc.map)) 24 | 25 | length(cc::ClassifiedCollections) = length(cc.map) 26 | 27 | ## retrieval 28 | 29 | getindex{T,C}(cc::ClassifiedCollections{T,C}, x::T) = cc.map[x] 30 | 31 | haskey{T,C}(cc::ClassifiedCollections{T,C}, x::T) = haskey(cc.map, x) 32 | 33 | keys(cc::ClassifiedCollections) = keys(cc.map) 34 | 35 | ## iteration 36 | 37 | start(cc::ClassifiedCollections) = start(cc.map) 38 | next(cc::ClassifiedCollections, state) = next(cc.map, state) 39 | done(cc::ClassifiedCollections, state) = done(cc.map, state) 40 | 41 | # manipulation 42 | 43 | function add!{K, C}(cc::ClassifiedCollections{K, C}, key::K, e) 44 | c = get(cc.map, key, nothing) 45 | if is(c, nothing) 46 | c = _create_empty(C) 47 | cc.map[key] = c 48 | end 49 | push!(c, e) 50 | end 51 | 52 | pop!{K}(cc::Accumulator{K}, key::K) = pop!(cc.map, key) 53 | 54 | -------------------------------------------------------------------------------- /src/trie.jl: -------------------------------------------------------------------------------- 1 | type Trie{T} 2 | value::T 3 | children::Dict{Char,Trie{T}} 4 | is_key::Bool 5 | 6 | function Trie() 7 | self = new() 8 | self.children = (Char=>Trie{T})[] 9 | self.is_key = false 10 | self 11 | end 12 | end 13 | 14 | Trie() = Trie{Any}() 15 | 16 | function setindex!{T}(t::Trie{T}, val::T, key::String) 17 | node = t 18 | for char in key 19 | if !haskey(node.children, char) 20 | node.children[char] = Trie{T}() 21 | end 22 | node = node.children[char] 23 | end 24 | node.is_key = true 25 | node.value = val 26 | end 27 | 28 | function subtrie(t::Trie, prefix::String) 29 | node = t 30 | for char in prefix 31 | if !haskey(node.children, char) 32 | return nothing 33 | else 34 | node = node.children[char] 35 | end 36 | end 37 | node 38 | end 39 | 40 | function haskey(t::Trie, key::String) 41 | node = subtrie(t, key) 42 | node != nothing && node.is_key 43 | end 44 | 45 | get(t::Trie, key::String) = get(t, key, nothing) 46 | function get(t::Trie, key::String, notfound) 47 | node = subtrie(t, key) 48 | if node != nothing && node.is_key 49 | return node.value 50 | end 51 | notfound 52 | end 53 | 54 | function keys(t::Trie, prefix::String, found) 55 | if t.is_key 56 | push(found, prefix) 57 | end 58 | for (char,child) in t.children 59 | keys(child, strcat(prefix,char), found) 60 | end 61 | end 62 | keys(t::Trie, prefix::String) = (found=String[]; keys(t, prefix, found); found) 63 | keys(t::Trie) = keys(t, "") 64 | 65 | function keys_with_prefix(t::Trie, prefix::String) 66 | st = subtrie(t, prefix) 67 | st != nothing ? keys(st,prefix) : [] 68 | end 69 | 70 | -------------------------------------------------------------------------------- /src/ordereddict.jl: -------------------------------------------------------------------------------- 1 | # ordered dict 2 | 3 | import Base: haskey, get, get!, getkey, delete!, push!, pop!, empty!, 4 | setindex!, getindex, sizehint, length, isempty, start, 5 | next, done, keys, values, setdiff, setdiff!, 6 | union, union!, intersect, isequal, filter, filter!, 7 | hash, eltype 8 | 9 | # This is just a simple wrapper around a HashDict, which is a modified 10 | # implementation of the Dict implementation in Base allowing ordering 11 | # information to be maintained. 12 | 13 | # In particular, the HashDict stored in an OrderedDict is a 14 | # HashDict{K,V,Ordered} 15 | 16 | # A HashDict{K,V,Unordered} would be equivalent to Base.Dict 17 | 18 | immutable OrderedDict{K,V} <: Associative{K,V} 19 | d::HashDict{K,V,Ordered} 20 | 21 | OrderedDict() = new(HashDict{K,V,Ordered}()) 22 | OrderedDict(kv) = new(HashDict{K,V,Ordered}(kv)) 23 | OrderedDict(ks,vs) = new(HashDict{K,V,Ordered}(ks,vs)) 24 | end 25 | 26 | OrderedDict() = OrderedDict{Any,Any}() 27 | 28 | OrderedDict{K,V}(ks::AbstractArray{K}, vs::AbstractArray{V}) = OrderedDict{K,V}(ks,vs) 29 | OrderedDict{K,V}(::Type{K},::Type{V}) = OrderedDict{K,V}() 30 | OrderedDict(ks,vs) = OrderedDict{eltype(ks),eltype(vs)}(ks, vs) 31 | 32 | OrderedDict{K,V}(kv::AbstractArray{(K,V)}) = OrderedDict{K,V}(kv) 33 | 34 | ## Functions 35 | 36 | ## Most functions are simply delegated to the wrapped HashDict 37 | 38 | @delegate OrderedDict.d [ haskey, get, get!, getkey, delete!, pop!, 39 | empty!, setindex!, getindex, sizehint, 40 | length, isempty, start, next, done, keys, 41 | values ] 42 | 43 | similar{K,V}(d::OrderedDict{K,V}) = OrderedDict{K,V}() 44 | in{T<:OrderedDict}(key, v::Base.KeyIterator{T}) = key in keys(v.dict.d.d) -------------------------------------------------------------------------------- /src/heaps.jl: -------------------------------------------------------------------------------- 1 | # Various heap implementation 2 | 3 | ########################################################### 4 | # 5 | # Heap interface specification 6 | # 7 | # Each heap is associated with a handle type (H), and 8 | # a value type v. 9 | # 10 | # Here, the value type must be comparable, and a handle 11 | # is an object through which one can refer to a specific 12 | # node of the heap and thus update its value. 13 | # 14 | # Each heap type must implement all of the following 15 | # functions. Here, let h be a heap, i be a handle, and 16 | # v be a value. 17 | # 18 | # - length(h) returns the number of elements 19 | # 20 | # - isempty(h) returns whether the heap is 21 | # empty 22 | # 23 | # - push!(h, v) add a value to the heap 24 | # 25 | # - sizehint(h) set size hint to a heap 26 | # 27 | # - top(h) return the top value of a heap 28 | # 29 | # - pop!(h) removes the top value, and 30 | # returns it 31 | # 32 | # For mutable heaps, it should also support 33 | # 34 | # - push!(h, v) adds a value to the heap and 35 | # returns a handle to v 36 | # 37 | # - update!(h, i, v) updates the value of an element 38 | # (referred to by the handle i) 39 | # 40 | ########################################################### 41 | 42 | # HT: handle type 43 | # VT: value type 44 | 45 | abstract AbstractHeap{VT} 46 | 47 | abstract AbstractMutableHeap{VT,HT} <: AbstractHeap{VT} 48 | 49 | # comparer 50 | 51 | immutable LessThan 52 | end 53 | 54 | immutable GreaterThan 55 | end 56 | 57 | compare(c::LessThan, x, y) = x < y 58 | compare(c::GreaterThan, x, y) = x > y 59 | 60 | # heap implementations 61 | 62 | include("heaps/binary_heap.jl") 63 | include("heaps/mutable_binary_heap.jl") 64 | 65 | # generic functions 66 | 67 | function extract_all!{VT}(h::AbstractHeap{VT}) 68 | n = length(h) 69 | r = Array(VT, n) 70 | for i = 1 : n 71 | r[i] = pop!(h) 72 | end 73 | r 74 | end 75 | -------------------------------------------------------------------------------- /test/test_binheap.jl: -------------------------------------------------------------------------------- 1 | # Test of binary heaps 2 | 3 | using DataStructures 4 | using Base.Test 5 | 6 | # test make heap 7 | 8 | vs = [4, 1, 3, 2, 16, 9, 10, 14, 8, 7] 9 | h = binary_minheap(vs) 10 | 11 | @test length(h) == 10 12 | @test !isempty(h) 13 | @test top(h) == 1 14 | @test isequal(h.valtree, [1, 2, 3, 4, 7, 9, 10, 14, 8, 16]) 15 | 16 | 17 | h = binary_maxheap(vs) 18 | 19 | @test length(h) == 10 20 | @test !isempty(h) 21 | @test top(h) == 16 22 | @test isequal(h.valtree, [16, 14, 10, 8, 7, 3, 9, 1, 4, 2]) 23 | 24 | # test push! 25 | 26 | hmin = binary_minheap(Int) 27 | @test length(hmin) == 0 28 | @test isempty(hmin) 29 | 30 | ss = { 31 | [4], 32 | [1, 4], 33 | [1, 4, 3], 34 | [1, 2, 3, 4], 35 | [1, 2, 3, 4, 16], 36 | [1, 2, 3, 4, 16, 9], 37 | [1, 2, 3, 4, 16, 9, 10], 38 | [1, 2, 3, 4, 16, 9, 10, 14], 39 | [1, 2, 3, 4, 16, 9, 10, 14, 8], 40 | [1, 2, 3, 4, 7, 9, 10, 14, 8, 16]} 41 | 42 | for i = 1 : length(vs) 43 | push!(hmin, vs[i]) 44 | @test length(hmin) == i 45 | @test !isempty(hmin) 46 | @test isequal(hmin.valtree, ss[i]) 47 | end 48 | 49 | hmax = binary_maxheap(Int) 50 | @test length(hmax) == 0 51 | @test isempty(hmax) 52 | 53 | ss = { 54 | [4], 55 | [4, 1], 56 | [4, 1, 3], 57 | [4, 2, 3, 1], 58 | [16, 4, 3, 1, 2], 59 | [16, 4, 9, 1, 2, 3], 60 | [16, 4, 10, 1, 2, 3, 9], 61 | [16, 14, 10, 4, 2, 3, 9, 1], 62 | [16, 14, 10, 8, 2, 3, 9, 1, 4], 63 | [16, 14, 10, 8, 7, 3, 9, 1, 4, 2]} 64 | 65 | for i = 1 : length(vs) 66 | push!(hmax, vs[i]) 67 | @test length(hmax) == i 68 | @test !isempty(hmax) 69 | @test isequal(hmax.valtree, ss[i]) 70 | end 71 | 72 | # test pop! 73 | 74 | @test isequal(extract_all!(hmin), [1, 2, 3, 4, 7, 8, 9, 10, 14, 16]) 75 | @test isempty(hmin) 76 | 77 | @test isequal(extract_all!(hmax), [16, 14, 10, 9, 8, 7, 4, 3, 2, 1]) 78 | @test isempty(hmax) 79 | 80 | # test hybrid add! and pop! 81 | 82 | h = binary_minheap(Int) 83 | 84 | push!(h, 5) 85 | push!(h, 10) 86 | @test isequal(h.valtree, [5, 10]) 87 | 88 | @test pop!(h) == 5 89 | @test isequal(h.valtree, [10]) 90 | 91 | push!(h, 7) 92 | push!(h, 2) 93 | @test isequal(h.valtree, [2, 10, 7]) 94 | 95 | @test pop!(h) == 2 96 | @test isequal(h.valtree, [7, 10]) 97 | -------------------------------------------------------------------------------- /src/tree.jl: -------------------------------------------------------------------------------- 1 | import Base: haskey, getindex, setindex!, delete! 2 | 3 | export Tree, EmptyTree, TreeNode, BinaryTree 4 | 5 | abstract Tree{K,V} 6 | 7 | type EmptyTree{K,V} <: Tree{K,V} 8 | end 9 | 10 | type TreeNode{K,V} <: Tree{K,V} 11 | key:: K 12 | data:: V 13 | left:: Tree{K,V} 14 | right::Tree{K,V} 15 | end 16 | 17 | type BinaryTree{K,V} 18 | root:: Tree{K,V} 19 | 20 | BinaryTree() = new(EmptyTree{K,V}()) 21 | end 22 | 23 | haskey(t::EmptyTree, key) = false 24 | haskey(t::BinaryTree, key) = haskey(t.root, key) 25 | 26 | function haskey(t::TreeNode, key) 27 | if t.key == key 28 | true 29 | elseif key < t.key 30 | haskey(t.left, key) 31 | else 32 | haskey(t.right, key) 33 | end 34 | end 35 | 36 | getindex(t::EmptyTree, k) = throw(KeyError(k)) 37 | getindex(t::BinaryTree, k) = t.root[k] 38 | 39 | function getindex(t::TreeNode, key) 40 | if t.key == key 41 | t.data 42 | elseif key < t.key 43 | t.left[key] 44 | else 45 | t.right[key] 46 | end 47 | end 48 | 49 | setindex!{K,V}(t::EmptyTree{K,V}, v, k) = TreeNode{K,V}(k, v, t, t) 50 | setindex!(t::BinaryTree, v, k) = (t.root = setindex!(t.root, v, k); t) 51 | 52 | function setindex!(t::TreeNode, v, k) 53 | if t.key == k 54 | t.data = v 55 | elseif k < t.key 56 | t.left = setindex!(t.left, v, k) 57 | else 58 | t.right = setindex!(t.right, v, k) 59 | end 60 | t 61 | end 62 | 63 | delete!(t::EmptyTree, k) = throw(KeyError(k)) 64 | delete!(t::BinaryTree, k) = (t.root = delete!(t.root, k); t) 65 | 66 | function delete!(t::TreeNode, k) 67 | if t.key == k 68 | if isa(t.right,EmptyTree) 69 | t = t.left 70 | elseif isa(t.left,EmptyTree) 71 | t = t.right 72 | else 73 | r = t.right 74 | t = t.left 75 | treeinsert!(t, r) 76 | end 77 | elseif k < t.key 78 | t.left = delete!(t.left, k) 79 | else 80 | t.right = delete!(t.right, k) 81 | end 82 | t 83 | end 84 | 85 | treeinsert!(t::EmptyTree, r::TreeNode) = r 86 | 87 | function treeinsert!(t::TreeNode, r::TreeNode) 88 | if r.key < t.key 89 | t.left = treeinsert!(t.left, r) 90 | else 91 | t.right = treeinsert!(t.right, r) 92 | end 93 | t 94 | end 95 | -------------------------------------------------------------------------------- /test/test_defaultdict.jl: -------------------------------------------------------------------------------- 1 | using DataStructures 2 | using Base.Test 3 | 4 | ############## 5 | # DefaultDicts 6 | ############## 7 | 8 | # construction 9 | @test_throws DefaultDict() 10 | @test_throws DefaultDict(String, Int) 11 | 12 | # empty dictionary 13 | d = DefaultDict(Char, Int, 1) 14 | @test length(d) == 0 15 | @test isempty(d) 16 | @test d['c'] == 1 17 | @test !isempty(d) 18 | empty!(d) 19 | @test isempty(d) 20 | 21 | # access, modification 22 | @test (d['a'] += 1) == 2 23 | @test 'a' in keys(d) 24 | @test haskey(d, 'a') 25 | @test get(d, 'b', 0) == 0 26 | @test !('b' in keys(d)) 27 | @test !haskey(d, 'b') 28 | @test pop!(d, 'a') == 2 29 | @test isempty(d) 30 | 31 | for c in 'a':'z' 32 | d[c] = c-'a'+1 33 | end 34 | 35 | @test d['z'] == 26 36 | @test d['@'] == 1 37 | @test length(d) == 27 38 | delete!(d, '@') 39 | @test length(d) == 26 40 | 41 | for (k,v) in d 42 | @test v == k-'a'+1 43 | end 44 | 45 | @test sort(collect(keys(d))) == ['a':'z'] 46 | @test sort(collect(values(d))) == [1:26] 47 | 48 | # Starting from an existing dictionary 49 | # Note: dictionary is copied upon construction 50 | e = ['a'=>1, 'b'=>3, 'c'=>5] 51 | f = DefaultDict(0, e) 52 | @test f['d'] == 0 53 | @test_throws e['d'] 54 | e['e'] = 9 55 | @test e['e'] == 9 56 | @test f['e'] == 0 57 | 58 | 59 | ##################### 60 | # DefaultOrderedDicts 61 | ##################### 62 | 63 | # construction 64 | @test_throws DefaultOrderedDict() 65 | @test_throws DefaultOrderedDict(String, Int) 66 | 67 | # empty dictionary 68 | d = DefaultOrderedDict(Char, Int, 1) 69 | @test length(d) == 0 70 | @test isempty(d) 71 | @test d['c'] == 1 72 | @test !isempty(d) 73 | empty!(d) 74 | @test isempty(d) 75 | 76 | # access, modification 77 | @test (d['a'] += 1) == 2 78 | @test 'a' in keys(d) 79 | @test haskey(d, 'a') 80 | @test get(d, 'b', 0) == 0 81 | @test !('b' in keys(d)) 82 | @test !haskey(d, 'b') 83 | @test pop!(d, 'a') == 2 84 | @test isempty(d) 85 | 86 | for c in 'a':'z' 87 | d[c] = c-'a'+1 88 | end 89 | 90 | @test d['z'] == 26 91 | @test d['@'] == 1 92 | @test length(d) == 27 93 | delete!(d, '@') 94 | @test length(d) == 26 95 | 96 | for (k,v) in d 97 | @test v == k-'a'+1 98 | end 99 | 100 | @test collect(keys(d)) == ['a':'z'] 101 | @test collect(values(d)) == [1:26] 102 | -------------------------------------------------------------------------------- /src/list.jl: -------------------------------------------------------------------------------- 1 | import Base: length, map, show, copy, cat, start, done, next 2 | 3 | export List, Nil, Cons, cons, nil, head, tail, list 4 | 5 | abstract List{T} 6 | 7 | type Nil{T} <: List{T} 8 | end 9 | 10 | type Cons{T} <: List{T} 11 | head::T 12 | tail::List{T} 13 | end 14 | 15 | cons{T}(h, t::List{T}) = Cons{T}(h, t) 16 | 17 | nil(T) = Nil{T}() 18 | nil() = nil(Any) 19 | 20 | head(x::Cons) = x.head 21 | tail(x::Cons) = x.tail 22 | 23 | function show{T}(io::IO, l::List{T}) 24 | if isa(l,Nil) 25 | if is(T,Any) 26 | print(io, "nil()") 27 | else 28 | print(io, "nil(", T, ")") 29 | end 30 | else 31 | print(io, "list(") 32 | while true 33 | show(io, head(l)) 34 | l = tail(l) 35 | if isa(l,Cons) 36 | print(io, ", ") 37 | else 38 | break 39 | end 40 | end 41 | print(io, ")") 42 | end 43 | end 44 | 45 | list() = nil() 46 | function list(elts...) 47 | l = nil() 48 | for i=length(elts):-1:1 49 | l = cons(elts[i],l) 50 | end 51 | return l 52 | end 53 | function list{T}(elts::T...) 54 | l = nil(T) 55 | for i=length(elts):-1:1 56 | l = cons(elts[i],l) 57 | end 58 | return l 59 | end 60 | function list{T}(elts::List{T}...) 61 | l = nil(List{T}) 62 | for i=length(elts):-1:1 63 | l = cons(elts[i],l) 64 | end 65 | return l 66 | end 67 | 68 | length(l::Nil) = 0 69 | length(l::Cons) = 1 + length(tail(l)) 70 | 71 | map(f::Base.Callable, l::Nil) = l 72 | map(f::Base.Callable, l::Cons) = cons(f(head(l)), map(f, tail(l))) 73 | 74 | copy(l::Nil) = l 75 | copy(l::Cons) = cons(head(l), copy(tail(l))) 76 | 77 | function append2(a, b) 78 | if isa(a,Nil) 79 | b 80 | else 81 | cons(head(a), append2(tail(a), b)) 82 | end 83 | end 84 | 85 | cat(lst::List) = lst 86 | 87 | function cat(lst::List, lsts...) 88 | n = length(lsts) 89 | l = lsts[n] 90 | for i = (n-1):-1:1 91 | l = append2(lsts[i], l) 92 | end 93 | return append2(lst, l) 94 | end 95 | 96 | start{T}(l::Cons{T}) = l 97 | done{T}(l::Cons{T}, state::Cons{T}) = false 98 | done{T}(l::Cons{T}, state::Nil{T}) = true 99 | next{T}(l::Cons{T}, state::Cons{T}) = (state.head, state.tail) 100 | -------------------------------------------------------------------------------- /src/orderedset.jl: -------------------------------------------------------------------------------- 1 | # ordered sets 2 | 3 | # This was largely copied and modified from Base 4 | 5 | # TODO: Most of these functions should be removed once AbstractSet is introduced there 6 | # (see https://github.com/JuliaLang/julia/issues/5533) 7 | 8 | immutable OrderedSet{T} 9 | dict::HashDict{T,Nothing,Ordered} 10 | 11 | OrderedSet() = new(HashDict{T,Nothing,Ordered}()) 12 | OrderedSet(x...) = union!(new(HashDict{T,Nothing,Ordered}()), x) 13 | end 14 | OrderedSet() = OrderedSet{Any}() 15 | OrderedSet(x...) = OrderedSet{Any}(x...) 16 | OrderedSet{T}(x::T...) = OrderedSet{T}(x...) 17 | 18 | show(io::IO, s::OrderedSet) = (show(io, typeof(s)); Base.show_comma_array(io, s,'(',')')) 19 | 20 | @delegate OrderedSet.dict [isempty, length, sizehint] 21 | 22 | eltype{T}(s::OrderedSet{T}) = T 23 | 24 | in(x, s::OrderedSet) = haskey(s.dict, x) 25 | 26 | push!(s::OrderedSet, x) = (s.dict[x] = nothing; s) 27 | pop!(s::OrderedSet, x) = (pop!(s.dict, x); x) 28 | pop!(s::OrderedSet, x, deflt) = pop!(s.dict, x, deflt) == deflt ? deflt : x 29 | delete!(s::OrderedSet, x) = (delete!(s.dict, x); s) 30 | 31 | union!(s::OrderedSet, xs) = (for x in xs; push!(s,x); end; s) 32 | setdiff!(s::OrderedSet, xs) = (for x in xs; delete!(s,x); end; s) 33 | setdiff!(s::Set, xs::OrderedSet) = (for x in xs; delete!(s,x); end; s) 34 | 35 | similar{T}(s::OrderedSet{T}) = OrderedSet{T}() 36 | copy(s::OrderedSet) = union!(similar(s), s) 37 | 38 | empty!{T}(s::OrderedSet{T}) = (empty!(s.dict); s) 39 | 40 | start(s::OrderedSet) = start(s.dict) 41 | done(s::OrderedSet, state) = done(s.dict, state) 42 | # NOTE: manually optimized to take advantage of Dict representation 43 | next(s::OrderedSet, i) = (s.dict.keys[s.dict.order[i]], skip_deleted(s.dict,i+1)) 44 | 45 | # TODO: simplify me? 46 | pop!(s::OrderedSet) = (val = s.dict.keys[start(s.dict)]; delete!(s.dict, val); val) 47 | 48 | union(s::OrderedSet) = copy(s) 49 | function union(s::OrderedSet, sets...) 50 | u = OrderedSet{Base.join_eltype(s, sets...)}() 51 | union!(u,s) 52 | for t in sets 53 | union!(u,t) 54 | end 55 | return u 56 | end 57 | 58 | intersect(s::OrderedSet) = copy(s) 59 | function intersect(s::OrderedSet, sets...) 60 | i = copy(s) 61 | for x in s 62 | for t in sets 63 | if !in(x,t) 64 | delete!(i,x) 65 | break 66 | end 67 | end 68 | end 69 | return i 70 | end 71 | 72 | function setdiff(a::OrderedSet, b) 73 | d = similar(a) 74 | for x in a 75 | if !(x in b) 76 | push!(d, x) 77 | end 78 | end 79 | d 80 | end 81 | 82 | isequal(l::OrderedSet, r::OrderedSet) = (length(l) == length(r)) && (l <= r) 83 | <(l::OrderedSet, r::OrderedSet) = (length(l) < length(r)) && (l <= r) 84 | <=(l::OrderedSet, r::OrderedSet) = issubset(l, r) 85 | 86 | function filter!(f::Function, s::OrderedSet) 87 | for x in s 88 | if !f(x) 89 | delete!(s, x) 90 | end 91 | end 92 | return s 93 | end 94 | filter(f::Function, s::OrderedSet) = filter!(f, copy(s)) 95 | 96 | hash(s::OrderedSet) = (_compact_order(s.dict); hash(s.dict.keys[s.dict.order])) 97 | -------------------------------------------------------------------------------- /test/test_deque.jl: -------------------------------------------------------------------------------- 1 | using DataStructures 2 | using Base.Test 3 | 4 | 5 | # empty dequeue 6 | 7 | q = Deque{Int}() 8 | @test length(q) == 0 9 | @test isempty(q) 10 | @test q.blksize == DataStructures.DEFAULT_DEQUEUE_BLOCKSIZE 11 | @test_throws front(q) 12 | @test_throws back(q) 13 | 14 | q = Deque{Int}(3) 15 | @test length(q) == 0 16 | @test isempty(q) 17 | @test q.blksize == 3 18 | @test num_blocks(q) == 1 19 | @test_throws front(q) 20 | @test_throws back(q) 21 | @test isa(collect(q), Vector{Int}) 22 | @test collect(q) == Int[] 23 | 24 | # push back 25 | 26 | n = 10 27 | 28 | for i = 1 : n 29 | push!(q, i) 30 | @test length(q) == i 31 | @test isempty(q) == false 32 | @test num_blocks(q) == div(i-1, 3) + 1 33 | 34 | @test front(q) == 1 35 | @test back(q) == i 36 | 37 | cq = collect(q) 38 | @test isa(cq, Vector{Int}) 39 | @test cq == [1:i] 40 | end 41 | 42 | # pop back 43 | 44 | for i = 1 : n 45 | x = pop!(q) 46 | @test length(q) == n - i 47 | @test isempty(q) == (i == n) 48 | @test num_blocks(q) == div(n-i-1, 3) + 1 49 | @test x == n - i + 1 50 | 51 | if !isempty(q) 52 | @test front(q) == 1 53 | @test back(q) == n - i 54 | else 55 | @test_throws front(q) 56 | @test_throws back(q) 57 | end 58 | 59 | cq = collect(q) 60 | @test cq == [1:n-i] 61 | end 62 | 63 | # push front 64 | 65 | q = Deque{Int}(3) 66 | 67 | for i = 1 : n 68 | unshift!(q, i) 69 | @test length(q) == i 70 | @test isempty(q) == false 71 | @test num_blocks(q) == div(i-1, 3) + 1 72 | 73 | @test front(q) == i 74 | @test back(q) == 1 75 | 76 | cq = collect(q) 77 | @test isa(cq, Vector{Int}) 78 | @test cq == [i:-1:1] 79 | end 80 | 81 | # pop front 82 | 83 | for i = 1 : n 84 | x = shift!(q) 85 | @test length(q) == n - i 86 | @test isempty(q) == (i == n) 87 | @test num_blocks(q) == div(n-i-1, 3) + 1 88 | @test x == n - i + 1 89 | 90 | if !isempty(q) 91 | @test front(q) == n - i 92 | @test back(q) == 1 93 | else 94 | @test_throws front(q) 95 | @test_throws back(q) 96 | end 97 | 98 | cq = collect(q) 99 | @test cq == [n-i:-1:1] 100 | end 101 | 102 | # random operations 103 | 104 | q = Deque{Int}(5) 105 | r = Int[] 106 | m = 100 107 | 108 | for k = 1 : m 109 | la = rand(1:20) 110 | x = rand(1:1000, la) 111 | 112 | for i = 1 : la 113 | if randbool() 114 | push!(r, x[i]) 115 | push!(q, x[i]) 116 | else 117 | unshift!(r, x[i]) 118 | unshift!(q, x[i]) 119 | end 120 | end 121 | 122 | @test length(q) == length(r) 123 | @test collect(q) == r 124 | 125 | lr = rand(1:length(r)) 126 | for i = 1 : lr 127 | if randbool() 128 | pop!(r) 129 | pop!(q) 130 | else 131 | shift!(r) 132 | shift!(q) 133 | end 134 | end 135 | 136 | @test length(q) == length(r) 137 | @test collect(q) == r 138 | end 139 | 140 | 141 | -------------------------------------------------------------------------------- /src/disjoint_set.jl: -------------------------------------------------------------------------------- 1 | # Disjoint sets 2 | 3 | ############################################################ 4 | # 5 | # A forest of disjoint sets of integers 6 | # 7 | # Since each element is an integer, we can use arrays 8 | # instead of dictionary (for efficiency) 9 | # 10 | # Disjoint sets over other key types can be implemented 11 | # based on an IntDisjointSets through a map from the key 12 | # to an integer index 13 | # 14 | ############################################################ 15 | 16 | type IntDisjointSets 17 | parents::Vector{Int} 18 | ranks::Vector{Int} 19 | ngroups::Int 20 | 21 | # creates a disjoint set comprised of n singletons 22 | IntDisjointSets(n::Integer) = new([1:n], zeros(Int, n), n) 23 | end 24 | 25 | length(s::IntDisjointSets) = length(s.parents) 26 | num_groups(s::IntDisjointSets) = s.ngroups 27 | 28 | 29 | # find the root element of the subset that contains x 30 | # path compression is implemented here 31 | # 32 | function find_root(s::IntDisjointSets, x::Integer) 33 | @inbounds p::Int = s.parents[x] 34 | @inbounds if s.parents[p] != p 35 | s.parents[x] = p = find_root(s, p) 36 | end 37 | p 38 | end 39 | 40 | in_same_set(s::IntDisjointSets, x::Integer, y::Integer) = find_root(s, x) == find_root(s, y) 41 | 42 | # merge the subset containing x and that containing y into one 43 | # 44 | function union!(s::IntDisjointSets, x::Integer, y::Integer) 45 | xroot = find_root(s, x) 46 | yroot = find_root(s, y) 47 | if xroot != yroot 48 | rks::Vector{Int} = s.ranks 49 | @inbounds xrank::Int = rks[xroot] 50 | @inbounds yrank::Int = rks[yroot] 51 | 52 | if xrank < yrank 53 | @inbounds s.parents[xroot] = yroot 54 | else 55 | @inbounds s.parents[yroot] = xroot 56 | if xrank == yrank 57 | s.ranks[xroot] += 1 58 | end 59 | end 60 | s.ngroups -= 1 61 | end 62 | end 63 | 64 | 65 | ############################################################ 66 | # 67 | # A forest of disjoint sets of arbitrary value type T 68 | # 69 | # It is a wrapper of IntDisjointSets, which uses a 70 | # dictionary to map the input value to an internal index 71 | # 72 | ############################################################ 73 | 74 | type DisjointSets{T} 75 | intmap::Dict{T,Int} 76 | internal::IntDisjointSets 77 | 78 | function DisjointSets(xs) # xs must be iterable 79 | imap = Dict{T,Int}() 80 | n = length(xs) 81 | sizehint(imap, n) 82 | id = 0 83 | for x in xs 84 | imap[x] = (id += 1) 85 | end 86 | new(imap, IntDisjointSets(n)) 87 | end 88 | end 89 | 90 | length(s::DisjointSets) = length(s.internal) 91 | num_groups(s::DisjointSets) = num_groups(s.internal) 92 | 93 | find_root{T}(s::DisjointSets{T}, x::T) = find_root(s.internal, s.intmap[x]) 94 | 95 | in_same_set{T}(s::DisjointSets{T}, x::T, y::T) = in_same_set(s.internal, s.intmap[x], s.intmap[y]) 96 | 97 | function union!{T}(s::DisjointSets{T}, x::T, y::T) 98 | union!(s.internal, s.intmap[x], s.intmap[y]) 99 | end 100 | 101 | -------------------------------------------------------------------------------- /src/heaps/binary_heap.jl: -------------------------------------------------------------------------------- 1 | # Binary heap (non-mutable) 2 | 3 | ################################################# 4 | # 5 | # core implementation 6 | # 7 | ################################################# 8 | 9 | function _heap_bubble_up!{Comp,T}(comp::Comp, valtree::Array{T}, i::Int) 10 | i0::Int = i 11 | @inbounds v = valtree[i] 12 | 13 | while i > 1 # nd is not root 14 | p = i >> 1 15 | @inbounds vp = valtree[p] 16 | 17 | if compare(comp, v, vp) 18 | # move parent downward 19 | @inbounds valtree[i] = vp 20 | i = p 21 | else 22 | break 23 | end 24 | end 25 | 26 | if i != i0 27 | @inbounds valtree[i] = v 28 | end 29 | end 30 | 31 | function _heap_bubble_down!{Comp,T}(comp::Comp, valtree::Array{T}, i::Int) 32 | @inbounds v::T = valtree[i] 33 | swapped = true 34 | n = length(valtree) 35 | last_parent = n >> 1 36 | 37 | while swapped && i <= last_parent 38 | lc = i << 1 39 | if lc < n # contains both left and right children 40 | rc = lc + 1 41 | @inbounds lv = valtree[lc] 42 | @inbounds rv = valtree[rc] 43 | if compare(comp, rv, lv) 44 | if compare(comp, rv, v) 45 | @inbounds valtree[i] = rv 46 | i = rc 47 | else 48 | swapped = false 49 | end 50 | else 51 | if compare(comp, lv, v) 52 | @inbounds valtree[i] = lv 53 | i = lc 54 | else 55 | swapped = false 56 | end 57 | end 58 | else # contains only left child 59 | @inbounds lv = valtree[lc] 60 | if compare(comp, lv, v) 61 | @inbounds valtree[i] = lv 62 | i = lc 63 | else 64 | swapped = false 65 | end 66 | end 67 | end 68 | 69 | valtree[i] = v 70 | end 71 | 72 | 73 | function _binary_heap_pop!{Comp,T}(comp::Comp, valtree::Array{T}) 74 | # extract root 75 | v = valtree[1] 76 | 77 | if length(valtree) == 1 78 | empty!(valtree) 79 | else 80 | valtree[1] = pop!(valtree) 81 | if length(valtree) > 1 82 | _heap_bubble_down!(comp, valtree, 1) 83 | end 84 | end 85 | v 86 | end 87 | 88 | 89 | function _make_binary_heap{Comp,T}(comp::Comp, ty::Type{T}, xs) 90 | n = length(xs) 91 | valtree = copy(xs) 92 | for i = 2 : n 93 | _heap_bubble_up!(comp, valtree, i) 94 | end 95 | valtree 96 | end 97 | 98 | 99 | ################################################# 100 | # 101 | # heap type and constructors 102 | # 103 | ################################################# 104 | 105 | type BinaryHeap{T,Comp} <: AbstractHeap{T} 106 | comparer::Comp 107 | valtree::Array{T} 108 | 109 | function BinaryHeap(comp::Comp) 110 | new(comp, Array(T,0)) 111 | end 112 | 113 | function BinaryHeap(comp::Comp, xs) # xs is an iterable collection of values 114 | valtree = _make_binary_heap(comp, T, xs) 115 | new(comp, valtree) 116 | end 117 | end 118 | 119 | function binary_minheap{T}(ty::Type{T}) 120 | BinaryHeap{T,LessThan}(LessThan()) 121 | end 122 | 123 | binary_maxheap{T}(ty::Type{T}) = BinaryHeap{T,GreaterThan}(GreaterThan()) 124 | binary_minheap{T}(xs::AbstractVector{T}) = BinaryHeap{T,LessThan}(LessThan(), xs) 125 | binary_maxheap{T}(xs::AbstractVector{T}) = BinaryHeap{T,GreaterThan}(GreaterThan(), xs) 126 | 127 | ################################################# 128 | # 129 | # interfaces 130 | # 131 | ################################################# 132 | 133 | length(h::BinaryHeap) = length(h.valtree) 134 | 135 | isempty(h::BinaryHeap) = isempty(h.valtree) 136 | 137 | function push!{T}(h::BinaryHeap{T}, v::T) 138 | valtree = h.valtree 139 | push!(valtree, v) 140 | _heap_bubble_up!(h.comparer, valtree, length(valtree)) 141 | end 142 | 143 | top(h::BinaryHeap) = h.valtree[1] 144 | 145 | pop!{T}(h::BinaryHeap{T}) = _binary_heap_pop!(h.comparer, h.valtree) 146 | 147 | -------------------------------------------------------------------------------- /test/test_mutable_binheap.jl: -------------------------------------------------------------------------------- 1 | # Test of binary heaps 2 | 3 | using DataStructures 4 | using Base.Test 5 | 6 | # auxiliary functions 7 | 8 | function heap_values{VT,Comp}(h::MutableBinaryHeap{VT,Comp}) 9 | n = length(h) 10 | nodes = h.nodes 11 | @assert length(nodes) == n 12 | vs = Array(VT, n) 13 | for i = 1 : n 14 | vs[i] = nodes[i].value 15 | end 16 | vs 17 | end 18 | 19 | function list_values{VT,Comp}(h::MutableBinaryHeap{VT,Comp}) 20 | n = length(h) 21 | nodes = h.nodes 22 | nodemap = h.node_map 23 | vs = Array(VT, 0) 24 | for i = 1 : length(nodemap) 25 | id = nodemap[i] 26 | if id > 0 27 | push!(vs, nodes[id].value) 28 | end 29 | end 30 | vs 31 | end 32 | 33 | function verify_heap{VT,Comp}(h::MutableBinaryHeap{VT,Comp}) 34 | comp = h.comparer 35 | nodes = h.nodes 36 | n = length(h) 37 | m = n / 2 38 | for i = 1 : m 39 | v = nodes[i].value 40 | lc = i * 2 41 | if lc <= n 42 | if compare(comp, nodes[lc].value, v) 43 | return false 44 | end 45 | end 46 | rc = lc + 1 47 | if rc <= n 48 | if compare(comp, nodes[rc].value, v) 49 | return false 50 | end 51 | end 52 | end 53 | return true 54 | end 55 | 56 | 57 | 58 | # test make heap 59 | 60 | vs = [4, 1, 3, 2, 16, 9, 10, 14, 8, 7] 61 | h = mutable_binary_minheap(vs) 62 | 63 | @test length(h) == 10 64 | @test !isempty(h) 65 | @test top(h) == 1 66 | @test isequal(list_values(h), vs) 67 | @test isequal(heap_values(h), [1, 2, 3, 4, 7, 9, 10, 14, 8, 16]) 68 | 69 | 70 | h = mutable_binary_maxheap(vs) 71 | 72 | @test length(h) == 10 73 | @test !isempty(h) 74 | @test top(h) == 16 75 | @test isequal(list_values(h), vs) 76 | @test isequal(heap_values(h), [16, 14, 10, 8, 7, 3, 9, 1, 4, 2]) 77 | 78 | # test push! 79 | 80 | hmin = mutable_binary_minheap(Int) 81 | @test length(hmin) == 0 82 | @test isempty(hmin) 83 | 84 | ss = { 85 | [4], 86 | [1, 4], 87 | [1, 4, 3], 88 | [1, 2, 3, 4], 89 | [1, 2, 3, 4, 16], 90 | [1, 2, 3, 4, 16, 9], 91 | [1, 2, 3, 4, 16, 9, 10], 92 | [1, 2, 3, 4, 16, 9, 10, 14], 93 | [1, 2, 3, 4, 16, 9, 10, 14, 8], 94 | [1, 2, 3, 4, 7, 9, 10, 14, 8, 16]} 95 | 96 | for i = 1 : length(vs) 97 | ia = push!(hmin, vs[i]) 98 | @test ia == i 99 | @test length(hmin) == i 100 | @test !isempty(hmin) 101 | @test isequal(list_values(hmin), vs[1:i]) 102 | @test isequal(heap_values(hmin), ss[i]) 103 | end 104 | 105 | hmax = mutable_binary_maxheap(Int) 106 | @test length(hmax) == 0 107 | @test isempty(hmax) 108 | 109 | ss = { 110 | [4], 111 | [4, 1], 112 | [4, 1, 3], 113 | [4, 2, 3, 1], 114 | [16, 4, 3, 1, 2], 115 | [16, 4, 9, 1, 2, 3], 116 | [16, 4, 10, 1, 2, 3, 9], 117 | [16, 14, 10, 4, 2, 3, 9, 1], 118 | [16, 14, 10, 8, 2, 3, 9, 1, 4], 119 | [16, 14, 10, 8, 7, 3, 9, 1, 4, 2]} 120 | 121 | for i = 1 : length(vs) 122 | ia = push!(hmax, vs[i]) 123 | @test ia == i 124 | @test length(hmax) == i 125 | @test !isempty(hmax) 126 | @test isequal(list_values(hmax), vs[1:i]) 127 | @test isequal(heap_values(hmax), ss[i]) 128 | end 129 | 130 | # test pop! 131 | 132 | @test isequal(extract_all!(hmin), [1, 2, 3, 4, 7, 8, 9, 10, 14, 16]) 133 | @test isempty(hmin) 134 | 135 | @test isequal(extract_all!(hmax), [16, 14, 10, 9, 8, 7, 4, 3, 2, 1]) 136 | @test isempty(hmax) 137 | 138 | # test hybrid add! and pop! 139 | 140 | h = mutable_binary_minheap(Int) 141 | 142 | push!(h, 5) 143 | push!(h, 10) 144 | @test isequal(heap_values(h), [5, 10]) 145 | @test isequal(list_values(h), [5, 10]) 146 | 147 | @test pop!(h) == 5 148 | @test isequal(heap_values(h), [10]) 149 | @test isequal(list_values(h), [10]) 150 | 151 | push!(h, 7) 152 | push!(h, 2) 153 | @test isequal(heap_values(h), [2, 10, 7]) 154 | @test isequal(list_values(h), [10, 7, 2]) 155 | 156 | @test pop!(h) == 2 157 | @test isequal(heap_values(h), [7, 10]) 158 | @test isequal(list_values(h), [10, 7]) 159 | 160 | 161 | # test update! 162 | 163 | xs = rand(100) 164 | hmin = mutable_binary_minheap(xs) 165 | hmax = mutable_binary_maxheap(xs) 166 | 167 | @test length(hmin) == 100 168 | @test length(hmax) == 100 169 | @test verify_heap(hmin) 170 | @test verify_heap(hmax) 171 | 172 | for t = 1 : 1000 173 | i = rand(1:100) 174 | v = rand() 175 | xs[i] = v 176 | update!(hmin, i, v) 177 | update!(hmax, i, v) 178 | @test length(hmin) == 100 179 | @test length(hmax) == 100 180 | @test verify_heap(hmin) 181 | @test verify_heap(hmax) 182 | @test isequal(list_values(hmin), xs) 183 | @test isequal(list_values(hmax), xs) 184 | end 185 | 186 | -------------------------------------------------------------------------------- /src/heaps/mutable_binary_heap.jl: -------------------------------------------------------------------------------- 1 | # Binary heap 2 | 3 | immutable MutableBinaryHeapNode{T} 4 | value::T 5 | handle::Int 6 | end 7 | 8 | ################################################# 9 | # 10 | # core implementation 11 | # 12 | ################################################# 13 | 14 | function _heap_bubble_up!{Comp, T}(comp::Comp, 15 | nodes::Vector{MutableBinaryHeapNode{T}}, nodemap::Vector{Int}, nd_id::Int) 16 | 17 | @inbounds nd = nodes[nd_id] 18 | v::T = nd.value 19 | 20 | swapped = true # whether swap happens at last step 21 | i = nd_id 22 | 23 | while swapped && i > 1 # nd is not root 24 | p = i >> 1 25 | @inbounds nd_p = nodes[p] 26 | 27 | if compare(comp, v, nd_p.value) 28 | # move parent downward 29 | @inbounds nodes[i] = nd_p 30 | @inbounds nodemap[nd_p.handle] = i 31 | i = p 32 | else 33 | swapped = false 34 | end 35 | end 36 | 37 | if i != nd_id 38 | nodes[i] = nd 39 | nodemap[nd.handle] = i 40 | end 41 | end 42 | 43 | function _heap_bubble_down!{Comp, T}(comp::Comp, 44 | nodes::Vector{MutableBinaryHeapNode{T}}, nodemap::Vector{Int}, nd_id::Int) 45 | 46 | @inbounds nd = nodes[nd_id] 47 | v::T = nd.value 48 | 49 | n = length(nodes) 50 | last_parent = n >> 1 51 | 52 | swapped = true 53 | i = nd_id 54 | 55 | while swapped && i <= last_parent 56 | il = i << 1 57 | 58 | if il < n # contains both left and right children 59 | ir = il + 1 60 | 61 | # determine the better child 62 | @inbounds nd_l = nodes[il] 63 | @inbounds nd_r = nodes[ir] 64 | 65 | if compare(comp, nd_r.value, nd_l.value) 66 | # consider right child 67 | if compare(comp, nd_r.value, v) 68 | @inbounds nodes[i] = nd_r 69 | @inbounds nodemap[nd_r.handle] = i 70 | i = ir 71 | else 72 | swapped = false 73 | end 74 | else 75 | # consider left child 76 | if compare(comp, nd_l.value, v) 77 | @inbounds nodes[i] = nd_l 78 | @inbounds nodemap[nd_l.handle] = i 79 | i = il 80 | else 81 | swapped = false 82 | end 83 | end 84 | 85 | else # contains only left child 86 | nd_l = nodes[il] 87 | if compare(comp, nd_l.value, v) 88 | @inbounds nodes[i] = nd_l 89 | @inbounds nodemap[nd_l.handle] = i 90 | i = il 91 | else 92 | swapped = false 93 | end 94 | end 95 | end 96 | 97 | if i != nd_id 98 | @inbounds nodes[i] = nd 99 | @inbounds nodemap[nd.handle] = i 100 | end 101 | end 102 | 103 | function _binary_heap_pop!{Comp,T}(comp::Comp, 104 | nodes::Vector{MutableBinaryHeapNode{T}}, nodemap::Vector{Int}) 105 | 106 | # extract root node 107 | rt = nodes[1] 108 | v = rt.value 109 | @inbounds nodemap[rt.handle] = 0 110 | 111 | if length(nodes) == 1 112 | # clear 113 | empty!(nodes) 114 | else 115 | # place last node to root 116 | @inbounds nodes[1] = new_rt = pop!(nodes) 117 | @inbounds nodemap[new_rt.handle] = 1 118 | 119 | if length(nodes) > 1 120 | _heap_bubble_down!(comp, nodes, nodemap, 1) 121 | end 122 | end 123 | v 124 | end 125 | 126 | function _make_mutable_binary_heap{Comp,T}(comp::Comp, ty::Type{T}, values) 127 | # make a static binary index tree from a list of values 128 | 129 | n = length(values) 130 | nodes = Array(MutableBinaryHeapNode{T}, n) 131 | nodemap = Array(Int, n) 132 | 133 | i::Int = 0 134 | for v in values 135 | i += 1 136 | @inbounds nodes[i] = MutableBinaryHeapNode{T}(v, i) 137 | @inbounds nodemap[i] = i 138 | end 139 | 140 | for i = 1 : n 141 | _heap_bubble_up!(comp, nodes, nodemap, i) 142 | end 143 | return nodes, nodemap 144 | end 145 | 146 | 147 | ################################################# 148 | # 149 | # Binary Heap type and constructors 150 | # 151 | ################################################# 152 | 153 | type MutableBinaryHeap{VT, Comp} <: AbstractMutableHeap{VT,Int} 154 | comparer::Comp 155 | nodes::Vector{MutableBinaryHeapNode{VT}} 156 | node_map::Vector{Int} 157 | 158 | function MutableBinaryHeap(comp::Comp) 159 | nodes = Array(MutableBinaryHeapNode{VT}, 0) 160 | node_map = Array(Int, 0) 161 | new(comp, nodes, node_map) 162 | end 163 | 164 | function MutableBinaryHeap(comp::Comp, xs) # xs is an iterable collection of values 165 | nodes, node_map = _make_mutable_binary_heap(comp, VT, xs) 166 | new(comp, nodes, node_map) 167 | end 168 | end 169 | 170 | mutable_binary_minheap{T}(ty::Type{T}) = MutableBinaryHeap{T,LessThan}(LessThan()) 171 | mutable_binary_maxheap{T}(ty::Type{T}) = MutableBinaryHeap{T,GreaterThan}(GreaterThan()) 172 | 173 | mutable_binary_minheap{T}(xs::AbstractVector{T}) = MutableBinaryHeap{T,LessThan}(LessThan(), xs) 174 | mutable_binary_maxheap{T}(xs::AbstractVector{T}) = MutableBinaryHeap{T,GreaterThan}(GreaterThan(), xs) 175 | 176 | function show(io::IO, h::MutableBinaryHeap) 177 | print(io, "MutableBinaryHeap(") 178 | nodes = h.nodes 179 | n = length(nodes) 180 | if n > 0 181 | print(io, string(nodes[1].value)) 182 | for i = 2 : n 183 | print(io, ", $(nodes[i].value)") 184 | end 185 | end 186 | print(io, ")") 187 | end 188 | 189 | 190 | ################################################# 191 | # 192 | # interfaces 193 | # 194 | ################################################# 195 | 196 | length(h::MutableBinaryHeap) = length(h.nodes) 197 | 198 | isempty(h::MutableBinaryHeap) = isempty(h.nodes) 199 | 200 | function push!{T}(h::MutableBinaryHeap{T}, v::T) 201 | nodes = h.nodes 202 | nodemap = h.node_map 203 | i = length(nodemap) + 1 204 | nd_id = length(nodes) + 1 205 | push!(nodes, MutableBinaryHeapNode(v, i)) 206 | push!(nodemap, nd_id) 207 | _heap_bubble_up!(h.comparer, nodes, nodemap, nd_id) 208 | i 209 | end 210 | 211 | top(h::MutableBinaryHeap) = h.nodes[1].value 212 | 213 | pop!{T}(h::MutableBinaryHeap{T}) = _binary_heap_pop!(h.comparer, h.nodes, h.node_map) 214 | 215 | function update!{T}(h::MutableBinaryHeap{T}, i::Int, v::T) 216 | nodes = h.nodes 217 | nodemap = h.node_map 218 | comp = h.comparer 219 | 220 | nd_id = nodemap[i] 221 | v0 = nodes[nd_id].value 222 | nodes[nd_id] = MutableBinaryHeapNode(v, i) 223 | if compare(comp, v, v0) 224 | _heap_bubble_up!(comp, nodes, nodemap, nd_id) 225 | else 226 | _heap_bubble_down!(comp, nodes, nodemap, nd_id) 227 | end 228 | end 229 | 230 | 231 | -------------------------------------------------------------------------------- /src/deque.jl: -------------------------------------------------------------------------------- 1 | # Block-based deque 2 | 3 | ####################################### 4 | # 5 | # DequeBlock 6 | # 7 | ####################################### 8 | 9 | type DequeBlock{T} 10 | data::Vector{T} # only data[front:back] is valid 11 | capa::Int 12 | front::Int 13 | back::Int 14 | prev::DequeBlock{T} # ref to previous block 15 | next::DequeBlock{T} # ref to next block 16 | 17 | function DequeBlock(capa::Int, front::Int) 18 | data = Array(T, capa) 19 | blk = new(data, capa, front, front-1) 20 | blk.prev = blk 21 | blk.next = blk 22 | blk 23 | end 24 | end 25 | 26 | # block at the rear of the chain, elements towards the front 27 | rear_deque_block{T}(ty::Type{T}, n::Int) = DequeBlock{T}(n, 1) 28 | 29 | # block at the head of the train, elements towards the back 30 | head_deque_block{T}(ty::Type{T}, n::Int) = DequeBlock{T}(n, n+1) 31 | 32 | capacity(blk::DequeBlock) = blk.capa 33 | length(blk::DequeBlock) = blk.back - blk.front + 1 34 | isempty(blk::DequeBlock) = blk.back < blk.front 35 | 36 | # reset the block to empty, and position 37 | 38 | function reset!{T}(blk::DequeBlock{T}, front::Int) 39 | blk.front = front 40 | blk.back = front - 1 41 | blk.prev = blk 42 | blk.next = blk 43 | end 44 | 45 | function show(io::IO, blk::DequeBlock) # avoids recursion into prev and next 46 | x = blk.data[blk.front:blk.back] 47 | print(io, "$(typeof(blk))(capa = $(blk.capa), front = $(blk.front), back = $(blk.back)): $x") 48 | end 49 | 50 | 51 | ####################################### 52 | # 53 | # Deque 54 | # 55 | ####################################### 56 | 57 | const DEFAULT_DEQUEUE_BLOCKSIZE = 1024 58 | 59 | type Deque{T} 60 | nblocks::Int 61 | blksize::Int 62 | len::Int 63 | head::DequeBlock{T} 64 | rear::DequeBlock{T} 65 | 66 | function Deque(blksize::Int) 67 | head = rear = rear_deque_block(T, blksize) 68 | new(1, blksize, 0, head, rear) 69 | end 70 | 71 | Deque() = Deque{T}(DEFAULT_DEQUEUE_BLOCKSIZE) 72 | end 73 | 74 | deque{T}(::Type{T}) = Deque{T}() 75 | 76 | isempty(q::Deque) = q.len == 0 77 | length(q::Deque) = q.len 78 | num_blocks(q::Deque) = q.nblocks 79 | 80 | function front(q::Deque) 81 | if isempty(q) 82 | throw(ArgumentError("Attempted to front at an empty deque.")) 83 | end 84 | blk = q.head 85 | blk.data[blk.front] 86 | end 87 | 88 | function back(q::Deque) 89 | if isempty(q) 90 | throw(ArgumentError("Attempted to back at an empty deque.")) 91 | end 92 | blk = q.rear 93 | blk.data[blk.back] 94 | end 95 | 96 | 97 | # Iteration 98 | 99 | immutable DequeIterator{T} 100 | is_done::Bool 101 | cblock::DequeBlock{T} # current block 102 | i::Int 103 | end 104 | 105 | start{T}(q::Deque{T}) = DequeIterator{T}(isempty(q), q.head, q.head.front) 106 | 107 | function next{T}(q::Deque{T}, s::DequeIterator{T}) 108 | cb = s.cblock 109 | i::Int = s.i 110 | x::T = cb.data[i] 111 | 112 | is_done = false 113 | 114 | i += 1 115 | if i > cb.back 116 | cb_next = cb.next 117 | if is(cb, cb_next) 118 | is_done = true 119 | else 120 | cb = cb_next 121 | i = 1 122 | end 123 | end 124 | 125 | (x, DequeIterator{T}(is_done, cb, i)) 126 | end 127 | 128 | done{T}(q::Deque{T}, s::DequeIterator{T}) = s.is_done 129 | 130 | 131 | function Base.collect{T}(q::Deque{T}) 132 | r = T[] 133 | for x::T in q 134 | push!(r, x) 135 | end 136 | return r 137 | end 138 | 139 | 140 | # Showing 141 | 142 | function show(io::IO, q::Deque) 143 | print(io, "Deque [$(collect(q))]") 144 | end 145 | 146 | function dump(io::IO, q::Deque) 147 | println(io, "Deque (length = $(q.len), nblocks = $(q.nblocks))") 148 | cb::DequeBlock = q.head 149 | i = 1 150 | while true 151 | print(io, "block $i [$(cb.front):$(cb.back)] ==> ") 152 | for j = cb.front : cb.back 153 | print(io, cb.data[j]) 154 | print(io, ' ') 155 | end 156 | println(io) 157 | 158 | cb_next::DequeBlock = cb.next 159 | if !is(cb, cb_next) 160 | cb = cb_next 161 | i += 1 162 | else 163 | break 164 | end 165 | end 166 | end 167 | 168 | 169 | # Manipulation 170 | 171 | function empty!{T}(q::Deque{T}) 172 | # release all blocks except the head 173 | if q.nblocks > 1 174 | cb::DequeBlock{T} = q.rear 175 | while cb != q.head 176 | empty!(cb.data) 177 | cb = cb.prev 178 | end 179 | end 180 | 181 | # clean the head block (but retain the block itself) 182 | reset!(q.head, 1) 183 | 184 | # reset queue fields 185 | q.nblocks = 1 186 | q.len = 0 187 | q.rear = h 188 | end 189 | 190 | 191 | function push!{T}(q::Deque{T}, x) # push back 192 | rear = q.rear 193 | 194 | if isempty(rear) 195 | rear.front = 1 196 | rear.back = 0 197 | end 198 | 199 | if rear.back < rear.capa 200 | @inbounds rear.data[rear.back += 1] = convert(T, x) 201 | else 202 | new_rear = rear_deque_block(T, q.blksize) 203 | new_rear.back = 1 204 | new_rear.data[1] = convert(T, x) 205 | new_rear.prev = rear 206 | q.rear = rear.next = new_rear 207 | q.nblocks += 1 208 | end 209 | q.len += 1 210 | q 211 | end 212 | 213 | function unshift!{T}(q::Deque{T}, x) # push front 214 | head = q.head 215 | 216 | if isempty(head) 217 | n = head.capa 218 | head.front = n + 1 219 | head.back = n 220 | end 221 | 222 | if head.front > 1 223 | @inbounds head.data[head.front -= 1] = convert(T, x) 224 | else 225 | n::Int = q.blksize 226 | new_head = head_deque_block(T, n) 227 | new_head.front = n 228 | new_head.data[n] = convert(T, x) 229 | new_head.next = head 230 | q.head = head.prev = new_head 231 | q.nblocks += 1 232 | end 233 | q.len += 1 234 | q 235 | end 236 | 237 | function pop!{T}(q::Deque{T}) # pop back 238 | if isempty(q) 239 | throw(ArgumentError("Attempted to pop from an empty deque.")) 240 | end 241 | 242 | rear = q.rear 243 | @assert rear.back >= rear.front 244 | 245 | @inbounds x = rear.data[rear.back] 246 | rear.back -= 1 247 | if rear.back < rear.front 248 | if q.nblocks > 1 249 | # release and detach the rear block 250 | empty!(rear.data) 251 | q.rear = rear.prev::DequeBlock{T} 252 | q.rear.next = q.rear 253 | q.nblocks -= 1 254 | end 255 | end 256 | q.len -= 1 257 | x 258 | end 259 | 260 | 261 | function shift!{T}(q::Deque{T}) # pop front 262 | if isempty(q) 263 | throw(ArgumentError("Attempted to pop from an empty deque.")) 264 | end 265 | 266 | head = q.head 267 | @assert head.back >= head.front 268 | 269 | @inbounds x = head.data[head.front] 270 | head.front += 1 271 | if head.back < head.front 272 | if q.nblocks > 1 273 | # release and detach the head block 274 | empty!(head.data) 275 | q.head = head.next::DequeBlock{T} 276 | q.head.prev = q.head 277 | q.nblocks -= 1 278 | end 279 | end 280 | q.len -= 1 281 | x 282 | end 283 | 284 | -------------------------------------------------------------------------------- /src/defaultdict.jl: -------------------------------------------------------------------------------- 1 | # Dictionary which returns (and sets) a default value for a requested item not 2 | # already in to the dictionary 3 | 4 | # DefaultDictBase is the main class used to in Default*Dicts. 5 | # 6 | # Each related (immutable) Default*Dict class contains a single 7 | # DefautlDictBase object as a member, and delegates almost all 8 | # functions to this object. 9 | # 10 | # The main rationale for doing this instead of using type aliases is 11 | # that this way, we can have actual class names and constructors for 12 | # each of the DefaultDictBase "subclasses", in some sense getting 13 | # around the Julia limitation of not allowing concrete classes to be 14 | # subclassed. 15 | # 16 | 17 | immutable DefaultDictBase{K,V,F,D<:Associative{K,V}} <: Associative{K,V} 18 | default::F 19 | d::D 20 | 21 | DefaultDictBase(x::F, kv::AbstractArray{(K,V)}) = new(x, D(kv)) 22 | DefaultDictBase(x::F, d::DefaultDictBase) = DefaultDictBase(x, d.d) 23 | DefaultDictBase(x::F, d::D=D()) = new(x, d) 24 | DefaultDictBase(x, ks, vs) = new(x, D(ks,vs)) 25 | 26 | end 27 | 28 | # Constructors 29 | 30 | DefaultDictBase() = error("no default specified") 31 | DefaultDictBase(k,v) = error("no default specified") 32 | 33 | # TODO: these mimic similar Dict constructors, but may not be needed 34 | DefaultDictBase{K,V,F}(default::F, ks::AbstractArray{K}, vs::AbstractArray{V}) = 35 | DefaultDictBase{K,V,F,Dict{K,V}}(default,ks,vs) 36 | DefaultDictBase{F}(default::F,ks,vs) = DefaultDictBase{Any,Any,F,Dict}(default, ks, vs) 37 | 38 | # syntax entry points 39 | DefaultDictBase{F}(default::F) = DefaultDictBase{Any,Any,F,Dict}(default) 40 | DefaultDictBase{K,V,F}(::Type{K}, ::Type{V}, default::F) = DefaultDictBase{K,V,F,Dict}(default) 41 | DefaultDictBase{K,V,F}(default::F, kv::AbstractArray{(K,V)}) = DefaultDictBase{K,V,F,Dict}(default, kv) 42 | 43 | DefaultDictBase{F,D<:Associative}(default::F, d::D) = ((K,V)=eltype(d); DefaultDictBase{K,V,F,D}(default, d)) 44 | 45 | # Functions 46 | 47 | # most functions are simply delegated to the wrapped dictionary 48 | @delegate DefaultDictBase.d [ sizehint, empty!, setindex!, get, haskey, 49 | getkey, pop!, delete!, start, done, next, 50 | isempty, length ] 51 | 52 | similar{K,V,F}(d::DefaultDictBase{K,V,F}) = DefaultDictBase{K,V,F}() 53 | in{T<:DefaultDictBase}(key, v::Base.KeyIterator{T}) = key in keys(v.dict.d) 54 | next{T<:DefaultDictBase}(v::Base.KeyIterator{T}, i) = (v.dict.d.keys[i], Base.skip_deleted(v.dict.d,i+1)) 55 | next{T<:DefaultDictBase}(v::Base.ValueIterator{T}, i) = (v.dict.d.vals[i], Base.skip_deleted(v.dict.d,i+1)) 56 | 57 | getindex(d::DefaultDictBase, key) = get!(d.d, key, d.default) 58 | 59 | # TODO: remove these if/when https://github.com/JuliaLang/julia/pull/5519 is committed 60 | if !applicable(get!, (Dict,)) 61 | global getindex 62 | function getindex{K,V,F<:Base.Callable}(d::DefaultDictBase{K,V,F,Dict}, key) 63 | if !haskey(d.d, key) 64 | return (d.d[key] = d.default()) 65 | end 66 | return d.d[key] 67 | end 68 | 69 | function getindex{K,V,F}(d::DefaultDictBase{K,V,F,Dict}, key) 70 | if !haskey(d.d, key) 71 | return (d.d[key] = d.default) 72 | end 73 | return d.d[key] 74 | end 75 | end 76 | 77 | 78 | ################ 79 | 80 | # Here begins the actual definition of the DefaultDict and 81 | # DefaultOrderedDict classes. As noted above, these are simply 82 | # wrappers around a DefaultDictBase object, and delegate all functions 83 | # to that object 84 | 85 | for (DefaultDict,O) in [(:DefaultDict, :Unordered), (:DefaultOrderedDict, :Ordered)] 86 | @eval begin 87 | immutable $DefaultDict{K,V,F} <: Associative{K,V} 88 | d::DefaultDictBase{K,V,F,HashDict{K,V,$O}} 89 | 90 | $DefaultDict(x, kv::AbstractArray{(K,V)}) = new(DefaultDictBase{K,V,F,HashDict{K,V,$O}}(x, kv)) 91 | $DefaultDict(x, d::$DefaultDict) = $DefaultDict(x, d.d) 92 | $DefaultDict(x, d::HashDict) = new(DefaultDictBase{K,V,F,HashDict{K,V,$O}}(x, d)) 93 | $DefaultDict(x) = new(DefaultDictBase{K,V,F,HashDict{K,V,$O}}(x)) 94 | $DefaultDict(x, ks, vs) = new(DefaultDictBase{K,V,F,HashDict{K,V,$O}}(x,ks,vs)) 95 | end 96 | 97 | ## Constructors 98 | 99 | $DefaultDict() = error("$DefaultDict: no default specified") 100 | $DefaultDict(k,v) = error("$DefaultDict: no default specified") 101 | 102 | # TODO: these mimic similar Dict constructors, but may not be needed 103 | $DefaultDict{K,V,F}(default::F, ks::AbstractArray{K}, vs::AbstractArray{V}) = $DefaultDict{K,V,F}(default,ks,vs) 104 | $DefaultDict{F}(default::F,ks,vs) = $DefaultDict{Any,Any,F}(default, ks, vs) 105 | 106 | # syntax entry points 107 | $DefaultDict{F}(default::F) = $DefaultDict{Any,Any,F}(default) 108 | $DefaultDict{K,V,F}(::Type{K}, ::Type{V}, default::F) = $DefaultDict{K,V,F}(default) 109 | $DefaultDict{K,V,F}(default::F, kv::AbstractArray{(K,V)}) = $DefaultDict{K,V,F}(default, kv) 110 | 111 | $DefaultDict{F}(default::F, d::Associative) = ((K,V)=eltype(d); $DefaultDict{K,V,F}(default, HashDict(d))) 112 | 113 | ## Functions 114 | 115 | # Most functions are simply delegated to the wrapped DefaultDictBase object 116 | @delegate $DefaultDict.d [ sizehint, empty!, setindex!, 117 | getindex, get, get!, haskey, 118 | getkey, pop!, delete!, start, next, 119 | done, next, isempty, length] 120 | 121 | similar{K,V,F}(d::$DefaultDict{K,V,F}) = $DefaultDict{K,V,F}() 122 | in{T<:$DefaultDict}(key, v::Base.KeyIterator{T}) = key in keys(v.dict.d.d) 123 | end 124 | end 125 | 126 | 127 | ## If/when a SortedDict becomes available, this should be uncommented to provide a DefaultSortedDict 128 | 129 | # immutable DefaultSortedDict{K,V,F} <: Associative{K,V} 130 | # d::DefaultDictBase{K,V,F,SortedDict{K,V}} 131 | 132 | # DefaultSortedDict(x, kv::AbstractArray{(K,V)}) = new(DefaultDictBase{K,V,F,SortedDict{K,V}}(x, kv)) 133 | # DefaultSortedDict(x, d::DefaultSortedDict) = DefaultSortedDict(x, d.d) 134 | # DefaultSortedDict(x, d::SortedDict) = new(DefaultDictBase{K,V,F,SortedDict{K,V}}(x, d)) 135 | # DefaultSortedDict(x) = new(DefaultDictBase{K,V,F,SortedDict{K,V}}(x)) 136 | # DefaultSortedDict(x, ks, vs) = new(DefaultDictBase{K,V,F,SortedDict{K,V}}(x,ks,vs)) 137 | # end 138 | 139 | ## Constructors 140 | 141 | # DefaultSortedDict() = error("DefaultSortedDict: no default specified") 142 | # DefaultSortedDict(k,v) = error("DefaultSortedDict: no default specified") 143 | 144 | # # TODO: these mimic similar Dict constructors, but may not be needed 145 | # DefaultSortedDict{K,V,F}(default::F, ks::AbstractArray{K}, vs::AbstractArray{V}) = DefaultSortedDict{K,V,F}(default,ks,vs) 146 | # DefaultSortedDict{F}(default::F,ks,vs) = DefaultSortedDict{Any,Any,F}(default, ks, vs) 147 | 148 | # # syntax entry points 149 | # DefaultSortedDict{F}(default::F) = DefaultSortedDict{Any,Any,F}(default) 150 | # DefaultSortedDict{K,V,F}(::Type{K}, ::Type{V}, default::F) = DefaultSortedDict{K,V,F}(default) 151 | # DefaultSortedDict{K,V,F}(default::F, kv::AbstractArray{(K,V)}) = DefaultSortedDict{K,V,F}(default, kv) 152 | 153 | # DefaultSortedDict{F}(default::F, d::Associative) = ((K,V)=eltype(d); DefaultSortedDict{K,V,F}(default, SortedDict(d))) 154 | 155 | ## Functions 156 | 157 | ## Most functions are simply delegated to the wrapped DefaultDictBase object 158 | 159 | # @delegate DefaultSortedDict.d [ sizehint, empty!, setindex!, 160 | # getindex, get, get!, haskey, 161 | # getkey, pop!, delete!, start, next, 162 | # done, next, isempty, length] 163 | 164 | # similar{K,V,F}(d::DefaultSortedDict{K,V,F}) = DefaultSortedDict{K,V,F}() 165 | # in{T<:DefaultSortedDict}(key, v::Base.KeyIterator{T}) = key in keys(v.dict.d.d) 166 | 167 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DataStructures.jl 2 | 3 | [![Build Status](https://travis-ci.org/JuliaLang/DataStructures.jl.png)](https://travis-ci.org/JuliaLang/DataStructures.jl) 4 | 5 | This package implements a variety of data structures, including 6 | 7 | * Deque (based on block-list) 8 | * Stack 9 | * Queue 10 | * Disjoint Sets 11 | * Binary Heap 12 | * Mutable Binary Heap 13 | * Ordered Dicts and Sets 14 | * Dictionaries with Defaults 15 | 16 | ## Deque 17 | 18 | The ``Deque`` type implements a double-ended queue using a list of blocks. This data structure supports constant-time insertion/removal of elements at both ends of a sequence. 19 | 20 | Usage: 21 | ```julia 22 | a = Deque{Int}() 23 | isempty(a) # test whether the dequeue is empty 24 | length(a) # get the number of elements 25 | push!(a, 10) # add an element to the back 26 | pop!(a) # remove an element from the back 27 | unshift!(a, 20) # add an element to the front 28 | shift!(a) # remove an element from the front 29 | front(a) # get the element at the front 30 | back(a) # get the element at the back 31 | ``` 32 | 33 | *Note:* Julia's ``Vector`` type also provides this interface, and thus can be used as a deque. However, the ``Deque`` type in this package is implemented as a list of contiguous blocks (default size = 2K). As a deque grows, new blocks may be created and linked to existing blocks. This way avoids the copying when growing a vector. 34 | 35 | Benchmark shows that the performance of ``Deque`` is comparable to ``Vector`` on ``push!``, and is noticeably faster on ``unshift!`` (by about 30% to 40%). 36 | 37 | 38 | ## Stack and Queue 39 | 40 | The ``Stack`` and ``Queue`` types are a light-weight wrapper of a deque type, which respectively provide interfaces for FILO and FIFO access. 41 | 42 | Usage of Stack: 43 | ``` 44 | s = Stack(Int) 45 | push!(s, x) 46 | x = top(s) 47 | x = pop!(s) 48 | ``` 49 | 50 | Usage of Queue: 51 | ``` 52 | q = Queue(Int) 53 | enqueue!(q, x) 54 | x = front(q) 55 | x = back(q) 56 | x = dequeue!(q) 57 | ``` 58 | 59 | ## Accumulators and Counters 60 | 61 | A accumulator, as defined below, is a data structure that maintains an accumulated number for each key. This is a counter when the accumulated values reflect the counts. 62 | 63 | ```julia 64 | type Accumulator{K, V<:Number} 65 | map::Dict{K, V} 66 | end 67 | ``` 68 | 69 | There are different ways to construct an accumulator/counter: 70 | 71 | ```julia 72 | a = accumulator(K, V) # construct an accumulator with key-type K and 73 | # accumulated value type V 74 | 75 | a = accumulator(dict) # construct an accumulator from a dictionary 76 | 77 | a = counter(K) # construct a counter, i.e. an accumulator with 78 | # key type K and value type Int 79 | 80 | 81 | a = counter(dict) # construct a counter from a dictionary 82 | 83 | a = counter(seq) # construct a counter by counting keys in a sequence 84 | ``` 85 | 86 | Usage of an accumulator/counter: 87 | 88 | ```julia 89 | # let a and a2 be accumulators/counters 90 | 91 | a[x] # get the current value/count for x. 92 | # if x was not added to a, it returns zero(V) 93 | 94 | add!(a, x) # add the value/count for x by 1 95 | add!(a, x, v) # add the value/count for x by v 96 | add!(a, a2) # add all counts from a2 to a1 97 | 98 | pop!(a, x) # remove a key x from a, and returns its current value 99 | 100 | merge(a, a2) # return a new accumulator/counter that combines the 101 | # values/counts in both a and a2 102 | ``` 103 | 104 | 105 | ## Disjoint Sets 106 | 107 | Some algorithms, such as finding connected components in undirected graph and Kruskal's method of finding minimum spanning tree, require a data structure that can efficiently represent a collection of disjoint subsets. 108 | A widely used data structure for this purpose is the *Disjoint set forest*. 109 | 110 | Usage: 111 | ``` 112 | a = IntDisjointSet(10) # creates a forest comprised of 10 singletons 113 | union!(a, 3, 5) # merges the sets that contain 3 and 5 into one 114 | in_same_set(a, x, y) # determines whether x and y are in the same set 115 | ``` 116 | 117 | One may also use other element types 118 | ``` 119 | a = DisjointSet{String}(["a", "b", "c", "d"]) 120 | union!(a, "a", "b") 121 | in_same_set(a, "c", "d") 122 | ``` 123 | 124 | Note that the internal implementation of ``IntDisjointSet`` is based on vectors, and is very efficient. ``DisjointSet{T}`` is a wrapper of ``IntDisjointSet``, which uses a dictionary to map input elements to an internal index. 125 | 126 | 127 | ## Heaps 128 | 129 | Heaps are data structures that efficiently maintain the minimum (or maximum) for a set of data that may dynamically change. 130 | 131 | All heaps in this package are derived from ``AbstractHeap``, and provides the following interface: 132 | 133 | ```julia 134 | let h be a heap, i be a handle, and v be a value. 135 | 136 | - length(h) # returns the number of elements 137 | 138 | - isempty(h) # returns whether the heap is empty 139 | 140 | - push!(h, v) # add a value to the heap 141 | 142 | - top(h) # return the top value of a heap 143 | 144 | - pop!(h) # removes the top value, and returns it 145 | ``` 146 | 147 | Mutable heaps (values can be changed after being pushed to a heap) are derived from 148 | ``AbstractMutableHeap <: AbstractHeap``, and additionally provides the following interface: 149 | 150 | ```julia 151 | - i = push!(h, v) # adds a value to the heap and and returns a handle to v 152 | 153 | - update!(h, i, v) # updates the value of an element (referred to by the handle i) 154 | ``` 155 | 156 | Currently, both min/max versions of binary heap (type ``BinaryHeap``) and mutable binary heap (type ``MutableBinaryHeap``) have been implemented. 157 | 158 | Examples of constructing a heap: 159 | ```julia 160 | h = binary_minheap(Int) 161 | h = binary_maxheap(Int) # create an empty min/max binary heap of integers 162 | 163 | h = binary_minheap([1,4,3,2]) 164 | h = binary_maxheap([1,4,3,2]) # create a min/max heap from a vector 165 | 166 | h = mutable_binary_minheap(Int) 167 | h = mutable_binary_maxheap(Int) # create an empty mutable min/max heap 168 | 169 | h = mutable_binary_minheap([1,4,3,2]) 170 | h = mutable_binary_maxheap([1,4,3,2]) # create a mutable min/max heap from a vector 171 | ``` 172 | 173 | ## OrderedDicts and OrderedSets 174 | 175 | ``OrderedDicts`` are simply dictionaries whose entries have a 176 | particular order. For ``OrderedDicts`` (and ``OrderedSets``), order 177 | refers to *insertion order*, which allows deterministic iteration over 178 | the dictionary or set. 179 | 180 | ```julia 181 | d = OrderedDict(Char,Int) 182 | for c in 'a':'e' 183 | d[c] = c-'a'+1 184 | end 185 | collect(d) # => [('a',1),('b',2),('c',3),('d',4),('e',5)] 186 | 187 | s = OrderedSet(π,e,γ,catalan,φ) 188 | collect(s) # => [π = 3.1415926535897..., 189 | # e = 2.7182818284590..., 190 | # γ = 0.5772156649015..., 191 | # catalan = 0.9159655941772..., 192 | # φ = 1.6180339887498...] 193 | ``` 194 | 195 | All standard ``Associative`` and ``Dict`` functions are available for 196 | ``OrderedDicts``, and all ``Set`` operations are available for 197 | OrderedSets. 198 | 199 | Note that to create an OrderedSet of a particular type, you must 200 | specify the type in curly-braces: 201 | 202 | ```julia 203 | # create an OrderedSet of Strings 204 | strs = OrderedSet{String}() 205 | ``` 206 | 207 | 208 | ## DefaultDict and DefaultOrderedDict 209 | 210 | A DefaultDict allows specification of a default value to return when a requested key is not in a dictionary. 211 | 212 | While the implementation is slightly different, a ``DefaultDict`` can be thought to provide a normal ``Dict`` 213 | with a default value. A ``DefaultOrderedDict`` does the same for an ``OrderedDict``. 214 | 215 | Constructors: 216 | ```julia 217 | DefaultDict(default, kv) # create a DefaultDict with a default value or function, 218 | # optionally wrapping an existing dictionary 219 | # or array of key-value pairs 220 | 221 | DefaultDict(KeyType, ValueType, default) # create a DefaultDict with Dict type (KeyType,ValueType) 222 | 223 | DefaultOrderedDict(default, kv) # create a DefaultOrderedDict with a default value or function, 224 | # optionally wrapping an existing dictionary 225 | # or array of key-value pairs 226 | 227 | DefaultOrderedDict(KeyType, ValueType, default) # create a DefaultOrderedDict with Dict type (KeyType,ValueType) 228 | ``` 229 | 230 | Examples using ``DefaultDict``: 231 | ```julia 232 | dd = DefaultDict(1) # create an (Any=>Any) DefaultDict with a default value of 1 233 | dd = DefaultDict(String, Int, 0) # create a (String=>Int) DefaultDict with a default value of 0 234 | 235 | d = ['a'=>1, 'b'=>2] 236 | dd = DefaultDict(0, d) # provide a default value to an existing dictionary 237 | dd['c'] == 0 # true 238 | #d['c'] == 0 # false 239 | 240 | dd = DefaultOrderedDict(time) # call time() to provide the default value for an OrderedDict 241 | dd = DefaultDict(Dict) # Create a dictionary of dictionaries 242 | # Dict() is called to provide the default value 243 | dd = DefaultDict(()->myfunc()) # call function myfunc to provide the default value 244 | 245 | # create a Dictionary of type String=>DefaultDict{String, Int}, where the default of the 246 | # inner set of DefaultDicts is zero 247 | dd = DefaultDict(String, DefaultDict, ()->DefaultDict(String,Int,0)) 248 | ``` 249 | 250 | Note that in the last example, we need to use a function to create each new ``DefaultDict``. 251 | If we forget, we will end up using the same ``DefaultDict`` for all default values: 252 | 253 | ```julia 254 | julia> dd = DefaultDict(String, DefaultDict, DefaultDict(String,Int,0)); 255 | 256 | julia> dd["a"] 257 | DefaultDict{String,Int64,Int64,Dict{K,V}}() 258 | 259 | julia> dd["b"]["a"] = 1 260 | 1 261 | 262 | julia> dd["a"] 263 | ["a"=>1] 264 | 265 | ``` 266 | 267 | ##Trie 268 | 269 | An implementation of the `Trie` data structure. This is an associative structure, with `String` keys. 270 | 271 | ```julia 272 | t=Trie{Int}() 273 | t["Rob"]=42 274 | t["Roger"]=24 275 | t.haskey("Rob") #true 276 | t.get("Rob") #42 277 | ``` 278 | -------------------------------------------------------------------------------- /src/hashdict.jl: -------------------------------------------------------------------------------- 1 | # HashDict 2 | 3 | import Base: KeyIterator, ValueIterator, haskey, get, getkey, delete!, 4 | pop!, empty!, filter!, setindex!, getindex, similar, 5 | sizehint, length, filter, isempty, start, next, done, 6 | keys, values, _tablesz, skip_deleted 7 | 8 | typealias Unordered Nothing 9 | typealias Ordered Int 10 | 11 | type HashDict{K,V,O<:Union(Ordered,Unordered)} <: Associative{K,V} 12 | slots::Array{Uint8,1} 13 | keys::Array{K,1} 14 | vals::Array{V,1} 15 | idxs::Array{O,1} 16 | order::Array{O,1} 17 | ndel::Int 18 | count::Int 19 | deleter::Function 20 | 21 | function HashDict() 22 | n = 16 23 | new(zeros(Uint8,n), Array(K,n), Array(V,n), Array(O,n), Array(O,0), 0, 0, identity) 24 | end 25 | function HashDict(ks, vs) 26 | n = length(ks) 27 | h = HashDict{K,V,O}() 28 | for i=1:n 29 | h[ks[i]] = vs[i] 30 | end 31 | return h 32 | end 33 | function HashDict(kv) 34 | h = HashDict{K,V,O}() 35 | for (k,v) in kv 36 | h[k] = v 37 | end 38 | return h 39 | end 40 | end 41 | 42 | HashDict() = HashDict{Any,Any,Unordered}() 43 | 44 | HashDict{K,V}(ks::AbstractArray{K}, vs::AbstractArray{V}) = HashDict{K,V,Unordered}(ks,vs) 45 | HashDict(ks, vs) = HashDict{Any,Any,Unordered}(ks, vs) 46 | HashDict{K,V}(kv::AbstractArray{(K,V)}) = HashDict{K,V,Unordered}(kv) 47 | 48 | # TODO: these could be more efficient 49 | HashDict{K,V,O}(d::HashDict{K,V,O}) = HashDict{K,V,O}(collect(kv)) 50 | HashDict{K,V}(d::Associative{K,V}) = HashDict{K,V,Unordered}(collect(d)) 51 | 52 | similar{K,V,O}(d::HashDict{K,V,O}) = HashDict{K,V,O}() 53 | 54 | function serialize(s, t::HashDict) 55 | serialize_type(s, typeof(t)) 56 | write(s, int32(length(t))) 57 | for (k,v) in t 58 | serialize(s, k) 59 | serialize(s, v) 60 | end 61 | end 62 | 63 | function deserialize{K,V,O}(s, T::Type{HashDict{K,V,O}}) 64 | n = read(s, Int32) 65 | t = T(); sizehint(t, n) 66 | for i = 1:n 67 | k = deserialize(s) 68 | v = deserialize(s) 69 | t[k] = v 70 | end 71 | return t 72 | end 73 | 74 | hashindex(key, sz) = (int(hash(key)) & (sz-1)) + 1 75 | 76 | isslotempty(h::HashDict, i::Int) = h.slots[i] == 0x0 77 | isslotfilled(h::HashDict, i::Int) = h.slots[i] == 0x1 78 | isslotmissing(h::HashDict, i::Int) = h.slots[i] == 0x2 79 | 80 | function rehash{K,V}(h::HashDict{K,V,Unordered}, newsz) 81 | newsz = _tablesz(newsz) 82 | 83 | if h.count == 0 84 | resize!(h.slots, newsz) 85 | fill!(h.slots, 0) 86 | resize!(h.keys, newsz) 87 | resize!(h.vals, newsz) 88 | h.ndel = 0 89 | return h 90 | end 91 | 92 | olds = h.slots 93 | oldk = h.keys 94 | oldv = h.vals 95 | sz = length(olds) 96 | 97 | slots = zeros(Uint8,newsz) 98 | keys = Array(K, newsz) 99 | vals = Array(V, newsz) 100 | count0 = h.count 101 | count = 0 102 | 103 | for i = 1:sz 104 | if olds[i] == 0x1 105 | k = oldk[i] 106 | v = oldv[i] 107 | index = hashindex(k, newsz) 108 | while slots[index] != 0 109 | index = (index & (newsz-1)) + 1 110 | end 111 | slots[index] = 0x1 112 | keys[index] = k 113 | vals[index] = v 114 | count += 1 115 | 116 | if h.count != count0 117 | # if items are removed by finalizers, retry 118 | return rehash(h, newsz) 119 | end 120 | end 121 | end 122 | 123 | h.slots = slots 124 | h.keys = keys 125 | h.vals = vals 126 | h.count = count 127 | h.ndel = 0 128 | 129 | return h 130 | end 131 | 132 | function rehash{K,V}(h::HashDict{K,V,Ordered}, newsz) 133 | newsz = _tablesz(newsz) 134 | 135 | if h.count == 0 136 | resize!(h.slots, newsz) 137 | fill!(h.slots, 0) 138 | resize!(h.keys, newsz) 139 | resize!(h.vals, newsz) 140 | resize!(h.idxs, newsz) 141 | resize!(h.order, 0) 142 | h.ndel = 0 143 | return h 144 | end 145 | 146 | _compact_order(h) 147 | 148 | olds = h.slots 149 | oldk = h.keys 150 | oldv = h.vals 151 | oldi = h.idxs 152 | oldo = h.order 153 | sz = length(olds) 154 | 155 | slots = zeros(Uint8,newsz) 156 | keys = Array(K, newsz) 157 | vals = Array(V, newsz) 158 | idxs = Array(Int, newsz) 159 | order = Array(Int, h.count) 160 | count0 = h.count 161 | count = 0 162 | 163 | for i = 1:sz 164 | if olds[i] == 0x1 165 | k = oldk[i] 166 | v = oldv[i] 167 | idx = oldi[i] 168 | index = hashindex(k, newsz) 169 | while slots[index] != 0 170 | index = (index & (newsz-1)) + 1 171 | end 172 | slots[index] = 0x1 173 | keys[index] = k 174 | vals[index] = v 175 | idxs[index] = idx 176 | order[idx] = index 177 | count += 1 178 | 179 | if h.count != count0 180 | # if items are removed by finalizers, retry 181 | return rehash(h, newsz) 182 | end 183 | end 184 | end 185 | 186 | h.slots = slots 187 | h.keys = keys 188 | h.vals = vals 189 | h.idxs = idxs 190 | h.order = order 191 | h.count = count 192 | h.ndel = 0 193 | 194 | return h 195 | end 196 | 197 | 198 | function _compact_order{K,V}(h::HashDict{K,V,Ordered}) 199 | if h.count == length(h.order) 200 | return 201 | end 202 | 203 | i = 1 204 | while h.order[i] > 0; i += 1; end 205 | 206 | j = i+1 207 | while h.order[j] == 0; j += 1; end 208 | 209 | for k = j:length(h.order) 210 | idx = h.order[k] 211 | if idx > 0 212 | h.order[i] = idx 213 | h.idxs[idx] = i 214 | i += 1 215 | end 216 | end 217 | 218 | resize!(h.order, h.count) 219 | 220 | nothing 221 | end 222 | 223 | function sizehint(d::HashDict, newsz) 224 | oldsz = length(d.slots) 225 | if newsz <= oldsz 226 | # todo: shrink 227 | # be careful: rehash() assumes everything fits. it was only designed 228 | # for growing. 229 | return d 230 | end 231 | # grow at least 25% 232 | newsz = max(newsz, (oldsz*5)>>2) 233 | rehash(d, newsz) 234 | end 235 | 236 | function empty!{K,V}(h::HashDict{K,V}) 237 | fill!(h.slots, 0x0) 238 | sz = length(h.slots) 239 | h.keys = Array(K, sz) 240 | h.vals = Array(V, sz) 241 | h.ndel = 0 242 | h.count = 0 243 | return h 244 | end 245 | 246 | function empty!{K,V}(h::HashDict{K,V,Ordered}) 247 | sz = length(h.slots) 248 | fill!(h.slots, 0x0) 249 | h.keys = Array(K, sz) 250 | h.vals = Array(V, sz) 251 | h.idxs = Array(Int, sz) 252 | h.order = Array(Int, 0) 253 | h.ndel = 0 254 | h.count = 0 255 | return h 256 | end 257 | 258 | # get the index where a key is stored, or -1 if not present 259 | function ht_keyindex{K,V}(h::HashDict{K,V}, key) 260 | sz = length(h.keys) 261 | iter = 0 262 | maxprobe = max(16, sz>>6) 263 | index = hashindex(key, sz) 264 | keys = h.keys 265 | 266 | while true 267 | if isslotempty(h,index) 268 | break 269 | end 270 | if !isslotmissing(h,index) && isequal(key,keys[index]) 271 | return index 272 | end 273 | 274 | index = (index & (sz-1)) + 1 275 | iter+=1 276 | iter > maxprobe && break 277 | end 278 | 279 | return -1 280 | end 281 | 282 | # get the index where a key is stored, or -pos if not present 283 | # and the key would be inserted at pos 284 | # This version is for use by setindex! and get! 285 | function ht_keyindex2{K,V}(h::HashDict{K,V}, key) 286 | sz = length(h.keys) 287 | 288 | if h.ndel >= ((3*sz)>>2) || h.count*3 > sz*2 289 | # > 3/4 deleted or > 2/3 full 290 | rehash(h, h.count > 64000 ? h.count*2 : h.count*4) 291 | sz = length(h.keys) # rehash may resize the table at this point! 292 | end 293 | 294 | iter = 0 295 | maxprobe = max(16, sz>>6) 296 | index = hashindex(key, sz) 297 | avail = 0 298 | keys = h.keys 299 | 300 | while true 301 | if isslotempty(h,index) 302 | avail < 0 && return avail 303 | return -index 304 | end 305 | 306 | if isslotmissing(h,index) 307 | if avail == 0 308 | # found an available slot, but need to keep scanning 309 | # in case "key" already exists in a later collided slot. 310 | avail = -index 311 | end 312 | elseif isequal(key, keys[index]) 313 | return index 314 | end 315 | 316 | index = (index & (sz-1)) + 1 317 | iter+=1 318 | iter > maxprobe && break 319 | end 320 | 321 | avail < 0 && return avail 322 | 323 | rehash(h, h.count > 64000 ? sz*2 : sz*4) 324 | 325 | return ht_keyindex2(h, key) 326 | end 327 | 328 | function _setindex!(h::HashDict, v, key, index) 329 | h.slots[index] = 0x1 330 | h.keys[index] = key 331 | h.vals[index] = v 332 | h.count += 1 333 | return h 334 | end 335 | 336 | function _setindex!{K,V}(h::HashDict{K,V,Ordered}, v, key, index) 337 | h.slots[index] = 0x1 338 | h.keys[index] = key 339 | h.vals[index] = v 340 | push!(h.order, index) 341 | h.idxs[index] = length(h.order) 342 | h.count += 1 343 | return h 344 | end 345 | 346 | function setindex!{K,V}(h::HashDict{K,V}, v0, key0) 347 | key = convert(K,key0) 348 | if !isequal(key,key0) 349 | error(key0, " is not a valid key for type ", K) 350 | end 351 | v = convert(V, v0) 352 | 353 | index = ht_keyindex2(h, key) 354 | 355 | if index > 0 356 | h.vals[index] = v 357 | else 358 | _setindex!(h, v, key, -index) 359 | end 360 | 361 | return h 362 | end 363 | 364 | function get!{K,V}(h::HashDict{K,V}, key0, default) 365 | key = convert(K,key0) 366 | if !isequal(key,key0) 367 | error(key0, " is not a valid key for type ", K) 368 | end 369 | 370 | index = ht_keyindex2(h, key) 371 | 372 | index > 0 && return h.vals[index] 373 | 374 | v = convert(V, default) 375 | _setindex!(h, v, key, -index) 376 | return v 377 | end 378 | 379 | function get!{K,V}(h::HashDict{K,V}, key0, default) 380 | key = convert(K,key0) 381 | if !isequal(key,key0) 382 | error(key0, " is not a valid key for type ", K) 383 | end 384 | 385 | index = ht_keyindex2(h, key) 386 | 387 | index > 0 && return h.vals[index] 388 | 389 | v = convert(V, default) 390 | _setindex!(h, v, key, -index) 391 | return v 392 | end 393 | 394 | # TODO: this makes it challenging to have V<:Base.Callable 395 | function get!{K,V,F<:Base.Callable}(h::HashDict{K,V}, key0, default::F) 396 | key = convert(K,key0) 397 | if !isequal(key,key0) 398 | error(key0, " is not a valid key for type ", K) 399 | end 400 | 401 | index = ht_keyindex2(h, key) 402 | 403 | index > 0 && return h.vals[index] 404 | 405 | v = convert(V, default()) 406 | _setindex!(h, v, key, -index) 407 | return v 408 | end 409 | 410 | function getindex{K,V}(h::HashDict{K,V}, key) 411 | index = ht_keyindex(h, key) 412 | return (index<0) ? throw(KeyError(key)) : h.vals[index]::V 413 | end 414 | 415 | function get{K,V}(h::HashDict{K,V}, key, deflt) 416 | index = ht_keyindex(h, key) 417 | return (index<0) ? deflt : h.vals[index]::V 418 | end 419 | 420 | haskey(h::HashDict, key) = (ht_keyindex(h, key) >= 0) 421 | contains{T<:HashDict}(v::KeyIterator{T}, key) = (ht_keyindex(v.dict, key) >= 0) 422 | 423 | function getkey{K,V}(h::HashDict{K,V}, key, deflt) 424 | index = ht_keyindex(h, key) 425 | return (index<0) ? deflt : h.keys[index]::K 426 | end 427 | 428 | function _pop!(h::HashDict, index) 429 | val = h.vals[index] 430 | _delete!(h, index) 431 | return val 432 | end 433 | 434 | function pop!(h::HashDict, key) 435 | index = ht_keyindex(h, key) 436 | index > 0 ? _pop!(h, index) : throw(KeyError(key)) 437 | end 438 | 439 | function pop!(h::HashDict, key, default) 440 | index = ht_keyindex(h, key) 441 | index > 0 ? _pop!(h, index) : default 442 | end 443 | 444 | 445 | function _delete!(h::HashDict, index) 446 | h.slots[index] = 0x2 447 | ccall(:jl_arrayunset, Void, (Any, Uint), h.keys, index-1) 448 | ccall(:jl_arrayunset, Void, (Any, Uint), h.vals, index-1) 449 | h.ndel += 1 450 | h.count -= 1 451 | return h 452 | end 453 | 454 | function _delete!{K,V}(h::HashDict{K,V,Ordered}, index) 455 | h.slots[index] = 0x2 456 | ccall(:jl_arrayunset, Void, (Any, Uint), h.keys, index-1) 457 | ccall(:jl_arrayunset, Void, (Any, Uint), h.vals, index-1) 458 | h.order[h.idxs[index]] = 0 459 | h.ndel += 1 460 | h.count -= 1 461 | return h 462 | end 463 | 464 | function delete!(h::HashDict, key) 465 | index = ht_keyindex(h, key) 466 | index > 0 && _delete!(h, index) 467 | return h 468 | end 469 | 470 | function skip_deleted{K,V,O}(h::HashDict{K,V,O}, i) 471 | L = length(h.slots) 472 | while i<=L && !isslotfilled(h,i) 473 | i += 1 474 | end 475 | return i 476 | end 477 | 478 | function skip_deleted{K,V}(h::HashDict{K,V,Ordered}, i) 479 | L = length(h.order) 480 | while i<=L && h.order[i] == 0 481 | i += 1 482 | end 483 | return i 484 | end 485 | 486 | start(t::HashDict) = skip_deleted(t, 1) 487 | done(t::HashDict, i) = done(t.vals, i) 488 | next(t::HashDict, i) = ((t.keys[i],t.vals[i]), skip_deleted(t,i+1)) 489 | 490 | done{K,V}(t::HashDict{K,V,Ordered}, i) = done(t.order, i) 491 | next{K,V}(t::HashDict{K,V,Ordered}, i) = ((t.keys[t.order[i]],t.vals[t.order[i]]), skip_deleted(t,i+1)) 492 | 493 | isempty(t::HashDict) = (t.count == 0) 494 | length(t::HashDict) = t.count 495 | 496 | next(v::KeyIterator{HashDict}, i) = (v.dict.keys[i], skip_deleted(v.dict,i+1)) 497 | next(v::ValueIterator{HashDict}, i) = (v.dict.vals[i], skip_deleted(v.dict,i+1)) 498 | 499 | next{K,V}(v::KeyIterator{HashDict{K,V,Ordered}}, i) = (v.dict.keys[v.dict.order[i]], skip_deleted(v.dict,i+1)) 500 | next{K,V}(v::ValueIterator{HashDict{K,V,Ordered}}, i) = (v.dict.vals[v.dict.order[i]], skip_deleted(v.dict,i+1)) 501 | --------------------------------------------------------------------------------