├── Finite_Domain_Sorting ├── bucket_sort.py ├── counting_sort.py ├── counting_sort_complex.py ├── radix_sort.py └── sort_test.py ├── General_Sorting ├── mergesort.py └── quicksort.py ├── LICENSE ├── README.md ├── Searching ├── binary_search.py ├── interpolation_search.py ├── quadratic_binary_search.py └── search_test.py ├── Selection ├── selection_det.py ├── selection_rand.py └── selection_test.py └── Tree └── 23Tree.py /Finite_Domain_Sorting/bucket_sort.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Sorting of n data that is within bounded limits so we can map all data to an interval of 0..1 ! 5 | """ 6 | __author__ = "Maurice Tollmien" 7 | __maintainer__ = "Maurice Tollmien" 8 | __email__ = "maurice.tollmien@gmail.com" 9 | 10 | 11 | def sort(inputList, mappingFactor, bucketCount): 12 | buckets = [[] for i in range(bucketCount)] 13 | outputList = [0 for i in inputList] 14 | 15 | for e in inputList: 16 | buckets[int(e*mappingFactor)].append(e) 17 | 18 | for i in range(bucketCount): 19 | buckets[i].sort() 20 | 21 | index = 0 22 | for l in buckets: 23 | for e in l: 24 | outputList[index] = e 25 | index += 1 26 | 27 | return outputList 28 | 29 | def bucket_sort(inputList): 30 | uniqueValues = len(set(inputList)) 31 | 32 | return sort(inputList, (uniqueValues-1)/float(max(inputList)), uniqueValues) 33 | 34 | -------------------------------------------------------------------------------- /Finite_Domain_Sorting/counting_sort.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Sorting of elements with a finite amount k of different values!! This is very important. Runs in Theta(n+k). 5 | """ 6 | __author__ = "Maurice Tollmien" 7 | __maintainer__ = "Maurice Tollmien" 8 | __email__ = "maurice.tollmien@gmail.com" 9 | 10 | def sort(inputList, maxElem): 11 | outputList = [0 for i in inputList] 12 | countingList = [0 for i in range(maxElem)] 13 | 14 | for e in inputList: 15 | countingList[e] += 1 16 | 17 | index = 0 18 | for e in range(len(countingList)): 19 | for i in range(countingList[e]): 20 | outputList[index] = e 21 | index += 1 22 | return outputList 23 | 24 | def counting_sort(inputList): 25 | return sort(inputList, max(inputList)+1) 26 | 27 | -------------------------------------------------------------------------------- /Finite_Domain_Sorting/counting_sort_complex.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Sorting of elements with a finite amount k of different values!! This is very important. Runs in Theta(n+k). 5 | """ 6 | __author__ = "Maurice Tollmien" 7 | __maintainer__ = "Maurice Tollmien" 8 | __email__ = "maurice.tollmien@gmail.com" 9 | 10 | def sort(inputList, maxElem): 11 | outputList = [0 for i in inputList] 12 | countingList = [0 for i in range(maxElem)] 13 | 14 | for e in inputList: 15 | countingList[e] += 1 16 | 17 | for i in range(len(countingList)): 18 | if i > 0: 19 | countingList[i] += countingList[i-1] 20 | 21 | for i in reversed(range(len(inputList))): 22 | outputList[countingList[inputList[i]]-1] = inputList[i] 23 | countingList[inputList[i]] -= 1 24 | 25 | return outputList 26 | 27 | # Simple copy&paste. Should be reworked to make it look nicer! 28 | # But more of a quick and dirty version. So not bothering. 29 | def sort_tuple(inputList, maxElem): 30 | outputList = [0 for i in inputList] 31 | countingList = [0 for i in range(maxElem)] 32 | 33 | for e in inputList: 34 | countingList[e[0]] += 1 35 | 36 | for i in range(len(countingList)): 37 | if i > 0: 38 | countingList[i] += countingList[i-1] 39 | 40 | for i in reversed(range(len(inputList))): 41 | outputList[countingList[inputList[i][0]]-1] = inputList[i] 42 | countingList[inputList[i][0]] -= 1 43 | 44 | return outputList 45 | 46 | # So this can be used in Radix-sort! 47 | def counting_sort_complex_tuple(inputList): 48 | maxElem = max(e[0] for e in inputList) 49 | return sort_tuple(inputList, maxElem+1) 50 | 51 | def counting_sort_complex(inputList): 52 | return sort(inputList, max(inputList)+1) 53 | 54 | -------------------------------------------------------------------------------- /Finite_Domain_Sorting/radix_sort.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Radix-Sort. Uses a complex version of Countingsort! 5 | The used countingsort is not very thought through... Could have been modelled better. 6 | Please provide a pull request if you have a good idea. 7 | """ 8 | __author__ = "Maurice Tollmien" 9 | __maintainer__ = "Maurice Tollmien" 10 | __email__ = "maurice.tollmien@gmail.com" 11 | 12 | import string 13 | from counting_sort_complex import counting_sort_complex_tuple 14 | 15 | def sort(inputList, maxWordLen): 16 | listToSort = [(-1, e) for e in inputList] 17 | for i in reversed(range(maxWordLen)): 18 | tmp = [(ord(listToSort[j][1][i]), listToSort[j][1]) for j in range(len(listToSort))] 19 | listToSort = counting_sort_complex_tuple(tmp) 20 | return listToSort 21 | 22 | def radix_sort(inputList, fillLeft): 23 | maxWordLen = max([len(e) for e in inputList]) 24 | if fillLeft: 25 | inputListFilled = [e.rjust(maxWordLen) for e in inputList] 26 | else: 27 | inputListFilled = [e.ljust(maxWordLen) for e in inputList] 28 | sortedInput = sort(inputListFilled, maxWordLen) 29 | return [string.strip(e[1]) for e in sortedInput] 30 | 31 | # Can be used directly, as Radixsort sorts words 32 | def radix_sort_str(inputList): 33 | return radix_sort(inputList, False) 34 | 35 | # Must be converted to 'words' so the basic Radix-sort can be applied. 36 | # But we can now sort numbers. 37 | def radix_sort_int(inputList): 38 | inputListStr = [str(e) for e in inputList] 39 | sortedList = radix_sort(inputListStr, True) 40 | return [int(e) for e in sortedList] 41 | 42 | 43 | -------------------------------------------------------------------------------- /Finite_Domain_Sorting/sort_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Testing sorting algorithms for elements in finite universes. 5 | Careful, it prints out the results. 6 | """ 7 | __author__ = "Maurice Tollmien" 8 | __maintainer__ = "Maurice Tollmien" 9 | __email__ = "maurice.tollmien@gmail.com" 10 | 11 | from counting_sort import counting_sort 12 | from counting_sort_complex import counting_sort_complex 13 | from bucket_sort import bucket_sort 14 | from radix_sort import radix_sort_int, radix_sort_str 15 | import random 16 | 17 | if __name__ == '__main__': 18 | listSize = 100 19 | bigList = [random.randint(0, listSize) for i in xrange(listSize)] 20 | 21 | print "Finished creating .." 22 | 23 | # Test all sorting 24 | print counting_sort(bigList) 25 | print counting_sort_complex(bigList) 26 | print bucket_sort(bigList) 27 | print radix_sort_int(bigList) 28 | 29 | # convert numbers to String so we can use radix-sort directly on words. 30 | stringList = [] 31 | maxWordSize = 15 32 | for i in range(listSize): 33 | word = "" 34 | for j in range(maxWordSize): 35 | if random.randint(0,10) <= 4: 36 | word += chr(random.randint(48,91)) 37 | stringList.append(word) 38 | 39 | print radix_sort_str(stringList) 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /General_Sorting/mergesort.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Mergesort. Not much to explain here... 5 | """ 6 | __author__ = "Maurice Tollmien" 7 | __maintainer__ = "Maurice Tollmien" 8 | __email__ = "maurice.tollmien@gmail.com" 9 | 10 | import random 11 | import time 12 | 13 | def merge (l1, l2): 14 | i = 0 15 | j = 0 16 | newList = [] 17 | while i < len(l1) and j < len(l2): 18 | if l1[i] < l2[j]: 19 | newList.append(l1[i]) 20 | i = i+1 21 | else: 22 | newList.append(l2[j]) 23 | j = j+1 24 | while i < len(l1): 25 | newList.append(l1[i]) 26 | i = i+1 27 | while j < len(l2): 28 | newList.append(l2[j]) 29 | j = j+1 30 | return newList 31 | 32 | def mergeSort (l): 33 | 34 | if len(l) <= 1: 35 | return l 36 | 37 | firstHalf = mergeSort (l[:len(l)/2]) 38 | secondHalf = mergeSort (l[len(l)/2:]) 39 | 40 | return merge(firstHalf, secondHalf) 41 | 42 | 43 | # Some tests 44 | if __name__ == '__main__': 45 | size = 1000000 46 | unsortedList = range(size) 47 | #random.shuffle(unsortedList) 48 | 49 | #unsortedList.reverse() 50 | 51 | t0 = time.time() 52 | 53 | res = mergeSort (unsortedList) 54 | 55 | print time.time() - t0 56 | 57 | #print res 58 | -------------------------------------------------------------------------------- /General_Sorting/quicksort.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Quicksort... You know, right? :) 5 | """ 6 | __author__ = "Maurice Tollmien" 7 | __maintainer__ = "Maurice Tollmien" 8 | __email__ = "maurice.tollmien@gmail.com" 9 | 10 | from random import randint 11 | import random 12 | import time 13 | 14 | def split(leftI, rightI): 15 | pivot = rightI 16 | i = leftI 17 | j = rightI 18 | while i < j: 19 | while l[i] <= l[pivot] and i < rightI: 20 | i = i+1 21 | while l[j] >= l[pivot] and j >= leftI: 22 | j = j-1 23 | if i < j: 24 | tmp = l[j] 25 | l[j] = l[i] 26 | l[i] = tmp 27 | if l[i] > l[pivot]: 28 | tmp = l[pivot] 29 | l[pivot] = l[i] 30 | l[i] = tmp 31 | return i 32 | 33 | def quickSort (leftI, rightI): 34 | if leftI < rightI: 35 | splitter = split(leftI, rightI) 36 | quickSort(leftI, splitter-1) 37 | quickSort(splitter+1, rightI) 38 | 39 | 40 | # Some small tests 41 | if __name__ == '__main__': 42 | size = 1000000 43 | l = range(size) 44 | #random.shuffle(l) 45 | 46 | #l.reverse() 47 | 48 | t0 = time.time() 49 | 50 | quickSort (0, size-1) 51 | 52 | print time.time() - t0 53 | 54 | #print l 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, 2016 Maurice Tollmien 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Advanced Algorithms 2 | 3 | This repository contains some more advanced algorithms for sorting, sorting in finite universes, searching and trees. 4 | For some algorithms there are short or more advanced tests. 5 | 6 | Please see the appropriate file for the exact algorithm. 7 | 8 | These implementations were done accompanying a masters lecture on algorithmics. 9 | Some of these algorithms are just implemented quick-and-dirty while others (2,3-tree) have gotten some more thought put into. 10 | I couldn't always find time. 11 | 12 | None of the algorithms are fit and ready for production use. But most of them are available as robust implementations in standard or other libraries. Educational purpose only. 13 | 14 | ## Tree 15 | 16 | Algorithm | Runtime 17 | --- | --- 18 | [2,3-tree](https://github.com/MauriceGit/Advanced_Algorithms/blob/master/Tree/23Tree.py) (special case of the a,b-tree) | All operations in θ(logn) w.c. and a.c. 19 | 20 | ## General Sorting 21 | 22 | Algorithm | Runtime | 23 | --- | --- 24 | [Mergesort](https://github.com/MauriceGit/Advanced_Algorithms/blob/master/General_Sorting/mergesort.py) | O(n logn) 25 | [Quicksort](https://github.com/MauriceGit/Advanced_Algorithms/blob/master/General_Sorting/quicksort.py) | O(n^2) 26 | 27 | ## Sorting in finite domains 28 | 29 | n = number of values 30 | 31 | k = number of different values 32 | 33 | s = max word length (Radixsort) 34 | 35 | Algorithm | Runtime 36 | --- | --- 37 | [Countingsort](https://github.com/MauriceGit/Advanced_Algorithms/blob/master/Finite_Domain_Sorting/counting_sort.py) | θ(n+k) w.c. and a.c. 38 | [Advanced Countingsort](https://github.com/MauriceGit/Advanced_Algorithms/blob/master/Finite_Domain_Sorting/counting_sort_complex.py) | θ(n+k) w.c. and a.c. 39 | [Bucketsort](https://github.com/MauriceGit/Advanced_Algorithms/blob/master/Finite_Domain_Sorting/bucket_sort.py) | θ(n+k) w.c. and a.c. 40 | [Radixsort](https://github.com/MauriceGit/Advanced_Algorithms/blob/master/Finite_Domain_Sorting/radix_sort.py) | θ(s*(n+k)) w.c. and a.c. 41 | 42 | ## Order statistics (Select algorithms) 43 | 44 | Searching for the n-th element in a not sorted list. 45 | 46 | Algorithm | Runtime 47 | --- | --- 48 | [Randomised Algorithm](https://github.com/MauriceGit/Advanced_Algorithms/blob/master/Selection/selection_rand.py) | θ(n^2) w.c. θ(n) a.c. 49 | [Deterministic Algorithm](https://github.com/MauriceGit/Advanced_Algorithms/blob/master/Selection/selection_det.py) | θ(n) w.c. and a.c. 50 | 51 | ## Searching in sorted Arrays 52 | 53 | Algorithm | Runtime 54 | --- | --- 55 | [Binary Search](https://github.com/MauriceGit/Advanced_Algorithms/blob/master/Searching/binary_search.py) | θ(logn) 56 | [Interpolation Search](https://github.com/MauriceGit/Advanced_Algorithms/blob/master/Searching/interpolation_search.py) | θ(n) w.c. θ(log(logn)) a.c. 57 | [Quadratic Binary Search](https://github.com/MauriceGit/Advanced_Algorithms/blob/master/Searching/quadratic_binary_search.py) | θ(sqrt(n)) w.c. θ(log(logn)) a.c. 58 | -------------------------------------------------------------------------------- /Searching/binary_search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Search for the index of an element in a sorted list. Basic algorithm. 5 | """ 6 | __author__ = "Maurice Tollmien" 7 | __maintainer__ = "Maurice Tollmien" 8 | __email__ = "maurice.tollmien@gmail.com" 9 | 10 | def search(S, e, l, r, count): 11 | if l > r: 12 | return -1, count 13 | 14 | index = (r+l)/2 15 | 16 | if e == S[index]: 17 | return index, count 18 | if e < S[index]: 19 | return search(S, e, l, index-1, count+1) 20 | return search(S, e, index+1, r, count+1) 21 | 22 | def binary_search(S, e): 23 | return search(S, e, 0, len(S)-1, 1) 24 | 25 | -------------------------------------------------------------------------------- /Searching/interpolation_search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Search for the index of an element in a sorted list. 5 | """ 6 | __author__ = "Maurice Tollmien" 7 | __maintainer__ = "Maurice Tollmien" 8 | __email__ = "maurice.tollmien@gmail.com" 9 | 10 | def search(S, e, l, r, count): 11 | # second check to avoid possible division by zero later on 12 | # thanks mbolivar 13 | if e == S[l]: 14 | return l, count 15 | if l > r or S[l] == S[r]: 16 | return -1, count 17 | 18 | percentage = float(e-S[l]) / float(S[r]-S[l]) 19 | 20 | # When the element we are looking for is outside the given range 21 | if percentage > 1.0 or percentage < 0.0: 22 | return -1, count 23 | 24 | index = int(round(percentage * (r-l) + l)) 25 | 26 | if e == S[index]: 27 | return index, count 28 | if e < S[index]: 29 | return search(S, e, l, index-1, count+1) 30 | return search(S, e, index+1, r, count+1) 31 | 32 | def interpolation_search(S, e): 33 | return search(S, e, 0, len(S)-1, 1) 34 | 35 | -------------------------------------------------------------------------------- /Searching/quadratic_binary_search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Find the index of an element in a sorted List. 5 | """ 6 | __author__ = "Maurice Tollmien" 7 | __maintainer__ = "Maurice Tollmien" 8 | __email__ = "maurice.tollmien@gmail.com" 9 | 10 | import math 11 | 12 | def findLeftInterval(S, e, stepSize, start): 13 | thisStep = start-stepSize 14 | while e < S[thisStep] and thisStep > 0: 15 | thisStep -= stepSize 16 | 17 | return max(0, thisStep) 18 | 19 | def findRightInterval(S, e, stepSize, start): 20 | length = len(S) 21 | thisStep = start+stepSize 22 | while thisStep < length and e > S[thisStep]: 23 | thisStep += stepSize 24 | 25 | return min(thisStep, length-1) 26 | 27 | def search(S, e, l, r, count): 28 | # second check to avoid possible division by zero later on 29 | if l > r or S[l] == S[r]: 30 | return -1, count 31 | 32 | percentage = float(e-S[l]) / float(S[r]-S[l]) 33 | 34 | # When the element we are looking for is outside the given range 35 | if percentage > 1.0 or percentage < 0.0: 36 | return -1, count 37 | 38 | index = int(round(percentage * (r-l) + l)) 39 | 40 | if e == S[index]: 41 | return index, count 42 | if e < S[index]: 43 | return search(S, e, findLeftInterval(S, e, int(math.sqrt(index-1-l)), index-1), index-1, count+1) 44 | return search(S, e, index+1, findRightInterval(S, e, int(math.sqrt(r-(index+1))), index+1), count+1) 45 | 46 | def quadratic_binary_search(S, e): 47 | return search(S, e, 0, len(S)-1, 1) 48 | 49 | -------------------------------------------------------------------------------- /Searching/search_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Test some search-algorithms. Looking for random elements in an unsorted list. 5 | And create some statistics about it. 6 | """ 7 | __author__ = "Maurice Tollmien" 8 | __maintainer__ = "Maurice Tollmien" 9 | __email__ = "maurice.tollmien@gmail.com" 10 | 11 | from binary_search import binary_search 12 | from interpolation_search import interpolation_search 13 | from quadratic_binary_search import quadratic_binary_search 14 | import random 15 | 16 | if __name__ == '__main__': 17 | listSize = 100000000 18 | element = random.randint(0, listSize) 19 | bigList = [random.randint(0, listSize) for i in xrange(listSize)] 20 | bigList.sort() 21 | 22 | print "Finished creating and sorting .." 23 | print "Looking for", element, "in a list of", listSize, "elements." 24 | 25 | binCount = 0 26 | intCount = 0 27 | quadCount = 0 28 | testCount = 100 29 | 30 | for i in range(testCount): 31 | element = random.randint(0, listSize) 32 | binOut, c1 = binary_search(bigList, element) 33 | intOut, c2 = interpolation_search(bigList, element) 34 | quaOut, c3 = quadratic_binary_search(bigList, element) 35 | binCount += c1 36 | intCount += c2 37 | quadCount += c3 38 | print "binOut: (_, %s), intOut: (_, %s), quaOut: (_, %s)" % (c1, c2, c3) 39 | 40 | print "Over %s tests and %s elements, the average try-count is:\nBinary:\t%s\n Interpolation:\t%s\nQuadratic:\t%s" % \ 41 | (testCount, listSize, binCount/float(testCount), intCount/float(testCount), quadCount/float(testCount)) 42 | 43 | 44 | # For 10k Elements, it takes: 45 | # Binary: 12.95 46 | # Interpolation: 3.48 47 | # Quadratic: 3.21 48 | # For 100k Elements, it takes: 49 | # Binary: 16.13 50 | # Interpolation: 3.94 51 | # Quadratic: 3.24 52 | # For 1m Elements, it takes: 53 | # Binary: 19.43 54 | # Interpolation: 4.12 55 | # Quadratic: 3.6 56 | # For 10m Elements, it takes: 57 | # Binary: 22.66 58 | # Interpolation: 4.49 59 | # Quadratic: 3.97 60 | # For 100m Elements, it takes: 61 | # Binary: 26.02 62 | # Interpolation: 4.82 63 | # Quadratic: 4.22 64 | -------------------------------------------------------------------------------- /Selection/selection_det.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Search for the k-th element of an unsorted list. 5 | """ 6 | __author__ = "Maurice Tollmien" 7 | __maintainer__ = "Maurice Tollmien" 8 | __email__ = "maurice.tollmien@gmail.com" 9 | 10 | import statistics 11 | 12 | # Basically Quicksort partition 13 | def partition(pivot, A): 14 | smaller = [] 15 | equal = [] 16 | larger = [] 17 | for p in A: 18 | if p < pivot: 19 | smaller.append(p) 20 | else: 21 | if p == pivot: 22 | equal.append(p) 23 | else: 24 | larger.append(p) 25 | return smaller, equal, larger 26 | 27 | # Yay, generators :) 28 | def chunks(l, n): 29 | for i in range(0, len(l), n): 30 | yield l[i:i+n] 31 | 32 | def sortSubListsAndMedian(A): 33 | sortedList = [] 34 | medianList = [] 35 | for smallList in A: 36 | sortedList.append(sorted(smallList)) 37 | medianList.append(statistics.median_low(smallList)) 38 | return sortedList, medianList 39 | 40 | def select(k, A): 41 | 42 | bigList = chunks(A, 5) 43 | 44 | subSorted, medians = sortSubListsAndMedian(bigList) 45 | 46 | # yes, we have at least one element in this list. 47 | medianPivot = medians[0] 48 | if len(medians) > 1: 49 | medianPivot = select(int(len(medians)/2), medians) 50 | 51 | smaller, equal, larger = partition(medianPivot, A) 52 | 53 | if k <= len(smaller): 54 | return select(k, smaller) 55 | 56 | if k <= (len(smaller) + len(equal)): 57 | return medianPivot 58 | 59 | return select(k - (len(smaller) + len(equal)), larger) 60 | 61 | def select_det(k, A): 62 | return select(k,A) 63 | 64 | if __name__ == '__main__': 65 | print(select(3, [1,2,3,4,5,6,76,87,89])) 66 | -------------------------------------------------------------------------------- /Selection/selection_rand.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Search for the k-th element of an unsorted list. 5 | """ 6 | __author__ = "Maurice Tollmien" 7 | __maintainer__ = "Maurice Tollmien" 8 | __email__ = "maurice.tollmien@gmail.com" 9 | 10 | import random 11 | 12 | # Basically Quicksort partition 13 | def partition(pivot, A): 14 | smaller = [] 15 | equal = [] 16 | larger = [] 17 | for p in A: 18 | if p < pivot: 19 | smaller.append(p) 20 | else: 21 | if p == pivot: 22 | equal.append(p) 23 | else: 24 | larger.append(p) 25 | return smaller, equal, larger 26 | 27 | def select(k, A): 28 | pivotIndex = random.randint(0, len(A)-1) 29 | pivot = A[pivotIndex] 30 | 31 | smaller, equal, larger = partition(pivot, A) 32 | 33 | if k > len(smaller) and k <= (len(smaller) + len(equal)): 34 | return pivot 35 | 36 | if k <= len(smaller): 37 | return select(k, smaller) 38 | 39 | return select(k - (len(smaller) + len(equal)), larger) 40 | 41 | def select_rand(k, A): 42 | return select(k, A) 43 | 44 | -------------------------------------------------------------------------------- /Selection/selection_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Testing two selection algorithms (random and deterministic). 5 | Find a random index. 6 | """ 7 | __author__ = "Maurice Tollmien" 8 | __maintainer__ = "Maurice Tollmien" 9 | __email__ = "maurice.tollmien@gmail.com" 10 | 11 | 12 | from selection_det import select_det 13 | from selection_rand import select_rand 14 | import random 15 | import time 16 | 17 | if __name__ == '__main__': 18 | start = time.clock() 19 | listSize = 1000000 20 | index = random.randint(0, listSize/2) 21 | bigList = [random.randint(0, listSize) for i in range(listSize)] 22 | 23 | print("{} time: {}".format(index, time.clock()-start)) 24 | start = time.clock() 25 | print("Selection rand: {}, time: {}".format(select_rand(index, bigList), time.clock()-start)) 26 | start = time.clock() 27 | print("Selection det: {}, time: {}".format(select_det(index, bigList), time.clock()-start)) 28 | 29 | -------------------------------------------------------------------------------- /Tree/23Tree.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Complete object-oriented implementation of a 2,3-Tree. 5 | Implemented with just numbers in mind. Have a look through the code, if you want to put anything else in there! 6 | """ 7 | __author__ = "Maurice Tollmien" 8 | __maintainer__ = "Maurice Tollmien" 9 | __email__ = "maurice.tollmien@gmail.com" 10 | 11 | 12 | class Tree23: 13 | def __init__(self): 14 | # ([maxLeft, maxRight], [pointerLeftChild, pointerRightChild], elem) 15 | self.treeInstance = None 16 | 17 | def _isLeaf(self, tree): 18 | return tree[0] == [] 19 | 20 | def _newLeaf(self, elem): 21 | return ([], [], elem) 22 | 23 | def _max(self, tree): 24 | if self._isLeaf(tree): 25 | return tree[2] 26 | return max(tree[0]) 27 | 28 | def _nodeFromChildrenList(self, children): 29 | newNode = ([],[],0) 30 | for i in range(len(children)): 31 | newNode[0].append(self._max(children[i])) 32 | newNode[1].append(children[i]) 33 | return newNode 34 | 35 | def _multipleNodesFromChildrenList(self, children): 36 | 37 | cLen = len(children) 38 | if cLen == 0: 39 | return [self._nodeFromChildrenList(children)] 40 | 41 | if cLen <= 3: 42 | return [self._nodeFromChildrenList(children)] 43 | 44 | if 4 <= cLen <= 6: 45 | half = cLen/2 46 | c1 = self._nodeFromChildrenList(children[:half]) 47 | c2 = self._nodeFromChildrenList(children[half:]) 48 | return [c1, c2] 49 | 50 | if 7 <= cLen <= 9: 51 | third = cLen/3 52 | c1 = self._nodeFromChildrenList(children[:third]) 53 | c2 = self._nodeFromChildrenList(children[third:2*third]) 54 | c3 = self._nodeFromChildrenList(children[2*third:]) 55 | return [c1, c2, c3] 56 | 57 | print "should never get here." 58 | 59 | def _insertInto(self, tree, elem): 60 | biggest = -1 61 | subTree = -1 62 | for i in range(len(tree[0])): 63 | # smallest child bigger than elem 64 | if (subTree == -1 and tree[0][i] > elem) or (tree[0][i] > elem and tree[0][i] < tree[0][subTree]): 65 | subTree = i 66 | if tree[0][i] > biggest: 67 | biggest = i 68 | if subTree == -1: 69 | return biggest 70 | return subTree 71 | 72 | 73 | def _distributeFourChildren(self, c1, c2, c3, c4): 74 | child1 = ([], [], 0) 75 | child1[0].append(self._max(c1)) 76 | child1[1].append(c1) 77 | child1[0].append(self._max(c2)) 78 | child1[1].append(c2) 79 | 80 | child2 = ([], [], 0) 81 | child2[0].append(self._max(c3)) 82 | child2[1].append(c3) 83 | child2[0].append(self._max(c4)) 84 | child2[1].append(c4) 85 | 86 | newRoot = ([], [], 0) 87 | newRoot[0].append(self._max(child1)) 88 | newRoot[1].append(child1) 89 | newRoot[0].append(self._max(child2)) 90 | newRoot[1].append(child2) 91 | 92 | return newRoot 93 | 94 | def _insertRec(self, tree, elem): 95 | if self._isLeaf(tree): 96 | return [tree, self._newLeaf(elem)] 97 | subTree = self._insertInto(tree, elem) 98 | newChildren = self._insertRec(tree[1][subTree], elem) 99 | if len(newChildren[0]) == 1: 100 | tree[0][subTree] = self._max(newChildren[0]) 101 | tree[1][subTree] = newChildren[0] 102 | return [tree] 103 | if len(tree[0]) == 2: 104 | tree[0][subTree] = self._max(newChildren[0]) 105 | tree[1][subTree] = newChildren[0] 106 | tree[0].append(self._max(newChildren[1])) 107 | tree[1].append(newChildren[1]) 108 | return [tree] 109 | 110 | del tree[0][subTree] 111 | del tree[1][subTree] 112 | tmpRoot = self._distributeFourChildren(tree[1][0], tree[1][1], newChildren[0], newChildren[1]) 113 | 114 | return [tmpRoot[1][0], tmpRoot[1][1]] 115 | 116 | def insert(self, elem): 117 | 118 | if self.treeInstance == None: 119 | self.treeInstance = self._newLeaf(elem) 120 | return 121 | 122 | if self._isLeaf(self.treeInstance): 123 | newRoot = ([], [], 0) 124 | newChild = self._newLeaf(elem) 125 | newRoot[0].append(self.treeInstance[2]) 126 | newRoot[1].append(self.treeInstance) 127 | newRoot[0].append(newChild[2]) 128 | newRoot[1].append(newChild) 129 | self.treeInstance = newRoot 130 | return 131 | 132 | subTree = self._insertInto(self.treeInstance, elem) 133 | newChildren = self._insertRec(self.treeInstance[1][subTree], elem) 134 | 135 | if len(newChildren) == 1: 136 | self.treeInstance[0][subTree] = self._max(newChildren[0]) 137 | self.treeInstance[1][subTree] = newChildren[0] 138 | return 139 | 140 | # we get two new children and have one old (subTree is overwritten). 141 | if len(self.treeInstance[0]) == 2: 142 | # overwrite old child 143 | self.treeInstance[0][subTree] = self._max(newChildren[0]) 144 | self.treeInstance[1][subTree] = newChildren[0] 145 | # add new child 146 | self.treeInstance[0].append(self._max(newChildren[1])) 147 | self.treeInstance[1].append(newChildren[1]) 148 | return 149 | 150 | # delete old child 151 | del self.treeInstance[0][subTree] 152 | del self.treeInstance[1][subTree] 153 | 154 | newRoot = self._distributeFourChildren(self.treeInstance[1][0], self.treeInstance[1][1], newChildren[0], newChildren[1]) 155 | 156 | # set new root as global root 157 | self.treeInstance = newRoot 158 | 159 | def _deleteFrom(self, tree, elem): 160 | subTree = -1 161 | for i in range(len(tree[0])): 162 | # smallest child bigger or equal than elem 163 | if (subTree == -1 and tree[0][i] >= elem) or (tree[0][i] >= elem and tree[0][i] < tree[0][subTree]): 164 | subTree = i 165 | return subTree 166 | 167 | def _deleteRec(self, tree, elem): 168 | 169 | allLeaves = True 170 | for c in tree[1]: 171 | allLeaves = allLeaves and self._isLeaf(c) 172 | if allLeaves: 173 | newChildren = [] 174 | for c in tree[1]: 175 | if c[2] != elem: 176 | newChildren.append(c) 177 | return newChildren 178 | 179 | deleteFrom = self._deleteFrom(tree, elem) 180 | if deleteFrom == -1: 181 | return tree[1] 182 | 183 | oldChildren = tree[1][:deleteFrom] + tree[1][deleteFrom+1:] 184 | oldGrandchildren = [] 185 | for c in oldChildren: 186 | for i in range(len(c[1])): 187 | oldGrandchildren.append(c[1][i]) 188 | 189 | children = self._deleteRec(tree[1][deleteFrom], elem) 190 | 191 | nodes = self._multipleNodesFromChildrenList(oldGrandchildren + children) 192 | return nodes 193 | 194 | def delete(self, elem): 195 | 196 | if self.treeInstance == None or self._isLeaf(self.treeInstance) and self.treeInstance[2] == elem: 197 | self.treeInstance = None 198 | return 199 | 200 | children = self._deleteRec(self.treeInstance, elem) 201 | 202 | if len(children[0]) == 1: 203 | self.treeInstance = children[0] 204 | return 205 | 206 | self.treeInstance = self._nodeFromChildrenList(children) 207 | 208 | def _findInTree(self, tree, elem): 209 | if tree == None: 210 | return None 211 | if self._isLeaf(tree): 212 | if tree[2] == elem: 213 | return tree 214 | else: 215 | return None 216 | 217 | subTree = self._deleteFrom(tree, elem) 218 | 219 | return self._findInTree(tree[1][subTree], elem) 220 | 221 | def find(self, elem): 222 | return self._findInTree(self.treeInstance, elem) 223 | 224 | def _minmaxDepth(self, tree): 225 | if tree == None: 226 | return (0, 0) 227 | if self._isLeaf(tree): 228 | return (1, 1) 229 | 230 | depths = ([], []) 231 | 232 | for i in range(len(tree[1])): 233 | tmpDepth = self._minmaxDepth(tree[1][i]) 234 | depths[0].append(tmpDepth[0]+1) 235 | depths[1].append(tmpDepth[1]+1) 236 | 237 | return (min(depths[0]), max(depths[1])) 238 | 239 | def depths(self): 240 | return self._minmaxDepth(self.treeInstance) 241 | 242 | def invariant(self): 243 | depths = self.depths() 244 | return depths[0] == depths[1] 245 | 246 | def _pprint(self, tree): 247 | if self._isLeaf(tree): 248 | print tree[2], 249 | return 250 | for i in range(len(tree[1])): 251 | self._pprint(tree[1][i]) 252 | 253 | def pprint(self): 254 | print "Elements:", 255 | self._pprint(self.treeInstance) 256 | print "" 257 | 258 | # Some tests 259 | if __name__ == '__main__': 260 | 261 | tree = Tree23() 262 | 263 | print "Invariant:", tree.invariant() 264 | 265 | print "________________________ INSERT ___________________________" 266 | print "insert 4" 267 | tree.insert(4) 268 | print "insert 3" 269 | tree.insert(3) 270 | print "insert 5" 271 | tree.insert(5) 272 | print "insert 6" 273 | tree.insert(6) 274 | print "insert 7" 275 | tree.insert(7) 276 | tree.pprint() 277 | print "Invariant:", tree.invariant() 278 | 279 | print "________________________ FIND _____________________________" 280 | print "find 7" 281 | print tree.find(7) 282 | print "find 8" 283 | print tree.find(8) 284 | print "find 3" 285 | print tree.find(3) 286 | tree.pprint() 287 | print "Invariant:", tree.invariant() 288 | 289 | print "________________________ DELETE ___________________________" 290 | print "delete 5" 291 | tree.delete(5) 292 | print "delete 4" 293 | tree.delete(4) 294 | tree.pprint() 295 | print "delete 8" 296 | tree.delete(8) 297 | tree.pprint() 298 | print "delete 7" 299 | tree.delete(7) 300 | tree.pprint() 301 | 302 | print "Invariant:", tree.invariant() 303 | 304 | 305 | 306 | 307 | 308 | --------------------------------------------------------------------------------