├── data
    ├── sort
    │   ├── 3.out
    │   ├── 4.out
    │   ├── 3.in
    │   ├── 4.in
    │   ├── 5.out
    │   ├── 5.in
    │   ├── 8.out
    │   ├── 8.in
    │   └── README.md
    ├── sum_array
    │   ├── 16.out
    │   ├── 3.out
    │   ├── 4.out
    │   ├── 3.in
    │   ├── 4.in
    │   ├── 16.in
    │   └── README.md
    ├── compare_delimiter
    │   ├── 3.out
    │   ├── 3-less.out
    │   ├── 3-less.in
    │   └── 3.in
    ├── map
    │   ├── 1.out
    │   ├── 1.in
    │   └── README.md
    ├── tac
    │   ├── 0.in
    │   ├── 0.out
    │   ├── 1.in
    │   └── 1.out
    └── README.md
├── data-gen
    ├── .gitignore
    ├── Makefile
    ├── README.md
    └── sum_array
├── src
    ├── cpp
    │   ├── interactive
    │   │   ├── Makefile
    │   │   ├── common.hpp
    │   │   ├── README.md
    │   │   ├── cpu_bound.hpp
    │   │   ├── sum_array.hpp
    │   │   ├── cpu_bound.cpp
    │   │   ├── cpu_bound_parallel.cpp
    │   │   └── sum_array_parallel.cpp
    │   ├── Makefile
    │   ├── README.md
    │   ├── common.hpp
    │   ├── quick_sort.cpp
    │   ├── hanoi.cpp
    │   ├── map.hpp
    │   ├── merge_sort.cpp
    │   ├── map.cpp
    │   ├── make_change.cpp
    │   ├── longest_common_subsequence.cpp
    │   ├── longest_increasing_subsequence.cpp
    │   ├── knapsack01.cpp.off
    │   ├── hanoi_generalized.cpp.off
    │   └── hash_map.hpp
    ├── c
    │   ├── common.h
    │   ├── README.md
    │   ├── Makefile
    │   ├── stack.c
    │   ├── compare_delimiter.c
    │   ├── version_string.c
    │   ├── most_frequent.c
    │   ├── trie.c
    │   └── tac.c
    ├── java
    │   ├── lib
    │   │   ├── IntArrayConsumer.java
    │   │   ├── Searcher.java
    │   │   ├── Sort.java
    │   │   └── StringSearch.java
    │   ├── README.md
    │   ├── Makefile
    │   ├── HeapSort.java
    │   ├── StringSearchNaive.java
    │   ├── QuickSort.java
    │   ├── KnuthMorrisPrattAlgs4.java
    │   ├── QuickSortTail.java
    │   ├── WellNestedOpenClose.java.off
    │   ├── StringIsomorphism.java.off
    │   ├── LinkedList.java.off
    │   ├── WellNestedOpenClose.java
    │   ├── BinaryTree.java
    │   ├── Heap.java
    │   ├── KnuthMorrisPratt.java
    │   └── Tac.java
    ├── python
    │   └── newton.py
    └── README.md
├── logo.dot
├── newton-method.md
├── crit-bit-tree.md
├── btree.png
├── logo.png
├── b-plus-tree.md
├── human-factors.md
├── skip-list.md
├── b-star.md
├── stack-overflow-data-dump
    ├── .gitignore
    ├── checksums
    ├── README.adoc
    ├── users_rep_view_dat.gnuplot
    ├── users_rep_view_dat_to_html.py
    ├── users_xml_to_rep_view_dat.py
    └── users_rep_view_dat_to_matplotlib_svg.py
├── misc.md
├── disjoint-set.md
├── error-detection.md
├── pde.md
├── recursive-descent-parser.md
├── CONTRIBUTING.md
├── type-theory.md
├── bellman-ford.md
├── maximum-subarray.md
├── set.md
├── peg-grammar.md
├── regular-grammar.md
├── .gitignore
├── longest-increasing-subsequence.md
├── string-search.md
├── parallel.md
├── .travis.yml
├── longest-common-subsequence.md
├── matrix-multiplication.md
├── statistics.md
├── radix-tree.md
├── xor-swap.md
├── push-down-automaton.md
├── md5sum.md
├── heap-sort.md
├── regular-language.md
├── decimal-data-type.md
├── busy-beaver.md
├── map.md
├── hanoi-tower.md
├── bittorrent.md
├── topological-sort.md
├── knapsack.md
├── big-o-notation.md
├── getting-started.md
├── undecidability.md
├── a-star.md
├── change-making.md
├── insertion-sort.md
├── amortized-analysis.md
├── b-tree.md
├── bst.md
├── programming-languages.md
├── dfa.md
├── ll-parser.md
├── ecdsa.md
├── rb-tree.md
├── out-of-core.md
├── trie.md
├── tree.md
├── bubble-sort.md
├── formal-language.md
├── hash-map.md
├── recurrence-relations.md
├── sort.md
├── base64.md
├── recursive-algorithms.md
├── probabilistic-turing-machine.md
├── hash-function.md
├── bibliography.md
├── turing-machine.md
├── hardware.md
├── type-systems.md
├── tac.md
├── context-free-grammar.md
├── string-algorithms.md
├── heap.md
├── dijkstra.md
├── design-patterns.md
├── p-vs-np.md
├── quantum-computing.md
├── regular-expression.md
├── README.md
└── graph.md


/data/sort/3.out:
--------------------------------------------------------------------------------
1 | 0 1 2
2 | 


--------------------------------------------------------------------------------
/data/sort/4.out:
--------------------------------------------------------------------------------
1 | 0 1 2 3
2 | 


--------------------------------------------------------------------------------
/data/sum_array/16.out:
--------------------------------------------------------------------------------
1 | 0
2 | 


--------------------------------------------------------------------------------
/data/sum_array/3.out:
--------------------------------------------------------------------------------
1 | 6
2 | 


--------------------------------------------------------------------------------
/data/sum_array/4.out:
--------------------------------------------------------------------------------
1 | 5
2 | 


--------------------------------------------------------------------------------
/data/sort/3.in:
--------------------------------------------------------------------------------
1 | 3
2 | 1 2 0
3 | 


--------------------------------------------------------------------------------
/data/sort/4.in:
--------------------------------------------------------------------------------
1 | 4
2 | 1 3 2 0
3 | 


--------------------------------------------------------------------------------
/data/sort/5.out:
--------------------------------------------------------------------------------
1 | 0 1 2 3 4
2 | 


--------------------------------------------------------------------------------
/data-gen/.gitignore:
--------------------------------------------------------------------------------
1 | *.in
2 | *.out
3 | 


--------------------------------------------------------------------------------
/data/compare_delimiter/3.out:
--------------------------------------------------------------------------------
1 | 0
2 | 


--------------------------------------------------------------------------------
/data/sort/5.in:
--------------------------------------------------------------------------------
1 | 5
2 | 1 4 0 2 3
3 | 


--------------------------------------------------------------------------------
/data/sort/8.out:
--------------------------------------------------------------------------------
1 | 0 1 2 3 4 5 6 7
2 | 


--------------------------------------------------------------------------------
/data/sum_array/3.in:
--------------------------------------------------------------------------------
1 | 3
2 | 1 2 3
3 | 


--------------------------------------------------------------------------------
/data/sum_array/4.in:
--------------------------------------------------------------------------------
1 | 4
2 | 1 -1 2 3
3 | 


--------------------------------------------------------------------------------
/data/compare_delimiter/3-less.out:
--------------------------------------------------------------------------------
1 | -1
2 | 


--------------------------------------------------------------------------------
/data/sort/8.in:
--------------------------------------------------------------------------------
1 | 8
2 | 4 5 6 2 1 3 0 7
3 | 


--------------------------------------------------------------------------------
/src/cpp/interactive/Makefile:
--------------------------------------------------------------------------------
1 | ../Makefile


--------------------------------------------------------------------------------
/src/cpp/interactive/common.hpp:
--------------------------------------------------------------------------------
1 | ../common.hpp


--------------------------------------------------------------------------------
/data/map/1.out:
--------------------------------------------------------------------------------
1 | 0
2 | 0
3 | 1 2
4 | 1
5 | 0
6 | 


--------------------------------------------------------------------------------
/data/compare_delimiter/3-less.in:
--------------------------------------------------------------------------------
1 | 1 2 3
2 | 1 2 4
3 | 


--------------------------------------------------------------------------------
/data/compare_delimiter/3.in:
--------------------------------------------------------------------------------
1 | 1 2 3 0
2 | 1 2 3 0
3 | 


--------------------------------------------------------------------------------
/data/tac/0.in:
--------------------------------------------------------------------------------
1 | 1
2 | 2123
3 | 3
4 | 
5 | 512345
6 | 6
7 | 


--------------------------------------------------------------------------------
/data/tac/0.out:
--------------------------------------------------------------------------------
1 | 6
2 | 512345
3 | 
4 | 3
5 | 2123
6 | 1
7 | 


--------------------------------------------------------------------------------
/data/tac/1.in:
--------------------------------------------------------------------------------
1 | 1
2 | 2123
3 | 3
4 | 
5 | 512345
6 | 6
7 | 


--------------------------------------------------------------------------------
/data/tac/1.out:
--------------------------------------------------------------------------------
1 | 6
2 | 512345
3 | 
4 | 3
5 | 2123
6 | 1
7 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | # data
2 | 
3 | Language agnostic test data.
4 | 


--------------------------------------------------------------------------------
/logo.dot:
--------------------------------------------------------------------------------
1 | digraph logo {
2 |     1 -> 0;
3 |     1 -> 2;
4 | }
5 | 


--------------------------------------------------------------------------------
/newton-method.md:
--------------------------------------------------------------------------------
1 | # Newton method
2 | 
3 | TODO: convergence.
4 | 


--------------------------------------------------------------------------------
/data/map/1.in:
--------------------------------------------------------------------------------
1 | find 1
2 | add 1 2
3 | find 1
4 | remove 1
5 | find 1
6 | 


--------------------------------------------------------------------------------
/data/sum_array/16.in:
--------------------------------------------------------------------------------
1 | 16
2 | 1 2 3 4 -1 -2 -3 -4 5 6 7 8 -5 -6 -7 -8
3 | 


--------------------------------------------------------------------------------
/crit-bit-tree.md:
--------------------------------------------------------------------------------
1 | # Crit-bit tree
2 | 
3 | WIP
4 | 
5 | TODO: definition?
6 | 


--------------------------------------------------------------------------------
/btree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cirosantilli/algorithm-cheat/HEAD/btree.png


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cirosantilli/algorithm-cheat/HEAD/logo.png


--------------------------------------------------------------------------------
/b-plus-tree.md:
--------------------------------------------------------------------------------
1 | # B+ tree TODO
2 | 
3 | <https://en.wikipedia.org/wiki/B%2B_tree>
4 | 


--------------------------------------------------------------------------------
/data-gen/Makefile:
--------------------------------------------------------------------------------
1 | .POSIX:
2 | 
3 | .PHONY: clean
4 | 
5 | clean:
6 | 	rm *.in *.out
7 | 


--------------------------------------------------------------------------------
/human-factors.md:
--------------------------------------------------------------------------------
1 | # Human factors
2 | 
3 | <https://en.wikipedia.org/wiki/COCOMO>
4 | 


--------------------------------------------------------------------------------
/skip-list.md:
--------------------------------------------------------------------------------
1 | # Skip list
2 | 
3 | WIP
4 | 
5 | <http://en.wikipedia.org/wiki/Skip_list>
6 | 


--------------------------------------------------------------------------------
/b-star.md:
--------------------------------------------------------------------------------
1 | # B star TODO
2 | 
3 | # B* Algorithm
4 | 
5 | <https://en.wikipedia.org/wiki/B*>
6 | 


--------------------------------------------------------------------------------
/stack-overflow-data-dump/.gitignore:
--------------------------------------------------------------------------------
1 | *.7z
2 | *.csv
3 | *.dat
4 | *.html
5 | *.png
6 | *.xml
7 | 


--------------------------------------------------------------------------------
/misc.md:
--------------------------------------------------------------------------------
1 | Amazing: they allow you to run you own queries: <http://data.stackexchange.com/stackoverflow/queries>
2 | 


--------------------------------------------------------------------------------
/disjoint-set.md:
--------------------------------------------------------------------------------
1 | # Disjoint set
2 | 
3 | <http://en.wikipedia.org/wiki/Disjoint-set_data_structure>
4 | 
5 | TODO
6 | 


--------------------------------------------------------------------------------
/error-detection.md:
--------------------------------------------------------------------------------
1 | # Error detection
2 | 
3 | WIP
4 | 
5 | <http://en.wikipedia.org/wiki/Error_detection_and_correction>
6 | 


--------------------------------------------------------------------------------
/pde.md:
--------------------------------------------------------------------------------
1 | # PDE
2 | 
3 | ## FDM
4 | 
5 | - heat equation example: <http://www.dima.uniroma1.it/users/lsa_adn/MATERIALE/FDheat.pdf>
6 | 


--------------------------------------------------------------------------------
/recursive-descent-parser.md:
--------------------------------------------------------------------------------
1 | # Recursive descent parser
2 | 
3 | <https://en.wikipedia.org/wiki/Recursive_descent_parser>
4 | 
5 | TODO
6 | 


--------------------------------------------------------------------------------
/src/c/common.h:
--------------------------------------------------------------------------------
1 | #include <assert.h>
2 | #include <stdbool.h>
3 | #include <stdlib.h>
4 | #include <stdio.h>
5 | #include <string.h>
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing guidelines
2 | 
3 | Use the following Markdown style: <https://github.com/cirosantilli/markdown-styleguide>
4 | 


--------------------------------------------------------------------------------
/src/java/lib/IntArrayConsumer.java:
--------------------------------------------------------------------------------
1 | package lib;
2 | 
3 | public interface IntArrayConsumer {
4 |     public void accept(int[] in);
5 | }
6 | 


--------------------------------------------------------------------------------
/type-theory.md:
--------------------------------------------------------------------------------
1 | # Type theory
2 | 
3 | <https://en.wikipedia.org/wiki/Type_theory>
4 | 
5 | TODO: what is this. What is all the fuss about?
6 | 


--------------------------------------------------------------------------------
/src/java/lib/Searcher.java:
--------------------------------------------------------------------------------
1 | package lib;
2 | 
3 | public interface Searcher {
4 |     public void preProcess(int[] pattern);
5 |     public int search(int[] text);
6 | }
7 | 


--------------------------------------------------------------------------------
/bellman-ford.md:
--------------------------------------------------------------------------------
1 | # Bellman-ford TODO
2 | 
3 | Does the same as Dijkstra, but
4 | 
5 | - larger time complexity: $n^2 log n$
6 | - also works for graphs with negative weights
7 | 


--------------------------------------------------------------------------------
/src/java/README.md:
--------------------------------------------------------------------------------
1 | # Java
2 | 
3 | All Java files in this directory have a `main` method.
4 | 
5 | Shared classes for which there is no `main` method are under [lib/](lib).
6 | 


--------------------------------------------------------------------------------
/maximum-subarray.md:
--------------------------------------------------------------------------------
1 | # Maximum subarray
2 | 
3 | <http://en.wikipedia.org/wiki/Maximum_subarray_problem>
4 | 
5 | ## Implementations
6 | 
7 | - [kadane.cpp](src/cpp/kadane.cpp)
8 | 


--------------------------------------------------------------------------------
/src/java/Makefile:
--------------------------------------------------------------------------------
 1 | LIB_DIR ?= lib
 2 | OUT_EXT ?= .class
 3 | 
 4 | .PHONY: all clean
 5 | 
 6 | all:
 7 | 	javac -g *.java
 8 | 
 9 | clean:
10 | 	rm -f *$(OUT_EXT) $(LIB_DIR)/*$(OUT_EXT)
11 | 


--------------------------------------------------------------------------------
/src/c/README.md:
--------------------------------------------------------------------------------
1 | # C
2 | 
3 | 1. [BST](bst.c)
4 | 1. [tac](tac.c)
5 | 1. [Version string](version_string.c)
6 | 1. [Trie](trie.c)
7 | 1. [Most frequent](most_frequent.c)
8 | 1. [Sprite alpha blend](sprite_alpha_blend.c)
9 | 


--------------------------------------------------------------------------------
/stack-overflow-data-dump/checksums:
--------------------------------------------------------------------------------
1 | 52d0fb47b097ea52d79b3c89e0528056fc11a9de4a19a38c75a8ec189b009c72  stackoverflow.com-Users.7z
2 | 07a9265096f88e191d215cfdca1f5a67aee04bfa0c4bf2ec62886fba239620e5  users_rep_view.dat.7z
3 | 


--------------------------------------------------------------------------------
/set.md:
--------------------------------------------------------------------------------
1 | # Set
2 | 
3 | <http://en.wikipedia.org/wiki/Set_%28abstract_data_type%29>
4 | 
5 | Basically a map where all the values are the same: only key presence matters.
6 | 
7 | So we just remove the value as well to save some space.
8 | 


--------------------------------------------------------------------------------
/peg-grammar.md:
--------------------------------------------------------------------------------
 1 | # PEG grammar
 2 | 
 3 | Parsing expression grammar.
 4 | 
 5 | <https://en.wikipedia.org/wiki/Parsing_expression_grammar>
 6 | 
 7 | Deterministic subset of CFG. TODO vs LL, LR?
 8 | 
 9 | TODO example. Contrast with CFG.
10 | 


--------------------------------------------------------------------------------
/regular-grammar.md:
--------------------------------------------------------------------------------
 1 | # Regular grammar
 2 | 
 3 | Is either a left or right regular grammar:
 4 | 
 5 | - `B → a`
 6 | - `B → aC` or `Ca` (right and left regular grammars respectively)
 7 | - `B → ε`
 8 | 
 9 | TODO vs regular expression.
10 | 


--------------------------------------------------------------------------------
/data/sort/README.md:
--------------------------------------------------------------------------------
 1 | # Sort
 2 | 
 3 | Input format:
 4 | 
 5 | - number of elements
 6 | - newline
 7 | - space separated integer input
 8 | - newline
 9 | 
10 | Output format:
11 | 
12 | - space separated integer sorted output
13 | - newline
14 | 


--------------------------------------------------------------------------------
/data/sum_array/README.md:
--------------------------------------------------------------------------------
 1 | # Sum array
 2 | 
 3 | Input format:
 4 | 
 5 | - number of elements (integer)
 6 | - newline
 7 | - space separated integer input
 8 | - newline
 9 | 
10 | Output format:
11 | 
12 | - the sum (integer)
13 | - newline
14 | 


--------------------------------------------------------------------------------
/src/cpp/interactive/README.md:
--------------------------------------------------------------------------------
1 | # Interactive
2 | 
3 | Tests that take too long like benchmarks, or require user intervention.
4 | 
5 | They would slow down small correctness tests too much otherwise.
6 | 
7 | 1. [`sum_array_parallel`](sum_array_parallel)
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled languages
 2 | *.o
 3 | # Must be in src, because the `.out` in data/ should be included.
 4 | /src/**/*.out
 5 | gmon.out
 6 | 
 7 | # Java
 8 | *.class
 9 | 
10 | # Eclipse
11 | .classpath
12 | .project
13 | 
14 | tmp.*
15 | *.bak
16 | 


--------------------------------------------------------------------------------
/longest-increasing-subsequence.md:
--------------------------------------------------------------------------------
1 | # Longest increasing subsequence
2 | 
3 | <http://en.wikipedia.org/wiki/Longest_increasing_subsequence>
4 | 
5 | ## Implementations
6 | 
7 | - [longest_increasing_subsequence.cpp](src/cpp/longest_increasing_subsequence.cpp)
8 | 


--------------------------------------------------------------------------------
/string-search.md:
--------------------------------------------------------------------------------
1 | # String search
2 | 
3 | ## Implementations
4 | 
5 | - [KnuthMorrisPratt.java](src/java/KnuthMorrisPrattAlgs4.java)
6 | - [KnuthMorrisPrattAlgs4.java](src/java/KnuthMorrisPrattAlgs4.java)
7 | - [StringSearchNaive.java](src/java/StringSearchNaive.java)
8 | 


--------------------------------------------------------------------------------
/parallel.md:
--------------------------------------------------------------------------------
 1 | # Parallel
 2 | 
 3 | ## Primitives
 4 | 
 5 | <http://stackoverflow.com/questions/2368164/how-is-thread-synchronization-implemented-at-the-assembly-language-level>
 6 | 
 7 | - language agnostic: spinlock
 8 | - x86: `lock`, `xadd`, `cmpxchg` (CAS)
 9 | - Linux system calls: `futex`, `semctl`
10 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | install:
3 |   - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
4 |   - sudo apt-get update -y
5 |   - sudo apt-get install -y gcc-4.8 g++-4.8
6 |   - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 50
7 |   - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 50
8 | script: ./test
9 | 


--------------------------------------------------------------------------------
/longest-common-subsequence.md:
--------------------------------------------------------------------------------
 1 | # Longest common subsequence
 2 | 
 3 | <http://en.wikipedia.org/wiki/Longest_common_subsequence_problem>
 4 | 
 5 | Generalizations:
 6 | 
 7 | - up to N errors. <https://www.hackerrank.com/challenges/substring-diff>
 8 | 
 9 | ## Implementations
10 | 
11 | - [longest_common_subsequence.cpp](src/cpp/longest_common_subsequence.cpp)
12 | 


--------------------------------------------------------------------------------
/matrix-multiplication.md:
--------------------------------------------------------------------------------
1 | # Matrix multiplication
2 | 
3 | ## Applications
4 | 
5 | - <http://math.stackexchange.com/questions/41706/practical-uses-of-matrix-multiplication>
6 | - <https://www.quora.com/What-are-some-examples-of-how-we-use-matrix-multiplication-in-practice>
7 | - <https://www.quora.com/What-are-some-of-the-real-time-applications-of-Matrix-multiplication>
8 | 


--------------------------------------------------------------------------------
/statistics.md:
--------------------------------------------------------------------------------
 1 | # Statistics
 2 | 
 3 | -   https://www.evanmiller.org/how-not-to-sort-by-average-rating.html
 4 | 
 5 |     How to rank stuff that gets up and down votes. Don't use ratio, use Wilson.
 6 | 
 7 |     Not considering that the top posts get much more upvotes:
 8 | 
 9 |     https://www.effectivediscussions.org/-9qb49/solving-problem-first-comment-gets-all-upvotes
10 | 


--------------------------------------------------------------------------------
/radix-tree.md:
--------------------------------------------------------------------------------
 1 | # Radix tree
 2 | 
 3 | <http://en.wikipedia.org/wiki/Radix_tree>
 4 | 
 5 | Vs trie: internal nodes that have only a single child can contain multiple letters. This reduces spaces usage.
 6 | 
 7 | <http://stackoverflow.com/questions/14708134/what-is-the-difference-between-trie-and-radix-trie-data-structuries>
 8 | 
 9 | TODO: any disadvantage compared to tries?
10 | 


--------------------------------------------------------------------------------
/xor-swap.md:
--------------------------------------------------------------------------------
 1 | # XOR swap
 2 | 
 3 | <http://stackoverflow.com/questions/36906/what-is-the-fastest-way-to-swap-values-in-c>
 4 | 
 5 | <http://en.wikipedia.org/wiki/XOR_swap_algorithm>
 6 | 
 7 | Way to swap two values without temporary variables.
 8 | 
 9 | Major application: impress interviewers. In practice, the buffered swap is faster in most cases, and always more readable.
10 | 


--------------------------------------------------------------------------------
/src/cpp/Makefile:
--------------------------------------------------------------------------------
 1 | G ?= gdb3
 2 | IN_EXT ?= .cpp
 3 | O ?= 0
 4 | OUT_EXT ?= .out
 5 | 
 6 | INS := $(wildcard *$(IN_EXT))
 7 | OUTS_NOEXT := $(basename $(INS))
 8 | OUTS := $(addsuffix $(OUT_EXT), $(OUTS_NOEXT))
 9 | 
10 | .PHONY: all clean
11 | 
12 | all: $(OUTS)
13 | 
14 | %$(OUT_EXT): %$(IN_EXT)
15 | 	g++ -g'$(G)' -O'$(O)' -pg -pthread -std=c++11 -Wextra -o '$@' '$<'
16 | 
17 | clean:
18 | 	rm -f *$(OUT_EXT)
19 | 


--------------------------------------------------------------------------------
/push-down-automaton.md:
--------------------------------------------------------------------------------
 1 | # Push down automaton
 2 | 
 3 | Automaton for CFGs.
 4 | 
 5 | By default, PDA means implies non-deterministic,
 6 | as that matches context free grammars, which is a very popular topic.
 7 | 
 8 | ## DPDA
 9 | 
10 | Deterministic PDAs (DPDA), are strictly less powerful. A DPDA with one state recognizes LL(1).
11 | 
12 | Senizergues (1997), Godel Prize 2002, proved that equivalence of DPDA is decidable.
13 | 


--------------------------------------------------------------------------------
/stack-overflow-data-dump/README.adoc:
--------------------------------------------------------------------------------
 1 | = Stack Overflow data dump
 2 | 
 3 | == Find users with low reputation and high profile view count
 4 | 
 5 | Helper scripts for: https://stats.stackexchange.com/questions/376361/how-to-find-the-sample-points-that-have-statistically-meaningful-large-ratios-be
 6 | 
 7 | * link:users_rep_view_dat.gnuplot[]
 8 | * link:users_rep_view_dat_to_html.py[]
 9 | * link:users_xml_to_rep_view_dat.py[]
10 | 


--------------------------------------------------------------------------------
/data-gen/README.md:
--------------------------------------------------------------------------------
 1 | # data-gen
 2 | 
 3 | Executables in this directory generate test data to output files.
 4 | 
 5 | Those scripts generate large inputs / output, random or not, that would be infeasible to write manually.
 6 | 
 7 | They are often used for performance benchmarking.
 8 | 
 9 | TODO:
10 | 
11 | - allow the `test` script to use the generated data and do the benchmark
12 | - discard the cost of disk read from benchmarks
13 | 


--------------------------------------------------------------------------------
/src/cpp/interactive/cpu_bound.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef CPU_BOUND
 2 | #define CPU_BOUND
 3 | 
 4 | constexpr static uint64_t nIters = 0x10000000;
 5 | 
 6 | /* Any funky operation that the compiler is unlikely to optimize away. */
 7 | static void cpu_bound(uint64_t *io, uint64_t iterations) {
 8 |     uint64_t out = *io;
 9 |     for (uint64_t i = 0; i < iterations; ++i)
10 |         out += out*out + 2*out + 1;
11 |     *io = out;
12 | }
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/md5sum.md:
--------------------------------------------------------------------------------
 1 | #md5sum
 2 | 
 3 | Generates / checks MD5 checksums.
 4 | 
 5 |     echo a > a
 6 |     echo b > b
 7 |     md5sum a b > f
 8 |     cat f
 9 | 
10 |     60b725f10c9c85c70d97880dfe8191b3  a
11 |     3b5d5c3712955042212316173ccf37be  b
12 |     md5sum -c f
13 | 
14 | Sample output:
15 | 
16 |     a: OK
17 |     b: OK
18 | 
19 | ##Application
20 | 
21 | Have I downloaded the right file?
22 | 
23 | It is *very* difficult to make another file with the same checksum.
24 | 


--------------------------------------------------------------------------------
/data/map/README.md:
--------------------------------------------------------------------------------
 1 | # Map
 2 | 
 3 | Each input line produces exactly one output line.
 4 | 
 5 | Input and lines are of form:
 6 | 
 7 | -   `add 1 2` add a key value pair to the map.
 8 | 
 9 |     Output: `0` if not previously present, `1` otherwise.
10 | 
11 | -   `find 1` find the value in the map.
12 | 
13 |     Output: `0` if not present, `1 value` if present.
14 | 
15 | -   `remove 1` remove key 1 from the map.
16 | 
17 |     Output: `0` if not present, `1` otherwise.
18 | 


--------------------------------------------------------------------------------
/src/c/Makefile:
--------------------------------------------------------------------------------
 1 | .POSIX:
 2 | 
 3 | IN_EXT ?= .c
 4 | OUT_EXT ?= .out
 5 | 
 6 | INS := $(wildcard *$(IN_EXT))
 7 | OUTS_NOEXT := $(basename $(INS))
 8 | OUTS := $(addsuffix $(OUT_EXT), $(OUTS_NOEXT))
 9 | 
10 | .PHONY: all clean run
11 | 
12 | all: $(OUTS)
13 | 
14 | %$(OUT_EXT): %$(IN_EXT)
15 | 	gcc -O0 -Wall -ggdb3 -pedantic-errors -o '$@' -std=c99 '$<'
16 | 
17 | clean:
18 | 	rm *$(OUT_EXT)
19 | 
20 | clean:
21 | 	rm *$(OUT_EXT)
22 | 
23 | run: all
24 | 	./"$(RUN)$(OUT_EXT)"
25 | 


--------------------------------------------------------------------------------
/src/cpp/interactive/sum_array.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SUM_ARRAY
 2 | #define SUM_ARRAY
 3 | 
 4 | static uintmax_t repeat = 0x1000;
 5 | 
 6 | void initInput(std::vector<uint64_t> &input) {
 7 |     /*
 8 |     16MiB. Way larger than CPU cache,
 9 |     but not so large that it takes a long time to allocate.
10 |     */
11 |     constexpr uint64_t size = 0x100000;
12 |     input.resize(size);
13 |     for (uint64_t i = 0; i < size; ++i)
14 |         input[i] = i;
15 | }
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/heap-sort.md:
--------------------------------------------------------------------------------
 1 | # Heap sort
 2 | 
 3 | Convert to a heap, then delete from the top until the heap is over.
 4 | 
 5 | Time: $O(n log(n))$ worst and average. Creating the heap is $O(n)$, deleting the top is $O(log(n))$, and since we delete $n$ times we have $n log(n)$.
 6 | 
 7 | Space: $O(1)$ worst (in place). To do it:
 8 | 
 9 | - swap the top element with the smallest one
10 | - reduce the heap size by 1
11 | - sift down the new top element, currently the smallest, to restore the heap
12 | 


--------------------------------------------------------------------------------
/regular-language.md:
--------------------------------------------------------------------------------
 1 | # Regular language
 2 | 
 3 | Equivalent to what is recognized by a regular grammar and a DFA.
 4 | 
 5 | Definition:
 6 | 
 7 | -   languages consisting only of a single terminal are regular.
 8 | 
 9 | -   set union of two regular languages is regular.
10 | 
11 | -   concatenation of two regular languages is regular.
12 |     Concatenation of sets of strings is concatenating
13 |     each pair of strings in the set.
14 | 
15 | -   Kleene star. Infinite concatenations with itself.
16 | 


--------------------------------------------------------------------------------
/decimal-data-type.md:
--------------------------------------------------------------------------------
 1 | # Decimal data type
 2 | 
 3 | <http://en.wikipedia.org/wiki/Decimal_data_type>
 4 | 
 5 | Made to represent decimals precisely, specially for financial operations.
 6 | 
 7 | Subset of fraction data types.
 8 | 
 9 | As of 2015, mostly implemented at the software level. [IEEE 754:2008](http://en.wikipedia.org/wiki/IEEE_floating_point) however has defined an efficient binary representation that could be used by hardware implementations. The previous version IEEE 754:1985 did not define them.
10 | 


--------------------------------------------------------------------------------
/src/python/newton.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | 
 5 | def newton(f, fp, x0, eps):
 6 |     x = x0
 7 |     niter = 0
 8 |     xs = []
 9 |     while f(x) > eps:
10 |         x = x - f(x)/fp(x)
11 |         niter += 1
12 |         xs.append(x)
13 |     return x, niter, xs
14 | 
15 | if len(sys.argv) > 1:
16 |     eps = float(sys.argv[1])
17 | else:
18 |     eps = 1E-6
19 | print newton(lambda x: x*x, lambda x: 2.0*x, 5.0, eps)
20 | print newton(lambda x: (x - 5) ** 3, lambda x: 3 * (x - 5) ** 2, 9.0, eps)
21 | 


--------------------------------------------------------------------------------
/busy-beaver.md:
--------------------------------------------------------------------------------
 1 | # Busy Beaver
 2 | 
 3 | Largest number of 1's that can be printed by a machine with N states that halts.
 4 | 
 5 | <https://en.wikipedia.org/wiki/Busy_beaver>
 6 | 
 7 | <http://www.scottaaronson.com/writings/bignumbers.html>
 8 | 
 9 | Function grows really fast, and only finitely many numbers of it can be proved.
10 | 
11 | Only finitely many 3-4 are known, 7,918 is independent from ZFC (assuming some likely hypothesis): <http://www.scottaaronson.com/writings/bignumbers.html> <http://www.scottaaronson.com/blog/?p=2725>
12 | 


--------------------------------------------------------------------------------
/map.md:
--------------------------------------------------------------------------------
 1 | # Map
 2 | 
 3 | <http://en.wikipedia.org/wiki/Associative_array>
 4 | 
 5 | Map is an interface which can be implemented with many data structures.
 6 | 
 7 | A map takes one element of one type and returns a second one of another type.
 8 | 
 9 | Average operation time:
10 | 
11 | |                     | insert | lookup | delete |
12 | |---------------------|--------|--------|--------|
13 | | Binary Search Tree  | log n  | log n  | log n  |
14 | | Hash Map            | 1      | 1      | 1      |
15 | 
16 | Worst case operation time: `n` for all.
17 | 


--------------------------------------------------------------------------------
/hanoi-tower.md:
--------------------------------------------------------------------------------
 1 | # Hanoi tower
 2 | 
 3 | <http://en.wikipedia.org/wiki/Tower_of_Hanoi>
 4 | 
 5 | Generalizations TODO:
 6 | 
 7 | -   any start and end position
 8 | 
 9 | -   more than 3 pegs (Reve's puzzle).
10 | 
11 |     There exists no proved algorithm for more than 3 pegs, but it is conjectured that the Frame–Stewart algorithm solves those cases, and it has been checked up to large number of disks.
12 | 
13 |     <http://en.wikipedia.org/wiki/Tower_of_Hanoi#With_four_pegs_and_beyond>
14 | 
15 | ## Implementations
16 | 
17 | - [hanoi.cpp](src/cpp/hanoi.cpp)
18 | 


--------------------------------------------------------------------------------
/bittorrent.md:
--------------------------------------------------------------------------------
 1 | # BitTorrent
 2 | 
 3 | Requires centralized trackers to find other users which have the missing pieces,
 4 | and only then downloads the pieces directly from peers.
 5 | 
 6 | Communication with the tracker may happen while the transfer is going on
 7 | to try and find better peers, but this is optional
 8 | 
 9 | Sample magnet URI:
10 | 
11 |     magnet:?xt=urn:btih:1257A35748C96FBAE381B065E300B091CD35EEEF&dn=not+cool+2014+720p+web+dl+x264+etrg&tr=udp%3A%2F%2Ftracker.istole.it%3A80%2Fannounce&tr=udp%3A%2F%2Fopen.demonii.com%3A1337
12 | 
13 | TODO understand.
14 | 


--------------------------------------------------------------------------------
/topological-sort.md:
--------------------------------------------------------------------------------
 1 | # Topological sort TODO
 2 | 
 3 | Linearize a digraph in a way that related nodes keep the order.
 4 | 
 5 | Not unique.
 6 | 
 7 | Exists iff there is no cycle, i.e., if the graph is a Directed acyclic graph (DAG) <https://en.wikipedia.org/wiki/Directed_acyclic_graph>
 8 | 
 9 | Applications:
10 | 
11 | - `git log` must somehow linearize the commit tree to show it in the terminal (cannot be cyclic because would require SHA conflict: TODO check). In normal repositories it is a DAG. Topological sort is the default option, the other major intuitive method being by timestamp.
12 | 


--------------------------------------------------------------------------------
/knapsack.md:
--------------------------------------------------------------------------------
 1 | # Knapsack TODO
 2 | 
 3 | <http://en.wikipedia.org/wiki/Knapsack_problem>
 4 | 
 5 | There are entire books written about this subset of liner programming: subject
 6 | 
 7 | With unlimited supply, the greedy algorithm that takes the largest ratios first is guaranteed to reach $m/2$. With limited supply, it is far from optimal. TODO prove.
 8 | 
 9 | ## Bibliography
10 | 
11 | -   <http://www.amazon.com/Knapsack-problems-algorithms-computer-implementations/dp/B003KETJU0/>
12 | 
13 |     Very readable.
14 | 
15 | -   <http://www.amazon.com/Knapsack-Problems-Hans-Kellerer/dp/3540402861/>
16 | 


--------------------------------------------------------------------------------
/src/cpp/interactive/cpu_bound.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Dummy operation that uses a lot of CPU, but very little memory.
 3 | 
 4 | Used as a control for the parallel version, which should me scalably faster.
 5 | */
 6 | 
 7 | #include "common.hpp"
 8 | #include "cpu_bound.hpp"
 9 | 
10 | int main() {
11 |     unsigned int i;
12 |     unsigned int nThreads = std::thread::hardware_concurrency();
13 |     std::vector<uint64_t> ios(nThreads);
14 |     for (i = 0; i < nThreads; ++i)
15 |         ios[i] = i;
16 |     for (i = 0; i < nThreads; ++i)
17 |         cpu_bound(&ios[i], nIters);
18 |     for (i = 0; i < nThreads; ++i)
19 |         std::cout << ios[i] << '\n';
20 | }
21 | 


--------------------------------------------------------------------------------
/src/java/HeapSort.java:
--------------------------------------------------------------------------------
 1 | import lib.IntArrayConsumer;
 2 | import lib.Sort;
 3 | 
 4 | import java.util.Arrays;
 5 | 
 6 | public class HeapSort implements IntArrayConsumer {
 7 | 
 8 |     public void accept(final int[] in) {
 9 |         Heap.heapify(in);
10 |         int last = in.length - 1;
11 |         while(last >= 0) {
12 |             int buf = in[last];
13 |             in[last] = in[0];
14 |             in[0] = buf;
15 |             Heap.maxHeapify(in, 0, last);
16 |             last--;
17 |         }
18 |     }
19 | 
20 |     public static void main(String[] args) throws Throwable {
21 |         Sort.test(args[0], new HeapSort());
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/cpp/README.md:
--------------------------------------------------------------------------------
 1 | # C++
 2 | 
 3 | The preferred source organization is:
 4 | 
 5 | - every `.cpp` file has a runnable main
 6 | - everything that is shared across multiple `.cpp` files gets implemented into an `.hpp`
 7 | 
 8 | Advantages:
 9 | 
10 | - since each test has a main, we can test each algorithm independently from the others
11 | - simpler to write the Makefile as there are no `.o` involved
12 | 
13 | Disadvantages:
14 | 
15 | - we cannot reuse already compiled `.hpp` object files, increasing compilation time
16 | - we cannot reuse the main runners. The ideal reuse would be through a polymorphic interface. But then we'd lose the advantage of being able to compile and run each file independently.
17 | 


--------------------------------------------------------------------------------
/big-o-notation.md:
--------------------------------------------------------------------------------
 1 | # Big O notation
 2 | 
 3 | The major concept to classify the complexity of algorithms.
 4 | 
 5 | Also known specially in physics as Landau notations.
 6 | 
 7 | Each is evaluated at the limit as x tends to some point, and this point is always taken by default to be infinity in complexity calculations.
 8 | 
 9 | -   $f = O(g)$ iff exists $k$ in $R$ such that $f(x) <= k*g(x)$.
10 | 
11 |     Intuitively, $f$ is *smaller* than $g$.
12 | 
13 | -   $f = Omega(g)$ iff exists $k$ in $R$ such that $f(x) >= k*g(x)$.
14 | 
15 |     Intuitively, $f$ is *larger* than $g$.
16 | 
17 | -   $f = Theta(g)$ iff exists $k1$ and $k2$ in $R$ such that $k1*g(x) <= f(x) <= k2*g(x)$
18 | 
19 |     Intuitively, $f$ is *equal* to $g$.
20 | 


--------------------------------------------------------------------------------
/getting-started.md:
--------------------------------------------------------------------------------
 1 | # Getting started
 2 | 
 3 | ## Dependencies
 4 | 
 5 | - `make` (POSIX)
 6 | - Python 2 for the `test` script
 7 | - `gcc` >= 4.8
 8 | - `g++` >= 4.8
 9 | - Java >= 1.7
10 | 
11 | ## Usage
12 | 
13 | Run a single test:
14 | 
15 |     ./test QuickSort.java
16 | 
17 | or equivalently:
18 | 
19 |     ./test src/java/QuickSort.java
20 | 
21 | This will build the program, choose and feed input data to it, and check if the output is as expected.
22 | 
23 | Run all tests of for a given language:
24 | 
25 |     ./test java
26 | 
27 | or equivalently:
28 | 
29 |     ./test src/java
30 | 
31 | Run all tests for all languages:
32 | 
33 |     ./test
34 | 
35 | More info at (TODO implement help command):
36 | 
37 |     ./test --help
38 | 


--------------------------------------------------------------------------------
/undecidability.md:
--------------------------------------------------------------------------------
 1 | # Undecidability
 2 | 
 3 | <http://en.wikipedia.org/wiki/Undecidable_problem>
 4 | 
 5 | It can be proved that for certain problems, there is no Turing machine (and therefore any algorithm) that solves its instances.
 6 | 
 7 | Cool examples of such problems:
 8 | 
 9 | - halting problem. First discovered example. Proof of undecidability via cantors diagonal method.
10 | 
11 | There may however be Turing machines which solve specific families of instances, but there cannot be a finite number of such Turing machines that solve all problems, otherwise we could just run them all in parallel until one of them halts.
12 | 
13 | Those problems are called undecidable, and trying to solve them general is impossible with Turing machines.
14 | 


--------------------------------------------------------------------------------
/a-star.md:
--------------------------------------------------------------------------------
 1 | # A star TODO
 2 | 
 3 | # A* algorithm
 4 | 
 5 | Does the same as Dijkstra, but supposes that extra knowledge is known about the graph.
 6 | 
 7 | That extra knowledge is an estimative $h$ of distance between the current node and the target such as the straight line or manhattan distance.
 8 | 
 9 | The algorithm then first explores nodes with lowest: $current distance + h(next node)$
10 | 
11 | Good tutorial:
12 | 
13 | - <http://theory.stanford.edu/~amitp/GameProgramming/Heuristics.html>
14 | 
15 | Let $h(x)$ be the estimative.
16 | 
17 | - $h(x)$ is always smaller than the actual distance to goal: convergence guaranteed.
18 | - $h(x) = 0$: same as Dijkstra
19 | - $h(x)$ is the exact distance to destination: only the correct path is explored.
20 | 


--------------------------------------------------------------------------------
/data-gen/sum_array:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | Generate an uniformly distributed array of integers.
 5 | """
 6 | 
 7 | import os.path
 8 | import random
 9 | import sys
10 | 
11 | minv = -2**16
12 | maxv = 2**16 - 1
13 | 
14 | if len(sys.argv) > 1:
15 |     n = int(sys.argv[1])
16 | else:
17 |     n = 1000;
18 | 
19 | path_base = os.path.basename(__file__)
20 | 
21 | sum = 0
22 | with open(path_base + '.in', 'w') as f:
23 |     f.write(str(n) + '\n')
24 |     for i in xrange(n - 1):
25 |         r = random.randint(minv, maxv)
26 |         sum += r
27 |         f.write(str(r) + ' ')
28 |     r = random.randint(minv, maxv)
29 |     sum += r
30 |     f.write(str(r) + '\n')
31 | 
32 | with open(path_base + '.out', 'w') as f:
33 |     f.write(str(sum) + '\n')
34 | 


--------------------------------------------------------------------------------
/change-making.md:
--------------------------------------------------------------------------------
 1 | # Change making
 2 | 
 3 | <http://en.wikipedia.org/wiki/Change-making_problem>
 4 | 
 5 | Knapsack-like problem where:
 6 | 
 7 | - each weight is fixed to 1
 8 | - the goal must be reached exactly
 9 | - infinite supply of items
10 | 
11 | Even the feasibility is NP-complete.
12 | 
13 | If the input set is <http://en.wikipedia.org/wiki/Superincreasing_sequence>, like the coin denomination of most countries, then the greedy algorithm works. This is why there is cannot be a 3 euro coin: otherwise 3 + 2 + 1 = 6 > 5, and change making would be NP-complete.
14 | 
15 | ## Implementations
16 | 
17 | - [make_change.cpp](src/cpp/make_change.cpp)
18 | 
19 | ## Bibliography
20 | 
21 | - <https://web.archive.org/web/20131007212811/http://www.or.deis.unibo.it/kp/Chapter5.pdf>
22 | 


--------------------------------------------------------------------------------
/insertion-sort.md:
--------------------------------------------------------------------------------
 1 | # Insertion sort
 2 | 
 3 | <http://en.wikipedia.org/wiki/Insertion_sort>
 4 | 
 5 | In practice, used when the inputs are very small, e.g. < 10. Otherwise, a better worst case algorithm is chosen.
 6 | 
 7 | Create a new list (possibly in-place sublist), and insert elements into it so that it keeps sorted all the way
 8 | 
 9 | The complexities depend on the underlying data structures.
10 | 
11 | Arrays and linked lists: $O(N^2)$ time. Although the run time intuitively looks like $N log(N)$, it is not because:
12 | 
13 | - for arrays, you have to shift several elements to the right at each insertion
14 | - for lists, the insertion is cheap, but you need linear time to find elements
15 | 
16 | However, if a more advanced data structure like a skip list is used, it is possible to have $N log(N)$.
17 | 


--------------------------------------------------------------------------------
/stack-overflow-data-dump/users_rep_view_dat.gnuplot:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env gnuplot
 2 | 
 3 | set terminal png size 1024,1024
 4 | set output "users_rep_view.png"
 5 | 
 6 | #set terminal canvas mousing
 7 | #set termoption enhanced
 8 | #set output "users_rep_view.html"
 9 | 
10 | #set terminal wxt size 1024,1024
11 | 
12 | set logscale x
13 | set logscale y
14 | set key noautotitle
15 | set xlabel "Reputation"
16 | set ylabel "Profile Views"
17 | set cblabel "User Id"
18 | plot "users_rep_view.dat" using 2:3:1 palette
19 | # TODO would allow interactive inspection with mouseover on HTML output, but makes graph generation too slow!
20 | # https://groups.google.com/forum/#!topic/comp.graphics.apps.gnuplot/qpL8aJIi9ZE
21 | #plot "users_rep_view.dat" \
22 | #    using 2:3:(sprintf("(%d, %s)", $1, stringcolumn(4))) \
23 | #    with labels hypertext point pt 7
24 | 


--------------------------------------------------------------------------------
/amortized-analysis.md:
--------------------------------------------------------------------------------
 1 | # Amortized analysis
 2 | 
 3 | Considers the average operation time on possible sequences of operations.
 4 | 
 5 | Sources:
 6 | 
 7 | <https://en.wikipedia.org/wiki/Amortized_analysis>
 8 | 
 9 | [cormen09][] chapter 17 "Amortized Analysis".
10 | 
11 | Famous examples:
12 | 
13 | -   array backed heap. Worst case insert and delete is linear as it may require a copy operation to increase capacity.
14 | 
15 |     But those operations can only happen exponentially little if we only do additions, so the amortized is still constant.
16 | 
17 |     Note how the operation choice matters: if we allow insertion and deletion, then a sequence of insertion / deletion at the discontinuity could still have linear time.
18 | 
19 | -   <https://en.wikipedia.org/wiki/Fibonacci_heap> analysis, which is famously `O(1)` amortized, which is stronger than the `O(1)` average of binary heaps.
20 | 


--------------------------------------------------------------------------------
/src/cpp/interactive/cpu_bound_parallel.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Dummy operation that uses a lot of CPU, but very little memory.
 3 | 
 4 | Used as a control for the parallel version, which should me scalably faster.
 5 | 
 6 | TODO: find someothing that can be asserted.
 7 | */
 8 | 
 9 | #include "common.hpp"
10 | #include "cpu_bound.hpp"
11 | 
12 | int main() {
13 |     unsigned int i;
14 |     unsigned int nThreads = std::thread::hardware_concurrency();
15 |     std::thread *threads = new std::thread[nThreads];
16 |     std::vector<uint64_t> ios(nThreads);
17 |     for (i = 0; i < nThreads; ++i)
18 |         ios[i] = i;
19 |     for (i = 0; i < nThreads; ++i)
20 |         threads[i] = std::thread(cpu_bound, &ios[i], nIters);
21 |     for (i = 0; i < nThreads; ++i)
22 |         threads[i].join();
23 |     delete[] threads;
24 |     for (i = 0; i < nThreads; ++i)
25 |         std::cout << ios[i] << '\n';
26 | }
27 | 


--------------------------------------------------------------------------------
/b-tree.md:
--------------------------------------------------------------------------------
 1 | # B-tree TODO
 2 | 
 3 | *Not* binary tree: <http://en.wikipedia.org/wiki/B-tree>.
 4 | 
 5 | Like a binary tree with many values per node:
 6 | 
 7 | ![btree](bree.png)
 8 | 
 9 | Notable sub-cases:
10 | 
11 | - 2-3 <https://en.wikipedia.org/wiki/2%E2%80%933_tree>. Equivalent to AA-trees, which are somewhat like RB-trees (TODO understand exactly how)
12 | - 2-3-4 <https://en.wikipedia.org/wiki/2%E2%80%933%E2%80%934_tree>. Equivalent to RB-trees! TODO how.
13 | 
14 | Same complexity as RB-tree, but with slower in theory by a constant factor.
15 | 
16 | Default data structure for MySQL InnoDB `INDEX`.
17 | 
18 | In practice: way faster for accessing trees saved in slow storage like hard disks.
19 | 
20 | - to go down each level, you must traverse all values of a node
21 | - less memory efficient
22 | 
23 | Advantage:
24 | 
25 | - less nodes need to be retrieved. Huge practical gains here to read data from slow media.
26 | 


--------------------------------------------------------------------------------
/src/java/StringSearchNaive.java:
--------------------------------------------------------------------------------
 1 | import java.util.Arrays;
 2 | 
 3 | import lib.Searcher;
 4 | import lib.StringSearch;
 5 | 
 6 | /** Naive quicksort recursive implementation. */
 7 | public class StringSearchNaive implements Searcher {
 8 | 
 9 |     int[] pattern;
10 | 
11 |     public void preProcess(int[] pattern) { this.pattern = pattern; }
12 | 
13 |     public int search(int[] text) {
14 |         int lastPatternI = this.pattern.length;
15 |         int lastTextI = text.length - lastPatternI;
16 |     outer:
17 |         for (int i = 0; i <= lastTextI; i++) {
18 |             for (int j = 0; j < lastPatternI; j++) {
19 |                 if (text[i + j] != this.pattern[j])
20 |                     continue outer;
21 |             }
22 |             return i;
23 |         }
24 |         return -1;
25 |     }
26 | 
27 |     public static void main(String[] args) throws Throwable {
28 |         StringSearch.test(new StringSearchNaive());
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/stack-overflow-data-dump/users_rep_view_dat_to_html.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from signal import signal, SIGPIPE, SIG_DFL
 4 | import sys
 5 | 
 6 | dat_path = sys.argv[1]
 7 | 
 8 | signal(SIGPIPE, SIG_DFL)
 9 | sys.stdout.write('<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"></head><body><table><th>Id</th><th>Reputation</th><th>Views</th><th>DisplayName</th>\n')
10 | with open(dat_path, 'r', encoding='utf-8') as f:
11 |     next(f)
12 |     for line in f:
13 |         id, reputation, views, name = line[:-1].split(' ')
14 |         sys.stdout.write(
15 |             '<tr>' +
16 |             '<td><a href="https://stackoverflow.com/users/' + id + '">' + id + '</a></td>' +
17 |             '<td>' + reputation + '</td>' +
18 |             '<td>' + views + '</td>' +
19 |             '<td><a href="https://stackoverflow.com/users/' + id + '">' + name + '</a></td>' +
20 |             '</tr>\n'
21 |         )
22 | sys.stdout.write('</table></body></html>\n')
23 | 


--------------------------------------------------------------------------------
/src/java/lib/Sort.java:
--------------------------------------------------------------------------------
 1 | package lib;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.FileReader;
 5 | import java.util.Scanner;
 6 | 
 7 | /** Shared sort input parsing. */
 8 | public class Sort {
 9 |     public static void test(String path, IntArrayConsumer consumer) throws Throwable {
10 |         BufferedReader br = new BufferedReader(new FileReader(path));
11 |         String sizeS = br.readLine();
12 |         String inS = br.readLine();
13 |         int size = Integer.parseInt(sizeS.trim());
14 |         Scanner scanner;
15 |         int i;
16 |         int[] in = new int[size];
17 |         scanner = new Scanner(inS);
18 |         i = 0;
19 |         while (scanner.hasNextInt()) {
20 |             in[i] = scanner.nextInt();
21 |             i++;
22 |         }
23 |         consumer.accept(in);
24 |         for (i = 0; i < in.length - 1; i++) {
25 |             System.out.print(in[i] + " ");
26 |         }
27 |         System.out.print(in[in.length - 1] + "\n");
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/stack-overflow-data-dump/users_xml_to_rep_view_dat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import sys
 4 | import xml.parsers.expat
 5 | 
 6 | xml_path = sys.argv[1]
 7 | 
 8 | def tag_open(tag, attrs):
 9 |     if tag == 'row':
10 |         row = []
11 |         row.append(attrs['Id'])
12 |         row.append(attrs['Reputation'])
13 |         row.append(attrs['Views'])
14 |         clean_display_name = []
15 |         bad_chars = {' ', '\n', '\r'}
16 |         for c in attrs['DisplayName']:
17 |             if c in bad_chars:
18 |                 clean_display_name.append('_')
19 |             else:
20 |                 clean_display_name.append(c)
21 |         clean_display_name = ''.join(clean_display_name)
22 |         row.append(clean_display_name)
23 |         sys.stdout.write((' '.join(row) + '\n'))
24 | 
25 | print('Id Reputation Views DisplayName')
26 | parser = xml.parsers.expat.ParserCreate()
27 | parser.StartElementHandler = tag_open
28 | with open(xml_path, 'br') as f:
29 |     parser.ParseFile(f)
30 | 


--------------------------------------------------------------------------------
/bst.md:
--------------------------------------------------------------------------------
 1 | # BST
 2 | 
 3 | Binary search tree.
 4 | 
 5 | ## Find next element
 6 | 
 7 | Given one element, find the next one.
 8 | 
 9 | - <http://stackoverflow.com/questions/22114903/how-to-find-the-next-in-order-successor-in-a-binary-tree>
10 | - <https://www.quora.com/How-can-you-find-successors-and-predecessors-in-a-binary-search-tree-in-order>
11 | 
12 | Algorithm:
13 | 
14 | -   if there is a right node
15 |     - go right once, then down left all the way
16 |     - next element must be on right subtree, and the first thing there is left all the way
17 | -   else
18 |     - go up until you come up from the left or the root is reached
19 |         - if the root is reached from the right, it's over
20 |     - if you come up from the left, the parent must be the next thing to be visited
21 |     - if you come up from the right, the parent is smaller and has already been visited. TODO clearer proof that this works.
22 | 
23 | ## Delete element
24 | 
25 | Hard: <https://en.wikipedia.org/wiki/Binary_search_tree#Deletion>
26 | 


--------------------------------------------------------------------------------
/programming-languages.md:
--------------------------------------------------------------------------------
 1 | # Programming languages
 2 | 
 3 | <https://en.wikipedia.org/wiki/Recursive_data_type> interesting, no pointers.
 4 | 
 5 | ## Algebraic data types
 6 | 
 7 | <https://en.wikipedia.org/wiki/Algebraic_data_type> TODO why is the concept useful?
 8 | 
 9 | Why does C not have them through `struct` and `union`?
10 | 
11 | <http://programmers.stackexchange.com/questions/159804/how-do-you-encode-algebraic-data-types-in-a-c-or-java-like-language>
12 | 
13 | ## Go
14 | 
15 | Vs other garbage collected languages:
16 | 
17 | - <https://www.quora.com/Why-do-a-lot-of-people-hate-Java-but-advocate-Golang-Go-Language>
18 | 
19 | ## Scratch
20 | 
21 | <https://en.wikipedia.org/wiki/Scratch_(programming_language)>
22 | 
23 | Educational, visual. Really good.
24 | 
25 | File format for 2.0 is a zipped JSON: <https://wiki.scratch.mit.edu/wiki/Scratch_File_Format_(2.0)> but apparently it does not look much like the visual representation.
26 | 
27 | Hardcore projects:
28 | 
29 | - Doom-like FPS <https://www.youtube.com/watch?v=XQT3j2RA-0g>
30 | 


--------------------------------------------------------------------------------
/dfa.md:
--------------------------------------------------------------------------------
 1 | # DFA
 2 | 
 3 | Discrete finite automata.
 4 | 
 5 | <http://en.wikipedia.org/wiki/DFA_minimization>
 6 | 
 7 | Recognize the same languages as regular grammars.
 8 | 
 9 | ## Minimization
10 | 
11 | It is possible algorithmically minimize a DFA to an equivalent one with the smallest possible number of states.
12 | 
13 | The minimum is unique up to renaming, so it is also a good canonical form.
14 | 
15 | Hopcroft (1971) in $O(n log n)$ .
16 | 
17 | ## Non-deterministic
18 | 
19 | Equivalent power to deterministic, proved with definition in 1959.
20 | 
21 | Interestingly, the same is not the case for:
22 | 
23 | - push down automata, in which deterministic are less powerful than non-deterministic
24 | - omega automatons
25 | 
26 | Turing machines are also equivalent to NTMs, but the change alters the complexity of computations.
27 | 
28 | ## Omega automaton
29 | 
30 | <http://en.wikipedia.org/wiki/%CE%A9-automaton>
31 | 
32 | Look just like automaton, but with one of several rules for what it means to accept a finite input.
33 | 
34 | Non-deterministic is strictly more powerful than deterministic in this case.
35 | 


--------------------------------------------------------------------------------
/ll-parser.md:
--------------------------------------------------------------------------------
 1 | # LL parser
 2 | 
 3 | Efficient to parse strict subset of context-free.
 4 | 
 5 | Many popular languages are designed to be $LL(1)$.
 6 | 
 7 | TODO find a list of popular languages that are $LL(1)$ and a list of those which are not.
 8 | 
 9 | TODO understand, example.
10 | 
11 | DPDAs are the related automatons.
12 | 
13 | ## LL(*) star
14 | 
15 | Can look ahead by regular languages instead of finite numbers of characters.
16 | 
17 | ## Ambiguity
18 | 
19 | $LL(k)$ grammars are unambiguous.
20 | 
21 | ## Recursive descent parser
22 | 
23 | TODO
24 | 
25 | ## Related languages
26 | 
27 | Relationships: <http://cs.stackexchange.com/questions/43/language-theoretic-comparison-of-ll-and-lr-grammars>
28 | 
29 | ### LR parser
30 | 
31 | Cannot be ambiguous.
32 | 
33 | $LL(k) \subsetneq LR(k)$, Rosenkrantz (1969).
34 | 
35 | $LR(0) \subsetneq LR(1) = LR(n)$
36 | 
37 | Unambiguous.
38 | 
39 | ### SSR
40 | 
41 | ### SSL
42 | 
43 | TODO
44 | 
45 | ### LALR
46 | 
47 | $LALR(k)$ is incomparable with $$LL(k)$.
48 | 
49 | TODO
50 | 
51 | ### GLR
52 | 
53 | <https://en.wikipedia.org/wiki/GLR_parser>
54 | 
55 | TODo
56 | 


--------------------------------------------------------------------------------
/stack-overflow-data-dump/users_rep_view_dat_to_matplotlib_svg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | '''
 4 | Produce one huge 2.2G SVG with clickable points. Chromium 73 can't
 5 | handle it though, it just becomes too slow. Generation takes 30 mins
 6 | as well, which is not cool.
 7 | 
 8 | Bibliography:
 9 | 
10 | - https://stackoverflow.com/questions/12387585/python-matplotlib-svg-and-hyperlinks-in-text-labels
11 | - https://stackoverflow.com/questions/15417586/python-matlplotlib-add-hyperlink-to-text
12 | '''
13 | 
14 | import sys
15 | 
16 | import numpy as np
17 | import matplotlib.pyplot as plt
18 | 
19 | dat_path = sys.argv[1]
20 | 
21 | reputations = []
22 | profile_view_counts = []
23 | urls = []
24 | with open(dat_path, 'r', encoding='utf-8') as f:
25 |     next(f)
26 |     for line in f:
27 |         line = line[:-1]
28 |         id, reputation, profile_view_count, name = line.split(' ')
29 |         reputations.append(int(reputation))
30 |         profile_view_counts.append(int(profile_view_count))
31 |         urls.append('https://stackoverflow.com/users/' + id)
32 | __import__('ipdb').set_trace(context=31)
33 | f = plt.figure()
34 | s = plt.scatter(reputations, profile_view_counts)
35 | s.set_urls(urls)
36 | f.canvas.print_figure('users_rep_view_dat_matplotlib.svg')
37 | 


--------------------------------------------------------------------------------
/ecdsa.md:
--------------------------------------------------------------------------------
 1 | # ECDSA
 2 | 
 3 | Elliptic Curve Digital Signature Algorithm.
 4 | 
 5 | ## Requirements
 6 | 
 7 | Known public Key Cryptography in general will be helpful.
 8 | 
 9 | ## Sources
10 | 
11 | -   <http://www.johannes-bauer.com/compsci/ecc/>
12 | 
13 |     Shows the computations step by step.
14 | 
15 |     Does not talk much about the deeper theory behind the curve.
16 | 
17 | ## Vs RSA
18 | 
19 | Integer factorization was proposed in 1979, but many progresses were made.
20 | 
21 | ECC was proposed in 1985 but virtually no progress was made.
22 | 
23 | This is why new protocols that come out like bitcoin tend to use ECC.
24 | 
25 | TLS supports both RSA and ECC. As of 2014, RSA is much more popular because it came first.
26 | 
27 | ## Asymmetry basis
28 | 
29 | Just like RSA, ECC is based on an operation which no one knows how to invert efficiently:
30 | thus both are asymmetric cryptography.
31 | 
32 | In the case of RSA it is multiplication of primes.
33 | For ECC it is elevating a finite group element to a power:
34 | the inversion problem being called the
35 | <http://en.wikipedia.org/wiki/Discrete_logarithm>
36 | 
37 | Shor's quantum algorithm would be able to break both efficiently.
38 | 
39 | ## Signing
40 | 
41 | Like RSA, ECDSA can be used for both:
42 | 
43 | - encryption
44 | - signature
45 | 


--------------------------------------------------------------------------------
/src/cpp/common.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef COMMON_H
 2 | #define COMMON_H
 3 | 
 4 | #include <algorithm>
 5 | #include <cassert>
 6 | #include <cmath>
 7 | #include <fstream>
 8 | #include <initializer_list>
 9 | #include <iostream>
10 | #include <list>
11 | #include <sstream>
12 | #include <string>
13 | #include <thread>
14 | #include <tuple>
15 | #include <utility>
16 | #include <vector>
17 | 
18 | /*
19 | Read file at `path` as an array. It's format should be as of `data/sort`.
20 | */
21 | std::vector<int> parse_array(std::string path) {
22 |     int i, size, value;
23 |     std::ifstream infile(path);
24 |     std::string inputS;
25 |     std::string sizeS;
26 |     std::vector<int> input;
27 | 
28 |     std::getline(infile, sizeS);
29 |     size = std::stoi(sizeS);
30 |     input.reserve(size);
31 | 
32 |     std::getline(infile, inputS);
33 |     std::istringstream iss(inputS);
34 |     for (int i = 0; i < size; ++i) {
35 |         iss >> value;
36 |         input.push_back(value);
37 |     }
38 |     return input;
39 | }
40 | 
41 | /*
42 | Print the vector to stdout space separated.
43 | */
44 | void print_array(std::vector<int> output) {
45 |     for (unsigned int i = 0; i < output.size() - 1; ++i) {
46 |         std::cout << output[i] << ' ';
47 |     }
48 |     std::cout << output[output.size() - 1] << '\n';
49 | }
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/src/java/QuickSort.java:
--------------------------------------------------------------------------------
 1 | import lib.Sort;
 2 | import lib.IntArrayConsumer;
 3 | 
 4 | /** Naive quicksort recursive implementation. */
 5 | public class QuickSort implements IntArrayConsumer {
 6 | 
 7 |     public void accept(final int[] in) {
 8 |         acceptRecursive(in, 0, in.length - 1);
 9 |     }
10 | 
11 |     private static void acceptRecursive(
12 |             final int[] in,
13 |             final int leftI,
14 |             final int rightI) {
15 |         if (leftI < rightI) {
16 |             int smallI = leftI;
17 |             int bigI = leftI;
18 |             final int pivot = in[rightI];
19 |             while (bigI < rightI) {
20 |                 final int big = in[bigI];
21 |                 if (big < pivot) {
22 |                     int smallBuf = in[smallI];
23 |                     in[smallI] = big;
24 |                     in[bigI] = smallBuf;
25 |                     smallI++;
26 |                 }
27 |                 bigI++;
28 |             }
29 |             in[rightI] = in[smallI];
30 |             in[smallI] = pivot;
31 |             acceptRecursive(in, leftI, smallI - 1);
32 |             acceptRecursive(in, smallI + 1, rightI);
33 |         }
34 |     }
35 | 
36 |     public static void main(String[] args) throws Throwable {
37 |         Sort.test(args[0], new QuickSort());
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/rb-tree.md:
--------------------------------------------------------------------------------
 1 | # RB-tree TODO
 2 | 
 3 | # Red black tree
 4 | 
 5 | Balanced binary search tree.
 6 | 
 7 | Red black tree are binary search trees that support element query, insertion and deletion in $log(n)$ time.
 8 | 
 9 | All that is needed is to store one extra bit per node: red or black.
10 | 
11 | It must have the following properties:
12 | 
13 | -   A node is either red or black.
14 | 
15 | -   The root is black.
16 | 
17 | -   All leaves (NIL) are black.
18 | 
19 | -   Both children of every red node are black.
20 | 
21 |     Children of a black node can have any color.
22 | 
23 | -   Every simple path from a given node to any of its descendant leaves
24 |     contains the same number of black nodes.
25 | 
26 | A leaf is not a node and a node is not a leaf in this context. Nodes contain the actual numbers that are being indexed, leafs don't contain any information, except indicating that there are no more nodes below.
27 | 
28 | Leafs in this context are also called sentinels.
29 | 
30 | Operations are $log(n)$ because the properties imply that the tree is balanced in the sense that the deepest leaf from a node can only be twice as deep as the shallowest one.
31 | 
32 | The key to using the tree is keeping it balanced after insertion or deletion.
33 | 
34 | Live Java applet demo: <http://www.ece.uc.edu/~franco/C321/html/RedBlack/>. Requires a bit too many clicks, but really cool.
35 | 
36 | TODO visualize insert
37 | 


--------------------------------------------------------------------------------
/src/c/stack.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Linked list based stack.
 3 | */
 4 | 
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | #include <string.h>
 8 | size_t getline(char **lineptr, size_t *n, FILE *stream);
 9 | 
10 | typedef struct Stack Stack;
11 | struct Stack {
12 |     Stack *next;
13 |     int value;
14 | };
15 | 
16 | int Stack_push(Stack **this, int value) {
17 |     Stack *new;
18 |     new = malloc(sizeof(Stack));
19 |     if (new == NULL) return 1;
20 |     new->value = value;
21 |     new->next = *this;
22 |     *this = new;
23 |     return 0;
24 | }
25 | 
26 | int Stack_pop(Stack **this, int *ret) {
27 |     Stack *buffer;
28 |     if (*this == NULL) return 1;
29 |     *ret = (*this)->value;
30 |     buffer = (*this)->next;
31 |     free(*this);
32 |     *this = buffer;
33 |     return 0;
34 | }
35 | 
36 | int main(void) {
37 |     size_t maxLineLen = 1024;
38 |     char *line = (char*)malloc(maxLineLen);
39 |     Stack *s = NULL;
40 |     /*
41 |     TODO improve API.
42 | 
43 |     while(getline(&line, &maxLineLen, stdin) != -1) {
44 |         char *p = strtok(line, " ");
45 |         while( p != NULL) {
46 |             int d = atoi(p);
47 |             Stack_push(&s, d);
48 |             p = strtok(NULL, " ");
49 |         }
50 |     }
51 |     while(s != NULL) {
52 |         int ret;
53 |         Stack_pop(&s, &ret);
54 |         printf("%d ", ret);
55 |         Stack_pop(&s, &ret);
56 |     }
57 |     */
58 | }
59 | 


--------------------------------------------------------------------------------
/src/java/KnuthMorrisPrattAlgs4.java:
--------------------------------------------------------------------------------
 1 | import lib.Searcher;
 2 | import lib.StringSearch;
 3 | 
 4 | /*
 5 | Knuth-Morris-Pratt algorithm, initially taken from Sedgewick - Algorithms 4th edition
 6 | http://algs4.cs.princeton.edu/53substring/KMPplus.java
 7 | */
 8 | public class KnuthMorrisPrattAlgs4 implements Searcher {
 9 |     private int[] pattern;
10 |     private int[] skip;
11 | 
12 |     public void preProcess(int[] pattern) {
13 |         this.pattern = pattern;
14 |         int M = pattern.length;
15 |         skip = new int[M];
16 |         int j = -1;
17 |         for (int i = 0; i < M; i++) {
18 |             if (i == 0)
19 |                 skip[i] = -1;
20 |             else if (pattern[i] != pattern[j])
21 |                 skip[i] = j;
22 |             else
23 |                 skip[i] = skip[j];
24 |             while (j >= 0 && pattern[i] != pattern[j]) {
25 |                 j = skip[j];
26 |             }
27 |             j++;
28 |         }
29 |     }
30 | 
31 |     public int search(int[] text) {
32 |         int M = pattern.length;
33 |         int N = text.length;
34 |         int i, j;
35 |         for (i = 0, j = 0; i < N && j < M; i++) {
36 |             while (j >= 0 && text[i] != pattern[j])
37 |                 j = skip[j];
38 |             j++;
39 |         }
40 |         if (j == M)
41 |             return i - M;
42 |         return -1;
43 |     }
44 | 
45 |     public static void main(String[] args) throws Throwable {
46 |         StringSearch.test(new KnuthMorrisPrattAlgs4());
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/out-of-core.md:
--------------------------------------------------------------------------------
 1 | # Out of core algorithms
 2 | 
 3 | # IO algorithms
 4 | 
 5 | # External memory
 6 | 
 7 | <http://en.wikipedia.org/wiki/Out-of-core_algorithm>
 8 | 
 9 | Sometimes algorithms must operate on data that is too large to fit in RAM, e.g. hard disks. In those cases, it may be necessary to take that into consideration, since disk IO is 1000 slower than RAM access.
10 | 
11 | Certain algorithms are developed with that restriction in mind, e.g., the B-tree, which is less efficient than other binary search trees for in RAM computing, but much more efficient of out-of-core problems.
12 | 
13 | There is no simple way of modeling the performance of out of core algorithms: we just have to give different weights to certain operations, and then solve complex numerical optimization decisions.
14 | 
15 | Sample problems:
16 | 
17 | -   read several different sized inputs from a file, e.g. one per line, in a way that each line must be completely loaded and processed at a time.
18 | 
19 |     This could happen if you have for example a file with many inputs to be sorted like:
20 | 
21 |         3 1 2
22 |         3
23 |         14 3 2 1 2
24 | 
25 |     The `tac` command line utility (`cat` reversed line-wise, in GNU Coreutils) is another example. While `cat` can be implemented trivially (you don't need to have the entire line to print it to stdout), `tac` must first read entire lines backwards before putting them on stdout.
26 | 
27 | ## Sources
28 | 
29 | - <http://users-cs.au.dk/large/ioS12/>
30 | - <http://www.win.tue.nl/~hermanh/teaching/2IL35/>
31 | 


--------------------------------------------------------------------------------
/trie.md:
--------------------------------------------------------------------------------
 1 | # Trie
 2 | 
 3 | # Prefix tree
 4 | 
 5 | <http://en.wikipedia.org/wiki/Trie>
 6 | 
 7 | <https://www.topcoder.com/community/data-science/data-science-tutorials/using-tries/>
 8 | 
 9 | ## Vs other maps
10 | 
11 | Advantages
12 | 
13 | -   can search by prefix
14 | -   easier to search for:
15 |     - missing characters
16 |     - exchanged characters
17 | 
18 | Disadvantages:
19 | 
20 | -   I think tries take up more space. For each level, you have to store an array with ALL the possible numbers.
21 | 
22 |     -   a large alphabet is less space efficient and faster (go down less levels).
23 | 
24 |         An infinite alphabet is an array: O(1) worst case retrieval.
25 | 
26 |     -   a single bit alphabet (2 paths per node) is equivalent to TODO: I think a BST, if we compress it into a radix tree.
27 | 
28 |     -   a zero bit alphabet (1 path per node) is equivalent to TODO: I think a linked list, if we compress it into a radix tree.
29 | 
30 | Bibliography:
31 | 
32 | - <http://stackoverflow.com/questions/245878/how-do-i-choose-between-a-hash-table-and-a-trie-prefix-tree>
33 | 
34 | ### Vs B-Tree
35 | 
36 | B-tree also have a branching factor parameter similar to alphabet size:
37 | 
38 | - <http://stackoverflow.com/questions/2688639/trie-vs-b-tree>
39 | 
40 | The major difference seems to be do you want to make:
41 | 
42 | - X is between Y and Z queries? Then B-Tree.
43 | - X has prefix Y queries? Trie.
44 | 
45 | ## Patricia trie
46 | 
47 | Some people differentiate it, others not:
48 | 
49 | - <http://stackoverflow.com/a/15906358/895245>
50 | 


--------------------------------------------------------------------------------
/tree.md:
--------------------------------------------------------------------------------
 1 | # Tree
 2 | 
 3 | Trees are special cases of directed connected graphs that have the properties:
 4 | 
 5 | - no loops
 6 | - a root: there is a special node called the root. Any root you take on a tree generates a new different tree.
 7 | 
 8 | The great advantage of tree is perhaps the ability to create balanced trees which have `O(log(n)` height.
 9 | 
10 | It also simplifies search procedures since you don't need to check if you have already visited some nodes, as there can be no loops.
11 | 
12 | Trees have $E = V - 1$.
13 | 
14 | ## BST
15 | 
16 | ## Binary search tree
17 | 
18 | Each node has 2 children.
19 | 
20 | Not necessarily balanced, so bounds for all operations is $O(n)$, although average times are $long(n)$.
21 | 
22 | There are balanced search trees such as RB-tree which actually have $O(ln)$ for all operations.
23 | 
24 | The only complicated operation is delete, visualize it here: <http://www.algolist.net/Data_structures/Binary_search_tree/Removal>
25 | 
26 | ## Array-backed binary tree
27 | 
28 | <http://en.wikipedia.org/wiki/Binary_tree#Arrays>
29 | 
30 | Represents the tree as:
31 | 
32 |             1
33 |         2      3
34 |       4   5  6   7
35 | 
36 | On the array it becomes:
37 | 
38 |     1 2 3 4 5 6 7
39 | 
40 | Then:
41 | 
42 | - `child[i][0] == array[2*i]`
43 | - `child[i][1] == array[(2*i + 1]`
44 | - `parent[i] == array[(i/2)]`
45 | 
46 | Upside: memory efficient. Don't store any pointers: only the raw data.
47 | 
48 | Downside: BST operations like insert and rebalance are expensive as they requires to move lots of array elements around.
49 | 


--------------------------------------------------------------------------------
/bubble-sort.md:
--------------------------------------------------------------------------------
 1 | # Bubble sort
 2 | 
 3 | Very slow.
 4 | 
 5 | Very simple. Shortest code.
 6 | 
 7 | Worst: $n^2$ time (extremely rare) $1$ space (in place)
 8 | 
 9 | Average: $n^2$.
10 | 
11 | Bubble.
12 | 
13 | - quick. Worst: $n^2$ time, $n$ space.
14 | 
15 | ## Bubble visualization
16 | 
17 | Start with:
18 | 
19 | 
20 |     4 2 1 3
21 | 
22 | We compare two by two and exchange if needed:
23 | 
24 |     4 2 1 3
25 |     ^ ^
26 | 
27 | 4 is larger than 2, so exchange, giving:
28 | 
29 |     2 4 1 3
30 |     ^ ^
31 | 
32 | Move forward:
33 | 
34 |     2 4 1 3
35 |       ^ ^
36 | 
37 | 4 is larger than 1, so exchange, giving:
38 | 
39 |     2 1 4 3
40 |       ^ ^
41 | 
42 | Move forward:
43 | 
44 |     2 1 4 3
45 |         ^ ^
46 | 
47 | 4 is larger than 3, so exchange, giving:
48 | 
49 |     2 1 3 4
50 |         ^ ^
51 | 
52 | We reached the end. Restart:
53 | 
54 |     2 1 3 4
55 |     ^ ^
56 | 
57 | Then:
58 | 
59 |     1 2 3 4
60 |     ^ ^
61 | 
62 |     1 2 3 4
63 |       ^ ^
64 | 
65 | 2 is smaller than 3, don't exchange.
66 | 
67 |     1 2 3 4
68 |         ^ ^
69 | 
70 | We reached the end, restart:
71 | 
72 |     1 2 3 4
73 |     ^ ^
74 | 
75 |     1 2 3 4
76 |       ^ ^
77 | 
78 |     1 2 3 4
79 |         ^ ^
80 | 
81 | We made no exchanges on the latest pass, so we are done. As we can see, the list is sorted.
82 | 
83 | ## Correctness
84 | 
85 | TODO
86 | 
87 | ## Complexity
88 | 
89 | Input of length $n$, un upper limit for the worst possible case is $n^2$ because TODO.
90 | 
91 | But then for each $n$ we can generate a case which takes $n^2$ TODO
92 | 
93 | Therefore the w4 2 1 3 worst case complexity must be $n^2$.
94 | 


--------------------------------------------------------------------------------
/src/c/compare_delimiter.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Compare two arrays up to the delimiter 0.
 3 | 
 4 | Analogous to strcmp, or any other comparision where
 5 | we do not know the length of both arrays.
 6 | 
 7 | The delimiter is considered as the smallest possible element.
 8 | So a shorter prefix is smaller than a longer string that includes it.
 9 | */
10 | 
11 | #include <assert.h>
12 | #include <stdlib.h>
13 | 
14 | int cmp(int *a, int *b, int delimiter) {
15 |     int b_delim;
16 |     while (1) {
17 |         b_delim = (*b == delimiter);
18 |         if (*a == delimiter) {
19 |             if (b_delim) {
20 |                 return 0;
21 |             } else {
22 |                 return -1;
23 |             }
24 |         } else if (b_delim) {
25 |             return 1;
26 |         } else {
27 |             if (*a < *b) {
28 |                 return -1;
29 |             } else if (*a > *b) {
30 |                 return 1;
31 |             }
32 |         }
33 |         a++;
34 |         b++;
35 |     }
36 | }
37 | 
38 | int main(void) {
39 |     const int d = 0;
40 |     typedef int t[];
41 | 
42 |     /* Basic. */
43 |     assert(cmp((t){1, 2, 3, d}, (t){1, 2, 3, d}, d) == 0);
44 |     assert(cmp((t){1, 2, 3, d}, (t){1, 2, 4, d}, d)  < 0);
45 |     assert(cmp((t){1, 2, 4, d}, (t){1, 2, 3, d}, d)  > 0);
46 | 
47 |     /* Different lengths. */
48 |     assert(cmp((t){1, 2, d},    (t){1, 2, 3, d}, d) < 0);
49 |     assert(cmp((t){1, 2, 3, d}, (t){1, 2, d},    d) > 0);
50 | 
51 |     /* Delimiter in the middle. */
52 |     assert(cmp((t){1, d, 2}, (t){1, d, 3}, d) == 0);
53 | 
54 |     /* Empty. */
55 |     assert(cmp((t){d}, (t){d}, d) == 0);
56 | 
57 |     return EXIT_SUCCESS;
58 | }
59 | 


--------------------------------------------------------------------------------
/src/cpp/quick_sort.cpp:
--------------------------------------------------------------------------------
 1 | #include <algorithm> // swap
 2 | #include <vector>
 3 | #include <utility> // pair
 4 | 
 5 | #include "common.hpp"
 6 | 
 7 | /**
 8 | Sort the input vector via quick sort.
 9 | 
10 | Same interface as MergeSort.
11 | 
12 | Worst case time complexity:     $O(input.size() ^ 2)$.
13 | 
14 | Average case time complexity:   $O(input.size() * log input.size())$.
15 | 
16 | Extra memory excluding input:   $O(1)$
17 | 
18 | # Implementation notes
19 | 
20 | Quicksort is much easier to implement than mergesort.
21 | */
22 | template<typename COMPARABLE = int>
23 | void quick_sort(std::vector<COMPARABLE>& input) {
24 |     typedef typename std::vector<COMPARABLE>::size_type SizeType;
25 |     SizeType left_begin, left, right, pivot;
26 |     std::vector<std::pair<SizeType,SizeType>> recursion_stack{{0, input.size() - 1}};
27 |     while (!recursion_stack.empty()) {
28 |         std::tie(left_begin, pivot) = recursion_stack.back();
29 |         recursion_stack.pop_back();
30 |         left = left_begin;
31 |         right = left;
32 |         while (right < pivot) {
33 |             if (input[right] < input[pivot]) {
34 |                 std::swap(input[left], input[right]);
35 |                 left++;
36 |             }
37 |             right++;
38 |         }
39 |         std::swap(input[left], input[pivot]);
40 |         if (left_begin + 1 < left)
41 |             recursion_stack.push_back({left_begin, left - 1});
42 |         if (left + 1 < pivot)
43 |             recursion_stack.push_back({left + 1, pivot});
44 |     }
45 | }
46 | 
47 | int main(int argc, char **argv) {
48 |     std::vector<int> input = parse_array(argv[1]);
49 |     quick_sort(input);
50 |     print_array(input);
51 | }
52 | 


--------------------------------------------------------------------------------
/src/java/QuickSortTail.java:
--------------------------------------------------------------------------------
 1 | import lib.Sort;
 2 | import lib.IntArrayConsumer;
 3 | 
 4 | /**
 5 | Quicksort with tail recursion optimization.
 6 | <p>
 7 | Reduces call stack extra non-input memory worst case from N to log(N).
 8 | <p>
 9 | For that to be possible, it always does the tail optimization on the longest side.
10 | */
11 | public class QuickSortTail implements IntArrayConsumer {
12 | 
13 |     public void accept(final int[] in) {
14 |         acceptRecursive(in, 0, in.length - 1);
15 |     }
16 | 
17 |     private static void acceptRecursive(
18 |             final int[] in,
19 |             int leftI,
20 |             int rightI) {
21 |         while (leftI < rightI) {
22 | 
23 |             // Same as the non-optimized version.
24 |             int smallI = leftI;
25 |             int bigI = leftI;
26 |             final int pivot = in[rightI];
27 |             while (bigI < rightI) {
28 |                 final int big = in[bigI];
29 |                 if (big < pivot) {
30 |                     int smallBuf = in[smallI];
31 |                     in[smallI] = big;
32 |                     in[bigI] = smallBuf;
33 |                     smallI++;
34 |                 }
35 |                 bigI++;
36 |             }
37 |             in[rightI] = in[smallI];
38 |             in[smallI] = pivot;
39 | 
40 |             // Tail optimize on the longer side.
41 |             if (smallI - leftI > rightI - smallI) {
42 |                 acceptRecursive(in, smallI + 1, rightI);
43 |                 rightI = smallI - 1;
44 |             } else {
45 |                 acceptRecursive(in, leftI, smallI - 1);
46 |                 leftI = smallI + 1;
47 |             }
48 |         }
49 |     }
50 | 
51 |     public static void main(String[] args) throws Throwable {
52 |         Sort.test(args[0], new QuickSortTail());
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/formal-language.md:
--------------------------------------------------------------------------------
 1 | # Formal languages
 2 | 
 3 | A language is a set of strings.
 4 | 
 5 | A grammar for a language is a set of rules that produces exactly that language.
 6 | It is not easy, given a grammar and a string, to find out how the grammar can generate the string,
 7 | because at each step there are many possible actions.
 8 | 
 9 | ## Chomsky hierarchy
10 | 
11 | <https://en.wikipedia.org/wiki/Chomsky_hierarchy>
12 | 
13 | Famous hierarchy of certain languages that are strictly contained in each other.
14 | 
15 | The original hierarchy contains only:
16 | 
17 | | Grammar                | Automaton                                       | Production rules                                 |
18 | |------------------------|-------------------------------------------------|--------------------------------------------------|
19 | | Recursively enumerable | Turing machine                                  |                                                  |
20 | | Context-sensitive      | Linear-bounded non-deterministic Turing machine | $\alpha A \beta \rightarrow \alpha \gamma \beta$ |
21 | | Context-free           | Non-deterministic pushdown automaton            | $A \ rightarrow \gamma$                          |
22 | | Regular                | Finite state automaton                          | $A \rightarrow a$ and $A \rightarrow aB$         |
23 | 
24 | There are however many other well known languages in between those classes.
25 | 
26 | - finite language: contains only a finite number of words. Strict subset of Regular.
27 | - [LL](https://en.wikipedia.org/wiki/LL_grammar), and the related LR, SLL, SLR. Useful subset of context-free. Related automaton: DPDA.
28 | 
29 | ## Category of popular languages
30 | 
31 | - C and C++ are ambiguous and cannot be parsed by $LR(1)$, but can be parsed by GLR: <http://stackoverflow.com/questions/243383/why-cant-c-be-parsed-with-a-lr1-parser>
32 | 


--------------------------------------------------------------------------------
/src/README.md:
--------------------------------------------------------------------------------
 1 | # src
 2 | 
 3 | Educational implementations of algorithms.
 4 | 
 5 | For serious applications use real libraries like Boost.
 6 | 
 7 | Multiple significantly different implementations are accepted, including in different languages and using different libraries.
 8 | 
 9 | Add the `.off` extension to WIP files to prevent their compilation.
10 | 
11 | ## Program interfaces
12 | 
13 | ### Data on separate file
14 | 
15 | The preferred program interface:
16 | 
17 | - takes input from a data file give as a command line argument, e.g. `./program /path/to/data/0.in`
18 | - outputs the output to stdout
19 | - outputs diagnostic messages to stderr
20 | - the exit status must be 0 on success
21 | 
22 | The advantages of this method are that:
23 | 
24 | - it is language agnostic
25 | - it factors out the error checking across all algorithms
26 | 
27 | Although stdin input is more elegant as it does not require file IO, file input is more general as it allow for out-of-core considerations. This can be important for instance to implement [tac.c](https://github.com/cirosantilli/algorithm-cheat/blob/b09a181b686ebc33fb49a45b8264bbdde8508a9c/src/c/tac.c), since you cannot start from the end of stdin without reading everything to memory.
28 | 
29 | File output may also be supported some day if we find cool optimizations that need it.
30 | 
31 | ### Data with the algorithm or with the language
32 | 
33 | If you have just created a program, it may be faster to put the data and assertions into:
34 | 
35 | - the algorithm file directly
36 | - a shared file for the language
37 | 
38 | and only return its output via the exit status, with `0` for success and non-zero for failure.
39 | 
40 | But it has the following downsides:
41 | 
42 | - cannot share the data across languages
43 | - you must recompile on data changes
44 | 
45 | So only use this if you are lazy and have only a single program that takes a given type of input.
46 | 


--------------------------------------------------------------------------------
/src/c/version_string.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | http://stackoverflow.com/questions/15057010/comparing-version-numbers-in-c/15059401#15059401
 3 | */
 4 | 
 5 | #include <assert.h>
 6 | #include <stdlib.h>
 7 | 
 8 | int versionCmp( char *pc1, char *pc2)
 9 | {
10 |     int result = 0;
11 |     /* loop through each level of the version string */
12 |     while (result == 0) {
13 |         /* extract leading version numbers */
14 |         char* tail1;
15 |         char* tail2;
16 |         unsigned long ver1 = strtoul( pc1, &tail1, 10 );
17 |         unsigned long ver2 = strtoul( pc2, &tail2, 10 );
18 |         /* if numbers differ, then set the result */
19 |         if (ver1 < ver2)
20 |             result = -1;
21 |         else if (ver1 > ver2)
22 |             result = +1;
23 |         else {
24 |             /* if numbers are the same, go to next level */
25 |             pc1 = tail1;
26 |             pc2 = tail2;
27 |             /* if we reach the end of both, then they are identical */
28 |             if (*pc1 == '\0' && *pc2 == '\0')
29 |                 break;
30 |             /* if we reach the end of one only, it is the smaller */
31 |             else if (*pc1 == '\0')
32 |                 result = -1;
33 |             else if (*pc2 == '\0')
34 |                 result = +1;
35 |             /*  not at end ... so far they match so keep going */
36 |             else {
37 |                 pc1++;
38 |                 pc2++;
39 |             }
40 |         }
41 |     }
42 |     return result;
43 | }
44 | 
45 | int main( void )
46 | {
47 |     assert(versionCmp("1.2.3" , "1.2.3" ) == 0);
48 |     assert(versionCmp("1.2.3" , "1.2.4" )  < 0);
49 |     assert(versionCmp("1.2.4" , "1.2.3" )  > 0);
50 |     assert(versionCmp("10.2.4", "9.2.3" )  > 0);
51 |     assert(versionCmp("9.2.4",  "10.2.3")  < 0);
52 |     /* Trailing 0 ignored. */
53 |     assert(versionCmp("01", "1") == 0);
54 |     /* Any single space delimiter is OK. */
55 |     assert(versionCmp("1a2", "1b2") == 0);
56 |     return EXIT_SUCCESS;
57 | }
58 | 


--------------------------------------------------------------------------------
/src/cpp/hanoi.cpp:
--------------------------------------------------------------------------------
 1 | #include <algorithm>    // TODO why required?
 2 | #include <cassert>
 3 | #include <iostream>     // cout, endl
 4 | #include <tuple>
 5 | #include <vector>
 6 | #include <utility>      // pair
 7 | 
 8 | /**
 9 | Solve the classic Hanoi problem starting from any of the 3 pegs `from`
10 | and ending at any of the 3 pegs `to` in the minimum ammont of moves.
11 | */
12 | template<typename INPUT_TYPE>
13 | void hanoi(int n_disks, int from, int to,
14 |         std::vector<std::pair<INPUT_TYPE,INPUT_TYPE>>& output) {
15 |     if (from != to) {
16 |         if (n_disks == 1) {
17 |             output.push_back(std::pair<int,int>{from,to});
18 |             return;
19 |         } else {
20 |             int other = 3 - to - from;
21 |             hanoi(n_disks - 1, from, other, output);
22 |             output.push_back(std::pair<int,int>{from,to});
23 |             hanoi(n_disks - 1, other, to, output);
24 |         }
25 |     } else {
26 |         return;
27 |     }
28 | }
29 | 
30 | int main() {
31 |     typedef std::vector<std::pair<int,int>> OutputType;
32 |     typedef std::tuple<int, int, int, OutputType> IO;
33 |     IO in_outs[]{
34 |         // One moves.
35 |         IO{1, 0, 1, {{0, 1}}},
36 |         IO{1, 0, 2, {{0, 2}}},
37 |         IO{1, 1, 2, {{1, 2}}},
38 |         IO{2, 0, 1, {{0, 2}, {0, 1}, {2, 1}}},
39 |         IO{2, 0, 2, {{0, 1}, {0, 2}, {1, 2}}},
40 |         IO{3, 0, 1, {
41 |             {0, 1}, {0, 2}, {1, 2},
42 |             {0, 1}, {2, 0}, {2, 1},
43 |             {0, 1}
44 |         }},
45 |         // Edge case: move to self.
46 |         IO{1, 0, 0, {}},
47 |         IO{2, 0, 0, {}},
48 |     };
49 |     for (auto& in_out : in_outs) {
50 |         OutputType output;
51 |         auto& n_disks         = std::get<0>(in_out);
52 |         auto& from            = std::get<1>(in_out);
53 |         auto& to              = std::get<2>(in_out);
54 |         auto& expected_output = std::get<3>(in_out);
55 |         hanoi(n_disks, from, to, output);
56 |         assert(output == expected_output);
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/hash-map.md:
--------------------------------------------------------------------------------
 1 | # Hash map
 2 | 
 3 | ## Hash map vs BST
 4 | 
 5 | <http://stackoverflow.com/questions/371136/binary-trees-vs-linked-lists-vs-hash-tables>
 6 | 
 7 | Hash map:
 8 | 
 9 | -   more efficient search / insert than trees: $O(1)$ amortized with a perfect hash function, $O(1)$ average with a uniform random hash function
10 | 
11 | -   worse worst case search / insert $O(n)$ vs $O(log(n))$
12 | 
13 |     Search comes from a bad hash function with many conflicts.
14 | 
15 |     Insert comes from the rehashes.
16 | 
17 | -   takes up more memory than then number of elements because it must have an array of entries, and it must usually be at least 50% larger than the actual number of entries.
18 | 
19 | -   no ordered transversal
20 | 
21 | -   harder to implement because a good hash function must be chosen
22 | 
23 | ## Open addressing
24 | 
25 | Good sources on open addressing:
26 | 
27 | - <http://webdocs.cs.ualberta.ca/~holte/T26/open-addr.html>
28 | - <http://www.algolist.net/Data_structures/Hash_table/Open_addressing>
29 | 
30 | Open addressing vs. chaining:
31 | 
32 | -   Collision resolution.
33 | 
34 |     Using external data structure.
35 | 
36 |     Using hash table itself.
37 | 
38 | -   Memory waste:
39 | 
40 |     - Pointer size overhead per entry (storing list heads in the table).
41 | 
42 |     - No overhead 1.
43 | 
44 | -   Performance dependence on table's load factor
45 | 
46 |     - Directly proportional
47 | 
48 |     - Proportional to $(loadFactor) / (1 - loadFactor)$
49 | 
50 | -   Allow to store more items, than hash table size
51 | 
52 |     - Yes.
53 | 
54 |     - No. Moreover, it's recommended to keep table's load factor below 0.7.
55 | 
56 | -   Hash function requirements
57 | 
58 |     - Uniform distribution.
59 | 
60 |     - Uniform distribution, should avoid clustering.
61 | 
62 | -   Handle removals
63 | 
64 |     - Removals are OK.
65 | 
66 |     - Removals clog the hash table with "DELETED" entries.
67 | 
68 | -   Implementation
69 | 
70 |     - Simple.
71 | 
72 |     - Correct implementation of open addressing based hash table is quite tricky.
73 | 


--------------------------------------------------------------------------------
/recurrence-relations.md:
--------------------------------------------------------------------------------
 1 | # Recurrence relations
 2 | 
 3 | When the running time of an algorithm can be expressed in term of the running time of the algorithm itself, but with a smaller input, we have a recurrence relation.
 4 | 
 5 | It is often the case that recurrence relations are the easiest way to carry ou
 6 | 
 7 | Mathematically, this means that the complexity can be expressed as:
 8 | 
 9 | $$complexity(n) = complexity(n-1, n-2, ... 0)$$
10 | 
11 | This means that $C(n)$ is a function of the anterior values of $C$.
12 | 
13 | Recurrence relations are analogous to a discrete differential equations. Solutions are known to most of the interesting cases you are likely to find, so if you manage to reduce your analysis to one, you are usually done. Because of this, finding a recurrence relation is often the easiest way of solving a problem.
14 | 
15 | Recurrence relations come up often in algorithm analysis because the analysis of recursive algorithms naturally falls back to them.
16 | 
17 | ## Master theorem
18 | 
19 | Set of formulas popularized by Cormen's Introduction to algorithms textbook.
20 | 
21 | Those formulas are not super fundamental in a mathematical sense, but they are very useful in practical analysis.
22 | 
23 | $$C(n) = a*C(n/b) + f(n)$$
24 | 
25 | Then in terms of $f$:
26 | 
27 | 1. If $f(n) = O(n^{log(b, a - e)})$ for some constant $e > 0$, then $C(n) = Theta(n*log(b,a)$
28 | 
29 | 2. If $f(n) = Theta(n^{log(b, a - e)})$, then $C(n) = Theta(n^{log_{b}(a)}*lg(n))$
30 | 
31 | 3. If $f(n) = Omega(n^{log_{b}(a + e)})$ for some constant $e > 0$, and if $(n/b) <= cf (n)$ for some $c < 1$, then $T(n) = Omega(f(n))$
32 | 
33 | There are however many cases which don't fall into the master theorem.
34 | 
35 | ## Akra–Bazzi method
36 | 
37 | <http://en.wikipedia.org/wiki/Akra%E2%80%93Bazzi_method>
38 | 
39 | Solution method to a large set of recurrence relations of the form shown on the wiki article.
40 | 
41 | ## Solving recurrence relations
42 | 
43 | In the rare case that you fall on a recurrence relation to which you don't have a solution, you'll need to do some extra work.
44 | 


--------------------------------------------------------------------------------
/sort.md:
--------------------------------------------------------------------------------
 1 | # Sorting algorithms
 2 | 
 3 | To understand them, see how they work step by step on examples.
 4 | 
 5 | ## Sources
 6 | 
 7 | -   <https://en.wikipedia.org/wiki/Sorting_algorithm#Comparison_of_algorithms>
 8 | 
 9 |     Compare them all.
10 | 
11 | -   <http://www.sorting-algorithms.com/>
12 | 
13 |     Dedicated comparison website. Pretty good.
14 | 
15 | ## Algorithm comparison
16 | 
17 | See this: <https://en.wikipedia.org/wiki/Sorting_algorithm#Comparison_of_algorithms>
18 | 
19 | The following aspects must be taken into account:
20 | 
21 | - time
22 | - extra memory besides input used
23 | - stability
24 | - potentially common inputs like sorted and repeated elements
25 | 
26 | It can be proven that the best a general comparison based algorithm can do is $O(n log(n))$ worst case time.
27 | 
28 | If extra information is known about the input, it is possible to reduce time worst case to $O(n)$. For example, if the values are all integers between $0$ and $k$, and $k$ is $O(n)$, then counting sort has $O(n)$ worst case.
29 | 
30 | There are many algorithms that sort in-place, thus achieving $O(1)$ extra memory (excluding the input itself) worst case. This is the case for quicksort and heapsort, but not for merge sort.
31 | 
32 | There are algorithms that achieve both optimal time and space at the same time such as heapsort,
33 | 
34 | Another parameter to take into account is stability. Heapsort which achieves both $n log(n)$ worst case time and $O(1)$ space is not stable.
35 | 
36 | TODO is there a stable algorithm that achieves $n log(n)$ time $O(1)$ space *and* is stable?
37 | 
38 | In practice, considering cache performance and average cases, the following algorithms are very common and can all give good results:
39 | 
40 | - quicksort
41 | - mergesort
42 | - heapsort
43 | 
44 | The following are less common:
45 | 
46 | - bubble sort. Very inefficient: used mostly for educational purposes
47 | 
48 | ## Count
49 | 
50 | ## Shuffle
51 | 
52 | Pseudo inverse of sorting.
53 | 
54 | ## Shuffle
55 | 
56 | ### Fisher Yates shuffle
57 | 
58 | <http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle>
59 | 


--------------------------------------------------------------------------------
/base64.md:
--------------------------------------------------------------------------------
 1 | # base64
 2 | 
 3 | Byte encoding method, and GNU Coreutils command line utility that implements it. Also possible with the POSIX 7 `uuencode` utility, which has a less convenient interface.
 4 | 
 5 | Transforms binary data which may contain non printable bytes like ASCII 0 into data that contain only printable bytes non space chars.
 6 | 
 7 | Advantage: makes it easier for humans to view and input the data.
 8 | 
 9 | Disadvantage: data gets 33% larger in average: every byte, 7 data bits + 1 check bit, can represent 64 values, thus 6 bits, so 4 characters represent 24 bits == 3 bytes, and so the 4/3 ratio.
10 | 
11 | To understand see Wikipedia: <http://en.wikipedia.org/wiki/base64#examples>
12 | 
13 | `-d` to decode:
14 | 
15 |     assert [ "$(echo abc | base64 | base64 -d)" = 'abc' ]
16 | 
17 | ## Why 64?
18 | 
19 | There are at least 64 printable chars, but not 128.
20 | 
21 | ## Why not use more than 64 character?
22 | 
23 | ## Base85
24 | 
25 | There are more than 64 printable characters: 95 excluding space and line breaks, but 64 is a power of 2 which makes things easier.
26 | 
27 | Standards exist which drop the power of 2 requirement. Base85 encodes 4 bytes in 5 characters. 85 is chosen as it is the smallest value that allows 4 bytes in 5 characters because `85^5 > 256^4 > 84^5`.
28 | 
29 | Base85 implementations are slower than Base64 and less common. Data gets 25% bigger instead of 33% as for Base64.
30 | 
31 | ## Base58
32 | 
33 | <http://en.wikipedia.org/wiki/Base58>
34 | 
35 | Base 64 with a few characters removed to make it easier for humans to read:
36 | 
37 | - `I` (capital `i`) and `l` (lower case `L`) because they look alike
38 | - `O` (capital `o`) and `0` (zero) because they look alike
39 | - non alpha characters: `+` and `/`
40 | 
41 | The actual order is not well specified, and more specific standards must be considered.
42 | 
43 | E.g., Bitcoin and Ripple use different orders.
44 | 
45 | ## Why not use hexadecimal?
46 | 
47 | Even simpler for humans, but data gets much larger Ks the base is smaller.
48 | 
49 | ## Implementations
50 | 
51 | POSIX:
52 | 
53 | - `uudecode`
54 | - `uuencode`
55 | 


--------------------------------------------------------------------------------
/src/cpp/map.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef MAP_H
 2 | #define MAP_H
 3 | 
 4 | #include <utility> // pair
 5 | 
 6 | /**
 7 | Map abstract class.
 8 | 
 9 | @tparam KEY the key type of the map
10 | @tparam VAL the value type value of the map.
11 | @todo Add a key value iterator interface and implement str() in terms of it.
12 | */
13 | template<class KEY,class VAL>
14 | class Map {
15 |     public:
16 |         /**
17 |         Initialize map with key value pairs.
18 | 
19 |         Cannot be done from a constructor because relies on virtual method `add`
20 | 
21 |         <http://stackoverflow.com/questions/496440/c-virtual-function-from-constructor>
22 |         */
23 |         void init_pair(const KEY& key, const VAL& val) { this->add(key, val); }
24 |         void init_initializer(std::initializer_list<std::pair<KEY,VAL>> pairs) {
25 |             for (auto& pair : pairs) {
26 |                 this->add(pair);
27 |             }
28 |         }
29 | 
30 |         virtual bool add(const KEY& key, const VAL& val) = 0;
31 | 
32 |         /**
33 |         Remove key value pair from map.
34 | 
35 |         @param[in] key key to search
36 |         @return true iff the value was present
37 |         */
38 |         virtual bool del(const KEY& key) = 0;
39 | 
40 |         // TODO Should be const, waiting for Bst find to become const.
41 |         virtual bool find(const KEY& key, VAL& val) = 0;
42 | 
43 |         // TODO how to add those to the interface? Does not override because
44 |         // derived class signature is different (Hash instead of Map).
45 |         //virtual bool operator==(const Map<KEY,VAL>& other) const = 0;
46 |         //virtual bool operator!=(const Map<KEY,VAL>& other) const = 0;
47 |         virtual std::string str() const = 0;
48 | 
49 |         /**
50 |         std::pair add based on add(key,val).
51 |         */
52 |         bool add(const std::pair<KEY,VAL>& pair) { return add(pair.first, pair.second); }
53 | 
54 |         /**
55 |         ostream << operator. Based on str.
56 |         */
57 |         friend std::ostream& operator<<(std::ostream& os, const Map<KEY,VAL>& rhs) { return os << rhs.str(); }
58 | };
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/src/c/most_frequent.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Find the most frequent number in an array.
 3 | 
 4 | The maximum size of each element is bounded and fits into memory,
 5 | so an array solution is possible. Otherwise, we'd need a map.
 6 | */
 7 | 
 8 | #include "common.h"
 9 | 
10 | int solution0(int M, int A[], int N) {
11 |     int *count = malloc((M + 1) * sizeof(int));
12 |     int i, *ip;
13 |     int index = 0;
14 |     int maxOccurence = 0;
15 |     for (i = 0; i <= M; i++)
16 |         count[i] = 0;
17 |     for (i = 0; i < N; i++) {
18 |         ip = &count[A[i]];
19 |         if (*ip == maxOccurence) {
20 |             maxOccurence++;
21 |             index = i;
22 |         }
23 |         (*ip)++;
24 |     }
25 |     free(count);
26 |     return A[index];
27 | }
28 | 
29 | 
30 | /* Weird solution that counts one specially. */
31 | int solution1(int M, int A[], int N) {
32 |     int *count = malloc((M + 1) * sizeof(int));
33 |     int i;
34 |     for (i = 0; i <= M; i++)
35 |         count[i] = 0;
36 |     int maxOccurence = 1;
37 |     int index = 0;
38 |     for (i = 0; i < N; i++) {
39 |         if (count[A[i]] > 0) {
40 |             int tmp = count[A[i]] + 1;
41 |             if (tmp > maxOccurence) {
42 |                 maxOccurence = tmp;
43 |                 index = i;
44 |             }
45 |             count[A[i]] = tmp;
46 |         } else {
47 |             count[A[i]] = 1;
48 |         }
49 |     } free(count);
50 |     return A[index];
51 | }
52 | 
53 | typedef int t;
54 | 
55 | void assert_solution(
56 |         t *A,
57 |         size_t N,
58 |         t M,
59 |         t *valid_outputs,
60 |         size_t valid_outputs_size) {
61 |     int output = solution0(M, A, N);
62 |     size_t i;
63 |     for (i = 0; i < valid_outputs_size; ++i)
64 |         if (output == valid_outputs[i])
65 |             return;
66 |     assert(false);
67 | }
68 | 
69 | int main(void) {
70 |     /* TODO loop over all implementations. */
71 | 
72 |     assert_solution((t[]){1}, 1, 1, (t[]){1}, 1);
73 | 
74 |     /* Two possibilities. */
75 |     assert_solution((t[]){1, 2, 3, 3, 1, 3, 1}, 7, 3, (t[]){1, 3}, 2);
76 | 
77 |     /* Should fail. */
78 |     /*assert_solution((t[]){1, 2, 3, 3, 1, 3, 1}, 7, 3, (t[]){0, 2}, 2);*/
79 | 
80 |     assert_solution((t[]){1, 2, 3, 3, 1, 3, 3}, 7, 3, (t[]){3}, 1);
81 | 
82 |     return EXIT_SUCCESS;
83 | }
84 | 


--------------------------------------------------------------------------------
/src/c/trie.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Trie set.
 3 | 
 4 | Unlike single struct BSTs, we can represent empty tries without the NULL pointer:
 5 | 
 6 | TODO:
 7 | 
 8 | - free
 9 | - is_empty
10 | - find with prefix
11 | - typo correction
12 | */
13 | 
14 | #include <assert.h>
15 | #include <stdlib.h>
16 | 
17 | #define ALPHABET_SIZE 8
18 | 
19 | typedef unsigned int t;
20 | typedef t ts[];
21 | 
22 | typedef struct Trie Trie;
23 | struct Trie {
24 |     Trie *children[ALPHABET_SIZE];
25 |     int is_word;
26 | };
27 | 
28 | void Trie_init(Trie **trie) {
29 |     size_t i;
30 |     *trie = malloc(sizeof(Trie));
31 |     (*trie)->is_word = 0;
32 |     for (i = 0; i < ALPHABET_SIZE; ++i)
33 |         (*trie)->children[i] = NULL;
34 | }
35 | 
36 | int Trie_add(Trie *trie, t *word, size_t size) {
37 |     size_t i;
38 |     int present;
39 |     Trie **next;
40 |     for (i = 0; i < size; ++i) {
41 |         next = &trie->children[word[i]];
42 |         if (*next == NULL)
43 |             Trie_init(next);
44 |         trie = *next;
45 |     }
46 |     present = trie->is_word;
47 |     trie->is_word = 1;
48 |     return present;
49 | }
50 | 
51 | int Trie_find(Trie *trie, t *word, size_t size) {
52 |     size_t i;
53 |     for (i = 0; i < size; ++i) {
54 |         trie = trie->children[word[i]];
55 |         if (trie == NULL)
56 |             return 0;
57 |     }
58 |     if (trie->is_word)
59 |         return 1;
60 |     return 0;
61 | }
62 | 
63 | int main(void) {
64 |     Trie *trie = NULL;
65 |     Trie_init(&trie);
66 | 
67 |     /* find on empty trie. */
68 |     assert(!Trie_find(trie, (ts){0}, 1));
69 | 
70 |     /* Add and find length 1. */
71 |     assert(!Trie_add(trie, (ts){0}, 1));
72 |     assert(Trie_find(trie, (ts){0}, 1));
73 |     assert(!Trie_find(trie, (ts){1}, 1));
74 | 
75 |     /* Add previously added. */
76 |     assert(Trie_add(trie, (ts){0}, 1));
77 | 
78 |     /* Add and find length 3 */
79 |     {
80 |         assert(Trie_find(trie, (ts){0}, 1));
81 |         assert(!Trie_find(trie, (ts){0, 1}, 2));
82 |         assert(!Trie_find(trie, (ts){0, 1, 2}, 3));
83 | 
84 |         assert(!Trie_add(trie, (ts){0, 1, 2}, 3));
85 | 
86 |         assert(Trie_find(trie, (ts){0}, 1));
87 |         assert(!Trie_find(trie, (ts){0, 1}, 2));
88 |         assert(Trie_find(trie, (ts){0, 1, 2}, 3));
89 |     }
90 | 
91 |     return EXIT_SUCCESS;
92 | }
93 | 


--------------------------------------------------------------------------------
/src/java/lib/StringSearch.java:
--------------------------------------------------------------------------------
 1 | package lib;
 2 | 
 3 | import java.util.Arrays;
 4 | 
 5 | public class StringSearch {
 6 |     public static void test(Searcher searcher) {
 7 |         int[][] texts = {
 8 |             // 4-2-4
 9 |             {0, 1, 2, 3},
10 |             // 6-3-6
11 |             {0, 1, 2, 3, 4, 5},
12 |             // no-match
13 |             {0},
14 |             // two-matches
15 |             {0, 1, 1, 0, 1, 1},
16 |             // 17-6-3
17 |             {1, 2, 1, 0, 0, 1, 0, 2, 0, 0, 1, 0, 1, 0, 2, 0, 0},
18 | 
19 |             // 18-6-3
20 |             {2, 2, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1},
21 |             // no-match-18-6-3
22 |             {2, 2, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1},
23 |             // full-match-2-2-2
24 |             {0, 1},
25 |             // full-match-1-1-1
26 |             {0},
27 |             // pattern-longer-than-text
28 |             {0},
29 | 
30 |             // From the wiki page: http://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
31 |             // 23-7-6
32 |             {1, 2, 3, 0, 1, 2, 3, 4, 1, 2, 0, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 4, 5},
33 |         };
34 |         int[][] patterns = {
35 |             {1, 2},
36 |             {2, 3, 4},
37 |             {1},
38 |             {1, 1},
39 |             {0, 1, 0, 1, 0, 2},
40 | 
41 |             {0, 0, 1, 0, 0, 0, 1, 0},
42 |             {0, 0, 1, 0, 0, 0, 1, 1},
43 |             {0, 1},
44 |             {0},
45 |             {0, 1},
46 | 
47 |             {1, 2, 3, 4, 1, 2, 4}
48 |         };
49 |         int[] expected_outputs = {
50 |             1,
51 |             2,
52 |             -1,
53 |             1,
54 |             9,
55 | 
56 |             8,
57 |             -1,
58 |             0,
59 |             0,
60 |             -1,
61 | 
62 |             15,
63 |         };
64 |         for (int i = 0; i < texts.length; i++) {
65 |             searcher.preProcess(patterns[i]);
66 |             int output = searcher.search(texts[i]);
67 |             if (output != expected_outputs[i]) {
68 |                 System.err.println("Test id: " + i);
69 |                 System.err.println("Output: " + output);
70 |                 System.err.println("Expected output: " + expected_outputs[i]);
71 |                 System.exit(1);
72 |             }
73 |         }
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/recursive-algorithms.md:
--------------------------------------------------------------------------------
 1 | # Recursive algorithms
 2 | 
 3 | One common algorithm design pattern is to use recursion, i.e., a function that calls itself, but each time with an input that is closer to the solution.
 4 | 
 5 | Recursive implementations must have *base case* after which recursion is not called anymore. Each call must put the inputs closer to the base case.
 6 | 
 7 | Many recursive algorithms can be transformed into a non-recursive version with call elimination, i.e. replacing recursive calls with with a loop. The trade-offs are:
 8 | 
 9 | - the recursive version is simpler to write
10 | - the non-recursive version is more memory and time efficient because it does not generate a stack trace
11 | 
12 | More precisely, it is exactly tail recursive algorithms that can go through call elimination.
13 | 
14 | In functional programming languages, call elimination is not possible explicitly. However, many of those languages specify in their standard that call elimination is guaranteed to be done by the compiler. GCC is capable of doing TRO with the `-O2` flag.
15 | 
16 | ## Tail call
17 | 
18 | ## Tail recursive optimization
19 | 
20 | ## TRO
21 | 
22 | A tail call is a function call that happens just before a function returns.
23 | 
24 | It does not need to be a call to the current function. But if it is, it is called a *recursive tail call*, and may be simpler to optimize.
25 | 
26 | Non-example of tail call:
27 | 
28 |     int factorial(int n)
29 |         if n == 1
30 |             return 1
31 |         return factorial(n - 1) * n
32 | 
33 | Not a tail call because the processor must return to the return statement to do `factorial(n - 1) * n` before returning it.
34 | 
35 | If we were worried about performance, we should try to convert it into a tail call version, which will make it easier for the compiler to optimize:
36 | 
37 |     int factorial(int n)
38 |         return factorial-accum(n, 1)
39 | 
40 |     int factorial-accum(int n, int accum)
41 |         if n == 1
42 |             return accum
43 |         else
44 |             return factorial-accum(n - 1, n * accum)
45 | 
46 | It is then trivial for a compiler to automatically convert it to:
47 | 
48 |     int factorial-accum(n, accum)
49 |         TOP
50 |         if n == 1
51 |             return accum
52 |         n = n - 1
53 |         accum *= n
54 |         goto TOP
55 | 
56 | and the recursion is now gone!
57 | 


--------------------------------------------------------------------------------
/src/java/WellNestedOpenClose.java.off:
--------------------------------------------------------------------------------
 1 | /*
 2 | https://leetcode.com/problems/valid-parentheses/
 3 | */
 4 | 
 5 | import java.util.Deque;
 6 | import java.util.LinkedList;
 7 | 
 8 | public class WellNestedOpenClose {
 9 | 
10 |     public static char close(char c) {
11 |         switch (c) {
12 |             case '(':
13 |                 return ')';
14 |             case '[':
15 |                 return ']';
16 |             case '{':
17 |                 return '}';
18 |             default:
19 |                 throw new IllegalArgumentException(String.valueOf(c));
20 |         }
21 |     }
22 | 
23 |     public boolean isValid(String s) {
24 |         Deque<Character> stack = new LinkedList<>();
25 |         int l = s.length();
26 |         for (int i = 0; i < l; i++) {
27 |             char c = s.charAt(i);
28 |             switch (c) {
29 |                 case '(':
30 |                 case '[':
31 |                 case '{':
32 |                     stack.push(c);
33 |                 break;
34 |                 default:
35 |                     if (stack.isEmpty() || c != close(stack.pop()))
36 |                         return false;
37 |                 break;
38 |             }
39 |         }
40 |         if (!stack.isEmpty())
41 |             return false;
42 |         return true;
43 |     }
44 | 
45 |     /*
46 |     By using a magic value for characters that don't close,
47 |     we can reuse this on the main loop code.
48 | 
49 |     Another saner but less efficient possibility would be to return character / boolean pair,
50 |     where the boolean indicates if the char can be closed.
51 |     */
52 |     public static char closeMagic(char c) {
53 |         switch (c) {
54 |             case '(':
55 |                 return ')';
56 |             case '[':
57 |                 return ']';
58 |             case '{':
59 |                 return '}';
60 |         }
61 |         return 0;
62 |     }
63 | 
64 |     public boolean isValidMagic(String s) {
65 |         Deque<Character> stack = new LinkedList<>();
66 |         int l = s.length();
67 |         for (int i = 0; i < l; i++) {
68 |             char c = s.charAt(i);
69 |             if (close(c) != 0) {
70 |                 stack.push(c);
71 |             } else {
72 |                 if (stack.isEmpty() || c != close(stack.pop()))
73 |                         return false;
74 |             }
75 |         }
76 |         if (!stack.isEmpty())
77 |             return false;
78 |         return true;
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/probabilistic-turing-machine.md:
--------------------------------------------------------------------------------
 1 | # Probabilistic Turing Machine
 2 | 
 3 | Each transition happens with a given probability.
 4 | 
 5 | PTMs are a generalization of the deterministic machine, since you can get a deterministic machine by setting all probabilities to either 0 or 1.
 6 | 
 7 | ## Applications
 8 | 
 9 | -   Quantum Turing Machines are probabilistic and physically realisable.
10 | 
11 | -   Cryptography uses Probabilistic Turing Machines in their proofs,
12 |     since key concepts like SHA inversion are only meaningful statistically.
13 | 
14 | ## BPP
15 | 
16 | <http://en.wikipedia.org/wiki/BPP_%28complexity%29>
17 | 
18 | Bounded Probabilistic Polynomial.
19 | 
20 | -   runs polynomially for every input. In particular, this implies that it always halts on a correct or incorrect answer.
21 | 
22 | -   gives the correct answer with probability greater than a fixed number larger than 0.5.
23 | 
24 |     Note that this is equivalent to saying it gives the right result at least $2/3$ of the time:
25 |     you just have to create a new PTM that runs $n$ times and chooses the most common output.
26 | 
27 |     This implies that if you run it enough times, it tends with probability 1 to the right answer.
28 | 
29 | P == BPP is open.
30 | PRIMES was a famous problem known for a long time to be in BPP but a P was found in 2002.
31 | A famous problem in BPP but unknown P is polynomial identity testing:
32 | test if two polynomials are equal.
33 | The naive method of expanding the polynomial is not P because, e.g.:
34 | 
35 | $$(x_0 + x_1)(x_1 + x_2) ... (x_{n-1} x_n)$$
36 | 
37 | expands to $2^n$ terms, even though it is only $O(n)$ in initial size.
38 | 
39 | ### Las Vegas algorithm
40 | 
41 | <http://en.wikipedia.org/wiki/Las_Vegas_algorithm>
42 | 
43 | Algorithms like BPP but without the polynomial constraint.
44 | 
45 | Contrast with Monte Carlo algorithms.
46 | 
47 | ## ZPP
48 | 
49 | - It always runs in polynomial time.
50 | - It returns an answer YES, NO or DO NOT KNOW.
51 | - The answer is always either DO NOT KNOW or the correct answer.
52 | - It returns DO NOT KNOW with probability at most 1/2 (and the correct answer otherwise).
53 | 
54 | I guess a common design pattern for this is to take a regular algorithm
55 | and decide when to cut off execution intelligently giving the DO NOT KNOW.
56 | 
57 | ### Monte Carlo algorithms
58 | 
59 | <http://en.wikipedia.org/wiki/Monte_Carlo_algorithm>
60 | 
61 | Algorithms like ZPP but without the polynomial constraint.
62 | 
63 | Contrast with Las Vegas algorithms.
64 | 


--------------------------------------------------------------------------------
/hash-function.md:
--------------------------------------------------------------------------------
 1 | # Hash function
 2 | 
 3 | Applications:
 4 | 
 5 | - hash maps
 6 | - cryptography
 7 | 
 8 | ## Cryptographic hash function
 9 | 
10 | <http://en.wikipedia.org/wiki/checksum_algorithm>
11 | 
12 | Used in cryptography as well as other applications, e.g. Git SHA to identify objects uniquely.
13 | 
14 | Desired properties:
15 | 
16 | -   it is easy to compute the hash value for any given message
17 | 
18 | -   it is infeasible to generate a message that has a given hash
19 | 
20 | -   it is infeasible to modify a message without changing the hash
21 | 
22 | -   it is infeasible to find two different messages with the same hash.
23 | 
24 |     This is in general much easier than finding an input with a given hash because of the birthday problem: <http://en.wikipedia.org/wiki/Birthday_problem>
25 | 
26 | ### Measures of strength
27 | 
28 | The collision strength of a function is determined by the smallest attack that generates a collision with certainty or high probability.
29 | 
30 | For example, SHA-1 has 80 bits, so a naive brute force attack costs $2^80$. The first widely accepted break on SHA-1 was in 2005 which made it fall to $2^69$. This was later reduced to $2^63$, and there have been many later further claims of more efficient attacks, but some were withdrawn or under-verified.
31 | 
32 | ## Merkle–Damgard construction
33 | 
34 | <http://en.wikipedia.org/wiki/Merkle%E2%80%93Damg%C3%A5rd_construction>
35 | 
36 | Algorithms on that family can also hash an empty string: <http://superuser.com/questions/557925/how-can-zero-byte-files-generate-a-hash-value>
37 | 
38 | ### SHA-1
39 | 
40 | 160 bits.
41 | 
42 | SHA-1 is the most popular in 2014. Used in Git.
43 | 
44 | Collision attacks were found in 2005, but they are were too expensive.
45 | 
46 | Some parts of the US government moved to SHA-2 in 2010 because of the weaknesses.
47 | 
48 | SHA-1 will be practical in 2018 for organized crime: <https://www.schneier.com/blog/archives/2012/10/when_will_we_se.html>
49 | 
50 | Google, Microsoft and Mozilla will remove SHA-1 for security in 2017 and use SHA-2 instead.
51 | 
52 | The following is a for fun prefix finder: <https://github.com/bradfitz/gitbrute>
53 | 
54 | It seems unproven that the all zero SHA-1  `0^160` has a preimage: <http://stackoverflow.com/questions/1902340/can-a-sha-1-hash-be-all-zeroes>
55 | 
56 | ### SHA-2
57 | 
58 | Family of 6 functions and output lengths.
59 | 
60 | ## Rolling hash function
61 | 
62 | <http://en.wikipedia.org/wiki/Rolling_hash>
63 | 
64 | Major application: Rabin-Krap string search.
65 | 


--------------------------------------------------------------------------------
/src/java/StringIsomorphism.java.off:
--------------------------------------------------------------------------------
 1 | /*
 2 | https://leetcode.com/problems/isomorphic-strings/
 3 | 
 4 | The perfect data structure to solve this is a bimap, which represents a bijection.
 5 | 
 6 | There are no bimaps on the stlib: http://stackoverflow.com/questions/9783020/bidirectional-map
 7 | but it is easy to simulate them with two separte maps.
 8 | */
 9 | 
10 | import java.util.Map;
11 | import java.util.HashMap;
12 | 
13 | /*
14 | Current solutions do not consider 4-byte codepoints.
15 | 
16 | For 2-byte codepoints we could use an array as a map (64kB),
17 | but fro 4-byte codepoints it would be too much.
18 | */
19 | public class Solution {
20 | 
21 |     /* Double map solution. Duplicates the code, and is marginaly more efficient because it does only a single loop. */
22 |     public boolean isIsomorphic(String s, String t) {
23 |         int l = s.length();
24 |         if (l != t.length())
25 |             return false;
26 |         Map<Character, Character> mapst = new HashMap<>();
27 |         Map<Character, Character> mapts = new HashMap<>();
28 |         for (int i = 0; i < l; i++) {
29 |             char sc = s.charAt(i);
30 |             char tc = t.charAt(i);
31 |             Character sm = mapst.get(sc);
32 |             Character tm = mapts.get(tc);
33 |             if (sm == null) {
34 |                 mapst.put(sc, tc);
35 |             } else {
36 |                 if (!sm.equals(tc))
37 |                     return false;
38 |             }
39 |             if (tm == null) {
40 |                 mapts.put(tc, sc);
41 |             } else {
42 |                 if (!tm.equals(sc))
43 |                     return false;
44 |             }
45 |         }
46 |         return true;
47 |     }
48 | 
49 |     public boolean isIsomorphicOneSide(String s, String t) {
50 |         Map<Character, Character> map = new HashMap<>();
51 |         int l = s.length();
52 |         for (int i = 0; i < l; i++) {
53 |             char sc = s.charAt(i);
54 |             char tc = t.charAt(i);
55 |             Character mc = map.get(sc);
56 |             if (mc == null) {
57 |                 map.put(sc, tc);
58 |             } else {
59 |                 if (!mc.equals(tc))
60 |                     return false;
61 |             }
62 |         }
63 |         return true;
64 |     }
65 | 
66 |     public boolean isIsomorphic(String s, String t) {
67 |         int l = s.length();
68 |         if (l != t.length())
69 |             return false;
70 |         return isIsomorphicOneSide(s, t) && isIsomorphicOneSide(t, s);
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/src/java/LinkedList.java.off:
--------------------------------------------------------------------------------
 1 | /*
 2 | https://leetcode.com/problems/reverse-linked-list/
 3 | 
 4 | Definition for singly-linked list.
 5 | 
 6 |     public class ListNode {
 7 |         int val;
 8 |         ListNode next;
 9 |         ListNode(int x) { val = x; }
10 |     }
11 | */
12 | public class LinkedList {
13 |     public static ListNode reverseList(ListNode head) {
14 |         if (head == null)
15 |             return null;
16 |         ListNode cur = head.next;
17 |         ListNode prev = head;
18 |         head.next = null;
19 |         while (cur != null) {
20 |             ListNode next = cur.next;
21 |             cur.next = prev;
22 |             prev = cur;
23 |             cur = next;
24 |         }
25 |         return prev;
26 |     }
27 | 
28 |     public ListNode removeAll(ListNode head, int val) {
29 |         ListNode newHead = head;
30 |         while (newHead != null && newHead.val == val) {
31 |             newHead = newHead.next;
32 |         }
33 |         if (newHead != null)  {
34 |             ListNode prev = newHead;
35 |             ListNode cur = newHead.next;
36 |             while (cur != null) {
37 |                 ListNode next = cur.next;
38 |                 if (cur.val == val) {
39 |                     prev.next = next;
40 |                 } else {
41 |                     prev = cur;
42 |                 }
43 |                 cur = next;
44 |             }
45 |         }
46 |         return newHead;
47 |     }
48 | 
49 |     /*
50 |     Remove all val nodes from the list.
51 | 
52 |     Do the first removal check in the same loop as the other checks,
53 |     which is less efficient than pre-removing the heads in a separate loop.
54 |     */
55 |     public ListNode removeAllOneLoop(ListNode head, int val) {
56 |         ListNode prev = null;
57 |         ListNode cur = head;
58 |         ListNode newHead = head;
59 |         while (cur != null) {
60 |             ListNode next = cur.next;
61 |             if (cur.val == val) {
62 |                 if (prev == null) {
63 |                     newHead = next;
64 |                 } else {
65 |                     prev.next = next;
66 |                 }
67 |             } else {
68 |                 prev = cur;
69 |             }
70 |             cur = next;
71 |         }
72 |         return newHead;
73 |     }
74 | 
75 |     /* Super short, and super inneficient. */
76 |     public ListNode removeAllRecursive(ListNode head, int val) {
77 |         if(head == null)
78 |             return null;
79 |         if(head.val == val)
80 |             return removeAllRecursive(head.next, val);
81 |         head.next = removeAllRecursive(head.next, val);
82 |         return head;
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/bibliography.md:
--------------------------------------------------------------------------------
 1 | # Bibliography
 2 | 
 3 | ## Other bibliographies
 4 | 
 5 | - <https://github.com/vhf/free-programming-books>: huge list of free books and algorithmic problem sets
 6 | 
 7 | ## Free
 8 | 
 9 | - <https://github.com/OpenGenus/cosmos> similar to this with more algorithms, but where are the tests?
10 | 
11 | - <http://algs4.cs.princeton.edu/home/>, which has lots GPL Java source. This kind soul has put the source up on GitHub: <https://github.com/aistrate/AlgorithmsSedgewick>
12 | 
13 | - <http://www3.cs.stonybrook.edu/~algorith/>. Links to tons of open source algorithm implementations that solve many problems. Each algorithm has a rating, and algorithms are all classified.
14 | 
15 | - <http://www.geeksforgeeks.org/fundamentals-of-algorithms>
16 | 
17 | - <http://en.wikibooks.org/wiki/Algorithm_Implementation>
18 | 
19 | ## Non-free
20 | 
21 | - [Skiena - Algorithm Design Manual 2ed][skiena08]
22 | - [Cormen - Introduction do Algorithms 2ed][cormen09]
23 | 
24 | ## Algorithmic problem sets
25 | 
26 | My favorites are:
27 | 
28 | - [TopCoder](http://www.topcoder.com/active-challenges/develop). No solutions. 6M registered users. Money prizes. Some company proposed problems have Non Disclosure Agreements. Timed submission contests.
29 | - [HackerRank](https://www.hackerrank.com/categories/fp/intro). No solutions, 3M Round A. Timed submission contests.
30 | - [Kaggle](https://www.kaggle.com/competitions). Data science focused. No solutions. Some problems have money prizes.
31 | - [Sphere Online Judge (SPOJ)](http://www.spoj.com/problems/classical/all/). No solutions. 200K users, 10000 problems.
32 | - [Project Euler](http://projecteuler.net/problems). 350K users, only ~450 problems. No solutions. Since 2001. Probably one of the oldest around, but did not evolve much.
33 | - [UVa](http://uva.onlinejudge.org/index.php?option=com_onlinejudge&Itemid=8&category=1). 100K registered users. No solutions. Slow website.
34 | - [CareerCup](http://www.careercup.com): Interview questions.
35 | 
36 | Yearly contests:
37 | 
38 | - [ACM International Collegiate Programming Contest](http://icpc.baylor.edu/). [Wiki](en.wikipedia.org/wiki/ACM_International_Collegiate_Programming_Contest). No solutions. Eligibility: less than five years of university education before the contest. Started in 1977. [World final problems](http://icpc.baylor.edu/worldfinals/problems).
39 | - [Google Code Jam](http://code.google.com/codejam/contests.html). Only a few solutions.
40 | - [ICFP](http://en.wikipedia.org/wiki/ICFP_Programming_Contest). One problem per year. Since 1998.
41 | 
42 | [cormen09]: http://www.amazon.com/books/dp/0262033844
43 | [skiena08]: http://www.amazon.com/Algorithm-Design-Manual-Steven-Skiena/dp/1848000693
44 | 


--------------------------------------------------------------------------------
/turing-machine.md:
--------------------------------------------------------------------------------
 1 | # Turing machine
 2 | 
 3 | # Computation model
 4 | 
 5 | Review of Turing machines and it's variants.
 6 | 
 7 | The Turing machine and variants is the most popular model to model computations, so understanding it is fundamental before you do anything else.
 8 | 
 9 | Different models exist to represent different capabilities of real, or imaginary, hardware.
10 | 
11 | ## Variants
12 | 
13 | ### Classical model
14 | 
15 | Classical model.
16 | 
17 | ### Non-deterministic Turing Machine
18 | 
19 | ### NTM
20 | 
21 | Turing machine that has multiple possible transitions per input and state.
22 | 
23 | It decides between those transitions either:
24 | 
25 | - optimally through a magic oracle.
26 | - by following all paths at once. TODO: what is the correct output if multiple paths halt?
27 | 
28 | ### RAM
29 | 
30 | Random data access. Same computability class as Turing machine, but models currently existing memories better.
31 | 
32 | TODO vs Turing machine with a simple example.
33 | 
34 | ### Cache oblivious algorithm
35 | 
36 | <http://en.wikipedia.org/wiki/Cache-oblivious_algorithm>
37 | 
38 | Model a cache but without explicitly knowing the cache size.
39 | 
40 | ### PRAM
41 | 
42 | ### Parallel random-access machine
43 | 
44 | <http://en.wikipedia.org/wiki/Parallel_random-access_machine>
45 | 
46 | ## Input length vs value
47 | 
48 | Keep in mind that big O analysis uses a Turing machine, so what matters is the *length* of the input, *not* its value.
49 | 
50 | For example, deciding if a number $n$ is prime takes at most $\sqrt(n)$ steps (try dividing by each number smaller than $\sqrt(n)$), so one might think that deciding primeness is $Polynomial$.
51 | 
52 | However, $n$ is exponential on the number of digits of $n$! Adding a single digit 0 to the beginning of a number, multiplies it by 2 (in binary)!
53 | 
54 | Therefore, deciding primeness via trial division is not polynomial.
55 | 
56 | The practical importance of this depends on the nature of the input:
57 | 
58 | - if the input is man generated, such as a prime used for cryptography, it is easy to add lots zeros and ones to the beginning of a number so we have to consider the exponential aspect.
59 | 
60 | - if however $n$ is a number that comes out in some natural model in which $n$ itself cannot be too large because it cannot simply double too quickly (say, the number of people on the planet), then the exponential bound is not very meaningful.
61 | 
62 | ### Strongly NP
63 | 
64 | A problem is *strongly NP* if it is NP even if the input values are considered instead of the input lengths.
65 | 
66 | Therefore, the naive primeness test is not strongly NP complete, since if values were considered instead of input lengths, then it would be P.
67 | 
68 | Known strongly NP problems can be found here: <http://en.wikipedia.org/wiki/Category:Strongly_NP-complete_problems>
69 | 


--------------------------------------------------------------------------------
/hardware.md:
--------------------------------------------------------------------------------
 1 | # Hardware
 2 | 
 3 | ## Parallel vs serial connectors
 4 | 
 5 | - serial: one byte at a time
 6 | - parallel: multiple bytes at a time
 7 | 
 8 | Before, parallel ports were more used, but have been displaced by serial technologies:
 9 | 
10 | - USB:  universal Serial bus. Current method of choice for simple devices (mice, keyboard, flash storage)
11 | - SATA: universal Serial bus. Current method for HD connection.
12 | - PCIe: connexions directly on the motherboard
13 | 
14 | When the term serial port is used nowadays it refers to serial interfaces
15 | similar to the very old <http://en.wikipedia.org/wiki/RS-232>
16 | 
17 | ## Northbridge vs southbridge
18 | 
19 | - Northbridge vs southbridge: <http://en.wikipedia.org/wiki/Southbridge_%28computing%29>
20 | - The 8080 pinout: <http://www.cpu-world.com/info/Pinouts/8088.html>
21 | 
22 | ## USB
23 | 
24 | Serial
25 | 
26 | Has a class system. If a hardware fits into one of those classes,
27 | there is no need to install any driver since the interface is already specified.
28 | Sample classes:
29 | 
30 | - mouse
31 | - keyboard
32 | - storage device (hd, flash)
33 | - network
34 | 
35 | USB 1.0 has only 4 wires:
36 | 
37 | - ground
38 | - power
39 | - signal pairs
40 | 
41 | However newer standards have more cables.
42 | 
43 | Data transfers are made in a standard manner.
44 | Data is coded via a TODO scheme (if signal changes, 1, else 0).
45 | 
46 | Input and output is done in chunks called URBs, USB request blocks.
47 | 
48 | ## Bus
49 | 
50 | Name for several parallel wires used to communicate information between hardware.
51 | 
52 | Often buses have a different clock rate of their own, lower than the CPU clock rate and dividing it,
53 | since hardware response is so much slower than the processor time lengths.
54 | 
55 | ## PCI
56 | 
57 | Standard replaced in 2007 by PCIe.
58 | 
59 | Specifies cables and software interfaces for connected hardware.
60 | 
61 | Was very prevalent.
62 | 
63 | Bridges connect one bus into another to extend the number of available buses.
64 | 
65 | Each PCI peripheral is identified by 3 numbers:
66 | 
67 | - bus number
68 | - device number
69 | - function number
70 | 
71 | Each device contains info on:
72 | 
73 | - vendor id (standardized by the PCI special interest group)
74 | - device id given by each vendor for its devices
75 | - device class
76 | 
77 | Using vendor id and device id, the system can determine the exact type of hardware.
78 | 
79 | ### Linux
80 | 
81 | On Linux, you can get info on PCI and PCI-LIKE devices via `lspci`.
82 | 
83 | The kernel also exports PCI information under `/sys/devices/pci0000:00` which you can `cat` away.
84 | 
85 | ## CPU
86 | 
87 | The CPU communicates with devices in the following way:
88 | 
89 | - set origin or destination address in the address bus
90 | - set the output data in the data bus
91 | - send control signals on the control bus specifying which operation should be carried out
92 | 


--------------------------------------------------------------------------------
/src/c/tac.c:
--------------------------------------------------------------------------------
 1 | /**
 2 | Reads fixed chunk sizes from file not to overflow RAM.
 3 | 
 4 | If a line is larger than this chunk size,
 5 | the program exits with `EXIT_FAILURE` and gives an error message.
 6 | 
 7 | Mimics GNU cat 8.2, which require that the file
 8 | end in a newline to print the last line properly, e.g.:
 9 | 
10 |     a b \n c d \n
11 | 
12 | Gets printed as:
13 | 
14 |     cd
15 |     ab
16 | 
17 | But:
18 | 
19 |      a b \n c d
20 | 
21 | Gets printed as:
22 | 
23 |      cdab
24 | */
25 | 
26 | #include "common.h"
27 | 
28 | int main(int argc, char *argv[]) {
29 |     FILE* fp;
30 |     /* Maximum accepted line length is buf_size including the newline. */
31 |     enum Constexpr { buf_size = 8 };
32 |     char buffer[buf_size], buffer_leftover[buf_size];
33 |     size_t print_up_to, leftover_bytes, file_size, seek_end_offset, nbytes_read;
34 |     size_t i;
35 | 
36 |     if (argc < 2) {
37 |         fprintf(stderr, "Error: input path missing.\n");
38 |         exit(EXIT_FAILURE);
39 |     }
40 | 
41 |     fp = fopen(argv[1], "r");
42 |     if (fp == NULL) {
43 |         fprintf(stderr, "Error: input path not found: %s\n", argv[1]);
44 |         exit(EXIT_FAILURE);
45 |     }
46 |     /* TODO check file eixsts. */
47 |     fseek(fp, 0, SEEK_END);
48 |     file_size = ftell(fp);
49 |     nbytes_read = buf_size;
50 |     leftover_bytes = 0;
51 |     /* Unsigned type loop: use two variables idiom. */
52 |     for (seek_end_offset = buf_size; seek_end_offset < file_size + buf_size; seek_end_offset += buf_size) {
53 |         /* Last iteration. */
54 |         if (seek_end_offset > file_size) {
55 |             nbytes_read = buf_size - (seek_end_offset - file_size);
56 |             seek_end_offset = file_size;
57 |         }
58 |         fseek(fp, -seek_end_offset, SEEK_END);
59 |         fread(buffer, sizeof(char), nbytes_read, fp);
60 |         print_up_to = nbytes_read - 1;
61 |         /* Unsigned type loop. Use wrap around idiom. */
62 |         /* TODO replace with strrchr */
63 |         for (i = nbytes_read - 1; i < nbytes_read; --i) {
64 |             if (buffer[i] == '\n') {
65 |                 if (i < nbytes_read - 1) {
66 |                     fwrite(&buffer[i + 1], sizeof(char), print_up_to - i, stdout);
67 |                 }
68 |                 print_up_to = i;
69 |                 if (leftover_bytes > 0) {
70 |                     fwrite(buffer_leftover, sizeof(char), leftover_bytes, stdout);
71 |                     leftover_bytes = 0;
72 |                 }
73 |             }
74 |         }
75 |         if (print_up_to == buf_size) {
76 |             fprintf(stderr, "Error: line longer than buffer size. Buffer size: %d\n", buf_size);
77 |             exit(EXIT_FAILURE);
78 |         }
79 |         leftover_bytes = print_up_to + 1;
80 |         /* memmove */
81 |         memcpy(buffer_leftover, buffer, leftover_bytes);
82 |     }
83 |     fwrite(buffer_leftover, sizeof(char), leftover_bytes, stdout);
84 |     exit(EXIT_SUCCESS);
85 | }
86 | 


--------------------------------------------------------------------------------
/type-systems.md:
--------------------------------------------------------------------------------
 1 | # Type systems
 2 | 
 3 | General classifications of programming languages.
 4 | 
 5 | ## Statically vs dynamically
 6 | 
 7 | Statically: each name has a type. It is not possible for a name to refer to an object of two different types on the same program.
 8 | 
 9 | Python is dynamically typed:
10 | 
11 |     x = 1
12 |     x = "abc"
13 | 
14 | since the same name `x`, can refer to both integers and strings.
15 | 
16 | C and Java are statically typed, since:
17 | 
18 |     int x = 1;
19 |     x = "abc";
20 | 
21 | is an error. The name `x` cannot refer to both an integer and a string.
22 | 
23 | ## Weakly vs strongly
24 | 
25 | The definition of weakly typed and strongly typed is not very precise or agreed upon, so it is better to avoid using those terms.
26 | 
27 | In general, weakly typed means that it is possible to do type conversions between unrelated types implicitly.
28 | 
29 | For example, in Python:
30 | 
31 |     1 + "1"
32 | 
33 | is an error since Python does not implicitly convert between strings and integers, so Python is generally considered to be strongly typed.
34 | 
35 | The above is however perfectly valid in Perl, and yields `2`, since Perl does such type conversions. Therefore, Perl is generally considered to be weakly typed.
36 | 
37 | In order to do the above in Python, it would be necessary to make an explicit conversion:
38 | 
39 |     1 + int("1")
40 | 
41 | Some types however are considered to be of "related types" such as integers and floats, and in those cases languages that implicitly convert between them are still considered strongly typed. For example C is considered strongly typed, even if:
42 | 
43 |     int i = 1 + 1.0;
44 | 
45 | does an implicit conversion between the `float` `1.0` and the `int` `1`.
46 | 
47 | ## Manifest vs implicit
48 | 
49 | Manifest typing means that it is necessary to explicitly give the type of each name.
50 | 
51 | Implicit typing means that names can be given types based on what they are assigned to. Implicit typing is also known as type inference.
52 | 
53 | For example, before C++11, C++ used to be strictly manifest typed since:
54 | 
55 |     int main(){ i = 0; }
56 | 
57 | does not work. It is necessary to say that `i` is an `int` via:
58 | 
59 |     int main(){ int i = 0; }
60 | 
61 | With the C++11, C++ has gained type inference capabilities. For example, the keyword `auto` allows the following to work;
62 | 
63 |     int main(){ auto i = 0; }
64 | 
65 | where the type of `i` is only determined by its immediate initialization to `0` which is an integer.
66 | 
67 | This concept only makes sense for statically typed languages for which each name has a corresponding type that it can refer to, and it is how that type is determined that is specified by manifest or implicit typing.
68 | 
69 | ## Bibliography
70 | 
71 | -   <http://pythonconquerstheuniverse.wordpress.com/2009/10/03/static-vs-dynamic-typing-of-programming-languages/>
72 | 
73 |     Good tutorial on static and weakly typed.
74 | 


--------------------------------------------------------------------------------
/src/cpp/merge_sort.cpp:
--------------------------------------------------------------------------------
 1 | #include <algorithm>    // copy
 2 | #include <cmath>        // pow, ceil
 3 | #include <vector>
 4 | 
 5 | #include "common.hpp"
 6 | 
 7 | /**
 8 | Sort the input vector via merge sort inline.
 9 | 
10 | Time complexity: $O(input.size() log input.size())$
11 | 
12 | Memory complexity excluding input: $O(input.size())$
13 | 
14 | @parm[in,out]  input      The input vector to be sorted. It shall be modified to contain the output.
15 | @tparm         COMPARABLE A type that supports operators `<` and `==`.
16 | */
17 | template<typename COMPARABLE = int>
18 | void merge_sort(std::vector<COMPARABLE>& input) {
19 |     typename std::vector<COMPARABLE>::size_type input_size,
20 |              current_size,
21 |              left0,
22 |              right0,
23 |              left1,
24 |              right1,
25 |              output_position,
26 |              size_pow2;
27 |     typename std::vector<COMPARABLE>::iterator output_begin;
28 |     auto input_begin = input.begin();
29 |     input_size = input.size();
30 |     std::vector<COMPARABLE> output(input_size);
31 |     if (input_size < 2)
32 |         return;
33 |     size_pow2 = std::pow(2, std::ceil(std::log2(input_size)));
34 |     current_size = 1;
35 |     output_begin = output.begin();
36 |     while (current_size <= size_pow2 / 2) {
37 |         output_position = 0;
38 |         while (output_position < input_size) {
39 |             left0  = output_position;
40 |             right0 = left0  + current_size;
41 |             left1  = right0;
42 |             right1 = right0 + current_size;
43 |             // Make corrections in case the input size is not a power of 2.
44 |             if (right0 > input_size) {
45 |                 right0 = input_size;
46 |                 // If left1 == right1, no data access is ever made on the right side.
47 |                 // This is what we want since the right side is completely out of range in this case.
48 |                 left1 = right1;
49 |             } else if (right1 > input_size) {
50 |                 right1 = input_size;
51 |             }
52 |             while (true) {
53 |                 if (left0 == right0) {
54 |                     std::copy(input_begin + left1, input_begin + right1, output_begin + output_position);
55 |                     output_position += right1 - left1;
56 |                     break;
57 |                 } else if (left1 == right1) {
58 |                     std::copy(input_begin + left0, input_begin + right0, output_begin + output_position);
59 |                     output_position += right0 - left0;
60 |                     break;
61 |                 }
62 |                 if (input[left0] < input[left1]) {
63 |                     output[output_position] = input[left0];
64 |                     left0++;
65 |                 } else {
66 |                     output[output_position] = input[left1];
67 |                     left1++;
68 |                 }
69 |                 output_position++;
70 |             }
71 |         }
72 |         input = output;
73 |         current_size *= 2;
74 |     }
75 | }
76 | 
77 | int main(int argc, char **argv) {
78 |     std::vector<int> input = parse_array(argv[1]);
79 |     merge_sort(input);
80 |     print_array(input);
81 | }
82 | 


--------------------------------------------------------------------------------
/tac.md:
--------------------------------------------------------------------------------
  1 | # Tac
  2 | 
  3 | <https://www.gnu.org/software/coreutils/manual/html_node/tac-invocation.html>
  4 | 
  5 | ## Discussion
  6 | 
  7 | <http://www.gnu.org/software/coreutils/manual/html_node/tac-invocation.html>
  8 | 
  9 | `cat` reversed line-wise.
 10 | 
 11 | The hard part is implementing this with out-of-core considerations:
 12 | 
 13 | - RAM is finite, otherwise we could just read everything at once and it would be simple
 14 | - disk access is extremely slow, otherwise we could just read one character at a time and put it into the buffer
 15 | 
 16 | The first naive idea that comes to mind is:
 17 | 
 18 | - read a chunk
 19 | - go backwards until the last line
 20 | - read a smaller chunk
 21 | - the first time we print, print what was left from the previous
 22 | - repeat
 23 | 
 24 | But this simple strategy would not work because using the notation:
 25 | 
 26 | - `n`: newline
 27 | - `.`: any character
 28 | - digit: first character of the nth line to be printed
 29 | - `' '` (space): the character has already been printed
 30 | 
 31 | the buffer would evolve as follows:
 32 | 
 33 |     Read 1
 34 | 
 35 |     | 3 . . . . n 2 . . n 1 n |
 36 | 
 37 |     | 3 . . . . n 2 . . n     |
 38 | 
 39 |     | 3 . . . . n             |
 40 | 
 41 |     Read 2
 42 | 
 43 |     | 3 . . . . n 4 n 3 . . . |
 44 | 
 45 |     |             4 n         |
 46 | 
 47 |     Read 3
 48 | 
 49 |     |             4 n 5 . n 4 |
 50 | 
 51 |     |                 5 . n   |
 52 | 
 53 |     Read 4
 54 | 
 55 |     |                 5 . n 5 |
 56 | 
 57 | Note how at each step the read is reduced, and at the front there is a lot of wasted memory.
 58 | 
 59 | The solution is to use a circular buffer: <https://en.wikipedia.org/wiki/Circular_buffer>.
 60 | 
 61 | There are two possible implementations:
 62 | 
 63 | Double buffer implementation:
 64 | 
 65 | - use two buffers of equal size
 66 | - read into one of the two cyclically
 67 | 
 68 | Circular buffer implementation:
 69 | 
 70 | - read two chunks from disk every time
 71 | 
 72 | Advantage of circular: uses half the RAM.
 73 | 
 74 | Disadvantage: twice as many disk reads, which are *very* expensive.
 75 | 
 76 | So in general the double buffer is better.
 77 | 
 78 | With the double buffer technique, it would work as follows:
 79 | 
 80 |     Read 1
 81 | 
 82 |     | 3 . . . . n 2 . . n 1 n |
 83 |     |                         |
 84 | 
 85 |     | 3 . . . . n 2 . . n     |
 86 |     |                         |
 87 | 
 88 |     | 3 . . . . n             |
 89 |     |                         |
 90 | 
 91 |     Read 2
 92 | 
 93 |     | 3 . . . . n             |
 94 |     | n 5 . n 4 . . n 3 . . . |
 95 | 
 96 |     |                         |
 97 |     | n 5 . n 4 . . n         |
 98 | 
 99 |     |                         |
100 |     | n 5 .n                  |
101 | 
102 |     |                         |
103 |     | n                       |
104 | 
105 |     Read 3
106 | 
107 |     | 8 . . . . n 7 . . n 6 n |
108 |     | n                       |
109 | 
110 |     | 8 . . . . n 7 . . n 6 n |
111 |     |                         |
112 | 
113 |     | 8 . . . . n 7 . . n     |
114 |     |                         |
115 | 
116 |     | 8 . . . . n             |
117 |     |                         |
118 | 
119 | And so on.
120 | 


--------------------------------------------------------------------------------
/src/cpp/map.cpp:
--------------------------------------------------------------------------------
  1 | //#define DEBUG_OUTPUT
  2 | 
  3 | #include <algorithm>
  4 | #include <cassert>
  5 | #include <cmath>
  6 | #include <limits>
  7 | #include <functional>
  8 | #include <iostream>
  9 | #include <list>
 10 | #include <sstream>
 11 | 
 12 | #include "map.hpp"
 13 | //#include "bst.hpp"
 14 | #include "hash_map.hpp"
 15 | 
 16 | int main() {
 17 | 
 18 |     // Choose the map type.
 19 |     typedef HashMap<int,int> map_t;
 20 | 
 21 |     // TODO broken. When fixed, convert to polymorphism.
 22 |     //typedef Bst<int,int> map_t;
 23 | 
 24 |     map_t mapOrig{
 25 |         { 0, 1},
 26 |         { 1, 2},
 27 |         { 2, 3},
 28 |         { 3, 4},
 29 |         { 4, 5},
 30 |         {-1, 0},
 31 |     };
 32 |     map_t map;
 33 |     map_t mapExpect;
 34 |     int val;
 35 | 
 36 |     for (int i = 0; i < 1; i++) {
 37 | 
 38 |         // add
 39 | 
 40 |             // BST test
 41 | 
 42 |             // create a bst with all possible deletion cases:
 43 |             //
 44 |             // - two children
 45 |             // - one child
 46 |             // - 0   children
 47 | 
 48 |                 mapOrig.add( 2, 3);
 49 |                 mapOrig.add( 1, 2);
 50 |                 mapOrig.add( 3, 4);
 51 |                 mapOrig.add( 4, 5);
 52 |                 mapOrig.add(-1, 0);
 53 | 
 54 |         // <<
 55 | 
 56 |             // TODO test with stringstream
 57 |             // std::cout << mapOrig << std::endl;
 58 | 
 59 |         // find
 60 | 
 61 |             map = mapOrig;
 62 |             assert(!map.find(-2, val));
 63 |             assert(map.find(-1, val));
 64 |             assert(val == 0);
 65 |             assert(map.find(0, val));
 66 |             assert(val == 1);
 67 |             assert(map.find(1, val));
 68 |             assert(val == 2);
 69 |             assert(map.find(2, val));
 70 |             assert(val == 3);
 71 |             assert(map.find(3, val));
 72 |             assert(val == 4);
 73 |             assert(map.find(4, val));
 74 |             assert(val == 5);
 75 | 
 76 |             // ==
 77 | 
 78 |                 map = mapOrig;
 79 |                 assert(map == mapOrig);
 80 |                 map.add(5, 6);
 81 |                 assert(map != mapOrig);
 82 | 
 83 | 
 84 |             // del
 85 | 
 86 |                 // Two children.
 87 |                 map.del(0);
 88 |                 assert(!map.find(0, val));
 89 | 
 90 |                 // Leaf.
 91 |                 map = mapOrig;
 92 |                 map.del(1);
 93 |                 assert(!map.find(1, val));
 94 | 
 95 |                 // One child.
 96 |                 map = mapOrig;
 97 |                 map.del(3);
 98 |                 assert(!map.find(3, val));
 99 | 
100 |         // Hash map tests.
101 | 
102 |             // Add at powers of 2 the 0 hash so they clutter at hash 0.
103 |             map = map_t(0, 1);
104 |             map.add( 1,  2);
105 |             map.add( 2,  3);
106 |             map.add( 4,  5);
107 |             map.add( 8,  9);
108 |             map.add(16, 17);
109 | 
110 |             // find
111 |             assert(map.find(8, val));
112 |             assert(val == 9);
113 | 
114 |             // del
115 |             map.del(0);
116 |             assert(!map.find(0, val));
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/src/java/WellNestedOpenClose.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | https://leetcode.com/problems/valid-parentheses/
  3 | */
  4 | 
  5 | import java.util.Deque;
  6 | import java.util.LinkedList;
  7 | 
  8 | public class WellNestedOpenClose {
  9 | 
 10 |     public static char close(char c) {
 11 |         switch (c) {
 12 |             case '(':
 13 |                 return ')';
 14 |             case '[':
 15 |                 return ']';
 16 |             case '{':
 17 |                 return '}';
 18 |             default:
 19 |                 throw new IllegalArgumentException(String.valueOf(c));
 20 |         }
 21 |     }
 22 | 
 23 |     public static boolean isValid(String s) {
 24 |         Deque<Character> stack = new LinkedList<>();
 25 |         int l = s.length();
 26 |         for (int i = 0; i < l; i++) {
 27 |             char c = s.charAt(i);
 28 |             switch (c) {
 29 |                 case '(':
 30 |                 case '[':
 31 |                 case '{':
 32 |                     stack.push(c);
 33 |                 break;
 34 |                 default:
 35 |                     if (stack.isEmpty() || c != close(stack.pop()))
 36 |                         return false;
 37 |                 break;
 38 |             }
 39 |         }
 40 |         if (!stack.isEmpty())
 41 |             return false;
 42 |         return true;
 43 |     }
 44 | 
 45 |     /*
 46 |     By using a magic value for characters that don't close,
 47 |     we can reuse this on the main loop code.
 48 | 
 49 |     Another saner but less efficient possibility would be to return character / boolean pair,
 50 |     where the boolean indicates if the char can be closed.
 51 |     */
 52 |     public static char closeMagic(char c) {
 53 |         switch (c) {
 54 |             case '(':
 55 |                 return ')';
 56 |             case '[':
 57 |                 return ']';
 58 |             case '{':
 59 |                 return '}';
 60 |         }
 61 |         return 0;
 62 |     }
 63 | 
 64 |     public static boolean isValidMagic(String s) {
 65 |         Deque<Character> stack = new LinkedList<>();
 66 |         int l = s.length();
 67 |         for (int i = 0; i < l; i++) {
 68 |             char c = s.charAt(i);
 69 |             if (closeMagic(c) != 0) {
 70 |                 stack.push(c);
 71 |             } else {
 72 |                 if (stack.isEmpty() || c != closeMagic(stack.pop()))
 73 |                         return false;
 74 |             }
 75 |         }
 76 |         if (!stack.isEmpty())
 77 |             return false;
 78 |         return true;
 79 |     }
 80 | 
 81 |     public static void main(String[] args) {
 82 |         String stringsPass[] = {
 83 |             "",
 84 |             "()",
 85 |             "[]",
 86 |             "{}",
 87 |             "([])",
 88 |             "[()]",
 89 |         };
 90 |         for (String s : stringsPass) {
 91 |             assert isValid(s);
 92 |             assert isValidMagic(s);
 93 |         }
 94 | 
 95 |         String stringsFail[] = {
 96 |             "(",
 97 |             ")",
 98 |             "(]",
 99 |             "([)]",
100 |         };
101 |         for (String s : stringsFail) {
102 |             assert !isValid(s);
103 |             assert !isValidMagic(s);
104 |         }
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/src/java/BinaryTree.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Non-balanced binary tree:
  3 | 
  4 | - without outer container: nodes only.
  5 | - no repetitions
  6 | */
  7 | 
  8 | import java.util.ArrayList;
  9 | import java.util.Arrays;
 10 | import java.util.List;
 11 | 
 12 | public class BinaryTree {
 13 | 
 14 |     private Integer value;
 15 |     private BinaryTree left;
 16 |     private BinaryTree right;
 17 | 
 18 |     BinaryTree(Integer value) {
 19 |         this.value = value;
 20 |         this.left = null;
 21 |         this.right = null;
 22 |     }
 23 | 
 24 |     public boolean insert(Integer value) {
 25 |         if (value < this.value) {
 26 |             if (this.left == null) {
 27 |                 this.left = new BinaryTree(value);
 28 |                 return true;
 29 |             } else {
 30 |                 return this.left.insert(value);
 31 |             }
 32 |         } else if (value > this.value) {
 33 |             if (this.right == null) {
 34 |                 this.right = new BinaryTree(value);
 35 |                 return true;
 36 |             } else {
 37 |                 return this.right.insert(value);
 38 |             }
 39 |         } else {
 40 |             return false;
 41 |         }
 42 |     }
 43 | 
 44 |     public void inOrder(List<Integer> output) {
 45 |         if (this.left != null)
 46 |             this.left.inOrder(output);
 47 |         output.add(this.value);
 48 |         if (this.right != null)
 49 |             this.right.inOrder(output);
 50 |     }
 51 | 
 52 |     public void preOrder(List<Integer> output) {
 53 |         output.add(this.value);
 54 |         if (this.left != null)
 55 |             this.left.preOrder(output);
 56 |         if (this.right != null)
 57 |             this.right.preOrder(output);
 58 |     }
 59 | 
 60 |     public static void main(String[] args) {
 61 |         List<Integer> expected, output;
 62 |         BinaryTree t = new BinaryTree(0);
 63 |         assert !t.insert(0);
 64 |         assert  t.insert(-2);
 65 |         assert  t.insert(-1);
 66 |         assert  t.insert(-3);
 67 |         assert  t.insert(2);
 68 |         assert  t.insert(1);
 69 |         assert  t.insert(3);
 70 | 
 71 |         /* Insert. */
 72 |         assert t.value == 0;
 73 |         assert t.left.value == -2;
 74 |         assert t.left.left.value == -3;
 75 |         assert t.left.right.value == -1;
 76 |         assert t.right.value == 2;
 77 |         assert t.right.left.value == 1;
 78 |         assert t.right.right.value == 3;
 79 | 
 80 |         /* inOrder */
 81 |         expected = new ArrayList<Integer>();
 82 |         expected.add(-3);
 83 |         expected.add(-2);
 84 |         expected.add(-1);
 85 |         expected.add(0);
 86 |         expected.add(1);
 87 |         expected.add(2);
 88 |         expected.add(3);
 89 |         output = new ArrayList<Integer>();
 90 |         t.inOrder(output);
 91 |         assert output.equals(expected);
 92 | 
 93 |         /* preOrder */
 94 |         expected = new ArrayList<Integer>();
 95 |         expected.add(0);
 96 |         expected.add(-2);
 97 |         expected.add(-3);
 98 |         expected.add(-1);
 99 |         expected.add(2);
100 |         expected.add(1);
101 |         expected.add(3);
102 |         output = new ArrayList<Integer>();
103 |         t.preOrder(output);
104 |         assert output.equals(expected);
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/context-free-grammar.md:
--------------------------------------------------------------------------------
  1 | # Context-free grammar
  2 | 
  3 | Related automaton: PDA.
  4 | 
  5 | ## Application
  6 | 
  7 | Sufficient for most programming languages, while regexes are not.
  8 | 
  9 | But simpler than a context sensitive grammar.
 10 | 
 11 | Usually, programming languages are faster to parse subsets of CFG
 12 | most notably deterministic context free grammars,
 13 | which parse in $O(n)$) instead of $O(n^3)$.
 14 | 
 15 | ## Complexity
 16 | 
 17 | CYK is the most widely used algorithm and recognizes it in $O(n^3)$.
 18 | It is practically good, but better asymptotic already known.
 19 | 
 20 | Parsing CGFs and multiplying 0/1 matrix algorithms are almost time Valiant (1975) equivalent:
 21 | 
 22 | -   Valiant (1975) has a method that given a multiplication algorithm,
 23 |     it can be converted into a parsing algorithm of the same complexity
 24 | 
 25 | -   somewhat conversely, Lee (2002) proved that any parsing algorithm in $O(n^{3-c})$
 26 |     can be converted into a matrix multiplication algorithm of $O(n^{3-c/3})$
 27 | 
 28 | Therefore the optimal time is linked to matrix multiplication,
 29 | which is still an open problem, but conjectured to have largest
 30 | lower bound 2, even if the best algorithms known are at around $O(n^2.37)$
 31 | with huge constant terms.
 32 | 
 33 | In practice however, CYK is still the most used algorithm as of 2014.
 34 | 
 35 | ## Normal form
 36 | 
 37 | ## Backus-Naur form
 38 | 
 39 | <http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form>
 40 | 
 41 | Popular way to describe a CFG.
 42 | 
 43 | TODO
 44 | 
 45 | ### Extended Backus-Naur form
 46 | 
 47 | <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>
 48 | 
 49 | More convenient than the BNF for practical languages, but equally powerful.
 50 | 
 51 | Used by ANTLR.
 52 | 
 53 | ## Ambiguity
 54 | 
 55 | ## Inherently ambiguous languages
 56 | 
 57 | Although some CFLs have both an ambiguous and non ambiguous representation,
 58 | there are others which only have ambiguous representations.
 59 | Such languages are called inherently ambiguous languages.
 60 | 
 61 | Their existence was first proved by <https://en.wikipedia.org/wiki/Parikh%27s_theorem> (1961).
 62 | 
 63 | ## Undecidable problems
 64 | 
 65 | There are lots of interesting ones:
 66 | 
 67 | ### Universality
 68 | 
 69 | Given a CFG, does it generate the language of all strings over the alphabet
 70 | of terminal symbols used in its rules.
 71 | 
 72 | Equivalence with one side fixed.
 73 | 
 74 | ### Equivalence
 75 | 
 76 | Given two CFG, do they accept the same language?
 77 | 
 78 | Decidable $O(n)$ for regular expressions, and decidable for DPDA.
 79 | 
 80 | ### Language inclusion
 81 | 
 82 | Given two CFG, is one language included in the other?
 83 | 
 84 | ### Inclusions on Chomsky hierarchy
 85 | 
 86 | Given a CSG, is it a CFG?
 87 | 
 88 | Given a CFG, is it a regex?
 89 | 
 90 | ### Ambiguity detection
 91 | 
 92 | Given a CDG, is it ambiguous?
 93 | 
 94 | ## Extended context-free grammar
 95 | 
 96 | Grammar in which each right hand side can be a regex.
 97 | 
 98 | Same languages as context-free grammars, since regular expressions are contained in context-free grammars.
 99 | 
100 | Convenient because it represents well what most parsers do today: first a regex tokenization step, then parse.
101 | 
102 | ## Deterministic context-free grammar
103 | 
104 | Same as non-deterministic, but with deterministic automaton.
105 | 


--------------------------------------------------------------------------------
/src/java/Heap.java:
--------------------------------------------------------------------------------
  1 | import java.io.PrintStream;
  2 | import java.util.Arrays;
  3 | 
  4 | public class Heap {
  5 | 
  6 |     public static int child1(int i) {
  7 |         return (2 * i) + 1;
  8 |     }
  9 | 
 10 |     public static int child2(int i) {
 11 |         return child1(i) + 1;
 12 |     }
 13 | 
 14 |     public static int lastParent(int[] arr) {
 15 |         return lastParent(arr.length);
 16 |     }
 17 | 
 18 |     /** Return the index for the last parent node. */
 19 |     public static int lastParent(int length) {
 20 |         return (length / 2) - 1;
 21 |     }
 22 | 
 23 |     public static void heapify(int[] arr) {
 24 |         for (int i = lastParent(arr); i >= 0; i--) {
 25 |             maxHeapify(arr, i);
 26 |         }
 27 |     }
 28 | 
 29 |     public static int maxHeapifyStep(int[] arr, int i) {
 30 |         return maxHeapifyStep(arr, i, arr.length);
 31 |     }
 32 | 
 33 |     /** Make the heap property valid for a parent and its immediate children. */
 34 |     public static int maxHeapifyStep(int[] arr, int i, int length) {
 35 |         int child1I = child1(i);
 36 |         int child2I = child2(i);
 37 |         int child1 = arr[child1I];
 38 |         int child2;
 39 |         int swapWith;
 40 |         int swapWithI;
 41 |         int cur;
 42 |         if (child2I == length) {
 43 |             child2 = Integer.MIN_VALUE;
 44 |         } else {
 45 |             child2 = arr[child2I];
 46 |         }
 47 |         cur = arr[i];
 48 |         if (child1 < child2) {
 49 |             swapWith = child2;
 50 |             swapWithI = child2I;
 51 |         } else {
 52 |             swapWith = child1;
 53 |             swapWithI = child1I;
 54 |         }
 55 |         if (cur < swapWith) {
 56 |             arr[swapWithI] = cur;
 57 |             arr[i] = swapWith;
 58 |         } else {
 59 |             swapWithI = i;
 60 |         }
 61 |         return swapWithI;
 62 |     }
 63 | 
 64 |     public static void maxHeapify(int[] arr) {
 65 |         maxHeapify(arr, 0);
 66 |     }
 67 | 
 68 |     public static void maxHeapify(int[] arr, int i) {
 69 |         maxHeapify(arr, i, arr.length);
 70 |     }
 71 | 
 72 |     public static void maxHeapify(int[] arr, int i, int length) {
 73 |         int lastParent = lastParent(length);
 74 |         int lastI = -1;
 75 |         while ((i != lastI) && (i <= lastParent)) {
 76 |             lastI = i;
 77 |             i = maxHeapifyStep(arr, i, length);
 78 |         }
 79 |     }
 80 | 
 81 |     public static void main(String[] args) {
 82 |         // heapify. The ideal would be to have a `isHeap` method
 83 |         // since possible outputs are not unique.
 84 |         {
 85 |             final int ins[][] = {
 86 |                 {4, 5, 6, 2, 1, 3, 0, 7},
 87 |             };
 88 |             final int expected_outs[][] = {
 89 |                 {7, 5, 6, 4, 1, 3, 0, 2},
 90 |             };
 91 |             for (int i = 0; i < ins.length; i++) {
 92 |                 int out[] = ins[i].clone();
 93 |                 heapify(out);
 94 |                 if (!Arrays.equals(out, expected_outs[i])) {
 95 |                     System.err.println(Arrays.toString(ins[i]));
 96 |                     System.err.println(Arrays.toString(expected_outs[i]));
 97 |                     System.err.println(Arrays.toString(out));
 98 |                     System.exit(1);
 99 |                 }
100 |             }
101 |         }
102 |     }
103 | }
104 | 
105 | 


--------------------------------------------------------------------------------
/string-algorithms.md:
--------------------------------------------------------------------------------
  1 | # String algorithms
  2 | 
  3 | Algorithms that operate on sequences of bytes.
  4 | 
  5 | ## Terminology
  6 | 
  7 | ### Subsequence
  8 | 
  9 | ### Substring
 10 | 
 11 | Substrings are contiguous, subsequences not necessarily.
 12 | 
 13 | Substrings are therefore a particular case of subsequences.
 14 | 
 15 | <http://en.wikipedia.org/wiki/Subsequence>
 16 | 
 17 | <http://en.wikipedia.org/wiki/Substring>
 18 | 
 19 | ## Data structures
 20 | 
 21 | Data structures that are useful to solve string problems.
 22 | 
 23 | ## Suffix tree
 24 | 
 25 | <http://en.wikipedia.org/wiki/Suffix_tree>
 26 | 
 27 | String pre processing that allows for efficient solution of other string problems, e.g.:
 28 | 
 29 | - string search
 30 | - find longest repeated substring
 31 | - find longest common substring
 32 | - find longest palindrome substring
 33 | 
 34 | Concept introduced in 1973, but efficient algorithms for it's generation are non-trivial. First linear time algorithm (thus optimal) created by Farach (1997).
 35 | 
 36 | ### Banana example
 37 | 
 38 | Take the input string `banana`.
 39 | 
 40 | ### Trie
 41 | 
 42 | <http://en.wikipedia.org/wiki/Trie>
 43 | 
 44 | TODO constructions, algorithms
 45 | 
 46 | TODO: vs binary search tree?
 47 | 
 48 | TODO: vs <http://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton>? DAFSA is strictly smaller, but you have to optimize the automaton: 
 49 | 
 50 | ## Famous problems
 51 | 
 52 | ### Longest common substring
 53 | 
 54 | <http://en.wikipedia.org/wiki/Longest_common_substring_problem>
 55 | 
 56 | ### Longest increasing substring
 57 | 
 58 | <http://en.wikipedia.org/wiki/Longest_increasing_subsequence>
 59 | 
 60 | ### String metrics
 61 | 
 62 | ### String distances
 63 | 
 64 | <http://en.wikipedia.org/wiki/String_metric>
 65 | 
 66 | The length of the already implemented longest common subsequence result is a metric, although it is too local to be practically used as a metric.
 67 | 
 68 | Subjective as application dependant, and the only thing to optimize is to match "what humans think is sensible".
 69 | 
 70 | Formal definitions set the basic operations, and what must be optimized.
 71 | 
 72 | #### Levenshtein distance
 73 | 
 74 | -   <http://en.wikipedia.org/wiki/Levenshtein_distance>
 75 | 
 76 | The most famous string metric. Operations: single char addition, deletion and substitution. Dynamic programming algorithm.
 77 | 
 78 | -   <http://en.wikipedia.org/wiki/String_searching_algorithm>
 79 | 
 80 | ### Sequence alignment
 81 | 
 82 | <http://en.wikipedia.org/wiki/Sequence_alignment> seem to be string search algorithms with more complex metrics. Used in practice in bioinformatics.
 83 | 
 84 | <https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm> is a famous one.
 85 | 
 86 | ### Longest repeated substring problem
 87 | 
 88 | <http://en.wikipedia.org/wiki/Longest_repeated_substring_problem>
 89 | 
 90 | ### Shortest non-substring
 91 | 
 92 | Of a single string:
 93 | <http://cs.stackexchange.com/questions/21896/algorithm-request-shortest-non-existing-substring-over-given-alphabet>
 94 | 
 95 | Of multiple strings:
 96 | <http://cs.stackexchange.com/questions/39687/find-the-shortest-string-that-is-not-a-sub-string-of-a-set-of-strings>
 97 | 
 98 | ### Approximate string matching
 99 | 
100 | <http://en.wikipedia.org/wiki/Approximate_string_matching>
101 | 
102 | Set of techniques to find similar strings according to some metric.
103 | 


--------------------------------------------------------------------------------
/heap.md:
--------------------------------------------------------------------------------
 1 | # Heap
 2 | 
 3 | Concept: <http://en.wikipedia.org/wiki/Heap_%28data_structure%29>
 4 | 
 5 | Operations and complexities for each type: <http://en.wikipedia.org/wiki/Heap_%28data_structure%29#Comparison_of_theoretic_bounds_for_variants>
 6 | 
 7 | Efficient concrete data structure to implement a priority queue abstract structure.
 8 | 
 9 | ## Operations
10 | 
11 | - `find-max`: find the maximum item of a max-heap
12 | - `create-heap`: create an empty heap
13 | - `heapify`: create a heap out of given array of elements
14 | - `delete-max`: removing the root node of a max-heap, respectively
15 | - `increase-key`: updating a key within a max-heap, respectively
16 | - `insert`: adding a new key to the heap
17 | - `merge`: joining two heaps to form a valid new heap containing all the elements of both
18 | 
19 | ## Types
20 | 
21 | ### Binary heap
22 | 
23 | <http://en.wikipedia.org/wiki/Binary_heap>
24 | 
25 | The main operation that the `heap` does better than a BST is `find-max`. TODO vs a modified binary search tree that keeps track of the maximum and minimum on every operation that might change it? <http://stackoverflow.com/questions/6147242/heap-vs-binary-search-tree-bst/27074221#27074221>, <http://stackoverflow.com/questions/7878622/can-we-use-binary-search-tree-to-simulate-heap-operation>, <http://cs.stackexchange.com/questions/27860/whats-the-difference-between-a-binary-search-tree-and-a-binary-heap> Advantages I've seen so far:
26 | 
27 | - the heap is that it can be implemented on top of an array, which saves memory as you don't keep pointers, only the raw data
28 | - heap insert is `1` in average
29 | 
30 | `increase-key` requires an extra index, either a map or an array: <http://stackoverflow.com/questions/17009056/how-to-implement-ologn-decrease-key-operation-for-min-heap-based-priority-queu>
31 | 
32 | #### Binary heap vs binary tree
33 | 
34 | <http://stackoverflow.com/a/29548834/895245>
35 | 
36 | ## Array implementation
37 | 
38 | The advantage of doing so is that it uses less space than objects and pointers (no need for the pointers)
39 | 
40 | It is possible to do so because:
41 | 
42 | -   the parent of $i$ is at $floor(i/2)$
43 | 
44 | -   the children of $i$ are at $floor(i/2)$
45 | 
46 | -   all operations come down to switching elements two by two.
47 | 
48 |     There is no operation which involves insertion at an arbitrary point of the array, which would require the costly operation of shifting all elements to the right once.
49 | 
50 | ## Binary
51 | 
52 | Simplest implementation. Good in practice.
53 | 
54 | Operation explanation: <http://en.wikipedia.org/wiki/Binary_heap#Heap_operations>
55 | 
56 | For increase-key to work, a separate map of values to positions must be kept up to date after certain operations, and be accessible and kept up to date in time not larger than the corresponding operations.
57 | 
58 | Since all operations that modify / access the map are $O(log(n))$ for the binary heap, a simple balanced binary tree will do (also has $O(log(n))$).
59 | 
60 | ## Pairing
61 | 
62 | <http://en.wikipedia.org/wiki/Pairing_heap>
63 | 
64 | ## Fibonacci
65 | 
66 | 1987.
67 | 
68 | Best in practice for large enough problems. Theoretically good amortized times.
69 | 
70 | The only operation that has $log(n)$ amortized time is deleting the root, the others are $O(1)$.
71 | 
72 | ## Brodal
73 | 
74 | 1996.
75 | 
76 | <http://en.wikipedia.org/wiki/Brodal_queue>
77 | 
78 | Best theoretical asymptotic worst cases. Not useful in practice.
79 | 


--------------------------------------------------------------------------------
/src/cpp/make_change.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.hpp"
 2 | 
 3 | void vector_sum(const std::vector<int>& v0, const std::vector<int>& v1,
 4 |         std::vector<int>& output) {
 5 |     output.resize(v0.size());
 6 |     for (std::vector<int>::size_type i = 0; i < v0.size(); ++i)
 7 |         output[i] = v0[i] + v1[i];
 8 | }
 9 | 
10 | /**
11 | Solve the change making problem via dynamic programming.
12 | 
13 | Worst case time complexity: $O(output.size() ^ 3)$. TODO check. It is \sum i^2. Does that make i^3?
14 | 
15 | Given a value `total`, choose the minimum ammount of coins
16 | with one of the values inside `coin_values` that sums up to exactly `total`.
17 | 
18 | @param[in] coin_values  The value of each type of coin. Must be already ordered.
19 |                         All values must be positive.
20 | @param[in] total        The total value that must be reached with the coins.
21 |                         Must be positive.
22 | @param[out] output      The solution, that is, how many of each type of coin is needed to attain the total.
23 | 
24 |     If the total is not attainable, this container shall be empty.
25 | */
26 | void make_change(const std::vector<int>& coin_values, int total, std::vector<int>& output) {
27 |     std::vector<bool> possible(total + 1, false);
28 |     std::vector<int> coin_counts(total + 1, std::numeric_limits<int>::max());
29 |     std::vector<std::vector<int> > solutions(total + 1, std::vector<int>(coin_values.size(), 0));
30 |     possible[0] = true;
31 |     coin_counts[0] = 0;
32 |     for (std::vector<int>::size_type i = 0; i < coin_values.size(); ++i) {
33 |         int coin_value = coin_values[i];
34 |         possible[coin_value] = true;
35 |         solutions[coin_value][i] = 1;
36 |         coin_counts[coin_value] = 1;
37 |     }
38 |     for (int subtotal = 1; subtotal <= total; ++subtotal) {
39 |         int min_coin_count = coin_counts[subtotal];
40 |         int best_first, best_second;
41 |         bool cur_possible = false;
42 |         for (int first = 0; first <= subtotal / 2; ++first) {
43 |             int second = subtotal - first;
44 |             if (possible[first] && possible[second]) {
45 |                 int new_coin_count = coin_counts[first] + coin_counts[second];
46 |                 if (new_coin_count < min_coin_count) {
47 |                     best_first = first;
48 |                     best_second = second;
49 |                     min_coin_count = new_coin_count;
50 |                     cur_possible = true;
51 |                 }
52 |             }
53 |         }
54 |         if (cur_possible) {
55 |             possible[subtotal] = true;
56 |             coin_counts[subtotal] = coin_counts[best_first]
57 |                 + coin_counts[best_second];
58 |             vector_sum(solutions[best_first], solutions[best_second], solutions[subtotal]);
59 |         }
60 |     }
61 |     output = solutions[total];
62 | }
63 | 
64 | int main() {
65 |     typedef std::tuple<std::vector<int>,
66 |                        int,
67 |                        std::vector<int>> InOut;
68 | 
69 |     InOut in_outs[]{
70 |         InOut{
71 |             {1, 3, 4},
72 |             6,
73 |             {0, 2, 0}
74 |         },
75 |         InOut{
76 |             {1, 3, 4, 7, 11, 24},
77 |             1731,
78 |             {0, 1, 0, 0, 0, 72}
79 |         },
80 |     };
81 |     for (auto& in_out : in_outs) {
82 |         auto& coin_values = std::get<0>(in_out);
83 |         auto& total  = std::get<1>(in_out);
84 |         auto& expected_output = std::get<2>(in_out);
85 |         std::vector<int> output;
86 |         make_change(coin_values, total, output);
87 |         assert(output == expected_output);
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/dijkstra.md:
--------------------------------------------------------------------------------
  1 | # Dijkstra
  2 | 
  3 | <http://en.wikipedia.org/wiki/Dijkstra%27s_algorithm>
  4 | 
  5 | Find the shortest path between two given nodes.
  6 | 
  7 | Only works if all weights are positive. If negative weights are possible, consider using the [Bellman-Ford algorithm](http://en.wikipedia.org/wiki/Bellman%E2%80%93Ford_algorithm).
  8 | 
  9 | ## Dijkstra complexity
 10 | 
 11 | Time worst case: $O(|E| + |V| log |V|)$, so $O(|V|^2)$ on a dense graph.
 12 | 
 13 | In the worst case we have a dense graph and we must visit all the nodes giving:
 14 | 
 15 | -   `FOR_VERTEX`: for each vertex ($|V|$):
 16 | 
 17 |     -   `MARK_VISITED`: mark vertex as visited.
 18 | 
 19 |     -   `FOR_NEIGHBOUR`: for each neighbour: $|O(\sqrt(|E|))|$, $|V|$ for a dense graph.
 20 | 
 21 |         - `UPDATE_WEIGHT`: update the weights of that neighbour
 22 | 
 23 |     -   `FIND_MIN`: find the minimum adjacent element to determine the next vertex
 24 | 
 25 | Lets consider two data structure possibilities for storing node distances and if it was visited or not:
 26 | 
 27 | - unordered array
 28 | - min heap
 29 | 
 30 | ### Unordered array
 31 | 
 32 | Vertexes and their distances are stored in an array ordered by their index.
 33 | 
 34 | - `MARK_VISITED`    is done in $O(1)$: just set the corresponding array element visited field to true.
 35 | - `UPDATE_WEIGHT`   can be done in time $O(1)$ if the vertexes are on an unordered array.
 36 | - `FIND_MIN`        takes $O(V)$ since the elements are not ordered by distance
 37 | 
 38 | Total time:
 39 | 
 40 | $$
 41 |     FOR_VERTEX * ( MARK_VISITED + ( FOR_NEIGHBOUR * UPDATE_WEIGHTS    ) + FIND_MIN    ) =
 42 |     V *          ( 1            + ( V             * 1                 ) + V           ) =
 43 |     V^2
 44 | $$
 45 | 
 46 | operations.
 47 | 
 48 | ### Min heap
 49 | 
 50 | We order the vertexes on a min heap that takes the distance into account.
 51 | 
 52 | The complexity will depend on the type of min heap used. We consider there the two most common and efficient min heaps for practical data sizes:
 53 | 
 54 | - binary heap
 55 | - Fibonacci heap
 56 | 
 57 | TODO get this right
 58 | 
 59 | -   `MARK_VISITED`: means that we have to remove the root element from the min heap:
 60 | 
 61 |     - Binary heap:    `log`
 62 |     - Fibonacci heap: `n` worst case, `log` amortized
 63 | 
 64 | -   `UPDATE_WEIGHT`:  this critical operation depends on the type of min heap used.
 65 | 
 66 |     - Binary heap:    `log`
 67 |     - Fibonacci heap: `log` worst case, `1`
 68 | 
 69 | -   `FIND_MIN`: always $O(1)$ since we are using heaps
 70 | 
 71 | Therefore for the binary heap:
 72 | 
 73 | $$
 74 |     FOR_VERTEX * ( MARK_VISITED + ( ( FOR_NEIGHBOUR * UPDATE_WEIGHTS ) + FIND_MIN ) =
 75 |     V *          ( log(V)       + ( ( V             * log V          ) + 1        ) =
 76 |     V^2 log(V)
 77 | $$
 78 | 
 79 | and for Fibonacci amortized time:
 80 | 
 81 | $$
 82 |     FOR_VERTEX * ( MARK_VISITED + ( ( FOR_NEIGHBOUR * UPDATE_WEIGHTS ) + FIND_MIN ) =
 83 |     V *          ( log(V)       + ( ( V             * 1              ) + 1        ) =
 84 |     V^2
 85 | $$
 86 | 
 87 | ## Choice of data structure
 88 | 
 89 | The final choice of the data structure will depend on the expected density of the graph:
 90 | 
 91 | -   if the graph is expected to be dense, use an unordered array, as it has the best worst time for that case, while a Fibonacci heap offers only
 92 | 
 93 | -   if the graph is known to be sparse, $degree(v) <<< V/log(V)$, then the heap approach starts being better.
 94 | 
 95 | -   If amortized time can be taken into account and it is not clear if the graph is dense or not, the Fibonacci Heap implementation is the best option, as it:
 96 | 
 97 |     - works as well as the unordered array implementation for dense graphs
 98 |     - works better if the graph is not dense (often the case).
 99 | 
100 | 


--------------------------------------------------------------------------------
/design-patterns.md:
--------------------------------------------------------------------------------
  1 | # Design patterns
  2 | 
  3 | -   <https://en.wikibooks.org/wiki/C%2B%2B_Programming/Code/Design_Patterns>
  4 | 
  5 | -   <http://calumgrant.net/patterns/index.html>
  6 | 
  7 | -   <http://en.wikibooks.org/wiki/More_C%2B%2B_Idioms> huge list, C++
  8 | 
  9 | -   <http://sourcemaking.com/>: site on both UML and design patterns
 10 | 
 11 | -   creation
 12 | 
 13 |     -   abstract factory : select from an entire family of abstract classes (one per OS for example)
 14 | 
 15 |     -   factory
 16 | 
 17 |         <http://sourcemaking.com/design_patterns/factory_method/cpp/1>
 18 | 
 19 |         make specific derived classes based on runtime information
 20 | 
 21 |         ``static Base make_object(enum type)`` method that returns an instance of a given type
 22 | 
 23 |         application : factor out the selection from enum into lib
 24 | 
 25 |     -   prototype
 26 | 
 27 |         abstract ``virtual Class* clone(){ return new Class(*this); }`` method
 28 | 
 29 |         TODO why use this?
 30 | 
 31 |     -   singleton : self evident. Application: hold configuration.
 32 | 
 33 |     -   object pool
 34 | 
 35 |         <http://sourcemaking.com/design_patterns/object_pool>
 36 | 
 37 |         insted of dynamically destroying/creating objects, ``acquire`` and ``release`` them for later use
 38 | 
 39 |         performance only
 40 | 
 41 | -   structural
 42 | 
 43 |     -   adapter
 44 | 
 45 |         <http://sourcemaking.com/design_patterns/adapter>
 46 | 
 47 |         convert one class interface into another.
 48 | 
 49 |         new class contains old.
 50 | 
 51 |     -   bridge TODO ?
 52 | 
 53 |     -   composite
 54 | 
 55 |         <http://en.wikipedia.org/wiki/Composite_pattern>
 56 | 
 57 |         treat leaves and inner nodes uniformly
 58 | 
 59 |         call on inner node propagates call down to all leaves
 60 | 
 61 |     -   decorator
 62 | 
 63 |         lots of classes have a given ``do_it`` method
 64 | 
 65 |         pattern makes a linked list of those classes, calling do it on all of them
 66 |         when the first is called
 67 | 
 68 |         the linked list can receive any combination of nodes, and a call to
 69 |         the first calls all the elements
 70 | 
 71 |     -   facade
 72 | 
 73 |         <http://sourcemaking.com/design_patterns/facade>
 74 | 
 75 |         simple single interface for lots of classes
 76 | 
 77 |     -   flyweight
 78 | 
 79 |         <http://sourcemaking.com/design_patterns/flyweight/cpp/1>
 80 | 
 81 |         separated shared state from individual state
 82 | 
 83 |         memory performance only
 84 | 
 85 |     -   private class data
 86 | 
 87 |         <http://sourcemaking.com/design_patterns/private_class_data>
 88 | 
 89 |         prevent attribute modification from ``class Main``
 90 |         by putting them into ``class Data`` with getters
 91 | 
 92 |     -   proxy
 93 | 
 94 |         <http://sourcemaking.com/design_patterns/proxy/cpp/1>
 95 | 
 96 |         a proxy interface is used instead of the real interface
 97 | 
 98 |         the proxy interface bahaves differently from the real interface
 99 |         by doing additional actions
100 | 
101 | -   behaviour
102 | 
103 |     - strategy : abstract method to vary algorithm
104 |     - template : same as strategy, but change only part of an algorithm
105 | 
106 | -   UML
107 | 
108 | -   class diagram
109 | 
110 |     -   name
111 | 
112 |     -   members
113 | 
114 |         relations (member that is a list of other objects):
115 | 
116 |         - bi
117 |         - uni
118 |         - association class
119 |         - basic aggregation (car wheel)
120 |         - compositio (company department)
121 |         - reflexive
122 | 
123 |     -   methods
124 | 
125 |     -   inheritance
126 | 
127 |     -   interfaces
128 | 
129 |     -   visibility
130 | 
131 |     -   packages
132 | 
133 | -   object diagram
134 | 
135 |     - same as class, but with instances instead
136 | 


--------------------------------------------------------------------------------
/src/cpp/interactive/sum_array_parallel.cpp:
--------------------------------------------------------------------------------
  1 | #include <array>
  2 | #include <cassert>
  3 | #include <cstdlib>
  4 | #include <future>
  5 | #include <iomanip>
  6 | #include <iostream>
  7 | #include <random>
  8 | #include <thread>
  9 | #include <vector>
 10 | 
 11 | typedef uint64_t DataType;
 12 | 
 13 | // Single threaded array, iterator version.
 14 | template<class T>
 15 | typename T::value_type sum_array(
 16 |     typename T::const_iterator begin,
 17 |     typename T::const_iterator end
 18 | ) {
 19 |     typename T::value_type sum = 0;
 20 |     while (begin != end) {
 21 |         sum += *begin;
 22 |         begin++;
 23 |     }
 24 |     return sum;
 25 | }
 26 | 
 27 | // Single threaded array sum, container version.
 28 | template<class T>
 29 | typename T::value_type sum_array(const T& array) {
 30 |     return sum_array<T>(
 31 |         array.cbegin(),
 32 |         array.cend()
 33 |     );
 34 | }
 35 | 
 36 | // Multi threaded array sum.
 37 | template<class T>
 38 | typename T::value_type sum_array_parallel(
 39 |     const T& array,
 40 |     unsigned int nthreads
 41 | ) {
 42 |     auto size = array.size();
 43 |     auto actual_nthreads = std::min(
 44 |         size,
 45 |         (decltype(array.size()))nthreads
 46 |     );
 47 |     auto delta = size / actual_nthreads;
 48 |     auto it = array.cbegin();
 49 |     typename T::value_type sum = 0;
 50 | 
 51 |     std::vector<std::future<DataType>> futures;
 52 |     for (decltype(actual_nthreads) i = 0; i < actual_nthreads; ++i) {
 53 |         futures.push_back(std::async(
 54 |             std::launch::async,
 55 |             [it, delta]{return sum_array<T>(it, it + delta);}
 56 |         ));
 57 | #if 0
 58 |         // TODO why this doesn't this syntax work? I can't be bothered right now.
 59 |         futures.push_back(std::async(
 60 |             std::launch::async,
 61 |             sum_array<T>,
 62 |             it,
 63 |             it + delta
 64 |         ));
 65 | #endif
 66 |         it += delta;
 67 |     }
 68 |     for (auto& future : futures) {
 69 |         sum += future.get();
 70 |     }
 71 |     return sum + sum_array<T>(it, array.cend());
 72 | }
 73 | 
 74 | void print_result(unsigned int nthreads, std::chrono::duration<double> dt) {
 75 |     std::cout
 76 |         << std::setprecision(4)
 77 |         << std::fixed
 78 |         << nthreads
 79 |         << " "
 80 |         << std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1>>>(dt).count()
 81 |         << std::endl
 82 |     ;
 83 | }
 84 | 
 85 | int main(int argc, char **argv) {
 86 |     unsigned long long array_size;
 87 |     if (argc > 1) {
 88 |         array_size = std::strtoll(argv[1], NULL, 10);
 89 |     } else {
 90 |         array_size = 10;
 91 |     }
 92 | 
 93 |     // Initialize array with random numbers.
 94 |     std::vector<DataType> array(array_size);
 95 |     std::mt19937_64 rng(std::random_device{}());
 96 |     std::uniform_int_distribution<decltype(rng)::result_type> dist(
 97 |         0,
 98 |         std::numeric_limits<DataType>::max()
 99 |     );
100 |     for (auto& i : array) {
101 |         i = dist(rng);
102 |     }
103 | 
104 |     // Single threaded sanity check.
105 |     auto start = std::chrono::steady_clock::now();
106 |     auto serial_result = sum_array(array);
107 |     auto end = std::chrono::steady_clock::now();
108 |     print_result(0, end - start);
109 | 
110 |     // Use different number of threads.
111 |     auto max_nthreads = std::thread::hardware_concurrency() * 2;
112 |     for (decltype(max_nthreads) nthreads = 1; nthreads <= max_nthreads; ++nthreads) {
113 |         auto start = std::chrono::steady_clock::now();
114 |         auto result = sum_array_parallel(array, nthreads);
115 |         auto end = std::chrono::steady_clock::now();
116 |         print_result(nthreads, end - start);
117 |         // Sanity check that our implementation is correct.
118 |         assert(result == serial_result);
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/src/cpp/longest_common_subsequence.cpp:
--------------------------------------------------------------------------------
  1 | #include <algorithm>    // TODO why required?
  2 | #include <cassert>
  3 | #include <iostream>     // cout, endl
  4 | #include <tuple>
  5 | #include <vector>
  6 | 
  7 | /**
  8 | Calculate the [longest common subsequence](https://en.wikipedia.org/wiki/Longest_common_subsequence_problem)
  9 | between two strings.
 10 | 
 11 | The substrings do not need to be contiguous.
 12 | 
 13 | @param[in]  input0, input1  The input strings.
 14 | @param[out] output          The longest substring.
 15 | 
 16 |     In case that there are multiple possible outputs
 17 | 
 18 | @tparam T The data type of the values of each string. Must implement `==`.
 19 | */
 20 | template<typename T>
 21 | void longest_common_subsequence(
 22 |         const std::vector<T>& input0,
 23 |         const std::vector<T>& input1,
 24 |         std::vector<typename std::vector<T>::size_type>& output) {
 25 |     typedef typename std::vector<T>::size_type SizeType;
 26 |     typedef char PreviousDirectionType;
 27 |     const PreviousDirectionType kUp   = 0;
 28 |     const PreviousDirectionType kLeft = 1;
 29 |     const PreviousDirectionType kDiag = 2;
 30 |     SizeType input0_size = input0.size();
 31 |     SizeType input1_size = input1.size();
 32 |     std::vector<std::vector<PreviousDirectionType>> previous(
 33 |             input0_size + 1, std::vector<PreviousDirectionType>(input1_size + 1));
 34 |     std::vector<SizeType> length_cur(input1_size + 1, 0);
 35 |     std::vector<SizeType> length_prev(input1_size + 1, 0);
 36 |     SizeType i, j;
 37 |     for (i = 1; i <= input0_size; ++i) {
 38 |         for (j = 1; j <= input1_size; ++j) {
 39 |             if (input0[i - 1] == input1[j - 1]) {
 40 |                 previous[i][j] = kDiag;
 41 |                 length_cur[j]  = length_prev[j - 1] + 1;
 42 |             } else {
 43 |                 if (length_cur[j - 1] < length_prev[j]) {
 44 |                     previous[i][j] = kUp;
 45 |                     length_cur[j]  = length_prev[j];
 46 |                 } else {
 47 |                     previous[i][j] = kLeft;
 48 |                     length_cur[j]  = length_cur[j - 1];
 49 |                 }
 50 |             }
 51 |         }
 52 |         length_prev = length_cur;
 53 |     }
 54 |     output = std::vector<SizeType>(length_cur[input1_size]);
 55 |     i = input0_size;
 56 |     j = input1_size;
 57 |     auto it = output.rbegin();
 58 |     while (it != output.rend()) {
 59 |         switch (previous[i][j]) {
 60 |             case kLeft: {
 61 |                 --j;
 62 |                 break;
 63 |             }
 64 |             case kUp: {
 65 |                 --i;
 66 |                 break;
 67 |             }
 68 |             case kDiag: {
 69 |                 *it = input0[i - 1];
 70 |                 ++it;
 71 |                 --i;
 72 |                 --j;
 73 |                 break;
 74 |             }
 75 |         }
 76 |     }
 77 | }
 78 | 
 79 | int main() {
 80 |     typedef int InputType;
 81 |     typedef std::vector<std::vector<InputType>::size_type> OutputType;
 82 |     typedef std::tuple<std::vector<InputType>,
 83 |                        std::vector<InputType>,
 84 |                        OutputType> IO;
 85 |     IO in_outs[]{
 86 |         IO{
 87 |             {0},
 88 |             {0},
 89 |             {0},
 90 |         },
 91 |         IO{
 92 |             {0},
 93 |             {1},
 94 |             {},
 95 |         },
 96 |         IO{
 97 |             {2, 0, 1},
 98 |             {0, 2, 1, 0, 3},
 99 |             {0, 1},
100 |         },
101 |     };
102 |     OutputType output;
103 |     for (auto& in_out : in_outs) {
104 |         auto& input0           = std::get<0>(in_out);
105 |         auto& input1           = std::get<1>(in_out);
106 |         auto& expected_output  = std::get<2>(in_out);
107 |         longest_common_subsequence(input0, input1, output);
108 |         assert(output == expected_output);
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/src/cpp/longest_increasing_subsequence.cpp:
--------------------------------------------------------------------------------
  1 | #include "common.hpp"
  2 | 
  3 | /**
  4 | Computes the longest increasing subsequence of the given input.
  5 | 
  6 | The found sequence does not need to be contiguous. For exapmle, if the input is:
  7 | 
  8 |     1 0 2
  9 | 
 10 | Then the longest increasing subsequence is:
 11 | 
 12 |     1 2
 13 | 
 14 | Time complexity:    $O(n * log(n))$, $n = input.size()$
 15 | 
 16 | Memory complexity:  $O(n))$, $n = input.size()$ extra memory excluding the input.
 17 | 
 18 | @param[in]  input  The input array in which the longest increasing subsequence must be found.
 19 | @param[out] output The indexes of the longest increasing subsequence found.
 20 | 
 21 |     In case that there are multiple possible outputs, the algorithm will choose the one that ends in the smallest
 22 |     possible value.
 23 | 
 24 |     If there are still multiple solutions, the algorithm will choose that which ends in with the smallest index.
 25 | 
 26 | @tparam     T      The data type of the values
 27 | 
 28 | # Implementation notes
 29 | 
 30 | Based on the pseudo code found at <http://en.wikipedia.org/wiki/Longest_increasing_subsequence#Efficient_algorithms>
 31 | on 28/09/2013. The actual implementation is original.
 32 | 
 33 | This can be classified as a dynamic programming algorithm, since it relies on the solutions of subproblems
 34 | to solve the larger problem, and stores the sub solutions for later use.
 35 | */
 36 | template<typename T>
 37 | void longest_increasing_subsequence(const std::vector<T>& input,
 38 |         std::vector<typename std::vector<T>::size_type>& output) {
 39 |     typedef typename std::vector<T>::size_type IndexType;
 40 |     std::vector<IndexType> M(input.size(), 0);  // M[j] = index of the smallest possible last element of the longest subsequence of length j - 1.
 41 |     std::vector<IndexType> P(input.size(), 0);  // P[i] = index of the predecessor of element `i`. If the element is the first then this value has no meaning and must never be used.
 42 |     IndexType i;
 43 |     T L, j;
 44 |     L = 1;                                      // Length of the longest sequence so far.
 45 |     for (i = 1; i < input.size(); ++i) {
 46 |         auto it = std::lower_bound(M.begin(), M.begin() + L, i,
 47 |                 [&input](const IndexType& a, const IndexType& b){ return input[a] < input[b]; } );
 48 |         j = it - M.begin();
 49 |         if (j == 0) { // This is the new best sequence of length one.
 50 |             M[0] = i;
 51 |             // We will never use its predecessor, so no need to touch P.
 52 |             // The length did not increase, so no need to touch L.
 53 |         } else {
 54 |             P[i] = M[j - 1];
 55 |             if ( j == L || input[i] < input[M[j]] ) {
 56 |                 M[j] = i;
 57 |                 L = std::max(L, j + 1);
 58 |             }
 59 |         }
 60 |     }
 61 |     output = std::vector<IndexType>(L);
 62 |     i = M[L - 1];
 63 |     for (auto it = output.rbegin(); it != output.rend(); ++it) {
 64 |         *it = i;
 65 |         i = P[i];
 66 |     }
 67 | }
 68 | 
 69 | int main() {
 70 |     typedef int InputType;
 71 |     typedef std::vector<std::vector<InputType>::size_type> OutputType;
 72 |     typedef std::tuple<std::vector<InputType>, OutputType> IO;
 73 |     IO in_outs[]{
 74 |         IO{
 75 |             {0},
 76 |             {0},
 77 |         },
 78 |         IO{
 79 |             {0, 1},
 80 |             {0, 1},
 81 |         },
 82 |         IO{
 83 |             {1, 0},
 84 |             {   1},
 85 |         },
 86 |         IO{
 87 |             {2, 0, 1},
 88 |             {   1, 2},
 89 |         },
 90 |         IO{
 91 |             {0, 2, 1},
 92 |             {0,    2},
 93 |         },
 94 |         IO{
 95 |             {1, -1, 2, 0, 1, 5, 5, 2, 3},
 96 |             {    1,    3, 4,       7, 8},
 97 |         },
 98 |         IO{
 99 |             {3, 2, 6, 4, 5, 1},
100 |             {   1,    3, 4   },
101 |         },
102 |     };
103 |     OutputType output;
104 |     for (auto& in_out : in_outs) {
105 |         auto& input           = std::get<0>(in_out);
106 |         auto& expected_output = std::get<1>(in_out);
107 |         longest_increasing_subsequence(input, output);
108 |         assert(output == expected_output);
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/p-vs-np.md:
--------------------------------------------------------------------------------
  1 | # P vs NP
  2 | 
  3 | The speed with which certain problems can be solved in terms of time and space can be divided into two categories:
  4 | 
  5 | - polynomial
  6 | - non-polynomial: growth larger than any polynomial
  7 | 
  8 | The division is interesting because non-polynomial problems are *MUCH* harder to solve than polynomial ones.
  9 | 
 10 | Just consider the fact that on an exponential problem with $2^x$ complexity, increasing the problem size `x` by one *doubles* the time it takes to solve the problem.
 11 | 
 12 | ## P
 13 | 
 14 | Problem can be solved by a Turing machine in polynomial time.
 15 | 
 16 | ## NP
 17 | 
 18 | Contains P
 19 | 
 20 | Problem solution can be verified, but not found, by a Turing machine in polynomial time.
 21 | 
 22 | For this reason, we want to believe that maybe they are easier to solve than other problems which we can't even check in polynomial time.
 23 | 
 24 | However, up to now, many NP problems can only be solved in exponential time, even if we can check them.
 25 | 
 26 | Example of NP problem: boolean satisfiability problem.
 27 | 
 28 | Non-example of NP problem: TSP. Even if we are given a solution, we cannot be sure that it is the optimum! This is often the case for optimization algorithms.
 29 | 
 30 | ## NP-complete
 31 | 
 32 | Contained, but not equal to NP.
 33 | 
 34 | It can be proven that all NP problems can be reduced to certain NP problems (NP-complete) in polynomial time.
 35 | 
 36 | Not all NP problems however are NP-complete.
 37 | 
 38 | ## Meaning of P vs NP
 39 | 
 40 | If a single NP-complete problem is proven to be in P, then all NP problems are also polynomial, and so P = NP.
 41 | 
 42 | This is what makes the P = NP question so interesting: if a single NP-complete problem is ever solved in P, every other NP problem can also be solved in P, and so P = NP
 43 | 
 44 | - if P = NP, every NP problem can be solved in polynomial time.
 45 | 
 46 | Since we have worked endless human hours searching for P algorithms to man NP-complete problems, it is widely believed that being NP-complete means that a problem cannot be solved in polynomial time.
 47 | 
 48 | - If P != NP, then we will be sure that all NP-complete really are hard, and that it is useless to search for P algorithms for them.
 49 | 
 50 | Important practical problems that are known to be NP-complete:
 51 | 
 52 | - knapsack problem (discrete linear programming)
 53 | - traveling salesman problem (discrete linear programming)
 54 | 
 55 | ## NP-hard
 56 | 
 57 | Problems such that there is an NP-complete problem that reduces to it in Polynomial time.
 58 | 
 59 | That means that if an NP-hard problem can be solved in polynomial time, then an NP-complete problem can also be solved in polynomial time, and `P = NP`.
 60 | 
 61 | All NP-complete problems are also NP-hard, but some NP-hard problems are not NP-complete.
 62 | 
 63 | Implications of P = NP to NP-hard problems:
 64 | 
 65 | - if P != NP, then all NP-hard problems cannot be solved in Polynomial time.
 66 | 
 67 | - if P == NP, then the NP-hard problems which are also NP-complete can be solved in polynomial time, but we know nothing about the NP-hard problems which are not NP-complete.
 68 | 
 69 | ### Decision vs optimization
 70 | 
 71 | It is very common to have optimization problems which are NP-hard, and for which the decision problem is NP-complete.
 72 | 
 73 | For example, in TSP, the decision problems are:
 74 | 
 75 | - is there a TSP path with cost at most 9?
 76 | - is there a TSP path with cost at most 10?
 77 | - ...
 78 | 
 79 | all of which are known to be NP-complete.
 80 | 
 81 | The optimization version of those problems is therefore immediately NP-hard, since if we knew that the optimum path has cost 5, we immediately know that there is at least one path with cost at most 9 or 10 (the path with cost 5), but no path with cost 4 (otherwise that would be the optimum).
 82 | 
 83 | ## Other similar questions
 84 | 
 85 | Similar questions exist for many other related complexity classes. See:
 86 | 
 87 | ![Complexity Space Inclusion](complexity-space-inclusion.svg)
 88 | 
 89 | ### LSPACE
 90 | 
 91 | <http://en.wikipedia.org/wiki/L_%28complexity%29>
 92 | 
 93 | Logarithmic space.
 94 | 
 95 | ### NL
 96 | 
 97 | <http://en.wikipedia.org/wiki/L_%28complexity%29>
 98 | 
 99 | Logarithmic space by non-deterministic Turing machine.
100 | 


--------------------------------------------------------------------------------
/src/cpp/knapsack01.cpp.off:
--------------------------------------------------------------------------------
  1 | #include <algorithm>    // TODO why required?
  2 | #include <cassert>
  3 | #include <iostream>     // cout, endl
  4 | #include <tuple>
  5 | #include <vector>
  6 | 
  7 | /**
  8 | Solves the 0-1 knapsack problem via dynamic programming.
  9 | 
 10 | Time   complexity: $O(max_weight * weights.size())$
 11 | Memory complexity: $O(max_weight * weights.size())$
 12 | 
 13 | All inputs must be contain only positive integer types.
 14 | 
 15 | The input is expected to be normalized beforehand, that is,
 16 | all values, weights and the maximum weight must have already been divided
 17 | by the GCD of all of those values, or this algorithm will be very memory inneficient.
 18 | 
 19 | @param[in] weights      Weight of each item.
 20 | @param[in] values       Value of each item.
 21 | @param[in] max_weight   Maximum weight to be carried.
 22 | @param[out] output      Modified to contain one of the sets of element indexes that reaches the minimum.
 23 | 
 24 |     If this container is ordered, no sorting shall be done on it, and the item order is unspecified.
 25 | 
 26 |     If more than one optimal solution exists, it is not specified which one shall be returned.
 27 | 
 28 | @param[out] output_value The maximum value attained by the solution.
 29 | 
 30 | @tparam WEIGHT data type of the weights
 31 | @tparam VALUE  data type of the values
 32 | 
 33 | # Implementation notes
 34 | 
 35 | TODO is it possible to not keep the 2D accepted_items, thus reducing memory to n instead of n^2?
 36 | */
 37 | template<typename WEIGHT = int, typename VALUE = int>
 38 | void knapsack01(const std::vector<WEIGHT>& weights,
 39 |         const std::vector<VALUE>& values,
 40 |         WEIGHT max_weight,
 41 |         std::vector<typename std::vector<WEIGHT>::size_type>& output,
 42 |         VALUE& output_value) {
 43 |     typename std::vector<WEIGHT>::size_type number_items = weights.size();
 44 |     std::vector<VALUE> cur_line(std::vector<VALUE>(max_weight + 1));
 45 |     std::vector<VALUE> last_line(std::vector<VALUE>(max_weight + 1));
 46 |     std::vector<std::vector<bool>> accepted_items(number_items + 1,
 47 |             std::vector<bool>(max_weight + 1));
 48 |     for (typename std::vector<WEIGHT>::size_type i = 1; i <= number_items; ++i ) {
 49 |         for (WEIGHT w = 1; w <= max_weight; ++w ) {
 50 |             if (weights[i] <= w) {
 51 |                 VALUE new_optimum_if_accept = values[i] + last_line[w - weights[i]];
 52 |                 if (new_optimum_if_accept > last_line[w]) {
 53 |                     accepted_items[i][w] = true;
 54 |                     cur_line[w] = new_optimum_if_accept;
 55 |                     continue;
 56 |                 }
 57 |             }
 58 |             cur_line[w] = last_line[w];
 59 |         }
 60 |         last_line = cur_line;
 61 |     }
 62 |     output_value = cur_line[max_weight];
 63 |     // Generate the solution.
 64 |     for (typename std::vector<WEIGHT>::size_type i = number_items; i > 0; --i) {
 65 |         if (accepted_items[i][max_weight]) {
 66 |             output.push_back(i);
 67 |             max_weight -= weights[i];
 68 |         }
 69 |     }
 70 | }
 71 | 
 72 | int main() {
 73 |     typedef std::tuple<std::vector<int>,
 74 |                         std::vector<int>,
 75 |                         int,
 76 |                         std::vector<std::vector<int>::size_type>
 77 |                       > InOut;
 78 |     InOut in_outs[]{
 79 |         InOut{
 80 |             {1, 2,  3 },
 81 |             {6, 10, 12},
 82 |             5,
 83 |             {1, 2}
 84 |         },
 85 |         InOut{
 86 |             {1, 2, 3, 4 , 5 },
 87 |             {3, 8, 7, 10, 14},
 88 |             10,
 89 |             {1, 2, 4}
 90 |         },
 91 |     };
 92 |     for (auto& in_out : in_outs) {
 93 |         auto& weights = std::get<0>(in_out);
 94 |         auto& values  = std::get<1>(in_out);
 95 |         auto& max_weight  = std::get<2>(in_out);
 96 |         auto& expected_output = std::get<3>(in_out);
 97 |         std::vector<unsigned int> output;
 98 |         int output_value;
 99 |         knapsack01(weights, values, max_weight, output, output_value);
100 |         std::sort(output.begin(), output.end());
101 |         std::sort(expected_output.begin(), expected_output.end());
102 |         assert(output == expected_output);
103 |     }
104 | }
105 | 


--------------------------------------------------------------------------------
/src/java/KnuthMorrisPratt.java:
--------------------------------------------------------------------------------
  1 | import java.util.Arrays;
  2 | import java.util.Collections;
  3 | 
  4 | import lib.Searcher;
  5 | import lib.StringSearch;
  6 | 
  7 | public class KnuthMorrisPratt implements Searcher {
  8 |     private int[] pattern;
  9 | 
 10 |     /**
 11 |     Skip table generated at preprocessing time.
 12 | 
 13 |     <p>This version of the algorithm uses forward skips instead of backtracking distances,
 14 |     as that is more direct to view on the search.
 15 | 
 16 |     <p>The disadvantages are:
 17 | 
 18 |     <ul>
 19 |     <li>the backtrack representation has lots of zeroes,
 20 |         so it is more direct to represent it with a sparse array.
 21 | 
 22 |     <li>the backtrack calculation is more elegant, as you can do `currentMatch = skip[currentMatch]`
 23 |     </ul>
 24 | 
 25 |     <p>but I'd rather have an elegant search loop rather than skip table.
 26 |     */
 27 |     private int[] skip;
 28 | 
 29 |     // With an all ones skip array, we fall back to the naive search.
 30 |     public void preProcessNaive(int[] pattern) {
 31 |         final int patternLength = pattern.length;
 32 |         this.skip = new int[patternLength];
 33 |         for (int i = 0; i < patternLength; i++)
 34 |             this.skip[i] = 1;
 35 |     }
 36 | 
 37 |     /**
 38 |     Naive pre-processor calculation that checkes all suffixes.
 39 |     <p>
 40 |     This has 3 nested loops because it does not reuse previously
 41 |     calculated information to calculate new table positions.
 42 |     */
 43 |     public void preProcessNaiveSkip(int[] pattern) {
 44 |         this.pattern = pattern;
 45 |         final int patternLength = pattern.length;
 46 |         this.skip = new int[patternLength];
 47 |         for (int i = 0; i < patternLength; i++) {
 48 |             int j;
 49 |             findprefix:
 50 |             for (j = 1; j < i; j++) {
 51 |                 final int substringLengh = i - j;
 52 |                 for (int k = 0; k < substringLengh; k++) {
 53 |                     if (pattern[k] != pattern[j + k]) {
 54 |                         continue findprefix;
 55 |                     }
 56 |                 }
 57 |                 break;
 58 |             }
 59 |             this.skip[i] = j;
 60 |         }
 61 |     }
 62 | 
 63 |     /**
 64 |     Dynamic programming preprocessor version in O(n).
 65 |     <p>
 66 |     Uses previously calculated skip table values to calculate new values.
 67 |     */
 68 |     public void preProcess(int[] pattern) {
 69 |         this.pattern = pattern;
 70 |         final int patternLength = pattern.length;
 71 |         this.skip = new int[patternLength];
 72 |         int i;
 73 |         // i == 0 is an special case: there is nothing to compatere with.
 74 |         int currentMatch = 0;
 75 |         if (patternLength > 0) {
 76 |             this.skip[0] = 1;
 77 |             if (patternLength > 1)
 78 |                 this.skip[1] = 1;
 79 |         }
 80 |         for (i = 2; i < patternLength; i++) {
 81 |             while (true) {
 82 |                 if (pattern[currentMatch] == pattern[i - 1]) {
 83 |                     currentMatch++;
 84 |                     break;
 85 |                 }
 86 |                 if (currentMatch == 0)
 87 |                     break;
 88 |                 currentMatch -= this.skip[currentMatch];
 89 |             }
 90 |             this.skip[i] = i - currentMatch;
 91 |         }
 92 |     }
 93 | 
 94 |     public int search(int[] text) {
 95 |         int lastPatternI = this.pattern.length;
 96 |         int lastTextI = text.length - lastPatternI;
 97 |         int j = 0;
 98 |         // Adding the skip is the only difference between KMP and the naive search.
 99 |         outer:
100 |         for (int i = 0; i <= lastTextI; i += this.skip[j]) {
101 |             // We must treat the case 0 specially. Even though the skip is 1,
102 |             // j cannot go back by 1 or it would be -1.
103 |             if (j > 0)
104 |                 j -= this.skip[j];
105 |             for (; j < lastPatternI; j++) {
106 |                 if (text[i + j] != this.pattern[j])
107 |                     continue outer;
108 |             }
109 |             return i;
110 |         }
111 |         return -1;
112 |     }
113 | 
114 |     public static void main(String[] args) throws Throwable {
115 |         StringSearch.test(new KnuthMorrisPratt());
116 |     }
117 | }
118 | 


--------------------------------------------------------------------------------
/src/cpp/hanoi_generalized.cpp.off:
--------------------------------------------------------------------------------
  1 | #include <algorithm>    // TODO why required?
  2 | #include <cassert>
  3 | #include <iostream>     // cout, endl
  4 | #include <tuple>
  5 | #include <vector>
  6 | #include <utility>      // pair
  7 | 
  8 | /**
  9 | Find a minimal path solution to generalizations of the towers of Hanoi problem.
 10 | 
 11 | The generalizations to the classic puzzle include:
 12 | 
 13 | -   from any starting position to any ending position.
 14 | 
 15 |     The classical puzzel always starts from the position where all pegs are the first pike,
 16 |     and they must finish at either the second or third pike.
 17 | 
 18 |     This also solves the problem for any initial or final configuration in a minimal number of moves.
 19 | 
 20 |     For exapmle, this could solve the problem if the initial position is:
 21 | 
 22 |     - smallest        disk is on first pike
 23 |     - second smallest disk is on first pike
 24 | 
 25 |     and the desired final position is:
 26 | 
 27 |     - both disks on the third pike
 28 | 
 29 | @param[in] inital_position, final_position Container that describes the initial and final positions.
 30 | 
 31 |     Positions are described as follows: the ith element of the vector contains the number of peg
 32 |     on which the ith largest pike is located.
 33 | 
 34 |     For example, the following situation:
 35 | 
 36 |     -        largest disk is on first  pike
 37 |     - second largest disk is on second pike
 38 |     - third  largest disk is on first  pike
 39 | 
 40 |     can be represented as:
 41 | 
 42 |     {0, 1, 0}
 43 | 
 44 | @parm[in] n_pegs The total number of pegs. Currently only works for 3.
 45 | 
 46 | @parm[out] output The minimal sequence of moves that solves the puzzle.
 47 | 
 48 |     Each move is represented as a pair `{i, j}` meaning the top disk of the ith peg
 49 |     is to be moved to the top of the jth peg.
 50 | */
 51 | template<typename INPUT_TYPE>
 52 | void HanoiAnyState(const std::vector<INPUT_TYPE>& initial_position,
 53 |                    const std::vector<INPUT_TYPE>& final_position,
 54 |                    const INPUT_TYPE n_pegs,
 55 |                    std::vector<std::pair<typename std::vector<INPUT_TYPE>::size_type,INPUT_TYPE>>& output) {
 56 |     if (n_pegs > 3) {
 57 |         throw "Not yet implemented.";
 58 |     } else {
 59 |         throw "Not yet implemented.";
 60 |         for (auto initial_position_it = initial_position.begin(), final_position_it = final_position.begin();
 61 |                   initial_position_it != initial_position.end();
 62 |                   ++initial_position_it, ++final_position_it) {
 63 |             auto largest_wrong_position = *initial_position_it;
 64 |             auto largest_correct_position = *final_position_it;
 65 |             if (largest_wrong_position != largest_correct_position) {
 66 |                 auto other_peg = 3 - (largest_wrong_position + largest_correct_position);
 67 |                 // TODO Critical steps missing here.
 68 |                 //HanoiManyToOne(std::next(initial_position_it), other_peg, n_pegs, output);
 69 |                 output.push_back({largest_wrong_position, largest_correct_position});
 70 |                 //HanoiOneToMany(other_peg, std::next(final_position_it), n_pegs, output);
 71 |             }
 72 |         }
 73 |     }
 74 | }
 75 | 
 76 | /* Generalized Hanoi. TODO not yet implemented. */
 77 | int main() {
 78 |     typedef unsigned int InputType;
 79 |     typedef std::vector<std::pair<InputType,InputType>> OutputType;
 80 |     typedef std::tuple<std::vector<InputType>,
 81 |                         std::vector<InputType>,
 82 |                         OutputType,
 83 |                         > IO;
 84 |     IO in_outs[]{
 85 |         {
 86 |             {0},
 87 |             {1},
 88 |             3,
 89 |             {
 90 |                 {0, 1}
 91 |             },
 92 |         },
 93 |         {
 94 |             {0, 0},
 95 |             {1, 1},
 96 |             3,
 97 |             {
 98 |                 {0, 2},
 99 |                 {0, 1},
100 |                 {2, 1}
101 |             },
102 |         },
103 |     };
104 |     OutputType output;
105 |     for (auto& in_out : in_outs) {
106 |         auto& initial_position = std::get<0>(in_out);
107 |         auto& final_position   = std::get<1>(in_out);
108 |         auto& n_pegs           = std::get<2>(in_out);
109 |         auto& expected_output  = std::get<3>(in_out);
110 |         HanoiAnyState(initial_position, final_position, n_pegs, output);
111 |         assert(output == expected_output);
112 |     }
113 | }
114 | 


--------------------------------------------------------------------------------
/src/java/Tac.java:
--------------------------------------------------------------------------------
  1 | import java.io.FileInputStream;
  2 | import java.util.Arrays;
  3 | 
  4 | /**
  5 |  * Implement the two sides strategy for tac.
  6 |  * <p>
  7 |  * No memory moves are ever done.
  8 |  * <p>
  9 |  * Disk read happens on the longer remaining buffer space.
 10 |  * <p>
 11 |  * On a hard drive, it is likely faster to move the buffer around to reduce disk access.
 12 |  * <p>
 13 |  * On SSD, it really depends, and the ideal thing would be
 14 |  * to check for the size of the chunk to move around.
 15 |  */
 16 | class Tac {
 17 | 
 18 |     private static final boolean DEBUG = false;
 19 |     private static final int DEFAULT_BUFLEN = 1024;
 20 |     private static final byte SEP = '\n';
 21 |     private static long fileOff;
 22 |     private static boolean found;
 23 | 
 24 |     /** Read the file and search on the read segment. Also print older segments. */
 25 |     private static int search (
 26 |             byte[] b,
 27 |             final FileInputStream f,
 28 |             int width,
 29 |             final int left,
 30 |             int oldLeft,
 31 |             int oldWidth,
 32 |             int oldLeft2,
 33 |             int oldWidth2) throws Exception {
 34 |         width = (int)Math.min(fileOff, width);
 35 |         fileOff -= width;
 36 |         f.getChannel().position(fileOff);
 37 |         f.read(b, left, width);
 38 |         int lastNewline = left + width - 1;
 39 |         int i = lastNewline;
 40 |         found = false;
 41 |         while (i >= left) {
 42 |             if (b[i] == SEP) {
 43 |                 // Print new segment.
 44 |                 if (i + 1 < b.length) {
 45 |                     System.out.write(b, i + 1, lastNewline - i);
 46 |                 }
 47 |                 // Print old segments.
 48 |                 if (!found) {
 49 |                     // TODO optimization: assume that the old ends in newline.
 50 |                     // This allows reading one extra byte each time.
 51 |                     System.out.write(b, oldLeft, oldWidth);
 52 |                     System.out.write(b, oldLeft2, oldWidth2);
 53 |                     found = true;
 54 |                 }
 55 |                 lastNewline = i;
 56 |             }
 57 |             i--;
 58 |         }
 59 |         return lastNewline - left + 1;
 60 |     }
 61 | 
 62 |     public static void main(final String[] args) throws Throwable {
 63 | 
 64 |     	// CLI
 65 |     	String path = args[0];
 66 |     	int buflen;
 67 |     	if (args.length > 1) {
 68 |     		buflen = Integer.parseInt(args[1]);
 69 |     	} else {
 70 |     		buflen = DEFAULT_BUFLEN;
 71 |     	}
 72 | 
 73 |         final byte[] b = new byte[buflen];
 74 |         final FileInputStream f = new FileInputStream(args[0]);
 75 |         final long fileSize = f.getChannel().size();
 76 |         fileOff = fileSize;
 77 |         int oldLeft = 0;
 78 |         int oldWidth = 0;
 79 |         int searchLeft;
 80 |         int width;
 81 |         int searchLeft2;
 82 |         int width2;
 83 | 
 84 |         while(fileOff > 0) {
 85 | 
 86 |             // TODO optimization:
 87 |             // decide what is better: moving the entire old to the left,
 88 |             // or reading the disk one extra time.
 89 | 
 90 |             // Decide which side to read on and read it.
 91 |             int leftWidth = oldLeft;
 92 |             int rightWidth = buflen - (oldLeft + oldWidth);
 93 |             if (leftWidth > rightWidth) {
 94 |                 searchLeft = 0;
 95 |                 width = leftWidth;
 96 |                 searchLeft2 = oldLeft + oldWidth;
 97 |                 width2 = rightWidth;
 98 |             } else {
 99 |                 searchLeft = oldLeft + oldWidth;
100 |                 width = rightWidth;
101 |                 searchLeft2 = 0;
102 |                 width2 = leftWidth;
103 |             }
104 | 
105 |             // First side.
106 |             oldWidth = search(b, f, width, searchLeft, oldLeft, oldWidth, 0, 0);
107 |             if (found) {
108 |                 oldLeft = searchLeft;
109 |                 continue;
110 |             }
111 | 
112 |             // Second side.
113 |             oldWidth = search(b, f, width2, searchLeft2, oldLeft, oldWidth, searchLeft, width);
114 |             if (found) {
115 |                 oldLeft = searchLeft2;
116 |                 continue;
117 |             }
118 | 
119 |             System.err.println("Line longer than maxium buffer size present: " + buflen);
120 |             System.exit(1);
121 |         }
122 |         // Last old.
123 |         System.out.write(b, oldLeft, oldWidth);
124 |     }
125 | }
126 | 


--------------------------------------------------------------------------------
/quantum-computing.md:
--------------------------------------------------------------------------------
  1 | # Quantum computing
  2 | 
  3 | ## Sources
  4 | 
  5 | Non-free:
  6 | 
  7 | -   Chuang, Nilsen - 2010 - Quantum Computation and Quantum Information
  8 | 
  9 |     <http://www.johnboccio.com/research/quantum/notes/QC10th.pdf>
 10 | 
 11 | ## General concepts
 12 | 
 13 | Quantum computation is at least as fast as classical computation,
 14 | and widely believed to be faster although that is unproven.
 15 | 
 16 | Quantum computing does not solve undecidable problems for classic computing:
 17 | it can only make some specific classes of problems solved faster.
 18 | 
 19 | The intuitive reason why quantum computers may be faster
 20 | is that they can carry out multiple operations at once,
 21 | since quantum systems like an electron calculate multiple trajectories at once probabilistically.
 22 | 
 23 | The problem is that observation of the output destroys part of the output information,
 24 | so clever techniques must be used to preserve some of the information
 25 | gained with that simultaneity.
 26 | 
 27 | Like classical computation, quantum computing can be split into two parts:
 28 | 
 29 | - modeling the computation from a high level point of view
 30 | - understanding how to implement the base computation steps physically
 31 | 
 32 | Quantum computers are at their infancy and their success is not certain,
 33 | but it is likely that the currently used model of quantum circuits will
 34 | model well any type of quantum computer likely to be built.
 35 | 
 36 | ## Models
 37 | 
 38 | ### Quantum circuit
 39 | 
 40 | <http://en.wikipedia.org/wiki/Quantum_circuit>
 41 | 
 42 | Most common model used.
 43 | 
 44 | <http://en.wikipedia.org/wiki/Quantum_gate>
 45 | 
 46 | Simulator: <http://www.davyw.com/quantum/>
 47 | 
 48 | All quantum gates are reversible: given the output you can know the input. TODO why?
 49 | As a consequence, the number of outputs is the same as the number of inputs.
 50 | This is not the case for many of the classical gates like AND.
 51 | There are however universal reversible classical gates.
 52 | 
 53 | Quantum gates are represented by unitary matrices.
 54 | A $k$ bit operation is represented by a $2^k x 2^k$ matrix.
 55 | 
 56 | Since quantum gates are always reversible,
 57 | any quantum circuit can be seen as a gate and represented by a matrix.
 58 | 
 59 | There exist universal quantum gates, e.g. the Deutsch gate.
 60 | 
 61 | ### Quantum Turing Machine
 62 | 
 63 | <http://en.wikipedia.org/wiki/Quantum_Turing_machine>
 64 | 
 65 | Equivalent to the Quantum Circuit but less commonly used.
 66 | 
 67 | ## Complexity classes
 68 | 
 69 | Two parameters have to be considered in terms of the input size:
 70 | 
 71 | - time to solve
 72 | - number of gates required, which may grow exponentially
 73 | 
 74 | ### EQP
 75 | 
 76 | <http://en.wikipedia.org/wiki/EQP_%28complexity%29>
 77 | 
 78 | Exact Quantum Polynomial.
 79 | 
 80 | Quantum computer solves it polynomially with probability 1.
 81 | 
 82 | Quantum analog of P.
 83 | 
 84 | $P == EQP$ is open.
 85 | 
 86 | ### BQP
 87 | 
 88 | Bounded Quantum Polynomial.
 89 | 
 90 | Quantum version of BPP.
 91 | 
 92 | We know:
 93 | 
 94 | -   $BQP \subset PSPACE$. Equality open but believed false.
 95 | 
 96 |     Disproving it should be hard as it would imply $P \subset PSPACE$ which is widely studied.
 97 | 
 98 | -   $NP \subset BQP$ is open but believed to be false.
 99 | 
100 | -   $P == BQP$ is open but believed to be false because of known quantum algorithms without known P.
101 | 
102 | ### Vs probabilistic TM
103 | 
104 | Quantum machines are probabilistic.
105 | 
106 | There is a relation between BPP and QTM. TODO
107 | 
108 | ## Quantum algorithms
109 | 
110 | ### Shor's algorithm
111 | 
112 | <http://en.wikipedia.org/wiki/Shor%27s_algorithm>
113 | 
114 | Integer factorization in $log(N)^3$, but exponentially many gates.
115 | 
116 | Great improvement over the best classic factorization algorithm known:
117 | <http://en.wikipedia.org/wiki/General_number_field_sieve>
118 | which is sub exponential.
119 | 
120 | Generates huge interest since could break RSA,
121 | and many quantum computer prototypes have showcased it.
122 | 
123 | ### Grover's algorithm
124 | 
125 | <http://en.wikipedia.org/wiki/Grover%27s_algorithm>
126 | 
127 | Search in unsorted array with $O(N^{1/2})$.
128 | 
129 | Proven to be the optimal quantum algorithm for this problem, while the optimal classic algorithm is $O(N)$
130 | 
131 | Probabilistic.
132 | 
133 | This quantum algorithm only produces a polynomial improvement ($N^{1.2}$) over the optimal classical algorithm,
134 | but there are others which may produce exponential improvements.
135 | 
136 | Not too important in practice since search on a sorted set is $log(N)$.
137 | 


--------------------------------------------------------------------------------
/regular-expression.md:
--------------------------------------------------------------------------------
  1 | # Regular expression
  2 | 
  3 | Strings that describe regular languages.
  4 | 
  5 | Can be formally constructed recursively as:
  6 | 
  7 | - any terminal symbol is a regular expression
  8 | - the empty string is a regular expression
  9 | 
 10 | Then recursively, if `R` and `S` are regular expressions then the following are also:
 11 | 
 12 | - alternative: `(R|S)`
 13 | - concatenate: `RS`
 14 | - Kleene star: `R*`
 15 | 
 16 | This defines a new language, which can be efficiently parsed,
 17 | and transformed into a DFA or a regular grammar.
 18 | 
 19 | Regular expressions are as powerful as regular grammars.
 20 | 
 21 | ## Extensions
 22 | 
 23 | Practical Regex implementations offer several extensions, some of which are reducible
 24 | to formal regexes, and others which are not and increase the power of the language,
 25 | while reducing theoretical performance.
 26 | 
 27 | ### Predefined classes
 28 | 
 29 | `.`, `\s`, etc.
 30 | 
 31 | Obviously possible.
 32 | 
 33 | ### Question mark ? and plus sign +
 34 | 
 35 | `?` and `+` can directly defined in terms of the base operations.
 36 | 
 37 | ### Counted repetitions {n,m}
 38 | 
 39 | Can be expanded with a huge number of `|`.
 40 | 
 41 | Example:
 42 | 
 43 |     a{2,4}
 44 | 
 45 | Equals:
 46 | 
 47 |     aa(|a(|a)))
 48 | 
 49 | ### Lazy matching .*?
 50 | 
 51 | E.g.: `.*?` or `.+?` in Perl.
 52 | 
 53 | This concept does not exist in formal regexes since laziness is only useful to remove ambiguity
 54 | to predict the content of capturing groups, concept which does not exist in formal regexes
 55 | 
 56 | ### Backreferences
 57 | 
 58 | Example: `(.*)\1.`, matching `aa`, `abab`.
 59 | 
 60 | Classic non-regex thing because of the pumping lemma, but possible in context free.
 61 | 
 62 | ## Pumping lemma
 63 | 
 64 | Application: regexes cannot do parenthesis matching,
 65 | therefore they are a strict subset of context-free languages
 66 | (a subset because you can express regular languages with context-free grammars).
 67 | 
 68 | Also proves many other languages are not context free.
 69 | 
 70 | ## Complexity
 71 | 
 72 | ### DFA implementation
 73 | 
 74 | DFAs have the Same power as regular expressions.
 75 | 
 76 | Regular expressions implementation has a better time performance,
 77 | but the number of states grows exponentially with regex size,
 78 | thus taking memory exponentially.
 79 | 
 80 | First a preprocessing step to transforms regex into a DFA.
 81 | 
 82 | Once that is done, matching takes:
 83 | 
 84 | - $O(n)$ time
 85 | - $O(2^n)$ memory
 86 | 
 87 | Then $O(n)$ time to do the transformation (compile the regex).
 88 | 
 89 | This can always be done because there is always a regex equivalent to DFA,
 90 | and the equivalence can be done in $O(n)$.
 91 | 
 92 | ### Lazy DFA
 93 | 
 94 | Middle ground between DFA and backtracking implementation.
 95 | 
 96 | Build DFA and discard states on the fly.
 97 | 
 98 | $O(mn^2)$ time and polynomial space.
 99 | 
100 | ### Backtracking
101 | 
102 | Exponential time.
103 | 
104 | Often used in implementations because:
105 | 
106 | - non exponential space
107 | - easier to implement extra features such as group matching
108 | - in practice, most regular expressions are small
109 | 
110 | ## Ambiguity
111 | 
112 | A regular expression is ambiguous when there are more than one way to reach a single string by deciding:
113 | 
114 | - which side of `|` to take
115 | - how many times to expand `a*`
116 | 
117 | Can always be removed for regular expressions.
118 | 
119 | This is in contrast to context-free languages where there are inherently ambiguous languages.
120 | 
121 | Example:
122 | 
123 |     (ab|a)(b|ab)
124 | 
125 | is ambiguous, since `aba` can be done in two ways.
126 | 
127 | The equivalent:
128 | 
129 |     a(b|b(a|b))
130 | 
131 | recognizes the same language but is not ambiguous.
132 | 
133 | TODO algorithm and complexity of resolving ambiguity?
134 | 
135 | ## Regex equivalence
136 | 
137 | Possible in $O(n)$ by Hopcroft-Krap (1973).
138 | 
139 | Naively obvious in $O(n log n)$, converting to DFA and minimizing since the minimum is unique.
140 | 
141 | Application: ensure that a simple regex does the same as a complicated one.
142 | 
143 | ## Star height problem
144 | 
145 | <http://en.wikipedia.org/wiki/Star_height_problem>
146 | 
147 | The *star height* is the nested level of Kleene stars:
148 | 
149 | - `(a|b)*`:        star height 1
150 | - `((a|b*)*` :     star height 2
151 | - `((a|b*)a|b*)*`: star height 3
152 | 
153 | Problems posed:
154 | 
155 | -   Is finite star height enough all regexes?
156 |     1966: no, counter example family given.
157 | 
158 | -   Compute star height of a given sequence. Much harder.
159 |     First answer: Hashiguchi 1988, not in $ELEMENTARY$.
160 |     Kirsten 2005: double exponential.
161 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | MOVING to: https://cirosantilli.com/linux-kernel-module-cheat#algorithms
  2 | 
  3 | It is early days, but this repo will be progressively split up and moved to either:
  4 | 
  5 | * https://cirosantilli.com/linux-kernel-module-cheat#algorithms for runnable algorithms with input and output, because we can factor out some automation with that repo, notably being able to run on gem5 for reproducible timing experiments
  6 | * https://github.com/cirosantilli/cirosantilli.github.io for the "theoretical stuff" that is less runnable
  7 | 
  8 | # Algorithm cheat ![logo](logo.png)
  9 | 
 10 | [![Build Status](https://travis-ci.org/cirosantilli/algorithm-cheat.svg?branch=master)](https://travis-ci.org/cirosantilli/algorithm-cheat)
 11 | 
 12 | Algorithm tutorials and simple multi-language implementations with unit tests. Test case data in language agnostic file formats. Usually updated when I'm grinding for job interviews :-)
 13 | 
 14 | 1.  Implementations
 15 |     1. [Getting started](getting-started.md)
 16 |     1. [src/](src/)
 17 |     1. [data/](data/)
 18 |     1. [test](test)
 19 | 1.  [Introduction](introduction.md)
 20 |     1.  [Beauty](beauty.md)
 21 |         1. [Recursive algorithms](recursive-algorithms.md)
 22 |     1.  [Turing machine](turing-machine.md)
 23 |         1. [Busy beaver](busy-beaver.md)
 24 |     1.  [Out of core algorithms](out-of-core.md)
 25 |     1.  [Undecidability](undecidability.md)
 26 |     1.  [P vs NP](p-vs-np.md)
 27 | 1.  Data structures
 28 |     1.  [Graph](graph.md)
 29 |         1. [Tree](tree.md)
 30 |         1. [Dijkstra](dijkstra.md)
 31 |     1.  [Map](map.md): [map.cpp](src/cpp/map.cpp),
 32 |         1. [BST](bst.md): [bst.cpp](src/cpp/bst.hpp), [bst.c](src/c/bst.c)
 33 |         1. [Hash map](hash-map.md): [hash_map.cpp](src/cpp/hash_map.hpp)
 34 |         1. [Heap](heap.md): [Heap.java](src/java/Heap.java)
 35 | 1.  [Sorting algorithms](sort/)
 36 |     1. [Quicksort](quicksort.md): [QuickSort.java](src/java/QuickSort.java), [QuickSortTail.java](src/java/QuickSortTail.java)
 37 |     1. [Merge sort](merge-sort.md)
 38 |     1. [Bubble sort](bubble-sort.md)
 39 | 1.  Parsing, formal languages and their automatons
 40 |     1. [Formal language](formal-language.md)
 41 |     1. [Context-free grammar](context-free-grammar.md)
 42 |     1. [Regular grammar](regular-grammar.md)
 43 |     1. [Regular language](regular-language.md)
 44 | 1.  [Programming languages](programming-languages.md)
 45 |     1. [Type systems](type-systems.md)
 46 | 1.  String algorithms
 47 |     1. [Longest common subsequence](longest-common-subsequence.md)
 48 |     1. [Longest increasing subsequence](longest-increasing-subsequence.md)
 49 |     1. [Maximum subarray](maximum-subarray.md)
 50 |     1. [String search](string-search.md): [StringSearchNaive.java](src/java/StringSearchNaive.java), [KnuthMorrisPratt.java](src/java/KnuthMorrisPratt.java)
 51 |     1. Version string parsing: [version_string.c](src/c/version_string.c)
 52 | 1.  [Cryptography](cryptography.md)
 53 |     1. [base64](base64.md)
 54 |     1. [ECDSA](ecdsa.md)
 55 |     1. [GPG](gpg.md)
 56 |     1. [md5sum](md5sum.md)
 57 | 1.  Linear programming
 58 |     1. [Change making](change-making.md)
 59 | 1.  Out-of-core
 60 |     1. [tac](tac.md): [tac.c](src/c/tac.c), [Tac.java](src/java/Tac.java)
 61 | 1.  Misc algorithms
 62 |     1. [Hanoi tower](hanoi-tower.md)
 63 |     1. [Hash function](hash-function.md)
 64 |     1. [XOR-swap](xor-swap.md)
 65 | 1.  Numerical
 66 |     1. [Matrix multiplication](matrix-multiplication.md)
 67 |     1. [PDE](pde.md)
 68 | 1.  Misc
 69 |     1. [Bitcoin](bitcoin.md)
 70 |     1. [Decimal data type](decimal-data-type.md)
 71 |     1. [Human factors](human-factors.md)
 72 |     1. [Licenses](licenses.md)
 73 |     1. [Quantum computing](quantum-computing.md)
 74 |     1. [Security](security.md)
 75 |     1. [Stack Overflow Data Dump](stack-overflow-data-dump/)
 76 | 1.  [Bibliography](bibliography.md)
 77 | 1.  [TODO](TODO.md)
 78 | 
 79 | ## WIP
 80 | 
 81 | 1.  [Knapsack](knapsack.md)
 82 | 1.  Data structures
 83 |     1.  [Crit-bit tree](crit-bit-tree.md)
 84 |     1.  [Disjoint set](disjoint-set.md)
 85 |     1.  [Skip list](skip-list.md)
 86 |     1.  Map
 87 |         1.  [Set](set.md)
 88 |         1.  Prefix trees
 89 |             1.  [Trie](trie.md): [trie.c](src/c/trie.c)
 90 |             1.  [Radix tree](radix-tree.md)
 91 |         1.  [RB tree](rb-tree.md)
 92 |         1.  [B-tree](b-tree.md)
 93 |             1. [B-tree](b-tree.md)
 94 |             1. [B-plus-tree](b-plus-tree.md)
 95 |     1. Graph
 96 |         1. [Topological sort](topological-sort.md)
 97 |         1. [A*](a-star.md)
 98 |         1. [B*](b-star.md)
 99 |         1. [Bellman-ford](bellman-ford.md)
100 | 1.  Sorting algorithms
101 |     1. [Heap sort](heap-sort.md): [HeapSort.java](src/java/HeapSort.java)
102 |     1. [Insertion sort](insertion-sort.md)
103 | 1.  Misc
104 |     1. [Error detection](error-detection.md)
105 | 1.  Numerical
106 |     [Newton's method](newton-method.md): [newton.py](src/python/newton.py)
107 | 


--------------------------------------------------------------------------------
/src/cpp/hash_map.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef HASH_MAP_H
  2 | #define HASH_MAP_H
  3 | 
  4 | #include "map.hpp"
  5 | 
  6 | template<class KEY>
  7 | size_t hash(KEY key, size_t container_size);
  8 | 
  9 | template<>
 10 | size_t hash<int>(int key, size_t container_size){
 11 |     return key % container_size;
 12 | }
 13 | 
 14 | /**
 15 | Hash map.
 16 | 
 17 | @tparam KEY the key type of the map
 18 | @tparam VAL the value value of the map
 19 | */
 20 | template<class KEY,class VAL>
 21 | class HashMap : public Map<KEY,VAL> {
 22 |     typedef std::vector<std::list<std::pair<KEY,VAL>>> map_t;
 23 |     public:
 24 |         using Map<KEY,VAL>::init_initializer;
 25 |         using Map<KEY,VAL>::add;
 26 |         HashMap(size_t keyCountInitial = HashMap::keyCountInitialDefault,
 27 |              float loadFactor = 0.7
 28 |         ):
 29 |              keyCount(keyCountInitial),
 30 |              loadFactor(loadFactor),
 31 |              map(map_t(keyCount))
 32 |         {};
 33 | 
 34 |         HashMap(const KEY& key, const VAL& val,
 35 |              size_t keyCountInitial = HashMap::keyCountInitialDefault,
 36 |              float loadFactor = 0.7
 37 |         ) : HashMap(keyCountInitialDefault, loadFactor)
 38 |         { this->init_pair(key, val); }
 39 | 
 40 |         HashMap(std::initializer_list<std::pair<KEY,VAL>> pairs) : HashMap() { this->init_initializer(pairs); }
 41 | 
 42 |         virtual ~HashMap() {}
 43 | 
 44 |         bool add(const KEY& key, const VAL& val) {
 45 |             size_t h, newKeyCount, newSize;
 46 |             map_t oldMap;
 47 |             h = hash<KEY>(key, map.size());
 48 |             // If already present, return false.
 49 |             for (auto& pair : map[h])
 50 |                 if (std::get<0>(pair) == key)
 51 |                     return false;
 52 |             newKeyCount = this->keyCount + 1;
 53 |             // Increase size if necessary and rehash everything
 54 |             if (newKeyCount >= this->map.size() * this->loadFactor) {
 55 |                 oldMap = this->map; //TODO possible to avoid this costly copy?
 56 |                 newSize = this->map.size() * this->increaseFactor;
 57 |                 this->map = map_t(newSize);
 58 |                 for (auto& list : oldMap) {
 59 |                     for (auto& pair : list) {
 60 |                         this->add(pair);
 61 |                     }
 62 |                 }
 63 |                 h = hash(key, newSize);
 64 |             }
 65 |             //if (std::find_if(
 66 |                 //map[h].begin(),
 67 |                 //map[h].end(),
 68 |                 //[&key](std::pair<KEY,VAL> pair){ return std::get<0>(pair) == key; }
 69 |             //) != map[h].end()) {
 70 |                 //return false;
 71 |             //}
 72 |             this->map[h].push_back(std::pair<KEY,VAL>(key, val));
 73 |             this->keyCount = newKeyCount;
 74 |             return true;
 75 |         }
 76 | 
 77 |         bool del(const KEY& key) {
 78 |             auto& list = map[ hash<KEY>(key, map.size()) ];
 79 |             //for (auto& pair : list) {
 80 |                 //if (std::get<0>(pair) == key)
 81 |                     //break;
 82 |             //}
 83 |             auto it = std::find_if(
 84 |                 list.begin(),
 85 |                 list.end(),
 86 |                 [&key](const std::pair<KEY,VAL>& pair){ return std::get<0>(pair) == key; });
 87 | 
 88 |             if (it != list.end()) {
 89 |                 list.erase(it);
 90 |                 return true;
 91 |             }
 92 |             return false;
 93 |         }
 94 | 
 95 |         bool find(const KEY& key, VAL& val) {
 96 |             size_t h;
 97 |             h = hash<KEY>(key, map.size());
 98 |             for (auto& pair : map[h]) {
 99 |                 if (std::get<0>(pair) == key) {
100 |                     val = std::get<1>(pair);
101 |                     return true;
102 |                 }
103 |             }
104 |             return false;
105 |         }
106 | 
107 |         /**
108 |         Return a string representation of the map.
109 |         */
110 |         std::string str() const {
111 |             std::stringstream ss;
112 |             for (size_t i = 0; i < this->map.size(); i++) {
113 |                 auto& list = this->map[i];
114 |                 if (list.size() > (size_t)0) {
115 |                     ss << i << ": ";
116 |                     for (auto& pair : list) {
117 |                         ss << std::get<0>(pair) << ":" << std::get<1>(pair) << ", ";
118 |                     }
119 |                     ss << std::endl;
120 |                 }
121 |             }
122 |             return ss.str();
123 |         }
124 | 
125 |         bool operator==(const HashMap<KEY,VAL>& other) const { return this->map == other.map; }
126 |         bool operator!=(const HashMap<KEY,VAL>& other) const { return ! (this->map == other.map); }
127 | 
128 |     private:
129 |         const static size_t keyCountInitialDefault = 1;
130 |         // By how much the map size will be multiplied if it needs to grow
131 |         const static int increaseFactor = 2;
132 |         size_t keyCount;
133 |         float loadFactor;
134 |         map_t map;
135 | };
136 | 
137 | #endif
138 | 


--------------------------------------------------------------------------------
/graph.md:
--------------------------------------------------------------------------------
  1 | # Graph
  2 | 
  3 | <http://en.wikipedia.org/wiki/Graph_%28abstract_data_type%29>
  4 | 
  5 | ## Properties
  6 | 
  7 | ### Relations between E and V
  8 | 
  9 | E = set of edges
 10 | 
 11 | V = set of vertices
 12 | 
 13 | $|E|$: number of edges
 14 | 
 15 | $|V|$: number of vertices
 16 | 
 17 | Important relation:
 18 | 
 19 | - $0 <= E <= V^2$, and for connected graphs: $|V| - 1 <= |E| <= |V|^2$. If equality happens, the graph is said to be dense.
 20 | 
 21 | - if $|E| > (V - 1)(V - 2)/2$ then it must be connected.
 22 | 
 23 | - $\forall y, |{ xy | x \in V }| <= V - 1$ for a fixed $i$ and $x$ free to vary.
 24 | 
 25 | ## Transversals
 26 | 
 27 | ### DFS
 28 | 
 29 | ### Depth first search
 30 | 
 31 | DFS can be done either recursively or not with a stack.
 32 | 
 33 | The only key difference is that the recursive version uses the system's call stack, while the non-recursive version uses an explicit stack.
 34 | 
 35 | ### BFS
 36 | 
 37 | ### Breadth first search
 38 | 
 39 | BFS cannot be done recursively naturally, since the non-recursive implementation uses a queue, and not a stack like DFS.
 40 | 
 41 | <http://stackoverflow.com/questions/2549541/performing-breadth-first-search-recursively>
 42 | 
 43 | ### DFS vs BFD
 44 | 
 45 | Both are methods to search vertexes on unordered graphs.
 46 | 
 47 | Advantages of DFS:
 48 | 
 49 | -   if the searched solutions are guaranteed to be at the greatest depths (e.g. leaves of a tree) then DFS will certainly be faster.
 50 | 
 51 | -   if the graph is somehow balanced (nodes of maximum depth are at similar depths), then DFS will certainly use less memory than BFS since BFS must store a FIFO of each level ($2^depth$ vertexes on a balanced binary tree) while BFS stores at most the maximum depth sequence of parents ($depth$ elements).
 52 | 
 53 |     Iterative deepening DFS may be used to reduce memory usage of the regular DFS.
 54 | 
 55 | Advantages of BFS:
 56 | 
 57 | -   it is the obvious choice when looking for minimum distances, since you want to keep as close as possible to the start and not go too deep.
 58 | 
 59 | -   if the depth can be too long, or even infinite, it is simpler to look breadth first, and limit the descent, or the process could take forever or too long.
 60 | 
 61 |     It also makes more sense to use heuristics with BFS, looking around the current position, and then deciding where to descend further.
 62 | 
 63 | ### IDDFS
 64 | 
 65 | ### Iterative deepening DFS
 66 | 
 67 | <https://en.wikipedia.org/wiki/Iterative_deepening_depth-first_search>
 68 | 
 69 | Do depth-limited searches with increasing height.
 70 | 
 71 | The best only possibility when `2^height` is too much for your RAM, and uses just `O(height)` for the backtracking. Downside: way slower.
 72 | 
 73 | This also implies of course that the graph does not fit into memory, so that you have to think about how to 
 74 | 
 75 | It is not feasible to keep track of the visited notes because that would take up too much memory. Infinite loops are voided by the depth limiting.
 76 | 
 77 | ## Attributes
 78 | 
 79 | Graph structures must be able to contain arbitrary attributes associated to each edge (node pair), and this is their major design concern.
 80 | 
 81 | For example:
 82 | 
 83 | -   on a shortest path problem, the only edge attributes is the width of an edge.
 84 | 
 85 | -   on a city, each street takes a time to cross, and has a different length.
 86 | 
 87 |     The two are not necessarily proportional since some streets have more traffic than others.
 88 | 
 89 |     Cover the maximum length withing a fixed amount of time.
 90 | 
 91 |     In this problem, each edge must have 2 integers associated to it: length and time to cross.
 92 | 
 93 | Node attributes are simpler to deal with as they can just be stored into a single array of properties.
 94 | 
 95 | Dynamic operations such as addition and removal of edges and nodes are also required, but often less important, as many practical graph problems are based on static graphs.
 96 | 
 97 | ## Implementations
 98 | 
 99 | There are two major graph implementations:
100 | 
101 | - adjacency list
102 | - adjacency matrix
103 | 
104 | ### Adjacency list
105 | 
106 | Represent nodes by integers. For each node, store what it points to.
107 | 
108 | -   directed implementation: array of destination to properties maps:
109 | 
110 |         g[origin0] = {dest00: properties00, dest01: properties01}.
111 |         g[origin1] = {dest10: properties10, dest11: properties11}.
112 |         ...
113 | 
114 |     Probably the best option in general, as it is `O(1)` average to get edge properties from an origin / destination pair.
115 | 
116 | -   undirected: same as above, but store every node pair as `(smaller, larger)`, and convert all inputs `(larger, smaller)` to `(smaller, larger)`.
117 | 
118 | ### Adjacency matrix
119 | 
120 | Keep a matrix of edge properties.
121 | 
122 | Advantage: `O(1)` worst time to access each edge properties from node pair.
123 | 
124 | Disadvantage: `O(n^2)` memory, regardless of graph density.
125 | 
126 | This method is generally less useful because:
127 | 
128 | - most graphs encountered in practice are very sparse, so the matrix will contain too many 0 entries.
129 | - adjacency list implementation can already reach `O(1)` average time (but not worst).
130 | 


--------------------------------------------------------------------------------