├── .gitignore ├── .htaccess ├── .htupdate ├── 2022-05-23-qp-api.pdf ├── 2022-05-23-qp-guts.pdf ├── Makefile ├── README.md ├── Tbl.c ├── Tbl.h ├── bench-cross.pl ├── bench-more.pl ├── bench-multi.pl ├── bench-reformat.pl ├── bench-table.css ├── bench.c ├── blog-2015-10-04.md ├── blog-2015-10-07.md ├── blog-2015-10-11.md ├── blog-2015-10-19.md ├── blog-2016-02-23.md ├── blog-2017-01-09.md ├── blog-2020-07-05.md ├── blog-2021-06-23.md ├── blog-2022-06-22.md ├── cb-debug.c ├── cb.c ├── cb.h ├── dns-debug.c ├── dns.c ├── dns.h ├── entities ├── fn-debug.c ├── fn.c ├── fn.h ├── fp-debug.c ├── fp.c ├── fp.h ├── getwords.pl ├── ht-debug.c ├── ht.c ├── ht.h ├── notes-bitstrings-prefixes.md ├── notes-concurrency.md ├── notes-dns.md ├── notes-generic-leaves.md ├── notes-jumbo.md ├── notes-love.md ├── notes-mistakes.md ├── notes-rib-compression.md ├── notes-todo.md ├── notes-write-buffer.md ├── popcount-test.c ├── qp-debug.c ├── qp.c ├── qp.h ├── rc-debug.c ├── rc.c ├── rc.h ├── siphash24.c ├── test-gen.pl ├── test-once.sh ├── test.c ├── test.pl ├── tinytocs.bib ├── tinytocs.cls ├── tinytocs.pdf ├── tinytocs.tex ├── wp-debug.c ├── wp.c └── wp.h /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.s 3 | *.html 4 | bench-?? 5 | test-?? 6 | test-out-* 7 | popcount-test 8 | bind9 9 | top-1m* 10 | in-* 11 | -------------------------------------------------------------------------------- /.htaccess: -------------------------------------------------------------------------------- 1 | RedirectMatch ^/prog/qp/?$ https://dotat.at/prog/qp/README.html 2 | RedirectMatch ^/prog/qp/(.*)\.md https://dotat.at/prog/qp/$1.html 3 | 4 | Redirect /prog/qp/notes-dns2.html https://dotat.at/prog/qp/notes-dns.html 5 | 6 | RewriteEngine On 7 | RewriteCond %{HTTPS} off 8 | RewriteRule (.*) https://%{SERVER_NAME}%{REQUEST_URI} [R,L] 9 | -------------------------------------------------------------------------------- /.htupdate: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | cd public-html/prog/qp 4 | git pull --ff-only 5 | make html 6 | -------------------------------------------------------------------------------- /2022-05-23-qp-api.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fanf2/qp/90fc7de8d0b3c9c1aaa5cd9d7b1765b122fad2e5/2022-05-23-qp-api.pdf -------------------------------------------------------------------------------- /2022-05-23-qp-guts.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fanf2/qp/90fc7de8d0b3c9c1aaa5cd9d7b1765b122fad2e5/2022-05-23-qp-guts.pdf -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # You may need -mpopcnt to get the compiler to emit POPCNT instructions 2 | CFLAGS= -std=gnu99 -Wall -Wextra -g -O3 -march=native 3 | #CFLAGS= -std=gnu99 -Wall -Wextra -g -fsanitize=undefined -fsanitize=address 4 | 5 | # implementation codes 6 | #XY= cb qp qs qn fp fs fc wp ws rc # ht 7 | XY= qp fp fn dns 8 | 9 | TEST= $(addprefix ./test-,${XY}) 10 | BENCH= $(addprefix ./bench-,${XY}) 11 | 12 | INPUT= in-b9 in-dns in-rdns in-usdw top-1m 13 | 14 | all: ${TEST} ${BENCH} ${INPUT} 15 | 16 | test: ${TEST} top-1m 17 | ./test-once.sh 10000 100000 top-1m ${XY} 18 | 19 | bench: ${BENCH} ${INPUT} 20 | ./bench-cross.pl 1000000 ${BENCH} -- ${INPUT} 21 | 22 | size: ${TEST} ${INPUT} 23 | for f in ${INPUT}; do \ 24 | sed 's/^/+/' <$$f >test-$$f; \ 25 | echo $$f; \ 26 | for p in ${TEST}; do \ 27 | $$p /dev/null; \ 28 | done; \ 29 | done 30 | 31 | clean: 32 | rm -f test-?? bench-?? *.o 33 | 34 | realclean: clean 35 | rm -f test-in test-out-?? 36 | 37 | bench-ht: bench.o Tbl.o ht.o siphash24.o 38 | ${CC} ${CFLAGS} -o $@ $^ 39 | 40 | test-ht: test.o Tbl.o ht.o ht-debug.o siphash24.o 41 | ${CC} ${CFLAGS} -o $@ $^ 42 | 43 | bench-%: bench.o Tbl.o %.o 44 | ${CC} ${CFLAGS} -o $@ $^ 45 | 46 | test-%: test.o Tbl.o %.o %-debug.o 47 | ${CC} ${CFLAGS} -o $@ $^ 48 | 49 | Tbl.o: Tbl.c Tbl.h 50 | test.o: test.c Tbl.h 51 | bench.o: bench.c Tbl.h 52 | siphash24.o: siphash24.c 53 | cb.o: cb.c cb.h Tbl.h 54 | qp.o: qp.c qp.h Tbl.h 55 | fp.o: fp.c fp.h Tbl.h 56 | fn.o: fn.c fn.h Tbl.h 57 | wp.o: wp.c wp.h Tbl.h 58 | rc.o: rc.c rc.h Tbl.h 59 | ht.o: ht.c ht.h Tbl.h 60 | dns.o: dns.c dns.h Tbl.h 61 | cb-debug.o: cb-debug.c cb.h Tbl.h 62 | qp-debug.o: qp-debug.c qp.h Tbl.h 63 | fp-debug.o: fp-debug.c fp.h Tbl.h 64 | fn-debug.o: fn-debug.c fn.h Tbl.h 65 | wp-debug.o: wp-debug.c wp.h Tbl.h 66 | rc-debug.o: rc-debug.c rc.h Tbl.h 67 | ht-debug.o: ht-debug.c ht.h Tbl.h 68 | dns-debug.o: dns-debug.c dns.h Tbl.h 69 | 70 | # no cache prefetch 71 | qc.o: qp.c qp.h Tbl.h 72 | ${CC} ${CFLAGS} -D__builtin_prefetch='(void)' -c -o qc.o $< 73 | 74 | # use SWAR 16 bit x 2 popcount 75 | qn.o: qp.c qp.h Tbl.h 76 | ${CC} ${CFLAGS} -DHAVE_NARROW_CPU -c -o qn.o $< 77 | 78 | # use hand coded 16 bit popcount 79 | qs.o: qp.c qp.h Tbl.h 80 | ${CC} ${CFLAGS} -DHAVE_SLOW_POPCOUNT -c -o qs.o $< 81 | 82 | # no cache prefetch 83 | fc.o: fp.c fp.h Tbl.h 84 | ${CC} ${CFLAGS} -D__builtin_prefetch='(void)' -c -o fc.o $< 85 | 86 | # use hand coded 32 bit popcount 87 | fs.o: fp.c fp.h Tbl.h 88 | ${CC} ${CFLAGS} -DHAVE_SLOW_POPCOUNT -c -o fs.o $< 89 | 90 | # use hand coded 64 bit popcount 91 | ws.o: wp.c wp.h Tbl.h 92 | ${CC} ${CFLAGS} -DHAVE_SLOW_POPCOUNT -c -o ws.o $< 93 | 94 | qn-debug.c: 95 | ln -s qp-debug.c qn-debug.c 96 | qs-debug.c: 97 | ln -s qp-debug.c qs-debug.c 98 | fs-debug.c: 99 | ln -s fp-debug.c fs-debug.c 100 | fc-debug.c: 101 | ln -s fp-debug.c fc-debug.c 102 | ws-debug.c: 103 | ln -s wp-debug.c ws-debug.c 104 | 105 | input: ${INPUT} 106 | 107 | in-usdw: 108 | ln -s /usr/share/dict/words in-usdw 109 | 110 | top-1m: top-1m.csv 111 | sed 's/^[0-9]*,//' <$< >$@ 112 | top-1m.csv: top-1m.csv.zip 113 | rm -f $@ 114 | unzip $< 115 | touch $@ 116 | top-1m.csv.zip: 117 | curl -O http://s3.amazonaws.com/alexa-static/top-1m.csv.zip 118 | 119 | in-rdns: in-dns 120 | rev in-dns >in-rdns 121 | 122 | in-dns: 123 | for z in cam.ac.uk private.cam.ac.uk \ 124 | eng.cam.ac.uk cl.cam.ac.uk \ 125 | maths.cam.ac.uk damtp.cam.ac.uk dpmms.cam.ac.uk; \ 126 | do dig axfr $$z @131.111.8.37; done |\ 127 | sed '/^;/d;s/[ ].*//' | uniq >in-dns 128 | 129 | in-b9: bind9 130 | find bind9/ -name '*.c' -o -name '*.h' | \ 131 | xargs ./getwords.pl >in-b9 132 | 133 | tex: 134 | pdflatex tinytocs.tex 135 | bibtex tinytocs 136 | pdflatex tinytocs.tex 137 | pdflatex tinytocs.tex 138 | sed '/\\abstract{/,/^}$$/!d;/\\abstract{/d;/^}$$/d' tinytocs.tex | wc -w 139 | sed '/\\tinybody{/,/}$$/!d;s/\\tinybody{//;s/}$$//;s/\\\\$$//' tinytocs.tex | wc -c 140 | 141 | bind9: 142 | git clone https://gitlab.isc.org/isc-projects/bind9.git 143 | 144 | html: 145 | for f in *.md; do markdown <$$f | ./entities >$${f%md}html; done 146 | 147 | upload: html 148 | git push chiark:public-git/qp.git 149 | git push git@github.com:fanf2/qp.git 150 | ssh chiark public-html/prog/qp/.htupdate 151 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | qp tries and crit-bit tries 2 | =========================== 3 | 4 | I have been working on radix trees / patricia tries / crit-bit tries 5 | with a larger fan-out per branch to reduce lookup costs without 6 | wasting memory. 7 | 8 | My best solution so far is the "qp trie", short for quelques-bits 9 | popcount patricia trie. (Nothing to do with cutie cupid dolls or 10 | Japanese mayonnaise!) A qp trie is like a crit-bit trie (aka patricia 11 | trie) except each branch is indexed by a few bits at a time instead of 12 | one bit. The array of sub-tries at a branch node is compressed using 13 | the popcount trick to omit unused branches, saving memory. When 14 | searching a qp trie, the child nodes are prefetched from memory while 15 | the CPU works out which child is next. 16 | 17 | The original version of qp tries used 4 bits at a time, so it was a 18 | quadbit popcount patricia trie. There is a faster (but slightly more 19 | complicated) version that uses 5 bits at a time, a quintuple-bit 20 | popcount patricia trie. There is a faster, smaller, and simpler 21 | DNS-optimized version that uses a byte at a time for standard 22 | hostnames. 23 | 24 | Based on a few benchmarks, qp tries have about 1/3 less memory 25 | overhead of crit-bit tries, 1.3 words vs 2 words of overhead per item; 26 | the average depth of a qp trie is about half that of a crit-bit trie; 27 | and the overall speed of qp tries is about 30% faster than crit-bit 28 | tries. The qp trie implementation is about 40% bigger. 29 | 30 | 31 | usage 32 | ----- 33 | 34 | Type `make test` or `make bench`. (You will need to use GNU make.) 35 | If you have a recent Intel CPU you might want to add `-mpopcnt` to 36 | the CFLAGS to get SSE4.2 POPCNT instructions. Other build options: 37 | 38 | * `HAVE_SLOW_POPCOUNT` 39 | compiles the code to use a hand-coded 16 bit `popcount()` 40 | instead of `__builtin_popcount()`. No need for this with 41 | recent clang/llvm; useful with older gcc. 42 | 43 | * `HAVE_NARROW_CPU` 44 | uses a 2 x 16 bit SIMD-within-a-register popcount instead of 45 | two separate 16 bit popcounts; might be useful on small CPUs 46 | but makes little difference on 64 bit Intel. 47 | 48 | The makefile builds {test,bench}-{qs,qn} with these options; they are 49 | otherwise the same as test-qp and bench-qp. 50 | 51 | 52 | caveats 53 | ------- 54 | 55 | Most of the code has only been tested on 64-bit little endian 56 | machines. It might work on 32-bit machines (provided the compiler 57 | supports 64 bit integers) and probably won't work on a big-endian 58 | machine. The "`fn`" (five-bit new) variation should be more portable 59 | since it avoids the mistakes of the earlier code. 60 | 61 | Key strings can be byte-aligned but values must be word-aligned; you 62 | can swap this restriction (e.g. if you want to map from strings to 63 | integers) by tweaking the struct layout and adjusting the check in 64 | Tset(). 65 | 66 | Keys are '\0' terminated C strings, which guarantees one key is not a 67 | prefix of another, so leaves and branches cannot occur at the same 68 | point. It should be possible to support arbitrary binary keys by being 69 | more clever about handling string termination. 70 | 71 | 72 | articles 73 | -------- 74 | 75 | Newest at the bottom, scroll down... 76 | 77 | * [QP TRIE HOME PAGE](https://dotat.at/prog/qp) 78 | 79 | * [2015-10-04](blog-2015-10-04.md) - 80 | qp tries: smaller and faster than crit-bit tries 81 | 82 | A blog article / announcement. 83 | 84 | * [2015-10-07](blog-2015-10-07.md) - 85 | crit-bit tries without allocation 86 | 87 | An unimplemented sketch of a neat way to use crit-bit tries. 88 | 89 | * [2015-10-11](blog-2015-10-11.md) - 90 | prefetching tries 91 | 92 | * [2015-10-13](https://9vx.org/post/qp-tries/) - 93 | Devon O'Dell benchmarks qp tries against some alternatives 94 | 95 | * [2015-10-19](blog-2015-10-19.md) - 96 | never mind the quadbits, feel the width! 97 | 98 | Benchmarking wider-fanout versions of qp tries. 99 | 100 | * [2016-20-23](blog-2016-02-23.md) - 101 | How does a qp trie compare to a network routing trie? 102 | 103 | Reading some vaguely-related academic literature. 104 | 105 | * [2016-03-06](tinytocs.pdf) - 106 | [TinyToCS](http://tinytocs.org/) vol. 4 includes a paper on QP tries! 107 | 108 | Nicest comment from a reviewer: 109 | 110 | > The body of this paper is a masterpiece of economy: 111 | > results are presented very clearly and understandably. 112 | > The result here is simple, compact, and unambiguous, 113 | > which makes it perfect for TinyToCS. 114 | 115 | * 2016-11-21 - 116 | 117 | 118 | A greatly enhanced and properly engineered implementation of a 119 | qp trie is being incorporated into CZ.NIC Knot DNS, for better 120 | memory efficiency. 121 | 122 | * 2016-12-20 - 123 | 124 | 125 | Frank Denis's Rust version of qp tries 126 | 127 | * [2017-01-09](blog-2017-01-09.md) - 128 | qp trie news roundup 129 | 130 | * [2020-07-05](blog-2020-07-05.md) - 131 | A compelling idea: the genesis of my DNS-trie 132 | 133 | * 2020-07-20 - 134 | 135 | A fork of NSD that uses my DNS-trie code. It is significantly 136 | faster and much smaller than NSD's default radix tree. 137 | 138 | - 139 | a thread about how to optimize a qp-trie for the DNS 140 | 141 | * [2021-06-23](blog-2021-06-23.md) - 142 | Page-based GC for qp-trie RCU 143 | 144 | Memory management to support multithreaded readers 145 | 146 | * [2022-05-23 qp guts](2022-05-23-qp-guts.pdf) + 147 | [2022-05-23 qp api](2022-05-23-qp-api.pdf) 148 | 149 | A couple of qp-trie presentations I gave 150 | at the 2022 isc.org all-hands meeting; 151 | the PDFs have the slides with speaker notes 152 | 153 | * [2022-06-22](blog-2022-06-22.md) - 154 | Compacting a qp-trie 155 | 156 | notes on several experiments 157 | 158 | 159 | 160 | thanks 161 | ------ 162 | 163 | Marek Vavrusa (CZ.NIC) and Devon O'Dell (Fastly) enthusiastically put 164 | this code to work and provided encouraging feedback. 165 | 166 | Vladimír Čunát incorporated qp tries into CZ.NIC Knot DNS, at the 167 | suggestion of Jan Včelák. 168 | 169 | Simon Tatham proved that parent pointers are not needed for embedded 170 | crit-bit tries. 171 | 172 | 173 | download 174 | -------- 175 | 176 | You can clone or browse the repository from: 177 | 178 | * git://dotat.at/qp.git 179 | * 180 | * 181 | * 182 | 183 | 184 | roadmap 185 | ------- 186 | 187 | * [Tbl.h][] [Tbl.c][] 188 | 189 | Abstract programming interface for tables with string keys and 190 | associated `void*` values. Intended to be shareable by multiple 191 | different implementations. 192 | 193 | * [qp.h][] [qp.c][] 194 | 195 | My original qp trie implementation. See qp.h for a longer 196 | description of where the data structure comes from. 197 | 198 | * [fp.h][] [fp.c][] 199 | 200 | 5-bit clone-and-hack variant of qp tries. 201 | 202 | * [fn.h][] [fn.c][] 203 | 204 | Newer version of 5-bit qp trie, which should be more portable. 205 | 206 | **This is the version that I recommend - faster and less memory overhead** 207 | 208 | * [dns.h][] [dns.c][] 209 | 210 | A qp-trie variant optimized for domain names. As well as the 211 | comments in [dns.h][] there are [some design notes](notes-dns.md). 212 | The DNS-trie implementation in this repository is heavily bodged 213 | to fit into my test / benchmark harness so it can be directly 214 | compared with the other qp-trie versions. 215 | 216 | * [wp.h][] [wp.c][] 217 | 218 | 6-bit clone-and-hack variant of qp tries. 219 | 220 | * [cb.h][] [cb.c][] 221 | 222 | My crit-bit trie implementation. See cb.h for a description of 223 | how it differs from DJB's crit-bit code. 224 | 225 | * [qp-debug.c][] [fp-debug.c][] [fn-debug.c][] [wp-debug.c][] [cb-debug.c][] 226 | 227 | Debug support code. 228 | 229 | * [bench.c][] [bench-multi.pl][] [bench-more.pl][] [bench-cross.pl][] 230 | 231 | Generic benchmark for Tbl.h implementations, and benchmark 232 | drivers for comparing different implementations. 233 | 234 | * [test.c][] [test.pl][] 235 | 236 | Generic test harness for the Tbl.h API, and a perl reference 237 | implementation for verifying correctness. 238 | 239 | * [test-gen.pl][] [test-once.sh][] 240 | 241 | Driver scripts for the test harness. 242 | 243 | 244 | [Tbl.c]: https://github.com/fanf2/qp/blob/HEAD/Tbl.c 245 | [Tbl.h]: https://github.com/fanf2/qp/blob/HEAD/Tbl.h 246 | [cb-debug.c]: https://github.com/fanf2/qp/blob/HEAD/cb-debug.c 247 | [cb.c]: https://github.com/fanf2/qp/blob/HEAD/cb.c 248 | [cb.h]: https://github.com/fanf2/qp/blob/HEAD/cb.h 249 | [dns-debug.c]: https://github.com/fanf2/qp/blob/HEAD/dns-debug.c 250 | [dns.c]: https://github.com/fanf2/qp/blob/HEAD/dns.c 251 | [dns.h]: https://github.com/fanf2/qp/blob/HEAD/dns.h 252 | [qp-debug.c]: https://github.com/fanf2/qp/blob/HEAD/qp-debug.c 253 | [qp.c]: https://github.com/fanf2/qp/blob/HEAD/qp.c 254 | [qp.h]: https://github.com/fanf2/qp/blob/HEAD/qp.h 255 | [fp-debug.c]: https://github.com/fanf2/qp/blob/HEAD/fp-debug.c 256 | [fp.c]: https://github.com/fanf2/qp/blob/HEAD/fp.c 257 | [fp.h]: https://github.com/fanf2/qp/blob/HEAD/fp.h 258 | [fn-debug.c]: https://github.com/fanf2/qp/blob/HEAD/fn-debug.c 259 | [fn.c]: https://github.com/fanf2/qp/blob/HEAD/fn.c 260 | [fn.h]: https://github.com/fanf2/qp/blob/HEAD/fn.h 261 | [wp-debug.c]: https://github.com/fanf2/qp/blob/HEAD/wp-debug.c 262 | [wp.c]: https://github.com/fanf2/qp/blob/HEAD/wp.c 263 | [wp.h]: https://github.com/fanf2/qp/blob/HEAD/wp.h 264 | [test-gen.pl]: https://github.com/fanf2/qp/blob/HEAD/test-gen.pl 265 | [test-once.sh]: https://github.com/fanf2/qp/blob/HEAD/test-once.sh 266 | [test.c]: https://github.com/fanf2/qp/blob/HEAD/test.c 267 | [test.pl]: https://github.com/fanf2/qp/blob/HEAD/test.pl 268 | [bench-cross.pl]: https://github.com/fanf2/qp/blob/HEAD/bench-multi.pl 269 | [bench-more.pl]: https://github.com/fanf2/qp/blob/HEAD/bench-more.pl 270 | [bench-multi.pl]: https://github.com/fanf2/qp/blob/HEAD/bench-multi.pl 271 | [bench.c]: https://github.com/fanf2/qp/blob/HEAD/bench.c 272 | 273 | 274 | notes 275 | ----- 276 | 277 | * [bitstring keys and longest prefix search](notes-bitstrings-prefixes.md) 278 | * [generic leaf types](notes-generic-leaves.md) 279 | * [rib compression](notes-rib-compression.md) 280 | * [buffered writes and compressed spines](notes-write-buffer.md) 281 | * [jumbo branches](notes-jumbo.md) 282 | * [DNS names](notes-dns.md) 283 | * [concurrent cache updates](notes-concurrency.md) 284 | * [mistakes](notes-mistakes.md) 285 | * [todo](notes-todo.md) 286 | 287 | --------------------------------------------------------------------------- 288 | 289 | Written by Tony Finch ; 290 | You may do anything with this. It has no warranty. 291 | 292 | -------------------------------------------------------------------------------- /Tbl.c: -------------------------------------------------------------------------------- 1 | // Tbl.c: simpler wrappers for core table functions 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "Tbl.h" 12 | 13 | void * 14 | Tgetl(Tbl *tbl, const char *key, size_t len) { 15 | const char *rkey = NULL; 16 | void *rval = NULL; 17 | if(Tgetkv(tbl, key, len, &rkey, &rval)) 18 | return(rval); 19 | else 20 | return(NULL); 21 | } 22 | 23 | void * 24 | Tget(Tbl *tbl, const char *key) { 25 | return(Tgetl(tbl, key, strlen(key))); 26 | } 27 | 28 | Tbl * 29 | Tset(Tbl *tbl, const char *key, void *value) { 30 | return(Tsetl(tbl, key, strlen(key), value)); 31 | } 32 | 33 | Tbl * 34 | Tdell(Tbl *tbl, const char *key, size_t len) { 35 | const char *rkey = NULL; 36 | void *rval = NULL; 37 | return(Tdelkv(tbl, key, len, &rkey, &rval)); 38 | } 39 | 40 | Tbl * 41 | Tdel(Tbl *tbl, const char *key) { 42 | return(Tdell(tbl, key, strlen(key))); 43 | } 44 | 45 | bool 46 | Tnext(Tbl *tbl, const char **pkey, void **pvalue) { 47 | size_t len = *pkey == NULL ? 0 : strlen(*pkey); 48 | return(Tnextl(tbl, pkey, &len, pvalue)); 49 | } 50 | 51 | const char * 52 | Tnxt(Tbl *tbl, const char *key) { 53 | void *value = NULL; 54 | Tnext(tbl, &key, &value); 55 | return(key); 56 | } 57 | -------------------------------------------------------------------------------- /Tbl.h: -------------------------------------------------------------------------------- 1 | // Tbl.h: an abstract API for tables with string keys. 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #ifndef Tbl_h 8 | #define Tbl_h 9 | 10 | // A table is represented by a pointer to this incomplete struct type. 11 | // You initialize an empty table by setting the pointer to NULL. 12 | // 13 | typedef struct Tbl Tbl; 14 | 15 | // Get the value associated with a key. 16 | // Returns NULL if the key is not in the Table. 17 | // 18 | void *Tgetl(Tbl *tbl, const char *key, size_t klen); 19 | void *Tget(Tbl *tbl, const char *key); 20 | 21 | // Returns false if the key is not found, otherwise returns true and 22 | // sets *rkey and *rval to the table's key and value pointers. 23 | // 24 | bool Tgetkv(Tbl *tbl, const char *key, size_t klen, const char **rkey, void **rval); 25 | 26 | // Associate a key with a value in a table. Returns a new pointer to 27 | // the modified table. If there is an error it sets errno and returns 28 | // NULL. To delete a key, set its value to NULL. When the last key is 29 | // deleted, Tset() returns NULL without setting errno. The key and 30 | // value are borrowed not copied. 31 | // 32 | // Errors: 33 | // EINVAL - value pointer is not word-aligned 34 | // ENOMEM - allocation failed 35 | // 36 | Tbl *Tsetl(Tbl *tbl, const char *key, size_t klen, void *value); 37 | Tbl *Tset(Tbl *tbl, const char *key, void *value); 38 | Tbl *Tdell(Tbl *tbl, const char *key, size_t klen); 39 | Tbl *Tdel(Tbl *tbl, const char *key); 40 | 41 | // Deletes an entry from the table as above, and sets *rkey and *rval 42 | // to the removed key and value pointers. 43 | // 44 | Tbl *Tdelkv(Tbl *tbl, const char *key, size_t klen, const char **rkey, void **rval); 45 | 46 | // Find the next item in the table. The p... arguments are in/out 47 | // parameters. To find the first key, pass *pkey=NULL and *pklen=0. 48 | // For subsequent keys, *pkey must be present in the table and is 49 | // updated to the lexicographically following key. Returns false or 50 | // NULL when there are no more keys. 51 | // 52 | bool Tnextl(Tbl *tbl, const char **pkey, size_t *pklen, void **pvalue); 53 | bool Tnext(Tbl *tbl, const char **pkey, void **pvalue); 54 | const char *Tnxt(Tbl *tbl, const char *key); 55 | 56 | // Debugging 57 | // 58 | void Tdump(Tbl *tbl); 59 | void Tsize(Tbl *tbl, const char **rtype, 60 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves); 61 | 62 | #endif // Tbl_h 63 | -------------------------------------------------------------------------------- /bench-cross.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use warnings; 4 | use strict; 5 | 6 | use MIME::Base64; 7 | 8 | sub maxlen { 9 | return (sort { $a <=> $b } map { length } @_)[-1]; 10 | } 11 | 12 | sub usage { 13 | die < ... -- ... 15 | EOF 16 | } 17 | 18 | usage if @ARGV < 4 or $ARGV[0] !~ m{^\d+$}; 19 | my $count = shift; 20 | 21 | my @prog; 22 | 23 | push @prog, shift while @ARGV and $ARGV[0] ne '--'; 24 | usage if '--' ne shift @ARGV; 25 | 26 | my %col; 27 | for my $i (0 .. $#prog) { 28 | $col{$prog[$i]} = sprintf "\e[3%dm", 1 + $i %7; 29 | } 30 | 31 | my @file = @ARGV; 32 | 33 | my $wp = maxlen @prog; 34 | my $wf = maxlen @file, "0.000"; 35 | my $waf = ($wf+1) * scalar @file; 36 | 37 | my %stats; 38 | 39 | open my $rnd, '<', '/dev/urandom' 40 | or die "open /dev/urandom: $!\n"; 41 | 42 | for (my $N = 1 ;; ++$N) { 43 | my $seed; 44 | sysread $rnd, $seed, 12; 45 | $seed = encode_base64 $seed, ""; 46 | 47 | for my $file (@file) { 48 | for my $prog (@prog) { 49 | print "$prog $seed $count $file\n"; 50 | for (qx{$prog $seed $count $file}) { 51 | if(m{^(\w+)... ([0-9.]+) s$}) { 52 | my $test = $1; 53 | my $time = $2; 54 | $stats{$test}{$prog}{$file}{this} = $time; 55 | $stats{$test}{$prog}{$file}{min} = $time 56 | if not defined $stats{$test}{$prog}{$file}{min} 57 | or $stats{$test}{$prog}{$file}{min} > $time; 58 | $stats{$test}{$prog}{$file}{tot} += $time; 59 | $stats{$test}{$prog}{$file}{tot2} += $time * $time; 60 | } 61 | } 62 | } 63 | } 64 | 65 | printf "%-*s ", $wp + $wf, "round $N"; 66 | printf " | %-31s", $_ for sort keys %stats; 67 | print "\n"; 68 | for my $file (@file) { 69 | for my $prog (@prog) { 70 | printf "%s%-*s %-*s", $col{$prog}, $wp, $prog, $wf, $file; 71 | for my $test (sort keys %stats) { 72 | my $mean = $stats{$test}{$prog}{$file}{tot} / $N; 73 | my $var = $stats{$test}{$prog}{$file}{tot2} / $N - $mean * $mean; 74 | printf " | \e[2m(%.3f)\e[22m %.3f < %.3f +/- %.3f", 75 | $stats{$test}{$prog}{$file}{this}, 76 | $stats{$test}{$prog}{$file}{min}, 77 | $mean, $var ** 0.5; 78 | 79 | } 80 | print "\e[0m\n"; 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /bench-more.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use warnings; 4 | use strict; 5 | 6 | use MIME::Base64; 7 | 8 | sub maxlen { 9 | return (sort { $a <=> $b } map { length } @_)[-1]; 10 | } 11 | 12 | sub usage { 13 | die < ... -- ... 15 | EOF 16 | } 17 | 18 | usage if @ARGV < 4 or $ARGV[0] !~ m{^\d+$}; 19 | my $count = shift; 20 | 21 | my @prog; 22 | 23 | push @prog, shift while @ARGV and $ARGV[0] ne '--'; 24 | usage if '--' ne shift @ARGV; 25 | 26 | my @file = @ARGV; 27 | 28 | my $wp = maxlen @prog; 29 | my $wf = maxlen @file, "0.000"; 30 | my $waf = ($wf+1) * scalar @file; 31 | 32 | my %stats; 33 | 34 | open my $rnd, '<', '/dev/urandom' 35 | or die "open /dev/urandom: $!\n"; 36 | 37 | for (my $N = 1 ;; ++$N) { 38 | my $seed; 39 | sysread $rnd, $seed, 12; 40 | $seed = encode_base64 $seed, ""; 41 | 42 | for my $file (@file) { 43 | for my $prog (@prog) { 44 | print "$prog $seed $count $file\n"; 45 | for (qx{$prog $seed $count $file}) { 46 | if(m{^(\w+)... ([0-9.]+) s$}) { 47 | my $test = $1; 48 | my $time = $2; 49 | $stats{$test}{$prog}{$file}{this} = $time; 50 | $stats{$test}{$prog}{$file}{min} = $time 51 | if not defined $stats{$test}{$prog}{$file}{min} 52 | or $stats{$test}{$prog}{$file}{min} > $time; 53 | $stats{$test}{$prog}{$file}{tot} += $time; 54 | $stats{$test}{$prog}{$file}{tot2} += $time * $time; 55 | } 56 | } 57 | } 58 | } 59 | 60 | printf "round $N\n"; 61 | printf "%-*s ", $wp, ""; 62 | printf "| %-*s", $waf, $_ for sort keys %stats; 63 | print "\n"; 64 | printf "%-*s", $wp, ""; 65 | for (sort keys %stats) { 66 | printf " |"; 67 | printf " %*s", $wf, $_ for @file; 68 | } 69 | print "\n"; 70 | for my $prog (@prog) { 71 | printf "%-*s", $wp, $prog; 72 | for my $test (sort keys %stats) { 73 | printf " |"; 74 | for my $file (@file) { 75 | my $mean = $stats{$test}{$prog}{$file}{tot} / $N; 76 | printf " %*.3f", $wf, $mean; 77 | } 78 | } 79 | print "\n"; 80 | } 81 | 82 | } 83 | -------------------------------------------------------------------------------- /bench-multi.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use warnings; 4 | use strict; 5 | 6 | use MIME::Base64; 7 | 8 | my @prog; 9 | 10 | push @prog, shift while @ARGV and $ARGV[0] ne '--'; 11 | if('--' ne shift @ARGV) { 12 | die <... -- 14 | EOF 15 | } 16 | 17 | my %stats; 18 | my $w = 0; 19 | for (@prog) { 20 | $w = length if $w < length; 21 | } 22 | 23 | open my $rnd, '<', '/dev/urandom' 24 | or die "open /dev/urandom: $!\n"; 25 | 26 | for (my $N = 1 ;; ++$N) { 27 | my $seed; 28 | sysread $rnd, $seed, 12; 29 | $seed = encode_base64 $seed, ""; 30 | 31 | for my $prog (@prog) { 32 | print "$prog $seed @ARGV\n"; 33 | for (qx{$prog $seed @ARGV}) { 34 | if(m{^(\w+)... ([0-9.]+) s$}) { 35 | my $test = $1; 36 | my $time = $2; 37 | $stats{$test}{$prog}{this} = $time; 38 | $stats{$test}{$prog}{min} = $time 39 | if not defined $stats{$test}{$prog}{min} 40 | or $stats{$test}{$prog}{min} > $time; 41 | $stats{$test}{$prog}{tot} += $time; 42 | $stats{$test}{$prog}{tot2} += $time * $time; 43 | } 44 | } 45 | } 46 | 47 | printf "round $N with @ARGV\n"; 48 | printf "%-*s", $w, ""; 49 | printf " | %-31s", $_ for sort keys %stats; 50 | print "\n"; 51 | for my $prog (@prog) { 52 | printf "%-*s", $w, $prog; 53 | for my $test (sort keys %stats) { 54 | my $mean = $stats{$test}{$prog}{tot} / $N; 55 | my $var = $stats{$test}{$prog}{tot2} / $N - $mean * $mean; 56 | printf " | %.3f : %.3f < %.3f +/- %.3f", 57 | $stats{$test}{$prog}{this}, 58 | $stats{$test}{$prog}{min}, 59 | $mean, $var ** 0.5; 60 | } 61 | print "\n"; 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /bench-reformat.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use warnings; 4 | use strict; 5 | 6 | my @test = <> =~ m{(?:[|]\s+(\w+)\s+)}g; 7 | #printf "tests: %s\n", join " ", @test; 8 | 9 | my @file = split m{\s+[|]\s+}, scalar <>; 10 | die "garbage at start of file list" 11 | unless "" eq shift @file; 12 | chomp $file[-1]; 13 | for my $file (@file) { 14 | die "mismatched file list: <$file> / <$file[-1]>" 15 | unless $file eq $file[-1]; 16 | } 17 | @file = split ' ', $file[-1]; 18 | shift @file if $file[0] eq ""; 19 | #printf "files: %s\n", join " ", @file; 20 | 21 | my %stats; 22 | my @prog; 23 | 24 | while (<>) { 25 | s{^\s*(\S+)\s+[|]}{|} or die "missing progname"; 26 | my $prog = $1; 27 | $prog =~ s{^\./bench-}{}; 28 | push @prog, $prog; 29 | for my $test (@test) { 30 | s{^\s*[|]\s+}{} or die "missing separator"; 31 | for my $file (@file) { 32 | s{^\s*([0-9.]+)\s+}{} or die "missing number"; 33 | $stats{$test}{$file}{$prog} = $1; 34 | } 35 | } 36 | } 37 | 38 | my %min; 39 | for my $test (@test) { 40 | for my $file (@file) { 41 | my $min = 99; 42 | for my $prog (@prog) { 43 | if ($min > $stats{$test}{$file}{$prog}) { 44 | $min = $stats{$test}{$file}{$prog}; 45 | $min{$test}{$file} = $prog; 46 | } 47 | } 48 | } 49 | } 50 | 51 | print "\n"; 52 | print ""; 53 | print "" for @file; 54 | print "\n"; 55 | for my $test (@test) { 56 | print "\n"; 57 | my $tt = $test; 58 | for my $prog (@prog) { 59 | print ""; 60 | for my $file (@file) { 61 | if ($min{$test}{$file} eq $prog) { 62 | print ""; 63 | } else { 64 | print ""; 65 | } 66 | } 67 | if ($tt ne "") { 68 | print "\n"; 69 | $tt = ""; 70 | } else { 71 | print "\n"; 72 | } 73 | } 74 | } 75 | print "
$_
$prog$stats{$test}{$file}{$prog}$stats{$test}{$file}{$prog}$tt
\n"; 76 | -------------------------------------------------------------------------------- /bench-table.css: -------------------------------------------------------------------------------- 1 | th { 2 | font-weight: normal; 3 | text-align: right; 4 | width: 4em; 5 | } 6 | 7 | td { 8 | text-align: right; 9 | } 10 | 11 | tr.break { 12 | height: 0.5em; 13 | } 14 | 15 | .rightlabel { 16 | padding-left: 1em; 17 | text-align: left; 18 | } 19 | -------------------------------------------------------------------------------- /bench.c: -------------------------------------------------------------------------------- 1 | // bench.c: table benchmark. 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | 20 | #include "Tbl.h" 21 | 22 | static const char *progname; 23 | 24 | static void 25 | die(const char *cause) { 26 | fprintf(stderr, "%s: %s: %s\n", progname, cause, strerror(errno)); 27 | exit(1); 28 | } 29 | 30 | static void 31 | usage(void) { 32 | fprintf(stderr, 33 | "usage: %s \n" 34 | " The seed must be at least 12 characters.\n" 35 | , progname); 36 | exit(1); 37 | } 38 | 39 | static struct timeval tu; 40 | 41 | static void 42 | start(const char *s) { 43 | printf("%s... ", s); 44 | gettimeofday(&tu, NULL); 45 | } 46 | 47 | static void 48 | done(void) { 49 | struct timeval tv; 50 | gettimeofday(&tv, NULL); 51 | tv.tv_sec -= tu.tv_sec; 52 | tv.tv_usec -= tu.tv_usec; 53 | if(tv.tv_usec < 0) { 54 | tv.tv_sec -= 1; 55 | tv.tv_usec += 1000000; 56 | } 57 | printf("%ld.%06ld s\n", 58 | (long)tv.tv_sec, (long)tv.tv_usec); 59 | } 60 | 61 | static int 62 | ssrandom(char *s) { 63 | // initialize random(3) from a string 64 | size_t len = strlen(s); 65 | if(len < 12) return(-1); 66 | unsigned seed = s[0] | s[1] << 8 | s[2] << 16 | s[3] << 24; 67 | initstate(seed, s+4, len-4); 68 | return(0); 69 | } 70 | 71 | int 72 | main(int argc, char *argv[]) { 73 | progname = argv[0]; 74 | if(argc != 4 || argv[1][0] == '-') usage(); 75 | if(ssrandom(argv[1]) < 0) usage(); 76 | size_t N = (size_t)atoi(argv[2]); 77 | 78 | int fd = open(argv[3], O_RDONLY); 79 | if(fd < 0) die("open"); 80 | struct stat st; 81 | if(fstat(fd, &st) < 0) die("stat"); 82 | size_t flen = (size_t)st.st_size; 83 | char *fbuf = malloc(flen + 1); 84 | if(fbuf == NULL) die("malloc"); 85 | if(read(fd, fbuf, flen) < 0) die("read"); 86 | close(fd); 87 | fbuf[flen] = '\0'; 88 | 89 | size_t lines = 0; 90 | for(char *p = fbuf; *p; p++) 91 | if(*p == '\n') 92 | ++lines; 93 | char **line = calloc(lines, sizeof(*line)); 94 | size_t l = 0; 95 | bool bol = true; 96 | for(char *p = fbuf; *p; p++) { 97 | if(bol) { 98 | line[l++] = p; 99 | bol = false; 100 | } 101 | if(*p == '\n') { 102 | *p = '\0'; 103 | bol = true; 104 | } 105 | } 106 | printf("- got %zu lines\n", lines); 107 | 108 | start("load"); 109 | Tbl *t = NULL; 110 | for(l = 0; l < lines; l++) 111 | t = Tset(t, line[l], main); 112 | done(); 113 | 114 | start("search"); 115 | l = 0; 116 | for(size_t i = 0; i < N; i++) 117 | if(Tget(t, line[random() % lines]) != NULL) 118 | ++l; 119 | assert(l == N); 120 | done(); 121 | 122 | start("mutate"); 123 | for(size_t i = 0; i < N; i++) 124 | t = Tset(t, line[random() % lines], 125 | random() % 2 ? main : NULL); 126 | done(); 127 | 128 | // ensure all keys present 129 | for(l = 0; l < lines; l++) 130 | t = Tset(t, line[l], main); 131 | start("free"); 132 | for(l = 0; l < lines; l++) 133 | t = Tset(t, line[l], NULL); 134 | assert(t == NULL); 135 | done(); 136 | 137 | return(0); 138 | } 139 | -------------------------------------------------------------------------------- /blog-2015-10-04.md: -------------------------------------------------------------------------------- 1 | qp tries: smaller and faster than crit-bit tries 2 | ================================================ 3 | 4 | tl;dr: I have developed a data structure called a "qp trie", based on 5 | the crit-bit trie. Some simple benchmarks say qp tries have about 1/3 6 | less memory overhead and are about 10% faster than crit-bit tries. 7 | 8 | "qp trie" is short for "quadbit popcount patricia trie". (Nothing to 9 | do with cutie cupid dolls or Japanese mayonnaise!) 10 | 11 | Get the code from . 12 | 13 | 14 | background 15 | ---------- 16 | 17 | Crit-bit tries are an elegant space-optimised variant of PATRICIA 18 | tries. Dan Bernstein has a well-known [description of crit-bit 19 | tries](https://cr.yp.to/critbit.html), and Adam Langley has nicely 20 | [annotated DJB's crit-bit 21 | implementation](https://github.com/agl/critbit). 22 | 23 | What struck me was crit-bit tries require quite a lot of indirections 24 | to perform a lookup. I wondered if it would be possible to test 25 | multiple bits at a branch point to reduce the depth of the trie, and 26 | make the size of the branch adapt to the trie's density to keep memory 27 | usage low. My initial attempt (over two years ago) was vaguely 28 | promising but too complicated, and I gave up on it. 29 | 30 | A few weeks ago I read about Phil Bagwell's hash array mapped trie 31 | (HAMT) which he described in two papers, ["fast and space efficient 32 | trie 33 | searches"](http://infoscience.epfl.ch/record/64394/files/triesearches.pdf), 34 | and ["ideal hash 35 | trees"](http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf). 36 | The part that struck me was the 37 | [popcount](https://en.wikipedia.org/wiki/popcount) trick he uses to 38 | eliminate unused pointers in branch nodes. (This is also described in 39 | ["Hacker's Delight"](http://www.hackersdelight.org) by Hank Warren, in 40 | the "applications" subsection of chapter 5-1 "Counting 1 bits", which 41 | evidently did not strike me in the same way when I read it!) 42 | 43 | You can use popcount() to implement a sparse array of length *N* 44 | containing *M < N* members using bitmap of length *N* and a packed 45 | vector of *M* elements. A member *i* is present in the array if bit 46 | *i* is set, so *M == `popcount(bitmap)`*. The index of member *i* in 47 | the packed vector is the popcount of the bits preceding *i*. 48 | 49 | mask = 1 << i; 50 | if(bitmap & mask) 51 | member = vector[popcount(bitmap & mask-1)] 52 | 53 | 54 | qp tries 55 | -------- 56 | 57 | If we are increasing the fanout of crit-bit tries, how much should we 58 | increase it by, that is, how many bits should we test at once? In a 59 | HAMT the bitmap is a word, 32 or 64 bits, using 5 or 6 bits from the 60 | key at a time. But it's a bit fiddly to extract bit-fields from a 61 | string when they span bytes. 62 | 63 | So I decided to use a quadbit at a time (i.e. a nibble or half-byte) 64 | which implies a 16 bit popcount bitmap. We can use the other 48 bits 65 | of a 64 bit word to identify the index of the nibble that this branch 66 | is testing. A branch needs a second word to contain the pointer to the 67 | packed array of "twigs" (my silly term for sub-tries). 68 | 69 | It is convenient for a branch to be two words, because that is the 70 | same as the space required for the key+value pair that you want to 71 | store at each leaf. So each slot in the array of twigs can contain 72 | either another branch or a leaf, and we can use a flag bit in the 73 | bottom of a pointer to tell them apart. 74 | 75 | Here's the qp trie containing the keys "foo", "bar", "baz". (Note 76 | there is only one possible trie for a given set of keys.) 77 | 78 | [ 0044 | 1 | twigs ] -> [ 0404 | 5 | twigs ] -> [ value | "bar" ] 79 | [ value | "foo" ] [ value | "baz" ] 80 | 81 | The root node is a branch. It is testing nibble 1 (the least 82 | significant half of byte 0), and it has twigs for nibbles containing 2 83 | ('b' == 0x6**2**) or 6 ('f' == 0x6**6**). (Note 1 << 2 == 0x0004 and 1 84 | << 6 == 0x0040.) 85 | 86 | The first twig is also a branch, testing nibble 5 (the least 87 | significant half of byte 2), and it has twigs for nibbles containing 2 88 | ('r' == 0x7**2**) or 10 ('z' == 0x7**a**). Its twigs are both leaves, 89 | for "bar" and "baz". (Pointers to the string keys are stored in the 90 | leaves - we don't copy the keys inline.) 91 | 92 | The other twig of the root branch is the leaf for "foo". 93 | 94 | If we add a key "hax" the trie will grow another twig on the root 95 | branch. 96 | 97 | [ 0144 | 1 | twigs ] -> [ 0404 | 5 | twigs ] -> [ value | "bar" ] 98 | [ value | "foo" ] [ value | "baz" ] 99 | [ value | "hax" ] 100 | 101 | This layout is very compact. In the worst case, where each branch has 102 | only two twigs, a qp trie has the same overhead as a crit-bit trie, 103 | two words (16 bytes) per leaf. In the best case, where each branch is 104 | full with 16 twigs, the overhead is one byte per leaf. 105 | 106 | When storing 236,000 keys from `/usr/share/dict/words` the overhead is 107 | 1.44 words per leaf, and when storing a vocabulary of 54,000 keys 108 | extracted from the BIND9 source, the overhead is 1.12 words per leaf. 109 | 110 | For comparison, if you have a parsimonious hash table which stores 111 | just a hash code, key, and value pointer in each slot, and which has 112 | 90% occupancy, its overhead is 1.33 words per item. 113 | 114 | In the best case, a qp trie can be a quarter of the depth of a 115 | crit-bit trie. In practice it is about half the depth. For our example 116 | data sets, the average depth of a crit-bit trie is 26.5 branches, and 117 | a qp trie is 12.5 for `dict/words` or 11.1 for the BIND9 words. 118 | 119 | My benchmarks show qp tries are about 10% faster than crit-bit tries. 120 | However I do not have a machine with both a popcount instruction and a 121 | compiler that supports it; also, LLVM fails to optimise popcount for a 122 | 16 bit word size, and GCC compiles it as a subroutine call. So there's 123 | scope for improvement. 124 | 125 | 126 | crit-bit tries revisited 127 | ------------------------ 128 | 129 | DJB's published crit-bit trie code only stores a set of keys, without 130 | any associated values. It's possible to add support for associated 131 | values without increasing the overhead. 132 | 133 | In DJB's code, branch nodes have three words: a bit index, and two 134 | pointers to child nodes. Each child pointer has a flag in its least 135 | significant bit indicating whether it points to another branch, or 136 | points to a key string. 137 | 138 | [ branch ] -> [ 4 ] 139 | [ branch ] -> [ 5 ] 140 | [ "hax" ] [ branch ] -> [ 20 ] 141 | [ "foo" ] [ "bar" ] 142 | [ "baz" ] 143 | 144 | It is hard to add associated values to this structure without 145 | increasing its overhead. If you simply replace each string pointer 146 | with a pointer to a key+value pair, the overhead is 50% greater: three 147 | words per entry in addition to the key+value pointers. 148 | 149 | When I wanted to benchmark my qp trie implementation against crit-bit 150 | tries, I trimmed the qp trie code to make a crit-bit trie 151 | implementation. So my crit-bit implementation stores keys with 152 | associated values, but still has an overhead of only two words per 153 | item. 154 | 155 | [ 3 twigs ] -> [ 4 twigs ] -> [ 20 twigs ] -> [ val "bar" ] 156 | [ val "hax" ] [ val "foo" ] [ val "baz" ] 157 | 158 | Instead of viewing this as a trimmed-down qp trie, you can look at it 159 | as evolving from DJB's crit-bit tries. First, add two words to each 160 | node for the value pointers, which I have drawn by making the nodes 161 | wider: 162 | 163 | [ branch ] -> [ 4 ] 164 | [ x branch ] -> [ 5 ] 165 | [ val "hax" ] [ x branch ] -> [ 20 ] 166 | [ val "foo" ] [ val "bar" ] 167 | [ val "baz" ] 168 | 169 | The value pointers are empty (marked x) in branch nodes, which 170 | provides space to move the bit indexes up a level. One bit index from 171 | each child occupies each empty word. Moving the bit indexes takes 172 | away a word from every node, except for the root which becomes a word 173 | bigger. 174 | 175 | 176 | conclusion 177 | ---------- 178 | 179 | This code was pretty fun to write, and I'm reasonably pleased with the 180 | results. The debugging was easier than I feared: most of my mistakes 181 | were simple (e.g. using the wrong variable, failing to handle a 182 | trivial case, muddling up getline()s two length results) and `clang 183 | -fsanitize=address` was a mighty debugging tool. 184 | 185 | My only big logic error was in Tnext(); I thought it was easy to find 186 | the key lexicographically following some arbitrary string not in the 187 | trie, but it is not. (This isn't a binary search tree!) You can easily 188 | find the keys with a given prefix, if you know in advance the length 189 | of the prefix. But, with my broken code, if you searched for an 190 | arbitrary string you could end up in a subtrie which was not the 191 | subtrie with the longest matching prefix. So now, if you want to 192 | delete a key while iterating, you have to find the next key before 193 | deleting the previous one. 194 | 195 | *finally...* 196 | 197 | I have this nice code, but I have no idea what practical use I might 198 | put it to! 199 | 200 | --------------------------------------------------------------------------- 201 | 202 | Written by Tony Finch ; 203 | You may do anything with this. It has no warranty. 204 | 205 | -------------------------------------------------------------------------------- /blog-2015-10-07.md: -------------------------------------------------------------------------------- 1 | crit-bit tries without allocation 2 | ================================= 3 | 4 | [Crit-bit tries](https://cr.yp.to/critbit.html) have fixed-size branch 5 | nodes and a constant overhead per leaf, which means they can be used 6 | as an embedded lookup structure. Embedded lookup structures do not 7 | need any extra memory allocation; it is enough to allocate the objects 8 | that are to be indexed by the lookup structure. 9 | 10 | An embedded lookup structure is a data structure in which the internal 11 | pointers used to search for an object (such as branch nodes) are 12 | embedded within the objects you are searching through. Each object can 13 | be a member of at most one of any particular kind of lookup structure, 14 | though an object can simultaneously be a member of several different 15 | kinds of lookup structure. 16 | 17 | The [BSD `` 18 | macros](https://svnweb.freebsd.org/base/head/sys/sys/queue.h?view=markup) 19 | are embedded linked lists. They are used frequently in the kernel, for 20 | instance in the network stack to chain [`mbuf` packet 21 | buffers](https://svnweb.freebsd.org/base/head/sys/sys/mbuf.h?view=markup#l178) 22 | together. Each mbuf can be a member of a list and a tailq. There is 23 | also a 24 | [``](https://cvsweb.openbsd.org/cgi-bin/cvsweb/src/sys/sys/tree.h?rev=HEAD&content-type=text/x-cvsweb-markup) 25 | which is used by [OpenSSH's privilege separation memory 26 | manager](https://cvsweb.openbsd.org/cgi-bin/cvsweb/src/usr.bin/ssh/monitor_mm.h?rev=HEAD&content-type=text/x-cvsweb-markup). Embedded red-black trees also appear in [jemalloc](https://github.com/jemalloc/jemalloc/blob/HEAD/include/jemalloc/internal/rb.h). 27 | 28 | 29 | embedded crit-bit branch node structure 30 | --------------------------------------- 31 | 32 | DJB's crit-bit branch nodes require three words: bit index, left 33 | child, and right child; embedded crit-bit branches are the same with 34 | an additional parent pointer. (However, see the addendum below in 35 | which Simon Tatham shows the parent pointer is not necessary.) 36 | 37 | struct branch { 38 | uint index; 39 | void *twig[2]; 40 | void **parent; 41 | }; 42 | 43 | The "twig" child pointers are tagged to indicate whether they point to 44 | a branch node or a leaf. The parent pointer normally points to the 45 | relevant child pointer inside the parent node; it can also point at 46 | the trie's root pointer, which means there has to be exactly one root 47 | pointer in a fixed place. 48 | 49 | (An aside about how I have been counting overhead: DJB does not 50 | include the leaf string pointer as part of the overhead of his 51 | crit-bit tries, and I have followed his lead by not counting the leaf 52 | key and value pointers in my crit-bit and qp tries. So by this logic, 53 | although an embedded branch adds four words to an object, it only 54 | counts as three words of overhead. Perhaps it would be more honest to 55 | count the total size of the data structure.) 56 | 57 | 58 | using embedded crit-bit tries 59 | ----------------------------- 60 | 61 | For most purposes, embedded crit-bit tries work the same as external 62 | crit-bit tries. 63 | 64 | When searching for an object, there is a final check that the search 65 | key matches the leaf. This check needs to know where to find the 66 | search key inside the leaf object - it should not assume the key is at 67 | the start. 68 | 69 | When inserting a new object, you need to add a branch node to the 70 | trie. For external crit-bit tries this new branch is allocated; for 71 | embedded crit-bit tries you use the branch embedded in the new leaf 72 | object. 73 | 74 | 75 | deleting objects from embedded crit-bit tries 76 | --------------------------------------------- 77 | 78 | This is where the fun happens. There are four objects of interest: 79 | 80 | * The doomed leaf object to be deleted; 81 | 82 | * The victim branch object which needs to remain in the trie, although 83 | it is embedded in the doomed leaf object; 84 | 85 | * The parent branch object pointing at the leaf, which will be 86 | unlinked from the trie; 87 | 88 | * The bystander leaf object in which the parent branch is embedded, 89 | which remains in the trie. 90 | 91 | The plan is that after unlinking the parent branch from the trie, you 92 | rescue the victim branch from the doomed leaf object by moving it into 93 | the place vacated by the parent branch. You use the parent pointer in 94 | the victim branch to update the twig (or root) pointer to follow the 95 | move. 96 | 97 | Note that you need to beware of the case where the parent branch 98 | happens to be embedded in the doomed leaf object. 99 | 100 | 101 | exercise for the reader 102 | ----------------------- 103 | 104 | Are the parent pointers necessary? 105 | 106 | Is the movement of branches constrained enough that we will always 107 | encounter a leaf's embedded branch in the course of searching for that 108 | leaf? If so, we can eliminate the parent pointers and save a word of 109 | overhead. 110 | 111 | 112 | conclusion 113 | ---------- 114 | 115 | I have not implemented this idea, but following [Simon Tatham's 116 | encouragement](http://fanf.livejournal.com/137283.html) I have written 117 | this description in the hope that it inspires someone else. 118 | 119 | 120 | addendum 121 | -------- 122 | 123 | In response to the "exercise for the reader", Simon Tatham says: 124 | 125 | > I think so, yes, and here is (hopefully) a proof. 126 | 127 | > We aim to show that the trie always satisfies the property that 128 | > every leaf's embedded branch object (if it exists at all – since we 129 | > need one fewer branch than we have leaves, there's always one leaf 130 | > whose embedded branch object is totally unused) is an ancestor of 131 | > that leaf. 132 | 133 | > Proof is by induction, of course. The base case is that a trie with 134 | > zero or one leaf obviously obeys the invariant. Now we have to show 135 | > that insertion and deletion each preserve it. 136 | 137 | > **Insertion.** We need one new branch object which will point at the new 138 | > leaf. Obviously we'll make this the branch object embedded in the 139 | > new leaf itself. (In principle we could instead find and use the 140 | > free one somewhere else in the tree, as mentioned above. But that 141 | > would be deliberately perverse, so let's not.) 142 | 143 | > So the new leaf's embedded branch object is indeed an ancestor of 144 | > the new leaf – specifically, the very closest ancestor. 145 | 146 | > And the new branch object gets inserted in the middle of some 147 | > existing link of the trie, from one branch object (or the root) to 148 | > another branch object (or a leaf). So the paths from the root to all 149 | > pre-existing leaves are either unchanged, or they get this new 150 | > branch object added in the middle of them. But insertion never 151 | > removes a branch object from any leaf's ancestry, so it cannot break 152 | > the invariant. 153 | 154 | > **Deletion.** In your terminology: the parent branch object is the 155 | > closest ancestor of the doomed leaf object. The victim branch object 156 | > is currently embedded in the doomed leaf object, and therefore, by 157 | > the inductive hypothesis, it's currently an ancestor of doomed leaf 158 | > object. Hence, the victim branch object is also ancestor of the 159 | > parent branch object. (Unless they're the same object, as you point 160 | > out, but in that case we have nothing to prove anyway.) 161 | 162 | > Also by the inductive hypothesis, the parent branch object is an 163 | > ancestor of the bystander leaf object. (I.e. the bystander leaf is 164 | > some leaf that you can reach by following whichever of the parent 165 | > branch object's child pointers doesn't lead to the doomed leaf). 166 | 167 | > But if the victim branch object is an ancestor of the parent branch 168 | > object, and the parent branch object in turn is an ancestor of the 169 | > bystander leaf object, then it follows that the victim branch object 170 | > must be an ancestor of the bystander leaf object. So embedding the 171 | > victim branch object in the bystander leaf object surely cannot 172 | > break the invariant! 173 | 174 | > Caveat. “I have only proved it correct, not tried it.” I won't be 175 | > completely confident of this argument until I've seen it go through 176 | > a long random test run :-) 177 | 178 | And: 179 | 180 | > Actually there's a second easy special case, which you didn't warn 181 | > implementors to watch out for :-) Another possibility is that the 182 | > doomed leaf object might be the one whose embedded branch object 183 | > isn't used at all, in which case there's no victim branch object in 184 | > the first place and it would be a mistake to follow any pointers in 185 | > an effort to salvage it! 186 | 187 | In my original construction you would have to mark the unused branch 188 | in some way (e.g. NULL twigs). You always have to copy the victim 189 | branch on top of the removed parent branch, whether the victim is used 190 | or not; if the victim was unused the parent has to be marked as 191 | unused. 192 | 193 | But given your proof we will discover during the traversal that the 194 | victim branch is unused - we will not discover the victim's parent 195 | twig so we will not have a pointer to update! 196 | 197 | (Perhaps I need better terminology since I have both the parent twig 198 | of the victim branch, and the parent branch of the deleted leaf.) 199 | 200 | --------------------------------------------------------------------------- 201 | 202 | Written by Tony Finch ; 203 | You may do anything with this. It has no warranty. 204 | 205 | -------------------------------------------------------------------------------- /blog-2015-10-11.md: -------------------------------------------------------------------------------- 1 | Prefetching tries 2 | ================= 3 | 4 | The inner loop in [qp trie](https://dotat.at/prog/qp) lookups is roughly 5 | 6 | while(t->isbranch) { 7 | __builtin_prefetch(t->twigs); 8 | b = 1 << key[t->index]; // simplified 9 | if((t->bitmap & b) == 0) return(NULL); 10 | t = t->twigs + popcount(t->bitmap & b-1); 11 | } 12 | 13 | The efficiency of this loop depends on how quickly we can get from one 14 | indirection down the trie to the next. There is quite a lot of work in 15 | the loop, enough to slow it down significantly compared to the 16 | crit-bit search loop. Although qp tries are half the depth of crit-bit 17 | tries on average, they don't run twice as fast. The prefetch 18 | compensates in a big way: without it, qp tries are about 10% faster; 19 | with it they are about 30% faster. 20 | 21 | I adjusted the code above to emphasize that in one iteration of the 22 | loop it accesses two locations: the key, which it is traversing 23 | linearly with small skips, so access is fast; and the tree node `t`, 24 | whose location jumps around all over the place, so access is slow. The 25 | body of the loop calculates the next location of `t`, but we know at 26 | the start that it is going to be some smallish offset from `t->twigs`, 27 | so the prefetch is very effective at overlapping calculation and 28 | memory latency. 29 | 30 | It was entirely accidental that prefetching works well for qp tries. I 31 | was trying not to waste space, so the thought process was roughly, a 32 | leaf has to be two words: 33 | 34 | struct Tleaf { const char *key; void *value; }; 35 | 36 | Leaves should be embedded in the twig array, to avoid a wasteful 37 | indirection, so branches have to be the same size as leaves. 38 | 39 | union Tnode { struct Tleaf leaf; struct Tbranch branch; }; 40 | 41 | A branch has to have a pointer to its twigs, so there is space in the 42 | other word for the metadata: bitmap, index, flags. (The limited space 43 | in one word is partly why qp tries test a nibble at a time.) Putting 44 | the metadata about the twigs next to the pointer to the twigs is the 45 | key thing that makes prefetching work. 46 | 47 | One of the inspirations of qp tries was [Phil Bagwell's hash array mapped 48 | tries](https://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf). 49 | HAMTs use the same popcount trick, but instead of using the PATRICIA 50 | method of skipping redundant branch nodes, they hash the key and use 51 | the hash as the trie index. The hashes should very rarely collide, so 52 | redundant branches should also be rare. Like qp tries, HAMTs put the 53 | twig metadata (just a bitmap in their case) next to the twig pointer, 54 | so they are friendly to prefetching. 55 | 56 | So, if you are designing a tree structure, put the metdadata for 57 | choosing which child is next adjacent to the node's pointer in its 58 | parent, not inside the node itself. That allows you to overlap the 59 | computation of choosing which child is next with the memory latency 60 | for fetching the child pointers. 61 | 62 | --------------------------------------------------------------------------- 63 | 64 | Written by Tony Finch ; 65 | You may do anything with this. It has no warranty. 66 | 67 | -------------------------------------------------------------------------------- /blog-2016-02-23.md: -------------------------------------------------------------------------------- 1 | How does a qp trie compare to a network routing trie? 2 | ===================================================== 3 | 4 | You might remember back in October I described how [my qp trie is a 5 | combination of djb's crit-bit tree and Bagwell's 6 | HAMT](https://dotat.at/prog/qp/blog-2015-10-04.html). The extra cherry 7 | on top of those ingredients was spotting that with the right layout in 8 | memory, [qp trie traversal gets a big boost from cache 9 | prefetching](https://dotat.at/prog/qp/blog-2015-10-11.html). 10 | 11 | There are a lot of papers about doing routing table lookup using a 12 | trie of some kind. So how do routing tries relate to qp tries? 13 | 14 | Rather than general-purpose data structures, the literature has a lot 15 | of application-specific tries designed just for routing. And rather 16 | than software, they are often tuned for implementation in hardware. 17 | 18 | There are a couple of examples below; the tl;dr is that they are more 19 | like the un-hashed variant of Bagwell's HAMT than like qp tries, since 20 | they don't have the PATRICIA / crit-bit trick of omitting nodes with 21 | one child. 22 | 23 | citations 24 | --------- 25 | 26 | In August 2015, a couple of months before my qp trie work, a paper 27 | titled [Poptrie: a compressed trie with population count for fast and 28 | scalable software IP routing table 29 | lookup](http://conferences.sigcomm.org/sigcomm/2015/pdf/papers/p57.pdf) 30 | was presented at the ACM SIGCOMM conference. 31 | 32 | The Poptrie paper cites a paper titled [Tree Bitmap: hardware/software 33 | IP lookups with incremental 34 | updates](http://dl.acm.org/citation.cfm?id=997160) published in April 35 | 2004 and sadly paywalled. 36 | 37 | [Getting less relevant, the Tree Bitmap paper frequently compares its 38 | data structure with the "Lulea" one described in [small forwarding 39 | tables for fast routing 40 | lookups](http://conferences.sigcomm.org/sigcomm/1997/papers/p192.pdf) 41 | (SIGCOMM 1997). That one is worth noting as an earlier non-HAMT-like 42 | structure.] 43 | 44 | why poptrie is more like a HAMT than qp trie 45 | -------------------------------------------- 46 | 47 | I found the poptrie paper *after* I thought up the qp trie, when 48 | trying to choose a name for it that wasn't already taken. Algorithm 1 49 | in that paper is very similar to the qp trie inner loop; the crit-bit 50 | difference is that in a poptrie the bit offset into the key has a 51 | fixed step per loop iteration (like a HAMT), whereas in a qp trie the 52 | offset is loaded from the node so it can skip ahead arbitrarily. 53 | 54 | Another HAMT similarity occurs in section 3.4 of the poptrie paper, 55 | which describes using a jumbo node at the root of the trie to reduce 56 | the number of indirections. I have not tried implementing a qp trie 57 | with this feature. 58 | 59 | string keys vs routing tables 60 | ----------------------------- 61 | 62 | The HAMT-like lack of PATRICIA-style skipping in a routing trie is one 63 | aspect of a pervasive structural difference due to the different kinds 64 | of data. 65 | 66 | A qp trie stores a relatively sparse set of strings. In any trie most 67 | of the possible strings you might try to look up will not be found. 68 | Strings are variable length. And strings are prefix-free - a short 69 | string will not be a prefix of a longer one if you include its '\0' 70 | terminator. 71 | 72 | A routing trie stores a dense set of address prefixes. Every address 73 | you look up will produce an answer - the lack of a route is handled at 74 | a higher level. Address prefixes have a limited set of possible 75 | lengths. And routing tables are not prefix-free: you often have a 76 | route for a large address range (short prefix) with a more specific 77 | route for a smaller address range (longer prefix) that carves a chunk 78 | out of it. 79 | 80 | other significant differences 81 | ----------------------------- 82 | 83 | The sparse/dense string/route difference means that the qp trie inner 84 | loop has a three-way decision (fail to find anything; find a leaf; go 85 | down another branch) whereas a routing trie has a two-way decision 86 | (leaf or branch). 87 | 88 | The lack of prefix-freedom means a routing trie needs a different 89 | compression approach than a qp trie. Section 3.3 of the poptrie paper 90 | has details of their version; one consequence is that a poptrie node 91 | has two bitmaps and two child pointers for leaves and branches, 92 | whereas an HAMT node or a qp trie node has one bitmap and one pointer 93 | for both. 94 | 95 | I think poptries would benefit from prefetching like qp tries and 96 | HAMTs, but the poptrie paper doesn't mention it. However the Tree 97 | Bitmap paper *does* talk about doing bitmap and index calculations 98 | while waiting for a memory fetch. 99 | 100 | conclusion and further work 101 | --------------------------- 102 | 103 | Obviously this was nothing like a proper literature search so there 104 | are certainly other papers covering similar ground. I would be 105 | interested in pointers to other literature in this area! 106 | -------------------------------------------------------------------------------- /blog-2017-01-09.md: -------------------------------------------------------------------------------- 1 | QP trie news roundup 2 | ==================== 3 | 4 | Firstly, I have to say that it's totally awesome that I am writing 5 | this at all, and it's entirely due to the cool stuff done by people 6 | other than me. Yes! News about other people doing cool stuff with my 7 | half-baked ideas, how cool is that? 8 | 9 | 10 | CZ.NIC Knot DNS 11 | --------------- 12 | 13 | OK, DNS is approximately the ideal application for tries. It needs a 14 | data structure with key/value lookup and lexically ordered traversal. 15 | 16 | When qp tries were new, I got some very positive feedback from [Marek 17 | Vavrusa](https://twitter.com/vavrusam) who I think was at CZ.NIC at 18 | the time. As well as being the Czech DNS registry, they also develop 19 | their own very competitive DNS server software. Clearly the potential 20 | for a win there, but I didn't have time to push a side project to 21 | production quality, nor any expectation that anyone else would do the 22 | work. 23 | 24 | But, in November I got email from Vladimír Čunát telling me he had 25 | reimplemented qp tries to fix the portability bugs and missing features 26 | (such as prefix searches) in my qp trie code, and added it to [Knot 27 | DNS](https://www.knot-dns.cz). Knot was previously using a [HAT 28 | trie](https://en.wikipedia.org/wiki/HAT-trie). 29 | 30 | Vladimír said [qp tries could reduce total server RSS by more than 50% 31 | in a mass hosting test 32 | case](https://gitlab.labs.nic.cz/knot/knot-dns/-/merge_requests/574). 33 | Although qp tries can be slower than HAT tries in some synthetic 34 | benchmarks (more indirections due to checking a nybble per node rather 35 | than a byte per node) this effect is too small to make a 36 | non-negligible difference to Knot. 37 | 38 | So, qp tries were a pretty good improvement. Thanks, Vladimír, 39 | for making such effective use of my ideas! 40 | 41 | (I've written some [notes on more memory-efficient DNS name lookups in 42 | qp tries](https://dotat.at/prog/qp/notes-dns.html) in case anyone wants 43 | to help make it even better...) 44 | 45 | 46 | Rust 47 | ---- 48 | 49 | Shortly before Christmas I spotted that [Frank 50 | Denis](https://twitter.com/jedisct1/) has [a qp trie implementation in 51 | Rust](https://github.com/jedisct1/rust-qptrie)! 52 | 53 | Sadly I'm still only appreciating Rust from a distance, but when I 54 | find some time to try it out properly, this will be top of my list of 55 | things to hack around with! 56 | 57 | I think qp tries are an interesting test case for Rust, because at the 58 | core of the data structure is a tightly packed two word `union` with 59 | type tags tucked into the low order bits of a pointer. It is dirty 60 | low-level C, but in principle it ought to work nicely as a Rust 61 | `enum`, provided Rust can be persuaded to make the same layout 62 | optimizations. In my head a qp trie is a parametric recursive 63 | algebraic data type, and I wish there were a programming language with 64 | which I could express that clearly. 65 | 66 | So, thanks, Frank, for giving me an extra incentive to try out Rust! 67 | Also, Frank's Twitter feed is ace, you should totally follow him. 68 | 69 | 70 | Time vs space 71 | ------------- 72 | 73 | Today I had a conversation on Twitter with 74 | [@tef](https://twitter.com/tef_ebooks) who has some really interesting 75 | ideas about possible improvements to qp tries. 76 | 77 | One of the weaknesses of qp-tries, at least in my proof-of-concept 78 | implementation, is the allocator is called for every insert or delete. 79 | C's allocator is relatively heavyweight (compared to languages with 80 | tightly-coupled GCs) so it's not great to call it so frequently. 81 | 82 | ([Bagwell's HAMT 83 | paper](https://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf) 84 | was a major inspiration for qp tries, and he goes into some detail 85 | describing his custom allocator. It makes me feel like I'm slacking!) 86 | 87 | There's an important trade-off between small memory size and keeping 88 | some spare space to avoid `realloc()` calls. I have erred on the side of 89 | optimizing for simple allocator calls and small data structure size at 90 | the cost of greater allocator stress. 91 | 92 | @tef suggested adding extra space to each node for use as a write 93 | buffer, in a similar way to ["fractal tree" 94 | indexes.](https://www.percona.com/files/presentations/percona-live/PLMCE2012/PLMCE2012-The_Right_Read_Optimization_is_Actually_Write_Optimization.pdf). 95 | As well as avoiding calls to `realloc()`, a write buffer could avoid 96 | malloc() calls for inserting new nodes. I was totally [nerd 97 | sniped](https://xkcd.com/356/) by his cool ideas! 98 | 99 | After some intensive thinking I worked out [a sketch of how write 100 | buffers might amortize allocation in qp 101 | tries](https://dotat.at/prog/qp/notes-write-buffer.html). I don't think 102 | it quite matches what tef had in mind, but it's definitely intriguing. 103 | It's *very* tempting to steal some time to turn the sketch into code, 104 | but I fear I need to focus more on things that are directly helpful to 105 | my colleagues... 106 | 107 | Anyway, thanks, tef, for the inspiring conversation! It also, 108 | tangentially, led me to write this item for my blog. 109 | -------------------------------------------------------------------------------- /blog-2022-06-22.md: -------------------------------------------------------------------------------- 1 | Compacting a qp-trie 2 | ==================== 3 | 4 | My new job is [working on BIND for ISC](https://www.isc.org/), and my 5 | main project is to replace BIND's core red-black tree data structure 6 | with [my qp-trie](https://dotat.at/prog/qp/). 7 | 8 | 9 | previously 10 | ---------- 11 | 12 | In the summer of 2021 I wrote some notes on 13 | [page-based GC for qp-trie RCU][notes2021] 14 | which I then went on to implement in 15 | [my fork of NSD](https://dotat.at/cgi/git/nsd.git). 16 | 17 | [notes2021]: https://dotat.at/prog/qp/blog-2021-06-23.md 18 | 19 | Since the start of May 2022 I have ported the NSD version of my 20 | qp-trie to BIND, with several improvements: 21 | 22 | * multi-version concurrency, instead of just two versions, one for 23 | readers and one for the writer; 24 | 25 | * the rather sketchy locking has been completed; 26 | 27 | * two flavours of write transaction: minimum space for authoritative 28 | DNS; and minimum time for recursive caches; 29 | 30 | * rollback for failed transactions. 31 | 32 | The notes I wrote last summer turned into code very nicely: NSD proved 33 | to be a good place to try out the ideas. And more recently, I am 34 | pleased with how the code adapted to the more complicated demands of 35 | BIND. 36 | 37 | But there's one area that has been problematic: compaction. 38 | 39 | 40 | memory manager 41 | -------------- 42 | 43 | My qp-trie organizes its memory into a collection of "chunks", each of 44 | which is something like 12 KiB or 48 KiB. (I previously called them 45 | "pages" but they aren't the same size as hardware pages, and the 46 | authors of the [Garbage Collection Handbook](https://gchandbook.org/) 47 | says "chunk" is their preferred term.) 48 | 49 | There is very little metadata: each chunk keeps a count of how much it 50 | has allocated, and another count of how much it has freed. Unlike most 51 | garbage collectors, the qp-trie code frees memory explicitly. This 52 | helps because the GC does not have to scan a chunk to find out how 53 | fragmented it is, and we can know in advance whether it is worth the 54 | effort to compact the trie. 55 | 56 | However, we don't know which nodes in a chunk are in use or not, 57 | without either scanning the chunk or traversing the trie. So this is 58 | the most expensive part of the memory manager. 59 | 60 | I have tried several compaction algorithms so far, and I am not sure I 61 | have found a good one yet... 62 | 63 | 64 | [version one][] 65 | --------------- 66 | 67 | [version one]: https://dotat.at/cgi/git/nsd.git/blob/refs/heads/fanf-cow:/qp-trie.c#l272 68 | 69 | The first compaction algorithm I implemented was basically what I 70 | described in [the notes I wrote before writing the code][notes2021]. 71 | Looking back at the code now, I can't see how it would have worked, 72 | which probably explains why I tried other algorithms. 73 | 74 | I remember one failure mode where the compactor often left behind a 75 | lot of new fragmentation (oops), so the GC would fire again soon after 76 | a compaction, and the double compaction would effectively copy the 77 | whole trie. 78 | 79 | 80 | [version two][] 81 | --------------- 82 | 83 | [version two]: https://dotat.at/cgi/git/nsd.git/blob/refs/heads/fanf-cjc:/qp-trie.c#l75 84 | 85 | Next I tried a simple semi-space garbage collector, using Cheney's 86 | exceptionally beautiful stackless copying algorithm. This simplifed 87 | many things, because there was no longer any need for a chunk table, 88 | instead just a single allocation for the whole trie. 89 | 90 | But it seemed a bit heavy-handed to me to copy the whole thing when 91 | fragmentation is likely to affect only part of the trie. And 92 | semi-space collectors need a lot of unused space to work efficiently. 93 | 94 | 95 | [version three][] 96 | ----------------- 97 | 98 | [version three]: https://dotat.at/cgi/git/nsd.git/blob/refs/heads/fanf-gen:/qp-trie.c#l281 99 | 100 | So I went back to the chunk table, and tried to apply the generational 101 | hypothesis. This is a rule of thumb that says most allocations are 102 | short-lived. Many garbage collectors split their memory into 103 | "generations"; the youngest "nursery" generation is optimized for fast 104 | allocation, and the expectation is that when it fills up, most of the 105 | contents will already be garbage, so it will be cheap to evacuate the 106 | live data from the nursery to the next generation. 107 | 108 | I applied this idea to the qp-trie by guessing that most fragmentation 109 | would occur near the root of the trie. So, walk the trie recursively 110 | from the root, copying nodes compactly into a fresh chunk, and stop 111 | recursing whenever you reach a node that is in a full chunk. 112 | 113 | This works OK for copy-on-write transactions, which must copy the path 114 | from the root to any modified leaves, but it is _terrible_ for a 115 | single-threaded trie. All mutations only affect one node near the leaf 116 | that is being added or deleted, i.e. nodes near the root are mostly 117 | left alone. 118 | 119 | 120 | [version four][] 121 | ---------------- 122 | 123 | [version four]: https://gitlab.isc.org/isc-projects/bind9/-/commit/1d94dec683ff8f23d4533f7fb83625e7dbd525ee#65b4d67ce64e9195e41ac43d78af5156f9ebb779_0_540 124 | 125 | I thought until last week that version three was OK: I was dismayed to 126 | learn that this part of my code from last summer was weaker than I 127 | remembered. But, I had a rethink, and worked out that the generational 128 | hypothesis is false in my situation, and thought up a new algorithm. 129 | 130 | This one takes advantage of the work I did to support transaction 131 | rollback, and more well-defined lifecycle management for the value 132 | objects hanging off the trie. As a result, BIND's qp-trie code can 133 | scan a chunk and know which parts of it are free or in use, without 134 | walking the trie. 135 | 136 | But there's a big proviso: it can only compact mutable chunks. This 137 | algorithm does not work for copy-on-write transactional modifications. 138 | It was OK as a stop-gap, but I knew it needed more work. 139 | 140 | 141 | [version five][] 142 | ---------------- 143 | 144 | [version five]: https://gitlab.isc.org/isc-projects/bind9/-/commit/ecc555e6ec763c4f8f2495864ec08749202fff1a#65b4d67ce64e9195e41ac43d78af5156f9ebb779_0_553 145 | 146 | Version three taught me that I need a way to find and compact 147 | fragmentation near the leaves of the trie. 148 | 149 | Version four suggested that an algorithm that aims not to increase 150 | fragmentation can work quite well in practice even if there are 151 | situations that can make it less effective. 152 | 153 | So I had another think and worked out a "bottom-up" algorithm: walk 154 | the trie, and copy/compactify any nodes we find in fragmented chunks. 155 | We might also need gratuitous copies of nodes on the paths from the 156 | root to the nodes that have been copied, if those nodes are in shared 157 | chunks. That is, we need to obey the copy-on-write rule. 158 | 159 | Today's algorithm turned out to be more effective at compaction than 160 | version four, and faster. It's also quite similar to version one, 161 | except less broken. (And I had fun writing an informal inductive proof 162 | to convince myself how it works!) 163 | 164 | 165 | version N+1 166 | ----------- 167 | 168 | This compaction algorithm rework comes in the middle of my efforts to 169 | write some basic testing and validation code for BIND's qp-trie, since 170 | the testing I have done so far revealed compaction to be a problem. 171 | The next things to test are transactional writes to the trie, 172 | exercising the copy-on-write code, and then see if I can break it in 173 | multithreaded mode. 174 | 175 | Even if version five survives this testing, I am sure I will need to 176 | do more experiments and try out other algorithms, because this aspect 177 | of the qp-trie code still uses more CPU than I am happy with. But it 178 | might be possible to shuffle the work into a corner where it doesn't 179 | cause trouble? 180 | 181 | 182 | postscript 183 | ---------- 184 | 185 | I have enjoyed reading [Andy Wingo's recent blog posts about garbage 186 | collection](https://wingolog.org/tags/garbage%20collection) which 187 | inspired me to write about it too. 188 | -------------------------------------------------------------------------------- /cb-debug.c: -------------------------------------------------------------------------------- 1 | // cb-debug.c: crit-bit trie debug support 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "Tbl.h" 14 | #include "cb.h" 15 | 16 | static void 17 | dump_rec(Trie *t, int d) { 18 | if(isbranch(t)) { 19 | printf("Tdump%*s branch %p %zu\n", d, "", 20 | t, (size_t)t->branch.index); 21 | assert(t->branch.index >= d); 22 | printf("Tdump%*s twig 0\n", d, ""); 23 | dump_rec(twig(t, 0), t->branch.index + 1); 24 | printf("Tdump%*s twig 1\n", d, ""); 25 | dump_rec(twig(t, 1), t->branch.index + 1); 26 | } else { 27 | printf("Tdump%*s leaf %p\n", d, "", t); 28 | printf("Tdump%*s leaf key %p %s\n", d, "", 29 | t->leaf.key, t->leaf.key); 30 | printf("Tdump%*s leaf val %p\n", d, "", 31 | t->leaf.val); 32 | } 33 | } 34 | 35 | void 36 | Tdump(Tbl *tbl) { 37 | printf("Tdump root %p\n", tbl); 38 | if(tbl != NULL) 39 | dump_rec(&tbl->root, 0); 40 | } 41 | 42 | static void 43 | size_rec(Trie *t, uint d, 44 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 45 | *rsize += sizeof(*t); 46 | if(isbranch(t)) { 47 | *rbranches += 1; 48 | size_rec(twig(t, 0), d+1, rsize, rdepth, rbranches, rleaves); 49 | size_rec(twig(t, 1), d+1, rsize, rdepth, rbranches, rleaves); 50 | } else { 51 | *rleaves += 1; 52 | *rdepth += d; 53 | } 54 | } 55 | 56 | void 57 | Tsize(Tbl *tbl, const char **rtype, 58 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 59 | *rtype = "cb"; 60 | *rsize = *rdepth = *rbranches = *rleaves = 0; 61 | if(tbl != NULL) 62 | size_rec(&tbl->root, 0, rsize, rdepth, rbranches, rleaves); 63 | } 64 | -------------------------------------------------------------------------------- /cb.c: -------------------------------------------------------------------------------- 1 | // cb.c: tables implemented with crit-bit tries. 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "Tbl.h" 15 | #include "cb.h" 16 | 17 | bool 18 | Tgetkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { 19 | if(tbl == NULL) 20 | return(false); 21 | Trie *t = &tbl->root; 22 | while(isbranch(t)) { 23 | __builtin_prefetch(t->branch.twigs); 24 | t = twig(t, twigoff(t, key, len)); 25 | } 26 | if(strcmp(key, t->leaf.key) != 0) 27 | return(false); 28 | *pkey = t->leaf.key; 29 | *pval = t->leaf.val; 30 | return(true); 31 | } 32 | 33 | static bool 34 | next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) { 35 | if(isbranch(t)) { 36 | // Recurse to find either this leaf (*pkey != NULL) 37 | // or the next one (*pkey == NULL). 38 | for(uint b = twigoff(t, *pkey, *plen); b <= 1; b++) 39 | if(next_rec(twig(t, b), pkey, plen, pval)) 40 | return(true); 41 | return(false); 42 | } 43 | // We have found the next leaf. 44 | if(*pkey == NULL) { 45 | *pkey = t->leaf.key; 46 | *plen = strlen(*pkey); 47 | *pval = t->leaf.val; 48 | return(true); 49 | } 50 | // We have found this leaf, so start looking for the next one. 51 | if(strcmp(*pkey, t->leaf.key) == 0) { 52 | *pkey = NULL; 53 | *plen = 0; 54 | return(false); 55 | } 56 | // No match. 57 | return(false); 58 | } 59 | 60 | bool 61 | Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) { 62 | if(tbl == NULL) { 63 | *pkey = NULL; 64 | *plen = 0; 65 | return(NULL); 66 | } 67 | return(next_rec(&tbl->root, pkey, plen, pval)); 68 | } 69 | 70 | Tbl * 71 | Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { 72 | if(tbl == NULL) 73 | return(NULL); 74 | Trie *t = &tbl->root, *p = NULL; 75 | uint b = 0; 76 | while(isbranch(t)) { 77 | __builtin_prefetch(t->branch.twigs); 78 | b = twigoff(t, key, len); 79 | p = t, t = twig(t, b); 80 | } 81 | if(strcmp(key, t->leaf.key) != 0) 82 | return(tbl); 83 | *pkey = t->leaf.key; 84 | *pval = t->leaf.val; 85 | if(p == NULL) { 86 | free(tbl); 87 | return(NULL); 88 | } 89 | // Move the other twig to the parent branch. 90 | t = p->branch.twigs; 91 | *p = *twig(p, !b); 92 | free(t); 93 | return(tbl); 94 | } 95 | 96 | Tbl * 97 | Tsetl(Tbl *tbl, const char *key, size_t len, void *val) { 98 | // Ensure flag bits are zero. 99 | if(((uint64_t)val & 3) != 0) { 100 | errno = EINVAL; 101 | return(NULL); 102 | } 103 | if(val == NULL) 104 | return(Tdell(tbl, key, len)); 105 | // First leaf in an empty tbl? 106 | if(tbl == NULL) { 107 | tbl = malloc(sizeof(*tbl)); 108 | if(tbl == NULL) return(NULL); 109 | tbl->root.leaf.key = key; 110 | tbl->root.leaf.val = val; 111 | return(tbl); 112 | } 113 | Trie *t = &tbl->root; 114 | // Find the most similar leaf node in the trie. We will compare 115 | // its key with our new key to find the first differing nibble, 116 | // which can be at a lower index than the point at which we 117 | // detect a difference. 118 | while(isbranch(t)) 119 | t = twig(t, twigoff(t, key, len)); 120 | // Do the keys differ, and if so, where? 121 | size_t i; 122 | for(i = 0; i <= len; i++) { 123 | if(key[i] != t->leaf.key[i]) 124 | goto newkey; 125 | } 126 | t->leaf.val = val; 127 | return(tbl); 128 | newkey:; // We have the byte index; what about the bit? 129 | uint k1 = (byte)key[i], k2 = (byte)t->leaf.key[i]; 130 | uint b = (uint)__builtin_clz((k1 ^ k2) << 24 | 0x800000); 131 | i = 8 * i + b; 132 | b = k1 >> (7 - b) & 1; 133 | // Find where to insert a branch or grow an existing branch. 134 | t = &tbl->root; 135 | while(isbranch(t)) { 136 | __builtin_prefetch(t->branch.twigs); 137 | if(i < t->branch.index) 138 | goto newbranch; 139 | t = twig(t, twigoff(t, key, len)); 140 | } 141 | newbranch:; 142 | Trie *twigs = malloc(sizeof(Trie) * 2); 143 | if(twigs == NULL) return(NULL); 144 | Trie t1 = { .leaf = { .key = key, .val = val } }; 145 | Trie t2 = *t; // Save before overwriting. 146 | t->branch.twigs = twigs; 147 | t->branch.isbranch = 1; 148 | t->branch.index = i; 149 | *twig(t, b) = t1; 150 | *twig(t, !b) = t2; 151 | return(tbl); 152 | } 153 | -------------------------------------------------------------------------------- /cb.h: -------------------------------------------------------------------------------- 1 | // cb.h: tables implemented with crit-bit tries. 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | // See qp.h for introductory comments on tries. 8 | // 9 | // Dan Bernstein has a well-known description of crit-bit tries 10 | // http://cr.yp.to/critbit.html 11 | // Adam Langley has annotated DJB's crit-bit implementation 12 | // https://github.com/agl/critbit 13 | // 14 | // DJB's crit-bit tries only store a set of keys, without any 15 | // associated values. The branch nodes have three words: a bit index, 16 | // and two pointers to child nodes. Each child pointer has a flag in 17 | // its least significant bit indicating whether it points to another 18 | // branch, or points to a key string. 19 | // 20 | // [ ptr B ] -> [ index ] 21 | // [ ptr L ] -> "leaf 0" 22 | // [ ptr B ] 23 | // \ 24 | // +-> [ index ] 25 | // [ ptr L ] -> "leaf 1" 26 | // [ ptr L ] -> "leaf 2" 27 | // 28 | // An important property of these tries is their low overhead, two 29 | // words per entry in addition to the key pointer itself. It is hard 30 | // to add associated values without increasing this overhead. If you 31 | // simply replace each string pointer with a pointer to a key+value 32 | // pair, the overhead is 50% greater: three words per entry in 33 | // addition to the key+value pointers. 34 | // 35 | // This crit-bit implementation uses a different layout. A branch node 36 | // contains a bit index and only one pointer. Its two children (called 37 | // "twigs") are allocated as a pair; the bit in the key selects which 38 | // twig in the pair is selected when traversing the trie. Now branch 39 | // nodes are two words, the same size as a key+value leaf node, so any 40 | // combination of leaves and branches packs nicely into a four-word 41 | // pair of twigs. The flag bit is put in the node (e.g. least 42 | // significant bit of the index is always set) rather than packing two 43 | // flag bits into the twigs pointer. 44 | // 45 | // [ index 1 twig ] -> [ value 0 key ] -> "leaf 0" 46 | // [ index 1 twig ] 47 | // \ 48 | // +-> [ value 0 key ] -> "leaf 1" 49 | // [ value 0 key ] -> "leaf 2" 50 | // 51 | // Another way of looking at this is we have added two words to each 52 | // node for the value pointers; these are empty in branch nodes, which 53 | // gives us space to move the bit indexes up a level, one bit index 54 | // from each child to occupy each empty word. Moving the bit indexes 55 | // takes away a word from every node, except for the root which 56 | // becomes a word bigger. 57 | // 58 | // This layout has two words of overhead per entry, in addition to the 59 | // key+value pointers. 60 | // 61 | // I originally developed this layout for qp tries, then simplified 62 | // the qp code to produce this crit-bit implementation. 63 | 64 | typedef unsigned char byte; 65 | typedef unsigned int uint; 66 | 67 | typedef struct Tleaf { 68 | const char *key; 69 | void *val; 70 | } Tleaf; 71 | 72 | // XXX this currently assumes a 64 bit little endian machine 73 | typedef struct Tbranch { 74 | union Trie *twigs; 75 | uint64_t 76 | isbranch : 1, 77 | index : 63; 78 | } Tbranch; 79 | 80 | typedef union Trie { 81 | struct Tleaf leaf; 82 | struct Tbranch branch; 83 | } Trie; 84 | 85 | struct Tbl { 86 | union Trie root; 87 | }; 88 | 89 | // Test flags to determine type of this node. 90 | 91 | static inline bool 92 | isbranch(Trie *t) { 93 | return(t->branch.isbranch); 94 | } 95 | 96 | static inline uint 97 | twigoff(Trie *t, const char *key, size_t len) { 98 | uint64_t i = t->branch.index; 99 | if(i/8 >= len) return(0); 100 | return(key[i/8] >> (7 - i%8) & 1); 101 | } 102 | 103 | static inline Trie * 104 | twig(Trie *t, uint i) { 105 | return(&t->branch.twigs[i]); 106 | } 107 | -------------------------------------------------------------------------------- /dns-debug.c: -------------------------------------------------------------------------------- 1 | // dns-debug.c: DNS-trie debug support 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "Tbl.h" 14 | #include "dns.h" 15 | 16 | static void 17 | print_bit(Shift bit) { 18 | if(bit == SHIFT_0) printf("^0/"); 19 | if(bit == SHIFTa1) printf("^1a/"); 20 | if(bit == SHYPHEN) printf("-/"); 21 | if(bit == SHIFDOT) printf("./"); 22 | if(bit == SHSLASH) printf("//"); 23 | // if(bit == SHIFTb1) printf("^1b/"); 24 | if(SHIFT_DIGIT <= bit && bit <= TOP_DIGIT) 25 | printf("%c/", '0' + bit - SHIFT_DIGIT); 26 | if(bit == SHIFTc1) printf("^1c/"); 27 | if(bit == SHIFT_2) printf("^2/"); 28 | if(bit == UNDERBAR) printf("_/"); 29 | if(bit == BACKQUO) printf("`/"); 30 | if(SHIFT_LETTER <= bit && bit <= TOP_LETTER) 31 | printf("%c/", 'a' + bit - SHIFT_LETTER); 32 | if(bit == SHIFT_2) printf("^2/"); 33 | if(bit == SHIFT_3) printf("^3/"); 34 | if(bit == SHIFT_4) printf("^4/"); 35 | if(bit == SHIFT_5) printf("^5/"); 36 | if(bit == SHIFT_6) printf("^6/"); 37 | if(bit == SHIFT_7) printf("^7/"); 38 | printf("%d", bit - SHIFT_LOWER); 39 | } 40 | 41 | static void 42 | print_bitmap(Node *n) { 43 | char sep = '('; 44 | if(hastwig(n, SHIFT_NOBYTE)) { 45 | printf("(NO"); 46 | sep = ','; 47 | } 48 | for(byte bit = SHIFT_0; bit < SHIFT_OFFSET; bit++) { 49 | if(!hastwig(n, bit)) 50 | continue; 51 | putchar(sep); 52 | print_bit(bit); 53 | sep = ','; 54 | } 55 | printf(")\n"); 56 | } 57 | 58 | static void 59 | dump_rec(Node *n, int d) { 60 | if(isbranch(n)) { 61 | printf("Tdump%*s branch %p %zu %zu", d, "", n, 62 | (size_t)n->index & MASK_FLAGS, keyoff(n)); 63 | print_bitmap(n); 64 | int dd = (int)keyoff(n) * 2 + 2; 65 | assert(dd > d); 66 | for(Shift bit = SHIFT_NOBYTE; bit < SHIFT_OFFSET; bit++) { 67 | if(hastwig(n, bit)) { 68 | printf("Tdump%*s twig ", d, ""); 69 | print_bit(bit); 70 | putchar('\n'); 71 | dump_rec(twig(n, twigoff(n, bit)), dd); 72 | } 73 | } 74 | } else { 75 | printf("Tdump%*s leaf %p\n", d, "", n); 76 | printf("Tdump%*s leaf key %p %s\n", d, "", 77 | n->ptr, n->ptr); 78 | printf("Tdump%*s leaf val %zx\n", d, "", 79 | (size_t)n->index); 80 | } 81 | } 82 | 83 | void 84 | Tdump(Tbl *tbl) { 85 | printf("Tdump root %p\n", tbl); 86 | if(tbl != NULL) 87 | dump_rec(&tbl->root, 0); 88 | } 89 | 90 | static void 91 | size_rec(Node *n, size_t d, 92 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 93 | *rsize += sizeof(*n); 94 | if(isbranch(n)) { 95 | *rbranches += 1; 96 | for(Shift bit = SHIFT_NOBYTE; bit < SHIFT_OFFSET; bit++) { 97 | if(hastwig(n, bit)) 98 | size_rec(twig(n, twigoff(n, bit)), 99 | d+1, rsize, rdepth, rbranches, rleaves); 100 | } 101 | } else { 102 | *rleaves += 1; 103 | *rdepth += d; 104 | } 105 | } 106 | 107 | void 108 | Tsize(Tbl *tbl, const char **rtype, 109 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 110 | *rtype = "dns"; 111 | *rsize = *rdepth = *rbranches = *rleaves = 0; 112 | if(tbl != NULL) 113 | size_rec(&tbl->root, 0, rsize, rdepth, rbranches, rleaves); 114 | } 115 | -------------------------------------------------------------------------------- /entities: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use warnings; 4 | use strict; 5 | 6 | use open qw(:std :utf8); 7 | 8 | use HTML::Entities; 9 | 10 | undef $/; 11 | print encode_entities <>, q{^\s!-~}; 12 | -------------------------------------------------------------------------------- /fn-debug.c: -------------------------------------------------------------------------------- 1 | // fn-debug.c: fn trie debug support 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "Tbl.h" 14 | #include "fn.h" 15 | 16 | const char * 17 | dump_bitmap(Tbitmap w) { 18 | static char buf[32*3]; 19 | int size = (int)sizeof(buf), n = 0; 20 | n += snprintf(buf+n, size-n, "("); 21 | for(uint s = 0; s < 32; s++) { 22 | Tbitmap b = 1 << s; 23 | if(w & b) 24 | n += snprintf(buf+n, size-n, "%u,", s); 25 | } 26 | if(n > 1) 27 | buf[n-1] = ')'; 28 | return buf; 29 | } 30 | 31 | static void 32 | dump_rec(Trie *t, uint d) { 33 | Tindex i = t->index; 34 | if(Tindex_branch(i)) { 35 | printf("Tdump%*s branch %p %s %zu %d\n", d, "", (void*)t, 36 | dump_bitmap(Tindex_bitmap(i)), 37 | (size_t)Tindex_offset(i), Tindex_shift(i)); 38 | uint dd = 1 + Tindex_offset(i) * 8 + Tindex_shift(i); 39 | assert(dd > d); 40 | for(uint s = 0; s < 32; s++) { 41 | Tbitmap b = 1 << s; 42 | if(hastwig(i, b)) { 43 | printf("Tdump%*s twig %d\n", d, "", s); 44 | dump_rec(Tbranch_twigs(t) + twigoff(i, b), dd); 45 | } 46 | } 47 | } else { 48 | printf("Tdump%*s leaf %p\n", d, "", 49 | (void *)t); 50 | printf("Tdump%*s leaf key %p %s\n", d, "", 51 | (const void *)Tleaf_key(t), Tleaf_key(t)); 52 | printf("Tdump%*s leaf val %p\n", d, "", 53 | (void *)Tleaf_val(t)); 54 | } 55 | } 56 | 57 | void 58 | Tdump(Tbl *tbl) { 59 | printf("Tdump root %p\n", (void*)tbl); 60 | if(tbl != NULL) 61 | dump_rec(tbl, 0); 62 | } 63 | 64 | static void 65 | size_rec(Trie *t, uint d, 66 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 67 | *rsize += sizeof(*t); 68 | Tindex i = t->index; 69 | if(Tindex_branch(i)) { 70 | *rbranches += 1; 71 | for(uint s = 0; s < 32; s++) { 72 | Tbitmap b = 1U << s; 73 | if(hastwig(i, b)) 74 | size_rec(Tbranch_twigs(t) + twigoff(i, b), 75 | d+1, rsize, rdepth, rbranches, rleaves); 76 | } 77 | } else { 78 | *rleaves += 1; 79 | *rdepth += d; 80 | } 81 | } 82 | 83 | void 84 | Tsize(Tbl *tbl, const char **rtype, 85 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 86 | *rtype = "fn"; 87 | *rsize = *rdepth = *rbranches = *rleaves = 0; 88 | if(tbl != NULL) 89 | size_rec(tbl, 0, rsize, rdepth, rbranches, rleaves); 90 | } 91 | -------------------------------------------------------------------------------- /fn.c: -------------------------------------------------------------------------------- 1 | // fn.h: quintet bit popcount patricia tries, new version 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "Tbl.h" 15 | #include "fn.h" 16 | 17 | bool 18 | Tgetkv(Tbl *t, const char *key, size_t len, const char **pkey, void **pval) { 19 | if(t == NULL) 20 | return(false); 21 | while(isbranch(t)) { 22 | __builtin_prefetch(t->ptr); 23 | Tindex i = t->index; 24 | Tbitmap b = twigbit(i, key, len); 25 | if(!hastwig(i, b)) 26 | return(false); 27 | t = Tbranch_twigs(t) + twigoff(i, b); 28 | } 29 | if(strcmp(key, Tleaf_key(t)) != 0) 30 | return(false); 31 | *pkey = Tleaf_key(t); 32 | *pval = Tleaf_val(t); 33 | return(true); 34 | } 35 | 36 | static bool 37 | next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) { 38 | Tindex i = t->index; 39 | if(Tindex_branch(i)) { 40 | // Recurse to find either this leaf (*pkey != NULL) 41 | // or the next one (*pkey == NULL). 42 | Tbitmap b = twigbit(i, *pkey, *plen); 43 | uint s, m; TWIGOFFMAX(s, m, i, b); 44 | for(; s < m; s++) 45 | if(next_rec(Tbranch_twigs(t)+s, pkey, plen, pval)) 46 | return(true); 47 | return(false); 48 | } 49 | // We have found the next leaf. 50 | if(*pkey == NULL) { 51 | *pkey = Tleaf_key(t); 52 | *plen = strlen(*pkey); 53 | *pval = Tleaf_val(t); 54 | return(true); 55 | } 56 | // We have found this leaf, so start looking for the next one. 57 | if(strcmp(*pkey, Tleaf_key(t)) == 0) { 58 | *pkey = NULL; 59 | *plen = 0; 60 | return(false); 61 | } 62 | // No match. 63 | return(false); 64 | } 65 | 66 | bool 67 | Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) { 68 | if(tbl == NULL) { 69 | *pkey = NULL; 70 | *plen = 0; 71 | return(NULL); 72 | } 73 | return(next_rec(tbl, pkey, plen, pval)); 74 | } 75 | 76 | Tbl * 77 | Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { 78 | if(tbl == NULL) 79 | return(NULL); 80 | Trie *t = tbl, *p = NULL; 81 | Tindex i = 0; 82 | Tbitmap b = 0; 83 | while(isbranch(t)) { 84 | __builtin_prefetch(t->ptr); 85 | i = t->index; 86 | b = twigbit(i, key, len); 87 | if(!hastwig(i, b)) 88 | return(tbl); 89 | p = t; t = Tbranch_twigs(t) + twigoff(i, b); 90 | } 91 | if(strcmp(key, Tleaf_key(t)) != 0) 92 | return(tbl); 93 | *pkey = Tleaf_key(t); 94 | *pval = Tleaf_val(t); 95 | if(p == NULL) { 96 | free(tbl); 97 | return(NULL); 98 | } 99 | Trie *twigs = Tbranch_twigs(p); 100 | uint m = popcount(Tindex_bitmap(i)); 101 | assert(twigs <= t && t < twigs+m); 102 | if(m == 2) { 103 | // Move the other twig to the parent branch. 104 | *p = twigs[twigs == t]; 105 | free(twigs); 106 | return(tbl); 107 | } 108 | memmove(t, t+1, ((twigs + m) - (t + 1)) * sizeof(Trie)); 109 | p->index = Tbitmap_del(i, b); 110 | // We have now correctly removed the twig from the trie, so if 111 | // realloc() fails we can ignore it and continue to use the 112 | // slightly oversized twig array. 113 | twigs = realloc(twigs, sizeof(Trie) * (m - 1)); 114 | if(twigs != NULL) Tset_twigs(p, twigs); 115 | return(tbl); 116 | } 117 | 118 | Tbl * 119 | Tsetl(Tbl *tbl, const char *key, size_t len, void *val) { 120 | if(Tindex_branch((Tindex)val) || len > Tmaxlen) { 121 | errno = EINVAL; 122 | return(NULL); 123 | } 124 | if(val == NULL) 125 | return(Tdell(tbl, key, len)); 126 | // First leaf in an empty tbl? 127 | if(tbl == NULL) { 128 | tbl = malloc(sizeof(*tbl)); 129 | if(tbl == NULL) return(NULL); 130 | Tset_key(tbl, key); 131 | Tset_val(tbl, val); 132 | return(tbl); 133 | } 134 | Trie *t = tbl; 135 | // Find the most similar leaf node in the trie. We will compare 136 | // its key with our new key to find the first differing nibble, 137 | // which can be at a lower index than the point at which we 138 | // detect a difference. 139 | while(isbranch(t)) { 140 | __builtin_prefetch(t->ptr); 141 | Tindex i = t->index; 142 | Tbitmap b = twigbit(i, key, len); 143 | // Even if our key is missing from this branch we need to 144 | // keep iterating down to a leaf. It doesn't matter which 145 | // twig we choose since the keys are all the same up to this 146 | // index. Note that blindly using twigoff(t, b) can cause 147 | // an out-of-bounds index if it equals twigmax(t). 148 | uint s = hastwig(i, b) ? twigoff(i, b) : 0; 149 | t = Tbranch_twigs(t) + s; 150 | } 151 | // Do the keys differ, and if so, where? 152 | uint off, xor, shf; 153 | const char *tkey = Tleaf_key(t); 154 | for(off = 0; off <= len; off++) { 155 | xor = (byte)key[off] ^ (byte)tkey[off]; 156 | if(xor != 0) goto newkey; 157 | } 158 | Tset_val(t, val); 159 | return(tbl); 160 | newkey:; // We have the branch's byte index; what is its chunk index? 161 | uint bit = off * 8 + (uint)__builtin_clz(xor) + 8 - sizeof(uint) * 8; 162 | uint qo = bit / 5; 163 | off = qo * 5 / 8; 164 | shf = qo * 5 % 8; 165 | // re-index keys with adjusted offset 166 | Tbitmap nb = 1U << knybble(key,off,shf); 167 | Tbitmap tb = 1U << knybble(tkey,off,shf); 168 | // Prepare the new leaf. 169 | Trie nt; 170 | Tset_key(&nt, key); 171 | Tset_val(&nt, val); 172 | // Find where to insert a branch or grow an existing branch. 173 | t = tbl; 174 | Tindex i = 0; 175 | while(isbranch(t)) { 176 | __builtin_prefetch(t->ptr); 177 | i = t->index; 178 | if(off == Tindex_offset(i) && shf == Tindex_shift(i)) 179 | goto growbranch; 180 | if(off == Tindex_offset(i) && shf < Tindex_shift(i)) 181 | goto newbranch; 182 | if(off < Tindex_offset(i)) 183 | goto newbranch; 184 | Tbitmap b = twigbit(i, key, len); 185 | assert(hastwig(i, b)); 186 | t = Tbranch_twigs(t) + twigoff(i, b); 187 | } 188 | newbranch:; 189 | Trie *twigs = malloc(sizeof(Trie) * 2); 190 | if(twigs == NULL) return(NULL); 191 | i = Tindex_new(shf, off, nb | tb); 192 | twigs[twigoff(i, nb)] = nt; 193 | twigs[twigoff(i, tb)] = *t; 194 | Tset_twigs(t, twigs); 195 | Tset_index(t, i); 196 | return(tbl); 197 | growbranch:; 198 | assert(!hastwig(i, nb)); 199 | uint s, m; TWIGOFFMAX(s, m, i, nb); 200 | twigs = realloc(Tbranch_twigs(t), sizeof(Trie) * (m + 1)); 201 | if(twigs == NULL) return(NULL); 202 | memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s)); 203 | memmove(twigs+s, &nt, sizeof(Trie)); 204 | Tset_twigs(t, twigs); 205 | Tset_index(t, Tbitmap_add(i, nb)); 206 | return(tbl); 207 | } 208 | -------------------------------------------------------------------------------- /fn.h: -------------------------------------------------------------------------------- 1 | // fn.h: quintet bit popcount patricia tries, new version 2 | // 3 | // This version uses somewhat different terminology than older 4 | // variants. The location of a quintet in the key is now called its 5 | // "offset", and the whole word containing the offset, bitmap, and tag 6 | // bit is called the "index word" (by analogy with a database index). 7 | // The precise quintet location is represented as a byte offset and a 8 | // shift. Previously a flags field contained the isbranch tag and shift, 9 | // but these are now separate. 10 | // 11 | // Instead of trying to use bit fields, this code uses accessor 12 | // functions to split up a pair of words into their constituent parts. 13 | // This should improve portability to machines with varying endianness 14 | // and/or word size. 15 | // 16 | // Written by Tony Finch 17 | // You may do anything with this. It has no warranty. 18 | // 19 | 20 | typedef unsigned char byte; 21 | typedef unsigned int uint; 22 | 23 | typedef uint32_t Tbitmap; 24 | typedef uint64_t Tindex; 25 | 26 | const char *dump_bitmap(Tbitmap w); 27 | 28 | static inline uint 29 | byte_me(char c) { 30 | return(c & 0xFF); 31 | } 32 | 33 | static inline uint 34 | word_up(const char *p) { 35 | uint w = byte_me(p[0]) << 8; 36 | if(w) w |= byte_me(p[1]); 37 | return(w); 38 | } 39 | 40 | #if defined(HAVE_SLOW_POPCOUNT) 41 | 42 | static inline uint 43 | popcount(Tbitmap w) { 44 | w -= (w >> 1) & 0x55555555; 45 | w = (w & 0x33333333) + ((w >> 2) & 0x33333333); 46 | w = (w + (w >> 4)) & 0x0F0F0F0F; 47 | w = (w * 0x01010101) >> 24; 48 | return(w); 49 | } 50 | 51 | #else 52 | 53 | static inline uint 54 | popcount(Tbitmap w) { 55 | return((uint)__builtin_popcount(w)); 56 | } 57 | 58 | #endif 59 | 60 | typedef struct Tbl { 61 | Tindex index; 62 | void *ptr; 63 | } Trie; 64 | 65 | // accessor functions, except for the index word 66 | 67 | #define Tset_field(cast, elem, type, field) \ 68 | static inline void \ 69 | Tset_##field(Trie *t, type field) { \ 70 | t->elem = cast field; \ 71 | } \ 72 | struct dummy 73 | 74 | Tset_field((void *), ptr, Trie *, twigs); 75 | Tset_field((Tindex), index, Tindex, index); 76 | Tset_field((void *)(uint64_t), ptr, const char *, key); 77 | Tset_field((Tindex), index, void *, val); 78 | 79 | static inline bool Tindex_branch(Tindex i); 80 | 81 | static inline bool isbranch(Trie *t) { 82 | return(Tindex_branch(t->index)); 83 | } 84 | 85 | #ifdef WITH_EXTRA_CHECKS 86 | #define Tbranch(t) assert(isbranch(t)) 87 | #define Tleaf(t) assert(!isbranch(t)) 88 | #else 89 | #define Tbranch(t) 90 | #define Tleaf(t) 91 | #endif 92 | 93 | #define Tcheck_get(type, tag, field, expr) \ 94 | static inline type \ 95 | tag##_##field(Trie *t) { \ 96 | tag(t); \ 97 | return(expr); \ 98 | } \ 99 | struct dummy 100 | 101 | Tcheck_get(Trie *, Tbranch, twigs, t->ptr); 102 | Tcheck_get(const char *, Tleaf, key, t->ptr); 103 | Tcheck_get(void *, Tleaf, val, (void*)t->index); 104 | 105 | // index word layout 106 | 107 | #define Tix_width_branch 1 108 | #define Tix_width_shift 3 109 | #define Tix_width_offset 28 110 | #define Tix_width_bitmap 32 111 | 112 | #define Tix_base_branch 0 113 | #define Tix_base_shift (Tix_base_branch + Tix_width_branch) 114 | #define Tix_base_offset (Tix_base_shift + Tix_width_shift) 115 | #define Tix_base_bitmap (Tix_base_offset + Tix_width_offset) 116 | 117 | #define Tix_place(field) ((Tindex)(field) << Tix_base_##field) 118 | 119 | #define Tix_mask(field) ((1ULL << Tix_width_##field) - 1ULL) 120 | 121 | #define Tunmask(field,index) ((uint)(((index) >> Tix_base_##field) \ 122 | & Tix_mask(field))) 123 | 124 | #define Tmaxlen Tix_mask(offset) 125 | 126 | // index word accessor functions 127 | 128 | #define Tindex_get(type, field) \ 129 | static inline type \ 130 | Tindex_##field(Tindex i) { \ 131 | return(Tunmask(field, i)); \ 132 | } \ 133 | struct dummy 134 | 135 | Tindex_get(bool, branch); 136 | Tindex_get(uint, shift); 137 | Tindex_get(uint, offset); 138 | Tindex_get(Tbitmap, bitmap); 139 | 140 | static inline Tindex 141 | Tindex_new(uint shift, uint offset, Tbitmap bitmap) { 142 | uint branch = 1; 143 | return( Tix_place(branch) | 144 | Tix_place(shift) | 145 | Tix_place(offset) | 146 | Tix_place(bitmap) ); 147 | } 148 | 149 | static inline Tindex 150 | Tbitmap_add(Tindex i, Tbitmap bitmap) { 151 | return(i | Tix_place(bitmap)); 152 | } 153 | 154 | static inline Tindex 155 | Tbitmap_del(Tindex i, Tbitmap bitmap) { 156 | return(i & ~Tix_place(bitmap)); 157 | } 158 | 159 | // sanity checks! 160 | 161 | #ifndef static_assert 162 | #define static_assert_cat(a,b) a##b 163 | #define static_assert_name(line) static_assert_cat(static_assert_,line) 164 | #define static_assert(must_be_true,message) \ 165 | static const void *static_assert_name(__LINE__) \ 166 | [must_be_true ? 2 : -1] = { \ 167 | message, \ 168 | &static_assert_name(__LINE__) } 169 | #endif 170 | 171 | static_assert(Tix_base_bitmap + Tix_width_bitmap == 64, 172 | "index fields must fill a 64 bit word"); 173 | 174 | static_assert(Tunmask(bitmap,0x1234567800000000ULL) == 0x12345678, 175 | "extracting the bitmap works"); 176 | 177 | static_assert(Tunmask(offset,0x0420ULL) == 0x42, 178 | "extracting the offset works"); 179 | 180 | static_assert(Tunmask(shift,0xFEDCBAULL) == 5, 181 | "extracting the shift works"); 182 | 183 | // ..key[o%5==0].. ..key[o%5==1].. ..key[o%5==2].. ..key[o%5==3].. ..key[o%5==4].. 184 | // | | | | | | 185 | // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 186 | // | | | | | | | | | 187 | // shift=0 shift=5 shift=2 shift=7 shift=4 shift=1 shift=6 shift=3 188 | 189 | static inline byte 190 | knybble(const char *key, uint off, uint shift) { 191 | uint word = word_up(key+off); 192 | uint right = 16 - 5 - shift; 193 | return((word >> right) & 0x1FU); 194 | } 195 | 196 | static inline byte 197 | nibble(Tindex i, const char *key, size_t len) { 198 | uint off = Tindex_offset(i); 199 | if(off >= len) return(0); 200 | else return(knybble(key, off, Tindex_shift(i))); 201 | } 202 | 203 | static inline Tbitmap 204 | twigbit(Tindex i, const char *key, size_t len) { 205 | return(1U << nibble(i, key, len)); 206 | } 207 | 208 | static inline bool 209 | hastwig(Tindex i, Tbitmap bit) { 210 | return(Tindex_bitmap(i) & bit); 211 | } 212 | 213 | static inline uint 214 | twigoff(Tindex i, Tbitmap bit) { 215 | return(popcount(Tindex_bitmap(i) & (bit-1))); 216 | } 217 | 218 | #define TWIGOFFMAX(off, max, i, b) do { \ 219 | off = twigoff(i, b); \ 220 | max = popcount(Tindex_bitmap(i)); \ 221 | } while(0) 222 | -------------------------------------------------------------------------------- /fp-debug.c: -------------------------------------------------------------------------------- 1 | // fp-debug.c: fp trie debug support 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "Tbl.h" 14 | #include "fp.h" 15 | 16 | const char * 17 | dump_bitmap(Tbitmap w) { 18 | static char buf[32*3]; 19 | uint n = 0; 20 | n += snprintf(buf+n, sizeof(buf)-n, "("); 21 | for(uint i = 0; i < 32; i++) { 22 | Tbitmap b = 1 << i; 23 | if(w & b) 24 | n += snprintf(buf+n, sizeof(buf)-n, "%u,", i); 25 | } 26 | if(n > 1) 27 | buf[n-1] = ')'; 28 | return buf; 29 | } 30 | 31 | static void 32 | dump_rec(Trie *t, int d) { 33 | if(isbranch(t)) { 34 | printf("Tdump%*s branch %p %s %zu %d\n", d, "", t, 35 | dump_bitmap(t->branch.bitmap), 36 | (size_t)t->branch.index, t->branch.flags); 37 | int dd = 2 + t->branch.index * 6 + t->branch.flags - 1; 38 | assert(dd > d); 39 | for(uint i = 0; i < 32; i++) { 40 | Tbitmap b = 1 << i; 41 | if(hastwig(t, b)) { 42 | printf("Tdump%*s twig %d\n", d, "", i); 43 | dump_rec(twig(t, twigoff(t, b)), dd); 44 | } 45 | } 46 | } else { 47 | printf("Tdump%*s leaf %p\n", d, "", t); 48 | printf("Tdump%*s leaf key %p %s\n", d, "", 49 | t->leaf.key, t->leaf.key); 50 | printf("Tdump%*s leaf val %p\n", d, "", 51 | t->leaf.val); 52 | } 53 | } 54 | 55 | void 56 | Tdump(Tbl *tbl) { 57 | printf("Tdump root %p\n", tbl); 58 | if(tbl != NULL) 59 | dump_rec(&tbl->root, 0); 60 | } 61 | 62 | static void 63 | size_rec(Trie *t, uint d, 64 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 65 | *rsize += sizeof(*t); 66 | if(isbranch(t)) { 67 | *rbranches += 1; 68 | for(uint i = 0; i < 32; i++) { 69 | Tbitmap b = 1U << i; 70 | if(hastwig(t, b)) 71 | size_rec(twig(t, twigoff(t, b)), 72 | d+1, rsize, rdepth, rbranches, rleaves); 73 | } 74 | } else { 75 | *rleaves += 1; 76 | *rdepth += d; 77 | } 78 | } 79 | 80 | void 81 | Tsize(Tbl *tbl, const char **rtype, 82 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 83 | *rtype = "fp"; 84 | *rsize = *rdepth = *rbranches = *rleaves = 0; 85 | if(tbl != NULL) 86 | size_rec(&tbl->root, 0, rsize, rdepth, rbranches, rleaves); 87 | } 88 | -------------------------------------------------------------------------------- /fp.c: -------------------------------------------------------------------------------- 1 | // qp.c: tables implemented with fivebit popcount patricia tries. 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "Tbl.h" 15 | #include "fp.h" 16 | 17 | bool 18 | Tgetkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { 19 | if(tbl == NULL) 20 | return(false); 21 | Trie *t = &tbl->root; 22 | while(isbranch(t)) { 23 | __builtin_prefetch(t->branch.twigs); 24 | Tbitmap b = twigbit(t, key, len); 25 | if(!hastwig(t, b)) 26 | return(false); 27 | t = twig(t, twigoff(t, b)); 28 | } 29 | if(strcmp(key, t->leaf.key) != 0) 30 | return(false); 31 | *pkey = t->leaf.key; 32 | *pval = t->leaf.val; 33 | return(true); 34 | } 35 | 36 | static bool 37 | next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) { 38 | if(isbranch(t)) { 39 | // Recurse to find either this leaf (*pkey != NULL) 40 | // or the next one (*pkey == NULL). 41 | Tbitmap b = twigbit(t, *pkey, *plen); 42 | uint s, m; TWIGOFFMAX(s, m, t, b); 43 | for(; s < m; s++) 44 | if(next_rec(twig(t, s), pkey, plen, pval)) 45 | return(true); 46 | return(false); 47 | } 48 | // We have found the next leaf. 49 | if(*pkey == NULL) { 50 | *pkey = t->leaf.key; 51 | *plen = strlen(*pkey); 52 | *pval = t->leaf.val; 53 | return(true); 54 | } 55 | // We have found this leaf, so start looking for the next one. 56 | if(strcmp(*pkey, t->leaf.key) == 0) { 57 | *pkey = NULL; 58 | *plen = 0; 59 | return(false); 60 | } 61 | // No match. 62 | return(false); 63 | } 64 | 65 | bool 66 | Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) { 67 | if(tbl == NULL) { 68 | *pkey = NULL; 69 | *plen = 0; 70 | return(NULL); 71 | } 72 | return(next_rec(&tbl->root, pkey, plen, pval)); 73 | } 74 | 75 | Tbl * 76 | Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { 77 | if(tbl == NULL) 78 | return(NULL); 79 | Trie *t = &tbl->root, *p = NULL; 80 | Tbitmap b = 0; 81 | while(isbranch(t)) { 82 | __builtin_prefetch(t->branch.twigs); 83 | b = twigbit(t, key, len); 84 | if(!hastwig(t, b)) 85 | return(tbl); 86 | p = t; t = twig(t, twigoff(t, b)); 87 | } 88 | if(strcmp(key, t->leaf.key) != 0) 89 | return(tbl); 90 | *pkey = t->leaf.key; 91 | *pval = t->leaf.val; 92 | if(p == NULL) { 93 | free(tbl); 94 | return(NULL); 95 | } 96 | t = p; p = NULL; // Becuase t is the usual name 97 | uint s, m; TWIGOFFMAX(s, m, t, b); 98 | if(m == 2) { 99 | // Move the other twig to the parent branch. 100 | Trie *twigs = t->branch.twigs; 101 | *t = *twig(t, !s); 102 | free(twigs); 103 | return(tbl); 104 | } 105 | memmove(t->branch.twigs+s, t->branch.twigs+s+1, sizeof(Trie) * (m - s - 1)); 106 | t->branch.bitmap &= ~b; 107 | // We have now correctly removed the twig from the trie, so if 108 | // realloc() fails we can ignore it and continue to use the 109 | // slightly oversized twig array. 110 | Trie *twigs = realloc(t->branch.twigs, sizeof(Trie) * (m - 1)); 111 | if(twigs != NULL) t->branch.twigs = twigs; 112 | return(tbl); 113 | } 114 | 115 | Tbl * 116 | Tsetl(Tbl *tbl, const char *key, size_t len, void *val) { 117 | // Ensure flag bits are zero. 118 | if(((uint64_t)val & 1) != 0 || len > 0xFFFFFF) { 119 | errno = EINVAL; 120 | return(NULL); 121 | } 122 | if(val == NULL) 123 | return(Tdell(tbl, key, len)); 124 | // First leaf in an empty tbl? 125 | if(tbl == NULL) { 126 | tbl = malloc(sizeof(*tbl)); 127 | if(tbl == NULL) return(NULL); 128 | tbl->root.leaf.key = key; 129 | tbl->root.leaf.val = val; 130 | return(tbl); 131 | } 132 | Trie *t = &tbl->root; 133 | // Find the most similar leaf node in the trie. We will compare 134 | // its key with our new key to find the first differing nibble, 135 | // which can be at a lower index than the point at which we 136 | // detect a difference. 137 | while(isbranch(t)) { 138 | __builtin_prefetch(t->branch.twigs); 139 | Tbitmap b = twigbit(t, key, len); 140 | // Even if our key is missing from this branch we need to 141 | // keep iterating down to a leaf. It doesn't matter which 142 | // twig we choose since the keys are all the same up to this 143 | // index. Note that blindly using twigoff(t, b) can cause 144 | // an out-of-bounds index if it equals twigmax(t). 145 | uint i = hastwig(t, b) ? twigoff(t, b) : 0; 146 | t = twig(t, i); 147 | } 148 | // Do the keys differ, and if so, where? 149 | size_t i; 150 | uint f; 151 | for(i = 0; i <= len; i++) { 152 | f = (byte)key[i] ^ (byte)t->leaf.key[i]; 153 | if(f != 0) goto newkey; 154 | } 155 | t->leaf.val = val; 156 | return(tbl); 157 | newkey:; // We have the branch's byte index; what is its chunk index? 158 | size_t bit = i * 8 + __builtin_clz(f) + 8 - sizeof(uint) * 8; 159 | size_t qi = bit / 5; 160 | i = qi * 5 / 8; 161 | f = qi * 5 % 8 << 1 | 1; 162 | // re-index keys with adjusted i 163 | uint k1 = (byte)key[i] << 8; 164 | uint k2 = (byte)t->leaf.key[i] << 8; 165 | k1 |= (k1 ? (byte)key[i+1] : 0); 166 | k2 |= (k2 ? (byte)t->leaf.key[i+1] : 0); 167 | Tbitmap b1 = nibbit(k1, f); 168 | // Prepare the new leaf. 169 | Trie t1 = { .leaf = { .key = key, .val = val } }; 170 | // Find where to insert a branch or grow an existing branch. 171 | t = &tbl->root; 172 | while(isbranch(t)) { 173 | __builtin_prefetch(t->branch.twigs); 174 | if(i == t->branch.index && f == t->branch.flags) 175 | goto growbranch; 176 | if(i == t->branch.index && f < t->branch.flags) 177 | goto newbranch; 178 | if(i < t->branch.index) 179 | goto newbranch; 180 | Tbitmap b = twigbit(t, key, len); 181 | assert(hastwig(t, b)); 182 | t = twig(t, twigoff(t, b)); 183 | } 184 | newbranch:; 185 | Trie *twigs = malloc(sizeof(Trie) * 2); 186 | if(twigs == NULL) return(NULL); 187 | Trie t2 = *t; // Save before overwriting. 188 | Tbitmap b2 = nibbit(k2, f); 189 | t->branch.twigs = twigs; 190 | t->branch.flags = f; 191 | t->branch.index = i; 192 | t->branch.bitmap = b1 | b2; 193 | *twig(t, twigoff(t, b1)) = t1; 194 | *twig(t, twigoff(t, b2)) = t2; 195 | return(tbl); 196 | growbranch:; 197 | assert(!hastwig(t, b1)); 198 | uint s, m; TWIGOFFMAX(s, m, t, b1); 199 | twigs = realloc(t->branch.twigs, sizeof(Trie) * (m + 1)); 200 | if(twigs == NULL) return(NULL); 201 | memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s)); 202 | memmove(twigs+s, &t1, sizeof(Trie)); 203 | t->branch.twigs = twigs; 204 | t->branch.bitmap |= b1; 205 | return(tbl); 206 | } 207 | -------------------------------------------------------------------------------- /fp.h: -------------------------------------------------------------------------------- 1 | // fp.h: tables implemented with fivebit popcount patricia tries. 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | typedef unsigned char byte; 8 | typedef unsigned int uint; 9 | 10 | typedef uint32_t Tbitmap; 11 | 12 | const char *dump_bitmap(Tbitmap w); 13 | 14 | #if defined(HAVE_SLOW_POPCOUNT) 15 | 16 | static inline uint 17 | popcount(Tbitmap w) { 18 | w -= (w >> 1) & 0x55555555; 19 | w = (w & 0x33333333) + ((w >> 2) & 0x33333333); 20 | w = (w + (w >> 4)) & 0x0F0F0F0F; 21 | w = (w * 0x01010101) >> 24; 22 | return(w); 23 | } 24 | 25 | #else 26 | 27 | static inline uint 28 | popcount(Tbitmap w) { 29 | return((uint)__builtin_popcount(w)); 30 | } 31 | 32 | #endif 33 | 34 | typedef struct Tleaf { 35 | const char *key; 36 | void *val; 37 | } Tleaf; 38 | 39 | typedef struct Tbranch { 40 | union Trie *twigs; 41 | uint32_t flags : 4, 42 | index : 28; 43 | uint32_t bitmap; 44 | } Tbranch; 45 | 46 | typedef union Trie { 47 | struct Tleaf leaf; 48 | struct Tbranch branch; 49 | } Trie; 50 | 51 | struct Tbl { 52 | union Trie root; 53 | }; 54 | 55 | // Test flags to determine type of this node. 56 | 57 | static inline bool 58 | isbranch(Trie *t) { 59 | return(t->branch.flags & 1); 60 | } 61 | 62 | // ..key[i%5==0].. ..key[i%5==1].. ..key[i%5==2].. ..key[i%5==3].. ..key[i%5==4].. 63 | // | | | | | | 64 | // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 65 | // | | | | | | | | | 66 | // shift=0 shift=5 shift=2 shift=7 shift=4 shift=1 shift=6 shift=3 67 | 68 | static inline Tbitmap 69 | nibbit(uint k, uint flags) { 70 | uint shift = 16 - 5 - (flags >> 1); 71 | return(1U << ((k >> shift) & 0x1FU)); 72 | } 73 | 74 | static inline Tbitmap 75 | twigbit(Trie *t, const char *key, size_t len) { 76 | uint64_t i = t->branch.index; 77 | if(i >= len) return(1); 78 | uint k = (byte)key[i] << 8; 79 | if(k) k |= (byte)key[i+1]; 80 | return(nibbit(k, t->branch.flags)); 81 | } 82 | 83 | static inline bool 84 | hastwig(Trie *t, Tbitmap bit) { 85 | return(t->branch.bitmap & bit); 86 | } 87 | 88 | static inline uint 89 | twigoff(Trie *t, Tbitmap b) { 90 | return(popcount(t->branch.bitmap & (b-1))); 91 | } 92 | 93 | static inline Trie * 94 | twig(Trie *t, uint i) { 95 | return(&t->branch.twigs[i]); 96 | } 97 | 98 | #define TWIGOFFMAX(off, max, t, b) do { \ 99 | off = twigoff(t, b); \ 100 | max = popcount(t->branch.bitmap); \ 101 | } while(0) 102 | -------------------------------------------------------------------------------- /getwords.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use warnings; 3 | use strict; 4 | my %a; 5 | while (<>) { 6 | $a{$_} = 1 for m{\b[A-Za-z0-9_]+\b}g; 7 | } 8 | print "$_\n" for keys %a; 9 | -------------------------------------------------------------------------------- /ht-debug.c: -------------------------------------------------------------------------------- 1 | // ht-debug.c: HAMT debug support 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "Tbl.h" 14 | #include "ht.h" 15 | 16 | static void 17 | dump_rec(Trie *t, int d) { 18 | if(isbranch(t)) { 19 | printf("Tdump%*s branch %p\n", d, "", t); 20 | for(uint i = 0; i < 64; i++) { 21 | uint64_t b = twigbit(i); 22 | if(hastwig(t, b)) { 23 | printf("Tdump%*s twig %d\n", d, "", i); 24 | dump_rec(twig(t, twigoff(t, b)), d+1); 25 | } 26 | } 27 | } else { 28 | printf("Tdump%*s leaf %p\n", d, "", t); 29 | printf("Tdump%*s leaf key %p %s\n", d, "", 30 | t->leaf.key, t->leaf.key); 31 | printf("Tdump%*s leaf val %p\n", d, "", 32 | t->leaf.val); 33 | } 34 | } 35 | 36 | void 37 | Tdump(Tbl *tbl) { 38 | printf("Tdump root %p\n", tbl); 39 | if(tbl != NULL) 40 | dump_rec(&tbl->root, 0); 41 | } 42 | 43 | static void 44 | size_rec(Trie *t, uint d, size_t *rsize, size_t *rdepth, size_t *rleaves) { 45 | *rsize += sizeof(*t); 46 | if(isbranch(t)) { 47 | for(uint i = 0; i < 64; i++) { 48 | uint64_t b = twigbit(i); 49 | if(hastwig(t, b)) 50 | size_rec(twig(t, twigoff(t, b)), d+1, 51 | rsize, rdepth, rleaves); 52 | } 53 | } else { 54 | *rdepth += d; 55 | *rleaves += 1; 56 | } 57 | } 58 | 59 | void 60 | Tsize(Tbl *tbl, const char **rtype, 61 | size_t *rsize, size_t *rdepth, size_t *rleaves) { 62 | *rtype = "ht"; 63 | *rsize = *rdepth = *rleaves = 0; 64 | if(tbl != NULL) 65 | size_rec(tbl, 0, rsize, rdepth, rleaves); 66 | } 67 | -------------------------------------------------------------------------------- /ht.c: -------------------------------------------------------------------------------- 1 | // qp.c: tables implemented with hash array mapped tries 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "Tbl.h" 15 | #include "ht.h" 16 | 17 | static inline uint64_t 18 | hash(const char *key, size_t len, uint depth) { 19 | uint64_t h, stir[2] = { depth, depth }; 20 | siphash((void*)&h, (const void *)key, len, (void*)stir); 21 | return(h); 22 | } 23 | 24 | bool 25 | Tgetkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { 26 | if(tbl == NULL) 27 | return(false); 28 | Trie *t = &tbl->root; 29 | for(uint d1 = 0 ;; ++d1) { 30 | uint64_t h = hash(key, len, d1); 31 | for(uint d2 = lglgN; d2 < Hbits; d2 += lglgN, h >>= lglgN) { 32 | if(!isbranch(t)) 33 | goto leaf; 34 | uintptr_t b = twigbit(h); 35 | if(!hastwig(t, b)) 36 | return(false); 37 | t = twig(t, twigoff(t, b)); 38 | } 39 | } 40 | leaf: if(strcmp(key, t->leaf.key) != 0) 41 | return(false); 42 | *pkey = t->leaf.key; 43 | *pval = t->leaf.val; 44 | return(true); 45 | } 46 | 47 | static bool 48 | next_rec(Trie *t, const char **pkey, size_t *plen, void **pval, 49 | uint64_t h, uint d1, uint d2) { 50 | if(isbranch(t)) { 51 | if(d2 >= Hbits) { 52 | h = *pkey == NULL ? 0 : 53 | hash(*pkey, *plen, d1++); 54 | d2 = lglgN; 55 | } 56 | uintptr_t b = twigbit(h); 57 | uint s = twigoff(t, b); 58 | uint m = twigmax(t); 59 | for(; s < m; s++) 60 | if(next_rec(twig(t, s), pkey, plen, pval, 61 | h >> lglgN, d1, d2 + lglgN)) 62 | return(true); 63 | else 64 | h = 0; 65 | return(false); 66 | } 67 | // We have found the next leaf. 68 | if(*pkey == NULL) { 69 | *pkey = t->leaf.key; 70 | *plen = strlen(*pkey); 71 | *pval = t->leaf.val; 72 | return(true); 73 | } 74 | // We have found this leaf, so start looking for the next one. 75 | if(strcmp(*pkey, t->leaf.key) == 0) { 76 | *pkey = NULL; 77 | *plen = 0; 78 | return(false); 79 | } 80 | // No match. 81 | return(false); 82 | } 83 | 84 | bool 85 | Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) { 86 | if(tbl == NULL) { 87 | *pkey = NULL; 88 | *plen = 0; 89 | return(NULL); 90 | } 91 | return(next_rec(&tbl->root, pkey, plen, pval, 0, 0, Hbits)); 92 | } 93 | 94 | Tbl * 95 | Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { 96 | if(tbl == NULL) 97 | return(NULL); 98 | Trie *t = &tbl->root, *p = NULL; 99 | uint64_t h = 0; 100 | uintptr_t b = 0; 101 | for(uint d1 = 0 ;; ++d1) { 102 | h = hash(key, len, d1); 103 | for(uint d2 = lglgN; d2 < Hbits; d2 += lglgN, h >>= lglgN) { 104 | if(!isbranch(t)) 105 | goto leaf; 106 | b = twigbit(h); 107 | if(!hastwig(t, b)) 108 | return(tbl); 109 | p = t; t = twig(t, twigoff(t, b)); 110 | } 111 | } 112 | leaf: if(strcmp(key, t->leaf.key) != 0) 113 | return(tbl); 114 | *pkey = t->leaf.key; 115 | *pval = t->leaf.val; 116 | if(p == NULL) { 117 | free(tbl); 118 | return(NULL); 119 | } 120 | t = p; p = NULL; // Becuase t is the usual name 121 | uint s = twigoff(t, b), m = twigmax(t); 122 | if(m == 2) { 123 | // Move the other twig to the parent branch. 124 | // XXX: May need to unsplice deep hash collision here. 125 | Trie *twigs = twig(t, 0); 126 | *t = *twig(t, !s); 127 | free(twigs); 128 | return(tbl); 129 | } 130 | Trie *twigs = malloc(sizeof(Trie) * (m - 1)); 131 | if(twigs == NULL) return(NULL); 132 | memcpy(twigs, twig(t, 0), sizeof(Trie) * s); 133 | memcpy(twigs+s, twig(t, s+1), sizeof(Trie) * (m - s - 1)); 134 | free(twig(t, 0)); 135 | twigset(t, twigs); 136 | t->branch.map &= ~b; 137 | return(tbl); 138 | } 139 | 140 | Tbl * 141 | Tsetl(Tbl *tbl, const char *key, size_t len, void *val) { 142 | // Ensure flag bits are zero. 143 | if(((uintptr_t)val & 1) != 0) { 144 | errno = EINVAL; 145 | return(NULL); 146 | } 147 | if(val == NULL) 148 | return(Tdell(tbl, key, len)); 149 | // First leaf in an empty tbl? 150 | if(tbl == NULL) { 151 | tbl = malloc(sizeof(*tbl)); 152 | if(tbl == NULL) return(NULL); 153 | tbl->root.leaf.key = key; 154 | tbl->root.leaf.val = val; 155 | return(tbl); 156 | } 157 | Trie *t = &tbl->root; 158 | Trie t1 = { .leaf = { .key = key, .val = val } }; 159 | uint d1, d2; 160 | uintptr_t b1; 161 | for(d1 = 0 ;; ++d1) { 162 | uint64_t h = hash(key, len, d1); 163 | for(d2 = lglgN; d2 < Hbits; d2 += lglgN, h >>= lglgN) { 164 | b1 = twigbit(h); 165 | if(!isbranch(t)) 166 | goto leaf; 167 | if(!hastwig(t, b1)) 168 | goto growbranch; 169 | t = twig(t, twigoff(t, b1)); 170 | } 171 | } 172 | leaf: if(strcmp(key, t->leaf.key) != 0) 173 | goto newbranch; 174 | t->leaf.val = val; 175 | return(tbl); 176 | newbranch:; 177 | // XXX May need multiple levels of trie here. 178 | Trie *twigs = malloc(sizeof(Trie) * 2); 179 | if(twigs == NULL) return(NULL); 180 | Trie t2 = *t; // Save before overwriting. 181 | uint64_t h2 = hash(t->leaf.key, strlen(t->leaf.key), d1); 182 | uint64_t b2 = twigbit(h2 >>= d2 - lglgN); 183 | t->branch.map = b1 | b2; 184 | twigset(t, twigs); 185 | *twig(t, twigoff(t, b1)) = t1; 186 | *twig(t, twigoff(t, b2)) = t2; 187 | return(tbl); 188 | growbranch:; 189 | assert(!hastwig(t, b1)); 190 | uint s = twigoff(t, b1), m = twigmax(t); 191 | twigs = malloc(sizeof(Trie) * (m + 1)); 192 | if(twigs == NULL) return(NULL); 193 | memcpy(twigs, twig(t, 0), sizeof(Trie) * s); 194 | memcpy(twigs+s, &t1, sizeof(Trie)); 195 | memcpy(twigs+s+1, twig(t, s), sizeof(Trie) * (m - s)); 196 | free(twig(t, 0)); 197 | twigset(t, twigs); 198 | t->branch.map |= b1; 199 | return(tbl); 200 | } 201 | -------------------------------------------------------------------------------- /ht.h: -------------------------------------------------------------------------------- 1 | // ht.h: tables implemented with hash array mapped tries 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | typedef unsigned char byte; 8 | typedef unsigned int uint; 9 | 10 | // Word size parameters. 11 | // lgN: number of bits in a word 12 | // lglgN: number of bits to index a word 13 | // 14 | // Branch maps are lgN bits wide (same as a pointer) 15 | // Hash values are consumed lglgN bits at a time 16 | // 17 | // Hash values are always 64 bits (defined by SipHash) 18 | 19 | #if UINTPTR_MAX == 0xFFFFFFFFFFFFFFFF 20 | 21 | #define lgN 64 22 | #define lglgN 6 23 | 24 | static inline uint 25 | popcount(uintptr_t w) { 26 | return((uint)__builtin_popcountll(w)); 27 | } 28 | 29 | #endif 30 | #if UINTPTR_MAX == 0xFFFFFFFF 31 | 32 | #define lgN 32 33 | #define lglgN 5 34 | 35 | static inline uint 36 | popcount(uintptr_t w) { 37 | return((uint)__builtin_popcount(w)); 38 | } 39 | 40 | #endif 41 | 42 | #define Hbits 64 43 | 44 | extern int 45 | siphash(uint8_t *out, const uint8_t *in, uint64_t inlen, const uint8_t *k); 46 | 47 | 48 | typedef struct Tleaf { 49 | const char *key; 50 | void *val; 51 | } Tleaf; 52 | 53 | typedef struct Tbranch { 54 | uintptr_t map; 55 | uintptr_t twigs; 56 | } Tbranch; 57 | 58 | typedef union Trie { 59 | struct Tleaf leaf; 60 | struct Tbranch branch; 61 | } Trie; 62 | 63 | struct Tbl { 64 | union Trie root; 65 | }; 66 | 67 | static inline bool 68 | isbranch(Trie *t) { 69 | return(t->branch.twigs & 1); 70 | } 71 | 72 | static inline uintptr_t 73 | twigbit(uint64_t h) { 74 | return((uintptr_t)1 << (h & (lgN-1))); 75 | } 76 | 77 | static inline bool 78 | hastwig(Trie *t, uintptr_t bit) { 79 | return(t->branch.map & bit); 80 | } 81 | 82 | static inline uint 83 | twigoff(Trie *t, uintptr_t bit) { 84 | return(popcount(t->branch.map & (bit - 1))); 85 | } 86 | 87 | static inline uint 88 | twigmax(Trie *t) { 89 | return(popcount(t->branch.map)); 90 | } 91 | 92 | static inline Trie * 93 | twig(Trie *t, uint i) { 94 | return((Trie*)(t->branch.twigs ^ 1) + i); 95 | } 96 | 97 | static inline void 98 | twigset(Trie *t, Trie *twigs) { 99 | t->branch.twigs = (uintptr_t)twigs | 1; 100 | } 101 | -------------------------------------------------------------------------------- /notes-bitstrings-prefixes.md: -------------------------------------------------------------------------------- 1 | Bitstring keys and longest prefix search in a [qp trie](https://dotat.at/prog/qp) 2 | ======================================================= 3 | 4 | _May 2017_ 5 | 6 | [Marek asked](https://twitter.com/vavrusam/status/867070309785980928) 7 | 8 | > any thoughts on making qptrie work for bitstrings with length not 9 | > aligned on nibble boundary? 10 | 11 | > I want to try it for v6 prefixes, a writeup would be appreciated! 12 | > I'm still not sure how to best store mask for the trailing bits 13 | 14 | 15 | Original qp trie 16 | ---------------- 17 | 18 | The existing implementation is based on nul-terminated strings, and it 19 | treats the nonexistent bytes after the end of the key as if they are 20 | zero. 21 | 22 | The qp trie lookup code fetches a nybble from the search key and turns 23 | it into a bitmap with one bit set. In the code there is the line 24 | 25 | if(offset >= len) return(1); 26 | 27 | which is the part that causes bytes after the end of the key to be 28 | treated as zero - 1 is the bitmap with bit zero set. 29 | 30 | In the 5 bit and 6 bit variants, it is common for the quintet / sextet 31 | nybbles to be unaligned wrt the end of the key, but that's not a 32 | problem - the bits in the nybble past the end of the key are filled 33 | with zeroes from the nul terminator. 34 | 35 | 36 | Double bitmap qp trie 37 | --------------------- 38 | 39 | The (as yet unimplemented) [double bitmap qp trie 40 | layout](notes-generic-leaves.md) allows generic leaf types 41 | embedded in the data structure, and allows arbitrary binary keys. 42 | 43 | What happens when the end of a key is unaligned wrt the 5 bit or 6 bit 44 | nybbles? 45 | 46 | For now, I'll just consider byte string keys - I'll generalise to 47 | bitstrings later. 48 | 49 | In this setting the equivalent to the line quoted above should be 50 | 51 | if(offset >= len) return(0); 52 | 53 | That is, an empty bitmap, signifying that there is nothing present in 54 | the key at this offset. When this bitmap is tested against the bitmaps 55 | in a branch, no match will be found - correctly, because we have 56 | descended into a part of the trie where all the keys are longer than 57 | the search key and therefore can't possibly match. 58 | 59 | (This tweak would also be valid in the existing implementations, since 60 | a key in the trie has to diverge from its neighbours no later than its 61 | end, so if our offset is after the end of the search key, the key 62 | can't be present since we should already have found it.) 63 | 64 | But what happens at the end of a key, when the last byte isn't aligned 65 | with the nybbles? Some of the bits in the nybble are logically not 66 | present, but we can't represent that, at least not in a way that 67 | produces an unambiguous bitmap. 68 | 69 | The solution is to observe that there can be at most one byte boundary 70 | in a nybble, so a key that ends in this nybble can't collide in the 71 | leaf bitmap with a longer key. 72 | 73 | This means we can fill in the missing bits in the nybble that fall 74 | after the end of the key with zero, and we won't get confused with a 75 | longer key which happens to have zero bits at that point. 76 | 77 | 78 | Bitstring keys 79 | -------------- 80 | 81 | OK so far. But if we allow keys to be arbitrary bit strings, then 82 | multiple keys can end inside the same nybble, and we could have 83 | multiple keys trying to occupy the same bit in the leaf bitmap. 84 | 85 | However, notice that Marek changed the problem that we are trying to 86 | solve: he asked about IP address prefix matching, which is a search 87 | for the longest key in the trie that matches a prefix of the search 88 | key, not a search for an exact match. 89 | 90 | There is a lot of literature for IP address prefix matching, and some 91 | of it describes data structures that are very similar to a qp trie. [I 92 | have previously reviewed a few papers on this 93 | topic](blog-2016-02-23.md) - a poptrie is probably better than a qp 94 | trie for this application. 95 | 96 | But anyway, how would I go about doing this in a qp style? 97 | 98 | 99 | Example 100 | ------- 101 | 102 | I'll write bit strings in binary with the big end on the left, and use 103 | 5 bit serch keys. 104 | 105 | We'll have a shorter prefix, 01/2, with a more specific route for the 106 | subnet 0101/4, and a third route to 101/3. 107 | 108 | So keys 01010 and 01011 (ten and eleven) match the longer prefix. 109 | 110 | Keys 01000 and 01001 match the shorter prefix, as do keys 01100 up to 111 | 01111 (eight, nine, twelve - fifteen). 112 | 113 | Keys 10100 up to 10111 (twenty - twenty-three) match the third prefix. 114 | 115 | Other keys do not match. 116 | 117 | Using S for the short prefix, M for the more specific route, T for the 118 | third prefix, and 0 for no match, the result for each of the 32 119 | possible search keys is: 120 | 121 | 00000000 SSMMSSSS 0000TTTT 00000000 122 | 123 | Letters correspond to bits set in the leaf bitmap. The leaf array then 124 | looks like 125 | 126 | SSMMSSSSTTTT 127 | 128 | Unlike an exact-match qp trie, there are multiple entries for the same 129 | leaf when it can match multiple search keys. This is a bit redundant. 130 | It implies that the leaves probably need to be represented as pointers 131 | rather than being embedded in the trie. Perhaps these overheads are 132 | acceptable. 133 | 134 | 135 | Longest-prefix search 136 | --------------------- 137 | 138 | The search algorithm's bitmap handling is the same as an original qp 139 | trie. The differences are in key matching and exit conditions. 140 | 141 | When searching, we need to keep track of the longest match found so 142 | far, which obviously starts off NULL. 143 | 144 | If there is a leaf at a node, compare the search key with the leaf's 145 | prefix. If they don't match, we have found a subtrie where our search 146 | key cannot match, so return the longest match found so far. If they do 147 | match, this leaf is now our longest match. Keep going. 148 | 149 | Next check for a branch; if there is a branch, continue down the trie, 150 | or if not, return the longest match found so far. 151 | 152 | 153 | Exact match search 154 | ------------------ 155 | 156 | To search for a specific prefix in the trie (rather than searching for 157 | a match for an IP address), it may be necessary to scan the leaf array 158 | to skip over longer prefixes. 159 | 160 | Start with the element in the leaf array identified by zero-padding 161 | the prefix up to the next nybble boundary. If it is for a longer 162 | prefix, skip the number of leaves determined by the difference in 163 | prefix lengths (e.g. 2 longer -> skip 4). 164 | 165 | This search can fail if you are looking for a shorter prefix that is 166 | completely covered by longer prefixes. 167 | 168 | 169 | --------------------------------------------------------------------------- 170 | 171 | Written by Tony Finch ; 172 | You may do anything with this. It has no warranty. 173 | 174 | -------------------------------------------------------------------------------- /notes-concurrency.md: -------------------------------------------------------------------------------- 1 | Concurrent updates to [qp tries](https://dotat.at/prog/qp) 2 | ================================ 3 | 4 | [Knot DNS uses qp tries](https://gitlab.labs.nic.cz/knot/knot-dns/-/tree/master/src%2Fcontrib%2Fqp-trie) 5 | and a couple of years ago I added 6 | [support for concurrent updates](https://fanf.dreamwidth.org/127488.html). 7 | It supports lock-free multiple reader / single writer updates in RCU 8 | (read/copy/update) style. 9 | 10 | It is designed to fit the way authoritative DNS servers work, where 11 | updates to a zone happen one at a time, not too frequently, and 12 | should not affect queries. 13 | 14 | The cache in recursive servers is more difficult. When there is a 15 | cache miss, the server needs to resolve the query, and there can be a 16 | lot of resolver jobs in progress that may need to add multiple names 17 | to the trie (e.g. for zones and nameservers they discovered while 18 | resolving the query). And the server needs to clean expired records 19 | from the cache. 20 | 21 | It is a much more concurrent setting than an authoritative server. 22 | 23 | RCU for qp-trie caches 24 | ---------------------- 25 | 26 | It would be nice if resolver jobs could update the cache without 27 | having to pass all updates to the single writer that is able to update 28 | the qp trie, and without having to wait for a full RCU epoch. 29 | 30 | It might be worth using a second data structure that's optimized for 31 | concurrent writes, rather than for storing a lot of data with high 32 | read speed. (A concurrent hash map of some kind, perhaps.) 33 | 34 | The idea is that when there is a cache miss on the main qp trie, check 35 | the hash map, and if that lookup also fails, start a resolver job. 36 | Resolver jobs only update the hashmap. 37 | 38 | Periodically a cache cleaning job runs, which folds the contents of 39 | the hashmap into the qp trie, and deletes expired records or records 40 | that need to be purged to remain within memory limits. This cleaning 41 | job uses the copy-on-write concurrent qp trie update code. 42 | 43 | One question I'm not sure about is how much the leaf objects - the DNS 44 | records structures that are application data from the qp trie point of 45 | view - might need to be updated after they are created, and whether it 46 | matters if they are updated after being moved from the hashmap to the 47 | main trie. For example, 48 | 49 | * When a resolver job is in progress, there needs to be a 50 | place-holder so that multiple concurrent queries for the same 51 | records can wait for one resolver to finish. 52 | 53 | * When a cache hit happens on a name that is soon to expire, a 54 | resolver job should be started to refresh it early. So an existing 55 | cache entry doubles as a resolver place-holder. 56 | 57 | * When a cache entry has expired and serve-stale is in effect, the 58 | server will need to keep it around while attempts continue to 59 | resolve it. 60 | 61 | The qp trie is about name lookups, but in the DNS a a name can have 62 | multiple RRsets with different TTLs, so the per-name cache structure 63 | is relatively complicated and needs to support concurrent updates. 64 | Since that is the case, it is probably OK if the per-name record is 65 | moved from the hashmap to the qp trie by the cleaner, even if resolver 66 | jobs are in progress. 67 | 68 | --------------------------------------------------------------------------- 69 | 70 | Written by Tony Finch ; 71 | You may do anything with this. It has no warranty. 72 | 73 | -------------------------------------------------------------------------------- /notes-generic-leaves.md: -------------------------------------------------------------------------------- 1 | Generic leaf types in a [qp trie](https://dotat.at/prog/qp) 2 | ================================= 3 | 4 | _May 2017_ 5 | 6 | 7 | The original qp trie is based around a two-word "twig" object, which 8 | can be either a leaf (a key+value pair of pointers) or a branch (an 9 | index word and a pointer). 10 | 11 | When I benchmarked memory overhead, I took the key+value pair as a 12 | sunk cost. But it's common (especially in the C++ world) to want to 13 | embed the key and value rather than reference them indirectly. 14 | Similarly, DJB's crit-bit trie has single-word leaves that just point 15 | to the key; if you want to store a key+value pair, you need to embed 16 | the key in the value so that you can find the value given the key. 17 | 18 | These efficiency tricks don't work in a qp trie because the layout of 19 | leaves is tied to the layout of branches. Can we decouple them, to 20 | make the layout of leaves more flexible and efficient? 21 | 22 | 23 | Old: one array, one bitmap 24 | -------------------------- 25 | 26 | The original branch layout consists of: 27 | 28 | * an index word, which contains the offset into the key of the 29 | branch's controlling nybble, and a bitmap indicating which child 30 | nodes are present; 31 | 32 | * a pointer to an array of child "twigs", each of which can be either 33 | a leaf or a branch. 34 | 35 | 36 | New: two arrays, two bitmaps 37 | ---------------------------- 38 | 39 | The new layout segregates child nodes into separate arrays of branches 40 | and leaves. Each array has its own bitmap, and the bitmaps must have 41 | an empty intersection. 42 | 43 | In effect, the tag bits inside twigs (the flags field that was used to 44 | distinguish between leaves and branches) have been moved up into the 45 | index word. 46 | 47 | As before, each element in the branch array consists of an index word 48 | and a pointer. The child's two arrays are placed consecutively in 49 | memory at the target of the pointer, so only one pointer is needed. 50 | 51 | The type of elements of the leaf array can be entirely under the 52 | control of the user. 53 | 54 | 55 | Making space 56 | ------------ 57 | 58 | We need to find space for this second bitmap. 59 | 60 | In a 4-bit qp trie, we can steal 16 bits from the nybble offset, so a 61 | 64 bit index word contains two 16 bit fields for bitmaps, and a 32 bit 62 | nybble offset. 63 | 64 | In a 5-bit qp trie, there isn't space in a 64 bit word for all three 65 | fields, so we have to spill into another word. 66 | 67 | With the old layout, a 6-bit qp trie was not an attractive option 68 | since it wastes a word per leaf, but that is no longer a problem with 69 | this new layout. 70 | 71 | The following table shows how branches can fit reasonably nicely on 72 | the two common word sizes and the three sensible nybble sizes. We want 73 | to keep a branch object to a whole number of words so an array of 74 | branches can be packed tightly. 75 | 76 | nybble 77 | size word size 32 64 78 | 79 | 4 bit pointer 32 64 80 | offset 31+1 31+1 81 | bitmaps 16 x 2 16 x 2 82 | 83 | words 3 2 84 | 85 | 5 bit pointer 32 64 86 | offset 29+3 61+3 87 | bitmaps 32 x 2 32 x 2 88 | 89 | words 4 3 90 | 91 | 6 bit pointer 32 64 92 | offset 30+2 62+2 93 | bitmaps 64 x 2 64 x 2 94 | 95 | words 6 4 96 | 97 | It's possible to reduce the size of branches by reducing the size of 98 | the offset field (the pointer and bitmap sizes are fixed) but to get 99 | the benefit of smaller offsets we would need to reorganize the branch 100 | array into separate arrays so that small offsets can be packed 101 | tightly. However this is likely to make array indexing more expensive. 102 | 103 | 104 | Concatenated nodes 105 | ------------------ 106 | 107 | This new layout works with concatenated branch nodes. There is no 108 | longer any need for a branch nybble field. If there is a single bit 109 | set in the branch bitmap, the branch array just contains one offset 110 | and a pair of bitmaps, and instead of a pointer, the child branch's 111 | arrays follow consecutively in memory. 112 | 113 | 114 | Binary keys and prefix agnosticism 115 | ---------------------------------- 116 | 117 | Two observations: 118 | 119 | * To support binary keys as described at the end of the [notes on rib 120 | compression](notes-rib-compression.md), the leaf bitmap needs an 121 | extra bit. This is annoying with wide fanouts, because the bitmaps 122 | no longer fit in a word. 123 | 124 | * The two bitmaps are somewhat redundant: zero in both means no nodes 125 | with this prefix; a one and a zero means either a leaf or a branch; 126 | but two ones doesn't have an assigned meaning. 127 | 128 | Having both a leaf and a branch at the same point in the trie implies 129 | that we have relaxed the requirement for prefix-freedom. This 130 | relaxation also means we no longer have a problem with binary keys, so 131 | we don't need the extra valueless bit in the leaf bitmap. 132 | 133 | When a child has bits set in both bitmaps, this means that the the 134 | leaf key is longer than the offset of this nybble, but shorter than 135 | the offsets of all children in the branch. In other words, a leaf is 136 | pushed down the tree as far as possible. 137 | 138 | When searching, if there is a leaf at a node, compare keys. If they 139 | match, you have succeeded. If the leaf is not a prefix of the search 140 | key we have found a subtrie where we cannot match, so quit. Else check 141 | for a branch; if there is a branch, continue down the trie, or if not, 142 | the search key is not in the trie, so quit. 143 | 144 | 145 | Portability and genericity 146 | -------------------------- 147 | 148 | The new layout is overall a lot more type-safe, since different types 149 | of object are placed in different parts of memory, rather than being 150 | distinguished by tag bits. 151 | 152 | This greatly reduces portability problems due to type punning between 153 | the index word and a pointer - things like endianness and word size 154 | mismatches can mess up the placement of the tag bit. 155 | 156 | The lack of coupling allows leaf type to be completely generic, and 157 | the genericity could be straightforwardly extended to key comparisons 158 | and fetching nybbles. 159 | 160 | The main requirement on leaves is that they can be moved around 161 | freely, when arrays are resized to insert or delete child nodes. 162 | 163 | Overall, this new layout should be a lot more friendly to C++ and Rust. 164 | 165 | 166 | Caveats 167 | ------- 168 | 169 | The risk of completely user-defined leaf types that embed both key and 170 | value is that the user must take care not to alter the key, otherwise 171 | they will corrupt the trie. I don't know of any way to get the 172 | compiler to help enforce this constraint, and also allow in-place 173 | mutation of the value part. 174 | 175 | It's also mildly awkward from the syntax point of view. When the key 176 | and value are the same object, a sugary 177 | 178 | trie[key] = value; 179 | 180 | syntax doesn't work. Instead it has to be more like 181 | 182 | trie.insert(leaf); 183 | 184 | 185 | --------------------------------------------------------------------------- 186 | 187 | Written by Tony Finch ; 188 | You may do anything with this. It has no warranty. 189 | 190 | -------------------------------------------------------------------------------- /notes-jumbo.md: -------------------------------------------------------------------------------- 1 | jumbo branches and qp tries 2 | =========================== 3 | 4 | There is a possibility of adding support for jumbo branches to qp 5 | tries. Jumbo branches would have more than 16 twigs (sub-tries). 6 | 7 | The flag bits can currently take values 0,1,2, so 3 is available to 8 | mark a jumbo branch; as suggested by the flag meanings it would test 9 | a whole byte at a time instead of one nibble at a time. Perhaps the 10 | bitmap field could be used to choose from multiple branch types, in 11 | the style of adaptive radix trees. https://github.com/armon/libart 12 | 13 | The key question is how to decide when to coalesce two layers of qp 14 | trie into a jumbo branch. A simple option is to coalesce when the 15 | upper nibble passes some density threshold. This would work OK for 16 | almost-binary keys. However for common ASCII keys, the upper nibble 17 | will usually have four or maybe five possible values. In this case 18 | the upper nibble alone does not provide a clear signal of the density 19 | of the byte, so there is too much risk of wasting time trying to find 20 | one. 21 | 22 | Perhaps it would be reasonable to sacrifice lexicographic ordering by 23 | testing lower nibbles before upper nibbles. Then it is quite likely 24 | that a dense byte will fill or nearly fill its first branch. But if it 25 | is OK to sacrifice lexicographic ordering, we might as well use a HAMT 26 | instead. 27 | -------------------------------------------------------------------------------- /notes-love.md: -------------------------------------------------------------------------------- 1 | Some comments from people with nice things to say about qp tries 2 | ---------------------------------------------------------------- 3 | 4 | 5 | Marek Vavrusa 6 | 7 | 8 | 9 | > I use both crit-bit tries and HAT tries, happy to try it against 10 | > them on DNS-like data (though the iteration code looks slow). 11 | 12 | 13 | 14 | > The @fanf qp-tries are ~20% faster and consume 9% less memory 15 | > (rigged with mempool allocator) than crit-bits in my use case. 16 | 17 | 18 | 19 | > I think it's going to consume less memory for most folks with stdlib 20 | > allocator, as it's 2x shallower trie and 2x less alloc calls! 21 | 22 | 23 | 24 | > though both gcc/clang generate popcntl with -msse4.2, beats even HAT 25 | > tries in this test. 26 | 27 | 28 | 29 | > Really enjoyed toying with @fanf's qp tries today. It's been a while 30 | > working on this sort of stuff... 31 | 32 | (prefetching) 33 | 34 | > pretty consistent 7% speed bump on my simple benchmark. This is 35 | > shaping up nicely! 36 | 37 | 38 | 39 | > crude bench on /usr..dict/words sample (almost no common pref), 40 | > hopscotch htable (85% fill) vs qp: 2.19x faster, 12% less mem 41 | 42 | 43 | 44 | > that means qp tries are still incredible, same ballpark perf for 45 | > such degenerate use case and still can do range/prefix scan! 46 | 47 | 48 | Justin Mason 49 | 50 | 51 | 52 | > Interesting new data structure from Tony Finch. 53 | 54 | (prefetching) 55 | 56 | > this is awesome. every time I've tried using tries, the memory 57 | > access patterns vs cache killed its performance 58 | 59 | 60 | 61 | > haha, nothing worse than when dumb brute force over an 62 | > integer-indexed array wins ;) 63 | 64 | 65 | 66 | Devon H. O'Dell 67 | 68 | 69 | 70 | > Been enjoying your critbit / trie articles. Just ran my own 71 | > benchmarks on qp and it is *very* nice. Will be using soon. 72 | 73 | 74 | 75 | > We've a compelling use case for crit-bit, and I was going to replace 76 | > it with a specialized rbt (which performed better), but qp > * 77 | 78 | (embedded crit-bit) 79 | 80 | > Oh, also, the cb tree we use is a parentless embedded tree; source 81 | > at https://github.com/glk/critbit (it's a bit of an eyesore though) 82 | 83 | 84 | 85 | > The "qp trie" by @fanf is amazing and you should use it. 86 | > Preliminary synthetic benchmarks against some alternatives: 87 | > https://9vx.org/post/qp-tries/ 88 | -------------------------------------------------------------------------------- /notes-mistakes.md: -------------------------------------------------------------------------------- 1 | Mistakes were made in my [qp trie](https://dotat.at/prog/qp) 2 | ================================== 3 | 4 | I addressed many of these mistakes when I refactored the qp-trie 5 | implementation in Knot DNS to support concurrent access, but I have 6 | not done so for this experimental implementation. (The code here is 7 | public domain; Knot DNS is GPL.) 8 | 9 | 10 | Terminology 11 | ----------- 12 | 13 | I originally used "index" to refer to the position of a nybble in a 14 | key, but that didn't leave me with a good word to describe the word 15 | that summarizes a node. Using "offset" for the position of a nybble 16 | allows me to use "index" for the word as a whole, which I like better 17 | because it's like a miniature database index, where a twigs vector 18 | is like a miniature database table. 19 | 20 | * word 21 | 22 | either a pointer or an index word, typically 64 bits 23 | 24 | * index word 25 | 26 | contains metadata about a twig vector, including key offset and bitmap 27 | 28 | * key offset 29 | 30 | identifies the nybble within a key that is checked against the index 31 | 32 | * nybble 33 | 34 | originally a string of 4 bits but now 5 bits from the key 35 | 36 | * node 37 | 38 | either a leaf or a branch 39 | 40 | * leaf 41 | 42 | a pair of a key and a value 43 | 44 | * branch 45 | 46 | a pair of an index word and a pointer to twigs 47 | 48 | * twigs 49 | 50 | a vector of nodes 51 | 52 | 53 | Unions and bit fields 54 | --------------------- 55 | 56 | A disastrous choice for portability. 57 | 58 | It is _much_ better to define the index word as a large-enough integer 59 | type, and use macros or inline functions to extract or update fields 60 | within it. 61 | 62 | This makes it trivial to ensure that the tag bits appear in the least 63 | significant bits of the word, without endianness issues. 64 | 65 | In a leaf the index word is not an index but instead is a pointer to a 66 | key or a value (depending on which guarantees word alignment), and 67 | it's just as easy to cast the integer to a pointer as it is to access 68 | a field of a union. 69 | 70 | The large-enough integer type is at least `uint64_t`, though it needs 71 | to be `uintptr_t` if the platform's pointers are bigger than that (for 72 | example, CHERI capabilities). 73 | 74 | 75 | --------------------------------------------------------------------------- 76 | 77 | Written by Tony Finch ; 78 | You may do anything with this. It has no warranty. 79 | 80 | -------------------------------------------------------------------------------- /notes-rib-compression.md: -------------------------------------------------------------------------------- 1 | Notes on [qp trie](https://dotat.at/prog/qp) rib compression 2 | ============================================================ 3 | 4 | Since January I have been thinking on and off about the details of 5 | what started as @tef's write buffer suggestion. It has taken a long 6 | time to distill down to something reasonably simple... 7 | 8 | 9 | A third kind of compression 10 | --------------------------- 11 | 12 | There are two kinds of compression in the original version of qp tries: 13 | 14 | * Spine compression, from Morrison's PATRICIA tries, in which 15 | sequences of branches with one child each are omitted, and instead 16 | of key indexes being implicit in the tree depth, each node is 17 | annotated with an explicit key index. 18 | 19 | * Branch compression, using Bagwell's HAMT popcount trick, in which 20 | null pointers to missing child nodes are omitted, and there is a 21 | bitmap indicating which child nodes are present. 22 | 23 | The new idea will add a third kind of compression, which I will call 24 | "rib compression", in which a branch that has leaves for all its 25 | children except for one child branch, is concatenated with its child 26 | branch to save a pointer indirection. 27 | 28 | (The term "rib compression" was inspired by "spine compression". The 29 | idea is that a linear sequence of nodes with leaves sprouting off to 30 | the sides is a bit like a rib cage.) 31 | 32 | 33 | Rib branches 34 | ------------ 35 | 36 | A "rib" branch needs to identify which nybble values have leaf 37 | children, and which nybble value is the branch child. We want to move 38 | the branch out of the twig array, so that we aren't wasting a word on 39 | the unused branch pointer. This implies that it should not be in the 40 | bitmap; instead there needs to be a field containing the nybble value 41 | of the branch child. 42 | 43 | (Actually, it's probably possible to omit this extra field, since we 44 | can recover its value from a stored child key when updating the trie. 45 | But the disadvantage is that we lose the ability to stop early when 46 | looking for a missing key, and we need some other way to identify rib 47 | branches.) 48 | 49 | The single branch child of a rib branch is concatenated onto its 50 | parent. Instead of following a pointer, the child appears 51 | consecutively in memory after its parent's twig array. 52 | 53 | 54 | Indirect and concatenated branches 55 | ---------------------------------- 56 | 57 | In an original qp trie, all branches have an indirect layout. They 58 | have two parts: 59 | 60 | * a twig containing an index word and a pointer; 61 | 62 | * an array of child twigs. 63 | 64 | With rib compression, the child branch of a rib branch has a 65 | concatenated layout: 66 | 67 | * a bare index word, consecutively followed by 68 | 69 | * an array of child twigs. 70 | 71 | 72 | Trunks 73 | ------ 74 | 75 | We'll call a consecutive sequence of concatenated branches a "trunk". 76 | A trunk corresponds to a single allocation. 77 | 78 | Every branch in a trunk except for the last must be a rib branch, i.e. 79 | must have exactly one branch child, the rest being leaves. 80 | 81 | The first branch in a trunk has an indirect layout - the pointer is 82 | how we find the trunk. All the rest are concatenated. 83 | 84 | A branch (of any shape) is concatenated to its parent exactly when its 85 | parent is a rib branch. 86 | 87 | 88 | Index word layout 89 | ----------------- 90 | 91 | On a little-endian machine the layout of an index word for a 92 | quintuple-bit qp trie with rib compression is: 93 | 94 | uint64_t tag : 1, 95 | shift : 3, 96 | offset : 23, 97 | branch : 5, 98 | bitmap : 32; 99 | 100 | For a quadruple-bit qp trie it is: 101 | 102 | uint64_t tag : 1, 103 | shift : 1, 104 | offset : 42, 105 | branch : 4, 106 | bitmap : 16; 107 | 108 | The branch field is only used in the index word of a rib branch, and 109 | it refers to the child node concatenated after the rib node. In this 110 | case the bit corresponding to the branch field is clear in the bitmap. 111 | 112 | To distinguish a rib branch from another kind of branch from looking 113 | at its index, the concatenated child branch field should correspond 114 | to one of the bits set in the bitmap. 115 | 116 | When looking up a key in the qp trie, the lookup code first checks the 117 | bitmap for a match (in which case the child is found in the twig 118 | array), then checks the branch field for a match (in which case the 119 | child is concatenated), otherwise the key is missing. 120 | 121 | 122 | Binary keys 123 | ----------- 124 | 125 | The original qp trie only supports nul-terminated C string keys, and 126 | treats all bytes after the end of the key as zero. 127 | 128 | I have considered a hack to make it work for arbitrary binary keys, by 129 | treating the first byte after the end to be one and subsequent bytes 130 | zero. This works for relaxing the prefix-freedom requirement, but it 131 | breaks lexical ordering: a shorter string should be lexically before 132 | any longer string of which it is a prefix, but with the hack a longer 133 | string that continues with zero bytes will be before the shorter 134 | string. 135 | 136 | In the Knot DNS qp trie implementation, there is an extra bit in the 137 | bitmap to indicate a non-octet, which sorts before a zero octet. This 138 | is probably a better solution than my hack. 139 | 140 | But does the Knot not bit work for rib compression? 141 | 142 | There is a potential problem in the rib index word where there is a 143 | copy of the child nybble corresponding to the following branch. There 144 | isn't space here to represent a non-octet value (without stealing yet 145 | another bit). 146 | 147 | But any time a non-octet appears in an index, it must be a leaf, so in 148 | a rib it will appear in the bitmap of leaves not the child branch 149 | slot. So the problem never occurs. 150 | 151 | The reason a non-octet must be a leaf is that all the subsequent 152 | octets in this key must also be non-octets, and there is only one key 153 | like this. 154 | 155 | This logic works for keys which are a single binary string, but it 156 | doesn't work for keys like DNS names which are sequences of binary 157 | strings separated by non-octets. In this setting it seems reasonable 158 | to promote troublesome ribs to branches, when they have non-octet 159 | following branches. 160 | 161 | 162 | --------------------------------------------------------------------------- 163 | 164 | Written by Tony Finch ; 165 | You may do anything with this. It has no warranty. 166 | 167 | -------------------------------------------------------------------------------- /notes-todo.md: -------------------------------------------------------------------------------- 1 | * fix union / bit-field portability mistake 2 | 3 | * public-domain version of concurrent qp-trie 4 | 5 | * DNS-trie 6 | 7 | * finish HAMT implementation 8 | 9 | * revise API to add Tsetkv() which returns the key pointer and 10 | previous value pointer from the table 11 | 12 | * implement embedded crit-bit tries 13 | 14 | * benchmark against other data structures 15 | * adaptive radix trie 16 | * HAT-trie 17 | * hash tables 18 | -------------------------------------------------------------------------------- /popcount-test.c: -------------------------------------------------------------------------------- 1 | // popcount-test.c: test popcount16() and popcount16x2() 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "qp.h" 13 | 14 | int 15 | main(void) { 16 | srandomdev(); 17 | Trie tt = {{0,0}}, *t = &tt; 18 | for(;;) { 19 | long r = random(); 20 | uint b16 = t->branch.bitmap = r & 0xFFFF; 21 | uint b = (r >> 16) & 0xF; 22 | uint pc = popcount(b16); 23 | uint po = popcount(b16 & (b-1)); 24 | uint off = twigoff(t, b); 25 | uint s, m; TWIGOFFMAX(s, m, t, b); 26 | if(pc != m || po != s || po != off) { 27 | printf("%04x b=%d pc=%d po=%d off=%d s=%d m=%d\n", 28 | b16, b, pc, po, off, s, m); 29 | assert(pc == m); 30 | assert(po == s); 31 | assert(po == off); 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /qp-debug.c: -------------------------------------------------------------------------------- 1 | // qp-debug.c: qp trie debug support 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "Tbl.h" 14 | #include "qp.h" 15 | 16 | static void 17 | dump_rec(Trie *t, int d) { 18 | if(isbranch(t)) { 19 | printf("Tdump%*s branch %p %zu %d\n", d, "", t, 20 | (size_t)t->branch.index, t->branch.flags); 21 | int dd = 2 + t->branch.index * 4 + (t->branch.flags - 1) * 2; 22 | assert(dd > d); 23 | for(uint i = 0; i < 16; i++) { 24 | uint b = 1 << i; 25 | if(hastwig(t, b)) { 26 | printf("Tdump%*s twig %d\n", d, "", i); 27 | dump_rec(twig(t, twigoff(t, b)), dd); 28 | } 29 | } 30 | } else { 31 | printf("Tdump%*s leaf %p\n", d, "", t); 32 | printf("Tdump%*s leaf key %p %s\n", d, "", 33 | t->leaf.key, t->leaf.key); 34 | printf("Tdump%*s leaf val %p\n", d, "", 35 | t->leaf.val); 36 | } 37 | } 38 | 39 | void 40 | Tdump(Tbl *tbl) { 41 | printf("Tdump root %p\n", tbl); 42 | if(tbl != NULL) 43 | dump_rec(&tbl->root, 0); 44 | } 45 | 46 | static void 47 | size_rec(Trie *t, uint d, 48 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 49 | *rsize += sizeof(*t); 50 | if(isbranch(t)) { 51 | *rbranches += 1; 52 | for(uint i = 0; i < 16; i++) { 53 | Tbitmap b = 1 << i; 54 | if(hastwig(t, b)) 55 | size_rec(twig(t, twigoff(t, b)), 56 | d+1, rsize, rdepth, rbranches, rleaves); 57 | } 58 | } else { 59 | *rleaves += 1; 60 | *rdepth += d; 61 | } 62 | } 63 | 64 | void 65 | Tsize(Tbl *tbl, const char **rtype, 66 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 67 | *rtype = "qp"; 68 | *rsize = *rdepth = *rbranches = *rleaves = 0; 69 | if(tbl != NULL) 70 | size_rec(&tbl->root, 0, rsize, rdepth, rbranches, rleaves); 71 | } 72 | -------------------------------------------------------------------------------- /qp.c: -------------------------------------------------------------------------------- 1 | // qp.c: tables implemented with quadbit popcount patricia tries. 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "Tbl.h" 15 | #include "qp.h" 16 | 17 | bool 18 | Tgetkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { 19 | if(tbl == NULL) 20 | return(false); 21 | Trie *t = &tbl->root; 22 | while(isbranch(t)) { 23 | __builtin_prefetch(t->branch.twigs); 24 | Tbitmap b = twigbit(t, key, len); 25 | if(!hastwig(t, b)) 26 | return(false); 27 | t = twig(t, twigoff(t, b)); 28 | } 29 | if(strcmp(key, t->leaf.key) != 0) 30 | return(false); 31 | *pkey = t->leaf.key; 32 | *pval = t->leaf.val; 33 | return(true); 34 | } 35 | 36 | static bool 37 | next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) { 38 | if(isbranch(t)) { 39 | // Recurse to find either this leaf (*pkey != NULL) 40 | // or the next one (*pkey == NULL). 41 | Tbitmap b = twigbit(t, *pkey, *plen); 42 | uint s, m; TWIGOFFMAX(s, m, t, b); 43 | for(; s < m; s++) 44 | if(next_rec(twig(t, s), pkey, plen, pval)) 45 | return(true); 46 | return(false); 47 | } 48 | // We have found the next leaf. 49 | if(*pkey == NULL) { 50 | *pkey = t->leaf.key; 51 | *plen = strlen(*pkey); 52 | *pval = t->leaf.val; 53 | return(true); 54 | } 55 | // We have found this leaf, so start looking for the next one. 56 | if(strcmp(*pkey, t->leaf.key) == 0) { 57 | *pkey = NULL; 58 | *plen = 0; 59 | return(false); 60 | } 61 | // No match. 62 | return(false); 63 | } 64 | 65 | bool 66 | Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) { 67 | if(tbl == NULL) { 68 | *pkey = NULL; 69 | *plen = 0; 70 | return(NULL); 71 | } 72 | return(next_rec(&tbl->root, pkey, plen, pval)); 73 | } 74 | 75 | Tbl * 76 | Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { 77 | if(tbl == NULL) 78 | return(NULL); 79 | Trie *t = &tbl->root, *p = NULL; 80 | Tbitmap b = 0; 81 | while(isbranch(t)) { 82 | __builtin_prefetch(t->branch.twigs); 83 | b = twigbit(t, key, len); 84 | if(!hastwig(t, b)) 85 | return(tbl); 86 | p = t; t = twig(t, twigoff(t, b)); 87 | } 88 | if(strcmp(key, t->leaf.key) != 0) 89 | return(tbl); 90 | *pkey = t->leaf.key; 91 | *pval = t->leaf.val; 92 | if(p == NULL) { 93 | free(tbl); 94 | return(NULL); 95 | } 96 | t = p; p = NULL; // Becuase t is the usual name 97 | uint s, m; TWIGOFFMAX(s, m, t, b); 98 | if(m == 2) { 99 | // Move the other twig to the parent branch. 100 | Trie *twigs = t->branch.twigs; 101 | *t = *twig(t, !s); 102 | free(twigs); 103 | return(tbl); 104 | } 105 | memmove(t->branch.twigs+s, t->branch.twigs+s+1, sizeof(Trie) * (m - s - 1)); 106 | t->branch.bitmap &= ~b; 107 | // We have now correctly removed the twig from the trie, so if 108 | // realloc() fails we can ignore it and continue to use the 109 | // slightly oversized twig array. 110 | Trie *twigs = realloc(t->branch.twigs, sizeof(Trie) * (m - 1)); 111 | if(twigs != NULL) t->branch.twigs = twigs; 112 | return(tbl); 113 | } 114 | 115 | Tbl * 116 | Tsetl(Tbl *tbl, const char *key, size_t len, void *val) { 117 | // Ensure flag bits are zero. 118 | if(((uint64_t)val & 3) != 0) { 119 | errno = EINVAL; 120 | return(NULL); 121 | } 122 | if(val == NULL) 123 | return(Tdell(tbl, key, len)); 124 | // First leaf in an empty tbl? 125 | if(tbl == NULL) { 126 | tbl = malloc(sizeof(*tbl)); 127 | if(tbl == NULL) return(NULL); 128 | tbl->root.leaf.key = key; 129 | tbl->root.leaf.val = val; 130 | return(tbl); 131 | } 132 | Trie *t = &tbl->root; 133 | // Find the most similar leaf node in the trie. We will compare 134 | // its key with our new key to find the first differing nibble, 135 | // which can be at a lower index than the point at which we 136 | // detect a difference. 137 | while(isbranch(t)) { 138 | __builtin_prefetch(t->branch.twigs); 139 | Tbitmap b = twigbit(t, key, len); 140 | // Even if our key is missing from this branch we need to 141 | // keep iterating down to a leaf. It doesn't matter which 142 | // twig we choose since the keys are all the same up to this 143 | // index. Note that blindly using twigoff(t, b) can cause 144 | // an out-of-bounds index if it equals twigmax(t). 145 | uint i = hastwig(t, b) ? twigoff(t, b) : 0; 146 | t = twig(t, i); 147 | } 148 | // Do the keys differ, and if so, where? 149 | size_t i; 150 | for(i = 0; i <= len; i++) { 151 | if(key[i] != t->leaf.key[i]) 152 | goto newkey; 153 | } 154 | t->leaf.val = val; 155 | return(tbl); 156 | newkey:; // We have the branch's index; what are its flags? 157 | byte k1 = (byte)key[i], k2 = (byte)t->leaf.key[i]; 158 | uint f = k1 ^ k2; 159 | f = (f & 0xf0) ? 1 : 2; 160 | // Prepare the new leaf. 161 | Tbitmap b1 = nibbit(k1, f); 162 | Trie t1 = { .leaf = { .key = key, .val = val } }; 163 | // Find where to insert a branch or grow an existing branch. 164 | t = &tbl->root; 165 | while(isbranch(t)) { 166 | __builtin_prefetch(t->branch.twigs); 167 | if(i == t->branch.index && f == t->branch.flags) 168 | goto growbranch; 169 | if(i == t->branch.index && f < t->branch.flags) 170 | goto newbranch; 171 | if(i < t->branch.index) 172 | goto newbranch; 173 | Tbitmap b = twigbit(t, key, len); 174 | assert(hastwig(t, b)); 175 | t = twig(t, twigoff(t, b)); 176 | } 177 | newbranch:; 178 | Trie *twigs = malloc(sizeof(Trie) * 2); 179 | if(twigs == NULL) return(NULL); 180 | Trie t2 = *t; // Save before overwriting. 181 | Tbitmap b2 = nibbit(k2, f); 182 | t->branch.twigs = twigs; 183 | t->branch.flags = f; 184 | t->branch.index = i; 185 | t->branch.bitmap = b1 | b2; 186 | *twig(t, twigoff(t, b1)) = t1; 187 | *twig(t, twigoff(t, b2)) = t2; 188 | return(tbl); 189 | growbranch:; 190 | assert(!hastwig(t, b1)); 191 | uint s, m; TWIGOFFMAX(s, m, t, b1); 192 | twigs = realloc(t->branch.twigs, sizeof(Trie) * (m + 1)); 193 | if(twigs == NULL) return(NULL); 194 | memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s)); 195 | memmove(twigs+s, &t1, sizeof(Trie)); 196 | t->branch.twigs = twigs; 197 | t->branch.bitmap |= b1; 198 | return(tbl); 199 | } 200 | -------------------------------------------------------------------------------- /qp.h: -------------------------------------------------------------------------------- 1 | // qp.h: tables implemented with quadbit popcount patricia tries. 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | // In a trie, keys are divided into digits depending on some radix 8 | // e.g. base 2 for binary tries, base 256 for byte-indexed tries. 9 | // When searching the trie, successive digits in the key, from most to 10 | // least significant, are used to select branches from successive 11 | // nodes in the trie, like: 12 | // for(i = 0; isbranch(node); i++) node = node->branch[key[i]]; 13 | // All of the keys in a subtrie have identical prefixes. Tries do not 14 | // need to store keys since they are implicit in the structure. 15 | // 16 | // A patricia trie or crit-bit trie is a binary trie which omits nodes that 17 | // have only one child. Nodes are annotated with the index of the bit that 18 | // is used to select the branch; indexes always increase as you go further 19 | // into the trie. Each leaf has a copy of its key so that when you find a 20 | // leaf you can verify that the untested bits match. 21 | // 22 | // The popcount() function counts the number of bits that are set in 23 | // a word. It's also known as the Hamming weight; Knuth calls it 24 | // "sideways add". https://en.wikipedia.org/wiki/popcount 25 | // 26 | // You can use popcount() to implement a sparse array of length N 27 | // containing M < N members using bitmap of length N and a packed 28 | // vector of M elements. A member i is present in the array if bit 29 | // i is set, so M == popcount(bitmap). The index of member i in 30 | // the packed vector is the popcount of the bits preceding i. 31 | // mask = 1 << i; 32 | // if(bitmap & mask) 33 | // member = vector[popcount(bitmap & mask-1)] 34 | // 35 | // See "Hacker's Delight" by Hank Warren, section 5-1 "Counting 1 36 | // bits", subsection "applications". http://www.hackersdelight.org 37 | // 38 | // Phil Bagwell's hashed array-mapped tries (HAMT) use popcount for 39 | // compact trie nodes. String keys are hashed, and the hash is used 40 | // as the index to the trie, with radix 2^32 or 2^64. 41 | // http://infoscience.epfl.ch/record/64394/files/triesearches.pdf 42 | // http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf 43 | // 44 | // A qp trie uses its keys a quadbit (or nibble or half-byte) at a 45 | // time. It is a radix 2^4 patricia trie, so each node can have between 46 | // 2 and 16 children. It uses a 16 bit word to mark which children are 47 | // present and popcount to index them. The aim is to improve on crit-bit 48 | // tries by reducing memory usage and the number of indirections 49 | // required to look up a key. 50 | // 51 | // The worst case for a qp trie is when each branch has 2 children; 52 | // then it is the same shape as a crit-bit trie. In this case there 53 | // are n-1 internal branch nodes of two words each, so it is equally 54 | // efficient as a crit-bit trie. If the key space is denser then 55 | // branches have more children but the same overhead, so the memory 56 | // usage is less. For maximally dense tries the overhead is: 57 | // 58 | // key length (bytes) n 59 | // number of leaves 256^n 60 | // crit-bit branches 256^n - 1 61 | // qp branches 1 + 16^(n*2-1) == 1 + 256^n / 16 62 | // crit-bit depth n * 8 63 | // qp depth n * 2 64 | // 65 | // In practice, qp averages about 3.3 words per leaf vs. crit-bit's 4 66 | // words per leaf, and qp has about half the depth. 67 | 68 | typedef unsigned char byte; 69 | typedef unsigned int uint; 70 | 71 | typedef uint Tbitmap; 72 | 73 | #if defined(HAVE_NARROW_CPU) || defined(HAVE_SLOW_POPCOUNT) 74 | 75 | // NOTE: 16 bits only 76 | 77 | static inline uint 78 | popcount(Tbitmap w) { 79 | w -= (w >> 1) & 0x5555; 80 | w = (w & 0x3333) + ((w >> 2) & 0x3333); 81 | w = (w + (w >> 4)) & 0x0F0F; 82 | w = (w + (w >> 8)) & 0x00FF; 83 | return(w); 84 | } 85 | 86 | #else 87 | 88 | static inline uint 89 | popcount(Tbitmap w) { 90 | return((uint)__builtin_popcount(w)); 91 | } 92 | 93 | #endif 94 | 95 | // Parallel popcount of the top and bottom 16 bits in a 32 bit word. This 96 | // is probably only a win if your CPU is short of registers and/or integer 97 | // units. NOTE: The caller needs to extract the results by masking with 98 | // 0x00FF0000 and 0x000000FF for the top and bottom halves. 99 | 100 | static inline uint 101 | popcount16x2(uint w) { 102 | w -= (w >> 1) & 0x55555555; 103 | w = (w & 0x33333333) + ((w >> 2) & 0x33333333); 104 | w = (w + (w >> 4)) & 0x0F0F0F0F; 105 | w = w + (w >> 8); 106 | return(w); 107 | } 108 | 109 | // A trie node is two words on 64 bit machines, or three on 32 bit 110 | // machines. A node can be a leaf or a branch. In a leaf, the value 111 | // pointer must be word-aligned to allow for the tag bits. 112 | 113 | typedef struct Tleaf { 114 | const char *key; 115 | void *val; 116 | } Tleaf; 117 | 118 | // Branch nodes are distinguished from leaf nodes using a couple 119 | // of flag bits which act as a dynamic type tag. They can be: 120 | // 121 | // 0 -> node is a leaf 122 | // 1 -> node is a branch, testing upper nibble 123 | // 2 -> node is a branch, testing lower nibble 124 | // 125 | // A branch node is laid out so that the flag bits correspond to the 126 | // least significant bits bits of one of the leaf node pointers. In a 127 | // leaf node, that pointer must be word-aligned so that its flag bits 128 | // are zero. We have chosen to place this restriction on the value 129 | // pointer. 130 | // 131 | // A branch contains the index of the byte that it tests. The combined 132 | // value index << 2 | flags increases along the key in big-endian 133 | // lexicographic order, and increases as you go deeper into the trie. 134 | // All the keys below a branch are identical up to the nibble 135 | // identified by the branch. 136 | // 137 | // A branch has a bitmap of which subtries ("twigs") are present. The 138 | // flags, index, and bitmap are packed into one word. The other word 139 | // is a pointer to an array of trie nodes, one for each twig that is 140 | // present. 141 | 142 | // XXX We hope that the compiler will not punish us for abusing unions. 143 | 144 | // XXX This currently assumes a 64 bit little endian machine. 145 | // On a 32 bit machine we could perhaps fit a branch in to two words 146 | // without restricting the key length by making the index relative 147 | // instead of absolute. If the gap between nodes is larger than a 16 148 | // bit offset allows, we can insert a stepping-stone branch with only 149 | // one twig. This would make the code a bit more complicated... 150 | 151 | typedef struct Tbranch { 152 | union Trie *twigs; 153 | uint64_t 154 | flags : 2, 155 | index : 46, 156 | bitmap : 16; 157 | } Tbranch; 158 | 159 | typedef union Trie { 160 | struct Tleaf leaf; 161 | struct Tbranch branch; 162 | } Trie; 163 | 164 | struct Tbl { 165 | union Trie root; 166 | }; 167 | 168 | // Test flags to determine type of this node. 169 | 170 | static inline bool 171 | isbranch(Trie *t) { 172 | return(t->branch.flags != 0); 173 | } 174 | 175 | // Make a bitmask for testing a branch bitmap. 176 | // 177 | // mask: 178 | // 1 -> 0xffff -> 0xfff0 -> 0xf0 179 | // 2 -> 0x0000 -> 0x000f -> 0x0f 180 | // 181 | // shift: 182 | // 1 -> 1 -> 4 183 | // 2 -> 0 -> 0 184 | 185 | static inline Tbitmap 186 | nibbit(byte k, uint flags) { 187 | uint mask = ((flags - 2) ^ 0x0f) & 0xff; 188 | uint shift = (2 - flags) << 2; 189 | return(1 << ((k & mask) >> shift)); 190 | } 191 | 192 | // Extract a nibble from a key and turn it into a bitmask. 193 | 194 | static inline Tbitmap 195 | twigbit(Trie *t, const char *key, size_t len) { 196 | uint64_t i = t->branch.index; 197 | if(i >= len) return(1); 198 | return(nibbit((byte)key[i], t->branch.flags)); 199 | } 200 | 201 | static inline bool 202 | hastwig(Trie *t, Tbitmap bit) { 203 | return(t->branch.bitmap & bit); 204 | } 205 | 206 | static inline uint 207 | twigoff(Trie *t, Tbitmap b) { 208 | return(popcount(t->branch.bitmap & (b-1))); 209 | } 210 | 211 | static inline Trie * 212 | twig(Trie *t, uint i) { 213 | return(&t->branch.twigs[i]); 214 | } 215 | 216 | #ifdef HAVE_NARROW_CPU 217 | 218 | #define TWIGOFFMAX(off, max, t, b) do { \ 219 | Tbitmap bitmap = t->branch.bitmap; \ 220 | uint word = (bitmap << 16) | (bitmap & (b-1)); \ 221 | uint counts = popcount16x2(word); \ 222 | off = counts & 0xFF; \ 223 | max = (counts >> 16) & 0xFF; \ 224 | } while(0) 225 | 226 | #else 227 | 228 | #define TWIGOFFMAX(off, max, t, b) do { \ 229 | off = twigoff(t, b); \ 230 | max = popcount(t->branch.bitmap); \ 231 | } while(0) 232 | 233 | #endif 234 | -------------------------------------------------------------------------------- /rc-debug.c: -------------------------------------------------------------------------------- 1 | // rc-debug.c: rc trie debug support 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "Tbl.h" 14 | #include "rc.h" 15 | 16 | const char * 17 | dump_bitmap(Tbitmap w) { 18 | static char buf[32*3]; 19 | int size = (int)sizeof(buf), n = 0; 20 | n += snprintf(buf+n, size-n, "("); 21 | for(uint s = 0; s < 32; s++) { 22 | Tbitmap b = 1 << s; 23 | if(w & b) 24 | n += snprintf(buf+n, size-n, "%u,", s); 25 | } 26 | if(n > 1) 27 | buf[n-1] = ')'; 28 | return buf; 29 | } 30 | 31 | static void 32 | dump_rec(Trie *t, uint d) { 33 | Tindex i = t->index; 34 | if(Tindex_branch(i)) { 35 | printf("Tdump%*s branch %p %s %zu %d\n", d, "", (void*)t, 36 | dump_bitmap(Tindex_bitmap(i)), 37 | (size_t)Tindex_offset(i), Tindex_shift(i)); 38 | uint dd = 1 + Tindex_offset(i) * 8 + Tindex_shift(i); 39 | assert(dd > d); 40 | for(uint s = 0; s < 32; s++) { 41 | Tbitmap b = 1 << s; 42 | if(hastwig(i, b)) { 43 | printf("Tdump%*s twig %d\n", d, "", s); 44 | dump_rec(Tbranch_twigs(t) + twigoff(i, b), dd); 45 | } 46 | } 47 | } else { 48 | printf("Tdump%*s leaf %p\n", d, "", 49 | (void *)t); 50 | printf("Tdump%*s leaf key %p %s\n", d, "", 51 | (const void *)Tleaf_key(t), Tleaf_key(t)); 52 | printf("Tdump%*s leaf val %p\n", d, "", 53 | (void *)Tleaf_val(t)); 54 | } 55 | } 56 | 57 | void 58 | Tdump(Tbl *tbl) { 59 | printf("Tdump root %p\n", (void*)tbl); 60 | if(tbl != NULL) 61 | dump_rec(tbl, 0); 62 | } 63 | 64 | static void 65 | size_rec(Trie *t, uint d, 66 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 67 | *rsize += sizeof(*t); 68 | Tindex i = t->index; 69 | if(Tindex_branch(i)) { 70 | *rbranches += 1; 71 | for(uint s = 0; s < 32; s++) { 72 | Tbitmap b = 1U << s; 73 | if(hastwig(i, b)) 74 | size_rec(Tbranch_twigs(t) + twigoff(i, b), 75 | d+1, rsize, rdepth, rbranches, rleaves); 76 | } 77 | } else { 78 | *rleaves += 1; 79 | *rdepth += d; 80 | } 81 | } 82 | 83 | void 84 | Tsize(Tbl *tbl, const char **rtype, 85 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 86 | *rtype = "rc"; 87 | *rsize = *rdepth = *rbranches = *rleaves = 0; 88 | if(tbl != NULL) 89 | size_rec(tbl, 0, rsize, rdepth, rbranches, rleaves); 90 | } 91 | -------------------------------------------------------------------------------- /rc.c: -------------------------------------------------------------------------------- 1 | // rc.h: quintet bit popcount patricia tries, with rib compression 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "Tbl.h" 15 | #include "rc.h" 16 | 17 | bool 18 | Tgetkv(Tbl *t, const char *key, size_t len, const char **pkey, void **pval) { 19 | if(t == NULL) 20 | return(false); 21 | while(isbranch(t)) { 22 | // step into trunk 23 | Trie *twigs = t->ptr; 24 | __builtin_prefetch(twigs); 25 | Tindex i = t->index; 26 | t = NULL; 27 | for(;;) { 28 | // examine this branch 29 | byte n = nibble(i, key, len); 30 | Tbitmap b = 1U << n; 31 | if(hastwig(i, b)) { 32 | t = twigs + twigoff(i, b); 33 | break; // to outer loop 34 | } else if(Tindex_concat(i) == n) { 35 | uint max = popcount(Tindex_bitmap(i)); 36 | // step along trunk 37 | Tindex *ip = (void*)(twigs + max); i = *ip; 38 | twigs = (void*)(ip+1); 39 | assert(Tindex_branch(i)); 40 | continue; // inner loop 41 | } else { 42 | return(false); 43 | } 44 | } 45 | } 46 | if(strcmp(key, Tleaf_key(t)) != 0) 47 | return(false); 48 | *pkey = Tleaf_key(t); 49 | *pval = Tleaf_val(t); 50 | return(true); 51 | } 52 | 53 | static bool 54 | next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) { 55 | Tindex i = t->index; 56 | if(Tindex_branch(i)) { 57 | // Recurse to find either this leaf (*pkey != NULL) 58 | // or the next one (*pkey == NULL). 59 | Tbitmap b = 1U << nibble(i, *pkey, *plen); 60 | uint s, m; TWIGOFFMAX(s, m, i, b); 61 | for(; s < m; s++) 62 | if(next_rec(Tbranch_twigs(t)+s, pkey, plen, pval)) 63 | return(true); 64 | return(false); 65 | } 66 | // We have found the next leaf. 67 | if(*pkey == NULL) { 68 | *pkey = Tleaf_key(t); 69 | *plen = strlen(*pkey); 70 | *pval = Tleaf_val(t); 71 | return(true); 72 | } 73 | // We have found this leaf, so start looking for the next one. 74 | if(strcmp(*pkey, Tleaf_key(t)) == 0) { 75 | *pkey = NULL; 76 | *plen = 0; 77 | return(false); 78 | } 79 | // No match. 80 | return(false); 81 | } 82 | 83 | bool 84 | Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) { 85 | if(tbl == NULL) { 86 | *pkey = NULL; 87 | *plen = 0; 88 | return(NULL); 89 | } 90 | return(next_rec(tbl, pkey, plen, pval)); 91 | } 92 | 93 | static size_t 94 | trunksize(Trie *t) { 95 | size_t s = 0; 96 | Tindex i = t->index; 97 | Trie *twigs = t->ptr; 98 | for(;;) { 99 | assert(Tindex_branch(i)); 100 | uint max = popcount(Tindex_bitmap(i)); 101 | s += max * sizeof(Trie); 102 | if(trunkend(i)) 103 | return(s); 104 | Tindex *ip = (void*)(twigs + max); 105 | s += sizeof(Tindex); 106 | i = *ip++; 107 | twigs = (void*)ip; 108 | } 109 | } 110 | 111 | static void * 112 | mdelete(void *vbase, size_t size, void *vsplit, size_t loss) { 113 | byte *base = vbase, *split = vsplit; 114 | assert(size > loss); 115 | assert(base <= split); 116 | assert(split < base + size); 117 | assert(split + loss <= base + size); 118 | size_t suffix = (base + size) - (split + loss); 119 | memmove(split, split + loss, suffix); 120 | // If realloc() fails, continue to use the oversized allocation. 121 | void *maybe = realloc(base, size - loss); 122 | return(maybe ? maybe : base); 123 | } 124 | 125 | static void * 126 | mtrimsert(void *vbase, size_t size, size_t trim, void *vsplit, 127 | void *vinsert, size_t gain) { 128 | byte *base = vbase, *split = vsplit, *insert = vinsert; 129 | assert(size > trim); 130 | assert(base <= split); 131 | assert(split < base + size); 132 | size -= trim; 133 | size_t prefix = split - base; 134 | size_t suffix = (base + size) - split; 135 | void *maybe = realloc(base, size + gain); 136 | if(maybe != NULL) 137 | base = maybe; 138 | else if(gain > trim) 139 | return(NULL); 140 | // If realloc() fails, continue to use the old allocation 141 | // provided we will not gain more than we trimmed. 142 | split = base + prefix; 143 | memmove(split + gain, split, suffix); 144 | memmove(split, insert, gain); 145 | return(base); 146 | } 147 | 148 | static void * 149 | minsert(void *base, size_t size, void *split, void *insert, size_t gain) { 150 | return(mtrimsert(base, size, 0, split, insert, gain)); 151 | } 152 | 153 | Tbl * 154 | Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { 155 | if(tbl == NULL) 156 | return(NULL); 157 | // parent and grandparent twigs of the current trunk 158 | Trie *p = NULL, *gp = NULL; 159 | // i abbreviates *ip; b is always wrt i 160 | Tindex *ip, i; 161 | Tbitmap b; 162 | // i and twigs start off as elements of p 163 | // then bump along the trunk together 164 | Trie *twigs; 165 | // previous ip 166 | Tindex *pip = NULL; 167 | // t is a twig of the current branch we might delete or follow 168 | Trie *t = tbl; 169 | while(isbranch(t)) { 170 | // step into next trunk 171 | gp = p; p = t; t = NULL; 172 | pip = ip; ip = &p->index; i = *ip; 173 | twigs = p->ptr; 174 | __builtin_prefetch(twigs); 175 | for(;;) { 176 | // examine this branch 177 | byte n = nibble(i, key, len); 178 | b = 1U << n; 179 | if(hastwig(i, b)) { 180 | t = twigs + twigoff(i, b); 181 | break; // to outer loop 182 | } else if(Tindex_concat(i) == n) { 183 | uint max = popcount(Tindex_bitmap(i)); 184 | // step along trunk 185 | pip = ip; ip = (void*)(twigs + max); i = *ip; 186 | twigs = (void*)(ip+1); 187 | assert(Tindex_branch(i)); 188 | continue; // inner loop 189 | } else { 190 | return(tbl); 191 | } 192 | } 193 | } 194 | if(strcmp(key, Tleaf_key(t)) != 0) 195 | return(tbl); 196 | *pkey = Tleaf_key(t); 197 | *pval = Tleaf_val(t); 198 | if(p == NULL) { 199 | free(tbl); 200 | return(NULL); 201 | } 202 | Trie *trunk = Tbranch_twigs(p); 203 | uint s = trunksize(p); 204 | assert(trunk <= t && t < trunk+s); 205 | uint m = popcount(Tindex_bitmap(i)); 206 | if(m == 1) { 207 | // Our twig is the last in a rib branch, so we need 208 | // to remove the whole branch from the trunk. The 209 | // following index word, for the concatenated branch, 210 | // needs to be moved to where our index word was. 211 | *ip = *(Tindex *)(t + 1); 212 | // If our branch was indirect, this will update the pointer 213 | // properly; if our branch was concatenated we do not have a 214 | // pointer to update. 215 | Tset_twigs(p, mdelete(trunk, s, t, 216 | sizeof(Trie) + sizeof(Tindex))); 217 | return(tbl); 218 | } 219 | if(m == 2 && trunkend(i)) { 220 | // We need to move the other twig into its parent. 221 | // There should be two leaves in this situation, but if 222 | // realloc() fails when we want to concatenate two trunks, we 223 | // will be unable to keep the trie as tight as we want. So we 224 | // need to allow for trunks that end with one branch. 225 | if(ip != &p->index) { 226 | // We were concatenated, so we need to shift the 227 | // other twig into the preceding twig array. This 228 | // will normally convert the preceding twig array 229 | // into all twigs. The abnormal case should be 230 | // vanishingly rare, so don't worry about it. 231 | void *end = (byte *)trunk + s; 232 | Trie save = t[t + 1 == end ? -1 : +1]; 233 | // Update preceding index. 234 | i = *pip; 235 | b = 1U << Tindex_concat(i); 236 | t = (Trie *)(pip + 1) + twigoff(i, b); 237 | *pip = Tbitmap_add(i, b); 238 | Tset_twigs(p, mtrimsert(trunk, s, 239 | sizeof(Tindex) + sizeof(Trie) * 2, 240 | t, &save, sizeof(Trie))); 241 | return(tbl); 242 | } 243 | // We were an indirect branch, so p is the parent. 244 | *p = t[t == trunk ? +1 : -1]; 245 | free(trunk); 246 | // We probably changed a branch into a leaf, which means 247 | // there might be only one other branch in the twig array 248 | // containing p, in which case we need to concatenate the 249 | // other branch's trunk onto its parent. We need to recover 250 | // the location of the twig array so we can scan it. 251 | i = *pip; 252 | b = 1U << nibble(i, key, len); 253 | twigs = p - twigoff(i, b); 254 | m = popcount(Tindex_bitmap(i)); 255 | uint n = 0; 256 | for(t = twigs; t < twigs + m; t++) 257 | if(isbranch(t)) 258 | p = t, n++; 259 | if(n != 1) 260 | return(tbl); 261 | // Now we need to concatenate p's trunk onto gp's trunk. 262 | uint gs = trunksize(gp); 263 | s = trunksize(p); 264 | Trie save = *p; 265 | trunk = Tbranch_twigs(gp); 266 | size_t split = (byte*)p - (byte*)trunk; 267 | byte *gt = realloc(trunk, gs + s 268 | - sizeof(Trie) + sizeof(Tindex)); 269 | if(gt == NULL) { 270 | // Well, we can't concatenate them this time, 271 | // maybe there will be another opportunity. 272 | return(tbl); 273 | } 274 | // Delete p from the twigs 275 | memmove(gt + split, gt + split + sizeof(Trie), 276 | gs - split - sizeof(Trie)); 277 | // Was probably moved by realloc() 278 | pip = (Tindex*)( gt + ((byte*)pip - (byte*)trunk) ); 279 | // The index must say p is a concatenated branch 280 | n = nibble(i, key, len); 281 | b = 1U << n; 282 | *pip = Tindex_new(Tindex_shift(i), Tindex_offset(i), 283 | n, Tindex_bitmap(i) & ~b); 284 | // Place concatenated index word 285 | gs -= sizeof(Trie); 286 | ip = (Tindex*)(gt + gs); 287 | *ip = save.index; 288 | // Concatenate! 289 | memmove(ip+1, save.ptr, s); 290 | free(save.ptr); 291 | return(tbl); 292 | } 293 | // Usual case 294 | *ip = Tbitmap_del(i, b); 295 | Tset_twigs(p, mdelete(trunk, s, t, sizeof(Trie))); 296 | return(tbl); 297 | } 298 | 299 | Tbl * 300 | Tsetl(Tbl *tbl, const char *key, size_t len, void *val) { 301 | if(Tindex_branch((Tindex)val) || len > Tmaxlen) { 302 | errno = EINVAL; 303 | return(NULL); 304 | } 305 | if(val == NULL) 306 | return(Tdell(tbl, key, len)); 307 | // First leaf in an empty tbl? 308 | if(tbl == NULL) { 309 | tbl = malloc(sizeof(*tbl)); 310 | if(tbl == NULL) return(NULL); 311 | Tset_key(tbl, key); 312 | Tset_val(tbl, val); 313 | return(tbl); 314 | } 315 | Trie *t = tbl; 316 | // Find the most similar leaf node in the trie. We will compare 317 | // its key with our new key to find the first differing nibble, 318 | // which can be at a lower index than the point at which we 319 | // detect a difference. 320 | while(isbranch(t)) { 321 | // step into trunk 322 | Trie *twigs = t->ptr; 323 | __builtin_prefetch(twigs); 324 | Tindex i = t->index; 325 | t = NULL; 326 | for(;;) { 327 | // examine this branch 328 | byte n = nibble(i, key, len); 329 | Tbitmap b = 1U << n; 330 | if(hastwig(i, b)) { 331 | t = twigs + twigoff(i, b); 332 | break; // to outer loop 333 | } else if(Tindex_concat(i) == n) { 334 | uint max = popcount(Tindex_bitmap(i)); 335 | // step along trunk 336 | Tindex *ip = (void*)(twigs + max); i = *ip; 337 | twigs = (void*)(ip+1); 338 | assert(Tindex_branch(i)); 339 | continue; // inner loop 340 | } else { 341 | // Even if our key is missing from this branch 342 | // we need to keep iterating down to a leaf. It 343 | // doesn't matter which twig we choose since the 344 | // keys are all the same up to this index. 345 | t = twigs; 346 | break; // to outer loop 347 | } 348 | } 349 | } 350 | // Do the keys differ, and if so, where? 351 | uint off, xor, shf; 352 | const char *tkey = Tleaf_key(t); 353 | for(off = 0; off <= len; off++) { 354 | xor = (byte)key[off] ^ (byte)tkey[off]; 355 | if(xor != 0) goto newkey; 356 | } 357 | Tset_val(t, val); 358 | return(tbl); 359 | newkey:; // We have the branch's byte index; what is its chunk index? 360 | uint bit = off * 8 + (uint)__builtin_clz(xor) + 8 - sizeof(uint) * 8; 361 | uint qo = bit / 5; 362 | off = qo * 5 / 8; 363 | shf = qo * 5 % 8; 364 | // re-index keys with adjusted offset 365 | Tbitmap nb = 1U << knybble(key,off,shf); 366 | Tbitmap tb = 1U << knybble(tkey,off,shf); 367 | // Prepare the new leaf. 368 | Trie nt; 369 | Tset_key(&nt, key); 370 | Tset_val(&nt, val); 371 | // Find where to insert a branch or grow an existing branch. 372 | t = tbl; 373 | Tindex i = 0; 374 | while(isbranch(t)) { 375 | // step into trunk 376 | Trie *twigs = t->ptr; 377 | __builtin_prefetch(twigs); 378 | i = t->index; 379 | t = NULL; 380 | for(;;) { 381 | // examine this branch 382 | if(off == Tindex_offset(i) && shf == Tindex_shift(i)) 383 | goto growbranch; 384 | if(off == Tindex_offset(i) && shf < Tindex_shift(i)) 385 | goto newbranch; 386 | if(off < Tindex_offset(i)) 387 | goto newbranch; 388 | byte n = nibble(i, key, len); 389 | Tbitmap b = 1U << n; 390 | if(hastwig(i, b)) { 391 | t = twigs + twigoff(i, b); 392 | break; // to outer loop 393 | } else if(Tindex_concat(i) == n) { 394 | uint max = popcount(Tindex_bitmap(i)); 395 | // step along trunk 396 | Tindex *ip = (void*)(twigs + max); i = *ip; 397 | twigs = (void*)(ip+1); 398 | assert(Tindex_branch(i)); 399 | continue; // inner loop 400 | } else { 401 | assert(false); 402 | } 403 | } 404 | } 405 | newbranch:; 406 | Trie *twigs = malloc(sizeof(Trie) * 2); 407 | if(twigs == NULL) return(NULL); 408 | i = Tindex_new(shf, off, nb | tb); 409 | twigs[twigoff(i, nb)] = nt; 410 | twigs[twigoff(i, tb)] = *t; 411 | Tset_twigs(t, twigs); 412 | Tset_index(t, i); 413 | return(tbl); 414 | growbranch:; 415 | assert(!hastwig(i, nb)); 416 | uint s, m; TWIGOFFMAX(s, m, i, nb); 417 | twigs = realloc(Tbranch_twigs(t), sizeof(Trie) * (m + 1)); 418 | if(twigs == NULL) return(NULL); 419 | memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s)); 420 | memmove(twigs+s, &nt, sizeof(Trie)); 421 | Tset_twigs(t, twigs); 422 | Tset_index(t, Tbitmap_add(i, nb)); 423 | return(tbl); 424 | } 425 | -------------------------------------------------------------------------------- /rc.h: -------------------------------------------------------------------------------- 1 | // rc.h: quintet bit popcount patricia tries, with rib compression 2 | // 3 | // Derived from the "fn" five-bit new variant - see the comments at 4 | // the top of fn.h for notes on terminology. 5 | // 6 | // See notes-rib-compression for an overview. 7 | // 8 | // Written by Tony Finch 9 | // You may do anything with this. It has no warranty. 10 | // 11 | 12 | typedef unsigned char byte; 13 | typedef unsigned int uint; 14 | 15 | typedef uint32_t Tbitmap; 16 | typedef uint64_t Tindex; 17 | 18 | const char *dump_bitmap(Tbitmap w); 19 | 20 | static inline uint 21 | byte_me(char c) { 22 | return(c & 0xFF); 23 | } 24 | 25 | static inline uint 26 | word_up(const char *p) { 27 | uint w = byte_me(p[0]) << 8; 28 | if(w) w |= byte_me(p[1]); 29 | return(w); 30 | } 31 | 32 | #if defined(HAVE_SLOW_POPCOUNT) 33 | 34 | static inline uint 35 | popcount(Tbitmap w) { 36 | w -= (w >> 1) & 0x55555555; 37 | w = (w & 0x33333333) + ((w >> 2) & 0x33333333); 38 | w = (w + (w >> 4)) & 0x0F0F0F0F; 39 | w = (w * 0x01010101) >> 24; 40 | return(w); 41 | } 42 | 43 | #else 44 | 45 | static inline uint 46 | popcount(Tbitmap w) { 47 | return((uint)__builtin_popcount(w)); 48 | } 49 | 50 | #endif 51 | 52 | typedef struct Tbl { 53 | Tindex index; 54 | void *ptr; 55 | } Trie; 56 | 57 | // accessor functions, except for the index word 58 | 59 | #define Tset_field(cast, elem, type, field) \ 60 | static inline void \ 61 | Tset_##field(Trie *t, type field) { \ 62 | t->elem = cast field; \ 63 | } \ 64 | struct dummy 65 | 66 | Tset_field((void *), ptr, Trie *, twigs); 67 | Tset_field((Tindex), index, Tindex, index); 68 | Tset_field((void *)(uint64_t), ptr, const char *, key); 69 | Tset_field((Tindex), index, void *, val); 70 | 71 | static inline bool Tindex_branch(Tindex i); 72 | 73 | static inline bool isbranch(Trie *t) { 74 | return(Tindex_branch(t->index)); 75 | } 76 | 77 | #ifdef WITH_EXTRA_CHECKS 78 | #define Tbranch(t) assert(isbranch(t)) 79 | #define Tleaf(t) assert(!isbranch(t)) 80 | #else 81 | #define Tbranch(t) 82 | #define Tleaf(t) 83 | #endif 84 | 85 | #define Tcheck_get(type, tag, field, expr) \ 86 | static inline type \ 87 | tag##_##field(Trie *t) { \ 88 | tag(t); \ 89 | return(expr); \ 90 | } \ 91 | struct dummy 92 | 93 | Tcheck_get(Trie *, Tbranch, twigs, t->ptr); 94 | Tcheck_get(const char *, Tleaf, key, t->ptr); 95 | Tcheck_get(void *, Tleaf, val, (void*)t->index); 96 | 97 | // index word layout 98 | 99 | #define Tix_width_branch 1 100 | #define Tix_width_shift 3 101 | #define Tix_width_offset 23 102 | #define Tix_width_concat 5 103 | #define Tix_width_bitmap 32 104 | 105 | #define Tix_base_branch 0 106 | #define Tix_base_shift (Tix_base_branch + Tix_width_branch) 107 | #define Tix_base_offset (Tix_base_shift + Tix_width_shift) 108 | #define Tix_base_concat (Tix_base_offset + Tix_width_offset) 109 | #define Tix_base_bitmap (Tix_base_concat + Tix_width_concat) 110 | 111 | #define Tix_place(field) ((Tindex)(field) << Tix_base_##field) 112 | 113 | #define Tix_mask(field) ((1ULL << Tix_width_##field) - 1ULL) 114 | 115 | #define Tunmask(field,index) ((uint)(((index) >> Tix_base_##field) \ 116 | & Tix_mask(field))) 117 | 118 | #define Tmaxlen Tix_mask(offset) 119 | 120 | // index word accessor functions 121 | 122 | #define Tindex_get(type, field) \ 123 | static inline type \ 124 | Tindex_##field(Tindex i) { \ 125 | return(Tunmask(field, i)); \ 126 | } \ 127 | struct dummy 128 | 129 | Tindex_get(bool, branch); 130 | Tindex_get(uint, shift); 131 | Tindex_get(uint, offset); 132 | Tindex_get(byte, concat); 133 | Tindex_get(Tbitmap, bitmap); 134 | 135 | static inline Tindex 136 | Tindex_new(uint shift, uint offset, uint concat, Tbitmap bitmap) { 137 | uint branch = 1; 138 | return( Tix_place(branch) | 139 | Tix_place(shift) | 140 | Tix_place(offset) | 141 | Tix_place(concat) | 142 | Tix_place(bitmap) ); 143 | } 144 | 145 | static inline Tindex 146 | Tbitmap_add(Tindex i, Tbitmap bitmap) { 147 | return(i | Tix_place(bitmap)); 148 | } 149 | 150 | static inline Tindex 151 | Tbitmap_del(Tindex i, Tbitmap bitmap) { 152 | return(i & ~Tix_place(bitmap)); 153 | } 154 | 155 | // sanity checks! 156 | 157 | #ifndef static_assert 158 | #define static_assert_cat(a,b) a##b 159 | #define static_assert_name(line) static_assert_cat(static_assert_,line) 160 | #define static_assert(must_be_true,message) \ 161 | static const void *static_assert_name(__LINE__) \ 162 | [must_be_true ? 2 : -1] = { \ 163 | message, \ 164 | &static_assert_name(__LINE__) } 165 | #endif 166 | 167 | static_assert(Tix_base_bitmap + Tix_width_bitmap == 64, 168 | "index fields must fill a 64 bit word"); 169 | 170 | static_assert(Tunmask(bitmap,0x1234567800000000ULL) == 0x12345678, 171 | "extracting the bitmap works"); 172 | 173 | static_assert(Tunmask(offset,0x0420ULL) == 0x42, 174 | "extracting the offset works"); 175 | 176 | static_assert(Tunmask(shift,0xFEDCBAULL) == 5, 177 | "extracting the shift works"); 178 | 179 | // ..key[o%5==0].. ..key[o%5==1].. ..key[o%5==2].. ..key[o%5==3].. ..key[o%5==4].. 180 | // | | | | | | 181 | // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 182 | // | | | | | | | | | 183 | // shift=0 shift=5 shift=2 shift=7 shift=4 shift=1 shift=6 shift=3 184 | 185 | static inline byte 186 | knybble(const char *key, uint off, uint shift) { 187 | uint word = word_up(key+off); 188 | uint right = 16 - 5 - shift; 189 | return((word >> right) & 0x1FU); 190 | } 191 | 192 | static inline byte 193 | nibble(Tindex i, const char *key, size_t len) { 194 | uint off = Tindex_offset(i); 195 | if(off >= len) return(0); 196 | else return(knybble(key, off, Tindex_shift(i))); 197 | } 198 | 199 | static inline bool 200 | hastwig(Tindex i, Tbitmap bit) { 201 | return(Tindex_bitmap(i) & bit); 202 | } 203 | 204 | static inline bool 205 | trunkend(Tindex i) { 206 | Tbitmap b = 1U << Tindex_concat(i); 207 | return(hastwig(i, b)); 208 | } 209 | 210 | static inline uint 211 | twigoff(Tindex i, Tbitmap bit) { 212 | return(popcount(Tindex_bitmap(i) & (bit-1))); 213 | } 214 | 215 | #define TWIGOFFMAX(off, max, i, b) do { \ 216 | off = twigoff(i, b); \ 217 | max = popcount(Tindex_bitmap(i)); \ 218 | } while(0) 219 | -------------------------------------------------------------------------------- /siphash24.c: -------------------------------------------------------------------------------- 1 | /* 2 | SipHash reference C implementation 3 | 4 | Copyright (c) 2012-2014 Jean-Philippe Aumasson 5 | Copyright (c) 2012-2014 Daniel J. Bernstein 6 | 7 | To the extent possible under law, the author(s) have dedicated all copyright 8 | and related and neighboring rights to this software to the public domain 9 | worldwide. This software is distributed without any warranty. 10 | 11 | You should have received a copy of the CC0 Public Domain Dedication along with 12 | this software. If not, see . 13 | */ 14 | #include 15 | #include 16 | #include 17 | 18 | /* default: SipHash-2-4 */ 19 | #define cROUNDS 2 20 | #define dROUNDS 4 21 | 22 | #define ROTL(x,b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) ) 23 | 24 | #define U32TO8_LE(p, v) \ 25 | (p)[0] = (uint8_t)((v) ); (p)[1] = (uint8_t)((v) >> 8); \ 26 | (p)[2] = (uint8_t)((v) >> 16); (p)[3] = (uint8_t)((v) >> 24); 27 | 28 | #define U64TO8_LE(p, v) \ 29 | U32TO8_LE((p), (uint32_t)((v) )); \ 30 | U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); 31 | 32 | #define U8TO64_LE(p) \ 33 | (((uint64_t)((p)[0]) ) | \ 34 | ((uint64_t)((p)[1]) << 8) | \ 35 | ((uint64_t)((p)[2]) << 16) | \ 36 | ((uint64_t)((p)[3]) << 24) | \ 37 | ((uint64_t)((p)[4]) << 32) | \ 38 | ((uint64_t)((p)[5]) << 40) | \ 39 | ((uint64_t)((p)[6]) << 48) | \ 40 | ((uint64_t)((p)[7]) << 56)) 41 | 42 | #define SIPROUND \ 43 | do { \ 44 | v0 += v1; v1=ROTL(v1,13); v1 ^= v0; v0=ROTL(v0,32); \ 45 | v2 += v3; v3=ROTL(v3,16); v3 ^= v2; \ 46 | v0 += v3; v3=ROTL(v3,21); v3 ^= v0; \ 47 | v2 += v1; v1=ROTL(v1,17); v1 ^= v2; v2=ROTL(v2,32); \ 48 | } while(0) 49 | 50 | #ifdef DEBUG 51 | #define TRACE \ 52 | do { \ 53 | printf( "(%3d) v0 %08x %08x\n", \ 54 | ( int )inlen, ( uint32_t )( v0 >> 32 ), ( uint32_t )v0 ); \ 55 | printf( "(%3d) v1 %08x %08x\n", \ 56 | ( int )inlen, ( uint32_t )( v1 >> 32 ), ( uint32_t )v1 ); \ 57 | printf( "(%3d) v2 %08x %08x\n", \ 58 | ( int )inlen, ( uint32_t )( v2 >> 32 ), ( uint32_t )v2 ); \ 59 | printf( "(%3d) v3 %08x %08x\n", \ 60 | ( int )inlen, ( uint32_t )( v3 >> 32 ), ( uint32_t )v3 ); \ 61 | } while(0) 62 | #else 63 | #define TRACE 64 | #endif 65 | 66 | int siphash( uint8_t *out, const uint8_t *in, uint64_t inlen, const uint8_t *k ) 67 | { 68 | /* "somepseudorandomlygeneratedbytes" */ 69 | uint64_t v0 = 0x736f6d6570736575ULL; 70 | uint64_t v1 = 0x646f72616e646f6dULL; 71 | uint64_t v2 = 0x6c7967656e657261ULL; 72 | uint64_t v3 = 0x7465646279746573ULL; 73 | uint64_t b; 74 | uint64_t k0 = U8TO64_LE( k ); 75 | uint64_t k1 = U8TO64_LE( k + 8 ); 76 | uint64_t m; 77 | int i; 78 | const uint8_t *end = in + inlen - ( inlen % sizeof( uint64_t ) ); 79 | const int left = inlen & 7; 80 | b = ( ( uint64_t )inlen ) << 56; 81 | v3 ^= k1; 82 | v2 ^= k0; 83 | v1 ^= k1; 84 | v0 ^= k0; 85 | 86 | #ifdef DOUBLE 87 | v1 ^= 0xee; 88 | #endif 89 | 90 | for ( ; in != end; in += 8 ) 91 | { 92 | m = U8TO64_LE( in ); 93 | v3 ^= m; 94 | 95 | TRACE; 96 | for( i=0; i 9 | Read the and randomly pick lines from it, 10 | then emit lines of input for Tbl-test.{c,pl}. 11 | EOF 12 | } 13 | 14 | my $i = shift; 15 | my $o = shift; 16 | 17 | my @p = qw( - + * * * * ); 18 | my @i = <>; 19 | my @a; 20 | 21 | push @a, splice @i, (int rand @i), 1 while $i--; 22 | print $p[int rand @p], $a[int rand @a] while $o--; 23 | -------------------------------------------------------------------------------- /test-once.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | if [ ! -f test-in ] 4 | then printf 1>&2 "generating..." 5 | ./test-gen.pl $1 $2 $3 >test-in 6 | printf 1>&2 "done\n" 7 | fi 8 | shift 3 9 | time ./test.pl test-out-pl 10 | for i in "$@" 11 | do time ./test-$i test-out-$i 12 | done 13 | for i in "$@" 14 | do cmp test-out-pl test-out-$i 15 | done 16 | rm -f test-in test-out-?? 17 | -------------------------------------------------------------------------------- /test.c: -------------------------------------------------------------------------------- 1 | // test.c: test table implementations. 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #define _WITH_GETLINE 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "Tbl.h" 17 | 18 | static const char *progname; 19 | static bool debug = false; 20 | 21 | static void 22 | die(const char *cause) { 23 | fprintf(stderr, "%s: %s: %s\n", progname, cause, strerror(errno)); 24 | exit(1); 25 | } 26 | 27 | static void 28 | usage(void) { 29 | fprintf(stderr, 30 | "usage: %s [input]\n" 31 | " The input is a series of lines starting with a + or a - to add\n" 32 | " or delete a key from the table. The rest of the line is the key.\n" 33 | , progname); 34 | exit(1); 35 | } 36 | 37 | static void 38 | trace(Tbl *t, int s, const char *key) { 39 | if(debug) { 40 | printf("%c%s\n", s, key); 41 | Tdump(t); 42 | } 43 | } 44 | 45 | int 46 | main(int argc, char *argv[]) { 47 | progname = argv[0]; 48 | if(argc > 1 && strcmp(argv[1], "-d") == 0) { 49 | debug = true; 50 | argv++; 51 | argc--; 52 | } 53 | if(argc > 2) 54 | usage(); 55 | if(argc == 2) { 56 | if(argv[1][0] == '-') 57 | usage(); 58 | if(freopen(argv[1], "r", stdin) == NULL) 59 | die("open"); 60 | } 61 | Tbl *t = NULL; 62 | for (;;) { 63 | char *key = NULL; 64 | size_t len = 0; 65 | int s = getchar(); 66 | if(s < 0) break; 67 | ssize_t n = getline(&key, &len, stdin); 68 | if(n < 0) break; 69 | else len = (size_t)n; 70 | if(len > 0 && key[len-1] == '\n') 71 | key[--len] = '\0'; 72 | switch(s) { 73 | default: 74 | usage(); 75 | case('*'): 76 | if(Tget(t, key)) 77 | putchar('*'); 78 | else 79 | putchar('='); 80 | continue; 81 | case('+'): 82 | errno = 0; 83 | void *val = Tget(t, key); 84 | t = Tsetl(t, key, len, val == NULL ? key : val); 85 | if(t == NULL) 86 | die("Tbl"); 87 | if(!val) 88 | trace(t, s, key); 89 | else 90 | free(key); 91 | continue; 92 | case('-'): 93 | errno = 0; 94 | const char *rkey = NULL; 95 | void *rval = NULL; 96 | t = Tdelkv(t, key, len, &rkey, &rval); 97 | if(t == NULL && errno != 0) 98 | die("Tbl"); 99 | if(rkey) 100 | trace(t, s, key); 101 | free(key); 102 | free(rkey); 103 | continue; 104 | } 105 | } 106 | putchar('\n'); 107 | if(ferror(stdin)) 108 | die("read"); 109 | size_t size, depth, branches, leaves; 110 | const char *type; 111 | Tsize(t, &type, &size, &depth, &branches, &leaves); 112 | size_t overhead = size / sizeof(void*) - 2 * leaves; 113 | fprintf(stderr, "SIZE %s leaves=%zu branches=%zu overhead=%.2f depth=%.2f\n", 114 | type, leaves, branches, 115 | (double)overhead / leaves, 116 | (double)depth / leaves); 117 | const char *key = NULL; 118 | void *val = NULL, *prev = NULL; 119 | while(Tnext(t, &key, &val)) { 120 | assert(key == val); 121 | puts(key); 122 | if(prev) { 123 | t = Tdel(t, prev); 124 | trace(t, '!', prev); 125 | free(prev); 126 | } 127 | prev = val; 128 | } 129 | if(prev) { 130 | t = Tdel(t, prev); 131 | free(prev); 132 | } 133 | return(0); 134 | } 135 | -------------------------------------------------------------------------------- /test.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # like Tbl-test.c but written in perl to verify correctness 4 | 5 | use warnings; 6 | use strict; 7 | 8 | my %t; 9 | 10 | while(<>) { 11 | m{^([-+*])(.*)$}s or die "bad input line"; 12 | delete $t{$2} if $1 eq '-'; 13 | $t{$2} = 1 if $1 eq '+'; 14 | print $t{$2} ? "*" : "=" if $1 eq '*'; 15 | } 16 | print "\n"; 17 | print for sort keys %t; 18 | -------------------------------------------------------------------------------- /tinytocs.bib: -------------------------------------------------------------------------------- 1 | @article{patricia, 2 | author = {Donald R. Morrison}, 3 | title = {PATRICIA -- Practical Algorithm To Retrieve Information Coded in Alphanumeric}, 4 | journal = {J. ACM}, 5 | issue_date = {Oct. 1968}, 6 | volume = {15}, 7 | number = {4}, 8 | month = oct, 9 | year = {1968}, 10 | issn = {0004-5411}, 11 | pages = {514--534}, 12 | numpages = {21}, 13 | url = {http://doi.acm.org/10.1145/321479.321481}, 14 | doi = {10.1145/321479.321481}, 15 | acmid = {321481}, 16 | publisher = {ACM}, 17 | address = {New York, NY, USA}, 18 | } 19 | 20 | @techreport{bagwell, 21 | author = {Phil Bagwell}, 22 | title = {Ideal Hash Trees}, 23 | institution = {\'{E}cole polytechnique f\'{e}d\'{e}rale de Lausanne EPFL}, 24 | type = {LAMP-REPORT}, 25 | number = {2001-001} 26 | } 27 | 28 | @misc{djb, 29 | author = {Daniel J. Bernstein}, 30 | title = {Crit-bit trees}, 31 | note = {\url{http://cr.yp.to/critbit.html}}, 32 | year = 2004, 33 | } 34 | 35 | @misc{agl, 36 | author = {Adam Langley}, 37 | title = {Crit-bit trees}, 38 | note = {\url{https://github.com/agl/critbit}}, 39 | year = 2008, 40 | month = Sep, 41 | } 42 | 43 | @misc{qp, 44 | author = {Tony Finch}, 45 | title = {{QP} tries}, 46 | note = {\url{http://dotat.at/prog/qp/}}, 47 | year = 2015, 48 | month = Oct, 49 | } 50 | -------------------------------------------------------------------------------- /tinytocs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fanf2/qp/90fc7de8d0b3c9c1aaa5cd9d7b1765b122fad2e5/tinytocs.pdf -------------------------------------------------------------------------------- /tinytocs.tex: -------------------------------------------------------------------------------- 1 | \documentclass{tinytocs} 2 | 3 | \usepackage{url} 4 | 5 | \title{QP tries are smaller and faster\\ 6 | than crit-bit trees} 7 | 8 | \author{Tony Finch\\ 9 | \affaddr{University of Cambridge}\\ 10 | \email{dot@dotat.at}} 11 | 12 | \begin{document} 13 | 14 | \maketitle 15 | 16 | \abstract{ 17 | 18 | A trie data structure stores an ordered set of keys; the branching 19 | structure of a trie depends on the lexical properties of its keys 20 | independent of the order of insertion. Compact implementations of 21 | PATRICIA binary tries called crit-bit trees \cite{djb} have just two 22 | words of overhead per item stored. 23 | 24 | A hash array mapped trie (HAMT) \cite{bagwell} has wide fan-out, 25 | indexing each tree node using several hashed key bits; each node is 26 | compressed using the population count of a bitmap to omit NULL child 27 | pointers. Bagwell sketches an un-hashed pure trie variant of HAMT in 28 | section 5 but doesn't eliminate redundant single-child nodes like 29 | crit-bit trees. 30 | 31 | Our contribution, QP tries \cite{qp}, are similar to crit-bit trees 32 | but test 5 bits per indirection instead of 1, using the HAMT bitmap 33 | POPCNT trick to keep overhead to at most two 64 bit words per item. 34 | QP tries prefetch the child pointer array while calculating which 35 | child is next; this reduces indirection latency and increases 36 | performance by about 5\%. QP tries have variable-sized nodes, so 37 | stress memory allocation more than crit-bit tries, but are usually 38 | much cheaper in other respects. 39 | 40 | We created similar implementations of QP tries and crit-bit trees, 41 | and benchmarked them using lists of: English words; identifiers in 42 | the BIND9 source code; domain names from a university; Alexa top 43 | million domain names. We measured average: trie depth; space 44 | overhead per item; mutation and search time. 45 | 46 | } 47 | 48 | \tinybody{Typical QP trie\\ 49 | depth is 0.35-0.40\\ 50 | space is 0.5-0.6\\ 51 | time is 0.6-0.8\\ 52 | of equivalent crit-bit tree.} 53 | 54 | \bibliographystyle{abbrv} 55 | \bibliography{tinytocs} 56 | 57 | \end{document} 58 | -------------------------------------------------------------------------------- /wp-debug.c: -------------------------------------------------------------------------------- 1 | // wp-debug.c: wp trie debug support 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "Tbl.h" 14 | #include "wp.h" 15 | 16 | const char * 17 | dump_bitmap(Tbitmap w) { 18 | static char buf[64*3]; 19 | uint n = 0; 20 | n += snprintf(buf+n, sizeof(buf)-n, "("); 21 | for(uint i = 0; i < 64; i++) { 22 | Tbitmap b = 1ULL << i; 23 | if(w & b) 24 | n += snprintf(buf+n, sizeof(buf)-n, "%u,", i); 25 | } 26 | if(n > 1) 27 | buf[n-1] = ')'; 28 | return buf; 29 | } 30 | 31 | static void 32 | dump_rec(Trie *t, int d) { 33 | if(isbranch(t)) { 34 | printf("Tdump%*s branch %p %s %zu %d\n", d, "", t, 35 | dump_bitmap(t->branch.bitmap), 36 | (size_t)t->branch.index, t->branch.flags); 37 | int dd = 2 + t->branch.index * 6 + t->branch.flags - 1; 38 | assert(dd > d); 39 | for(uint i = 0; i < 64; i++) { 40 | Tbitmap b = 1ULL << i; 41 | if(hastwig(t, b)) { 42 | printf("Tdump%*s twig %d\n", d, "", i); 43 | dump_rec(twig(t, twigoff(t, b)), dd); 44 | } 45 | } 46 | } else { 47 | printf("Tdump%*s leaf %p\n", d, "", t); 48 | printf("Tdump%*s leaf key %p %s\n", d, "", 49 | t->leaf.key, t->leaf.key); 50 | printf("Tdump%*s leaf val %p\n", d, "", 51 | t->leaf.val); 52 | } 53 | } 54 | 55 | void 56 | Tdump(Tbl *tbl) { 57 | printf("Tdump root %p\n", tbl); 58 | if(tbl != NULL) 59 | dump_rec(&tbl->root, 0); 60 | } 61 | 62 | static void 63 | size_rec(Trie *t, uint d, 64 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 65 | *rsize += sizeof(*t); 66 | if(isbranch(t)) { 67 | *rbranches += 1; 68 | for(uint i = 0; i < 64; i++) { 69 | Tbitmap b = 1ULL << i; 70 | if(hastwig(t, b)) 71 | size_rec(twig(t, twigoff(t, b)), 72 | d+1, rsize, rdepth, rbranches, rleaves); 73 | } 74 | } else { 75 | *rleaves += 1; 76 | *rdepth += d; 77 | } 78 | } 79 | 80 | void 81 | Tsize(Tbl *tbl, const char **rtype, 82 | size_t *rsize, size_t *rdepth, size_t *rbranches, size_t *rleaves) { 83 | *rtype = "wp"; 84 | *rsize = *rdepth = *rbranches = *rleaves = 0; 85 | if(tbl != NULL) 86 | size_rec(&tbl->root, 0, rsize, rdepth, rbranches, rleaves); 87 | } 88 | -------------------------------------------------------------------------------- /wp.c: -------------------------------------------------------------------------------- 1 | // qp.c: tables implemented with word-wide popcount patricia tries. 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "Tbl.h" 15 | #include "wp.h" 16 | 17 | bool 18 | Tgetkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { 19 | if(tbl == NULL) 20 | return(false); 21 | Trie *t = &tbl->root; 22 | while(isbranch(t)) { 23 | __builtin_prefetch(t->branch.twigs); 24 | Tbitmap b = twigbit(t, key, len); 25 | if(!hastwig(t, b)) 26 | return(false); 27 | t = twig(t, twigoff(t, b)); 28 | } 29 | if(strcmp(key, t->leaf.key) != 0) 30 | return(false); 31 | *pkey = t->leaf.key; 32 | *pval = t->leaf.val; 33 | return(true); 34 | } 35 | 36 | static bool 37 | next_rec(Trie *t, const char **pkey, size_t *plen, void **pval) { 38 | if(isbranch(t)) { 39 | // Recurse to find either this leaf (*pkey != NULL) 40 | // or the next one (*pkey == NULL). 41 | Tbitmap b = twigbit(t, *pkey, *plen); 42 | uint s, m; TWIGOFFMAX(s, m, t, b); 43 | for(; s < m; s++) 44 | if(next_rec(twig(t, s), pkey, plen, pval)) 45 | return(true); 46 | return(false); 47 | } 48 | // We have found the next leaf. 49 | if(*pkey == NULL) { 50 | *pkey = t->leaf.key; 51 | *plen = strlen(*pkey); 52 | *pval = t->leaf.val; 53 | return(true); 54 | } 55 | // We have found this leaf, so start looking for the next one. 56 | if(strcmp(*pkey, t->leaf.key) == 0) { 57 | *pkey = NULL; 58 | *plen = 0; 59 | return(false); 60 | } 61 | // No match. 62 | return(false); 63 | } 64 | 65 | bool 66 | Tnextl(Tbl *tbl, const char **pkey, size_t *plen, void **pval) { 67 | if(tbl == NULL) { 68 | *pkey = NULL; 69 | *plen = 0; 70 | return(NULL); 71 | } 72 | return(next_rec(&tbl->root, pkey, plen, pval)); 73 | } 74 | 75 | Tbl * 76 | Tdelkv(Tbl *tbl, const char *key, size_t len, const char **pkey, void **pval) { 77 | if(tbl == NULL) 78 | return(NULL); 79 | Trie *t = &tbl->root, *p = NULL; 80 | Tbitmap b = 0; 81 | while(isbranch(t)) { 82 | __builtin_prefetch(t->branch.twigs); 83 | b = twigbit(t, key, len); 84 | if(!hastwig(t, b)) 85 | return(tbl); 86 | p = t; t = twig(t, twigoff(t, b)); 87 | } 88 | if(strcmp(key, t->leaf.key) != 0) 89 | return(tbl); 90 | *pkey = t->leaf.key; 91 | *pval = t->leaf.val; 92 | if(p == NULL) { 93 | free(tbl); 94 | return(NULL); 95 | } 96 | t = p; p = NULL; // Becuase t is the usual name 97 | uint s, m; TWIGOFFMAX(s, m, t, b); 98 | if(m == 2) { 99 | // Move the other twig to the parent branch. 100 | Trie *twigs = t->branch.twigs; 101 | *t = *twig(t, !s); 102 | free(twigs); 103 | return(tbl); 104 | } 105 | memmove(t->branch.twigs+s, t->branch.twigs+s+1, sizeof(Trie) * (m - s - 1)); 106 | t->branch.bitmap &= ~b; 107 | // We have now correctly removed the twig from the trie, so if 108 | // realloc() fails we can ignore it and continue to use the 109 | // slightly oversized twig array. 110 | Trie *twigs = realloc(t->branch.twigs, sizeof(Trie) * (m - 1)); 111 | if(twigs != NULL) t->branch.twigs = twigs; 112 | return(tbl); 113 | } 114 | 115 | Tbl * 116 | Tsetl(Tbl *tbl, const char *key, size_t len, void *val) { 117 | if(val == NULL) 118 | return(Tdell(tbl, key, len)); 119 | // First leaf in an empty tbl? 120 | if(tbl == NULL) { 121 | tbl = malloc(sizeof(*tbl)); 122 | if(tbl == NULL) return(NULL); 123 | tbl->root.leaf.key = key; 124 | tbl->root.leaf.val = val; 125 | tbl->root.leaf.wasted = 0; 126 | return(tbl); 127 | } 128 | Trie *t = &tbl->root; 129 | // Find the most similar leaf node in the trie. We will compare 130 | // its key with our new key to find the first differing nibble, 131 | // which can be at a lower index than the point at which we 132 | // detect a difference. 133 | while(isbranch(t)) { 134 | __builtin_prefetch(t->branch.twigs); 135 | Tbitmap b = twigbit(t, key, len); 136 | // Even if our key is missing from this branch we need to 137 | // keep iterating down to a leaf. It doesn't matter which 138 | // twig we choose since the keys are all the same up to this 139 | // index. Note that blindly using twigoff(t, b) can cause 140 | // an out-of-bounds index if it equals twigmax(t). 141 | uint i = hastwig(t, b) ? twigoff(t, b) : 0; 142 | t = twig(t, i); 143 | } 144 | // Do the keys differ, and if so, where? 145 | size_t i; 146 | uint f; 147 | for(i = 0; i <= len; i++) { 148 | f = (byte)key[i] ^ (byte)t->leaf.key[i]; 149 | if(f != 0) goto newkey; 150 | } 151 | t->leaf.val = val; 152 | return(tbl); 153 | newkey:; // We have the branch's index; what are its flags? 154 | // Sometimes the first differing bits are in the low-order part 155 | // of a 6-bit chunk which overlaps two bytes. In these cases we 156 | // have to step back a byte so that the index points to the 157 | // first byte that overlaps the first differing 6-bit chunk. 158 | // See the diagram in wp.h ... This can probably be faster? 159 | // Also, flags = shift | isbranch; and isbranch == 1. 160 | switch(i % 3) { 161 | case(0): f = (f & 0xFC) ? 1 : 7; break; 162 | case(1): f = (f & 0xF0) ? (i -= 1), 7 : 5; break; 163 | case(2): f = (f & 0xC0) ? (i -= 1), 5 : 3; break; 164 | } 165 | // re-index keys with adjusted i 166 | uint k1 = (byte)key[i] << 8; 167 | uint k2 = (byte)t->leaf.key[i] << 8; 168 | k1 |= (k1 ? (byte)key[i+1] : 0); 169 | k2 |= (k2 ? (byte)t->leaf.key[i+1] : 0); 170 | Tbitmap b1 = nibbit(k1, f); 171 | // Prepare the new leaf. 172 | Trie t1 = { .leaf = { .key = key, .val = val, .wasted = 0 } }; 173 | // Find where to insert a branch or grow an existing branch. 174 | t = &tbl->root; 175 | while(isbranch(t)) { 176 | __builtin_prefetch(t->branch.twigs); 177 | if(i == t->branch.index && f == t->branch.flags) 178 | goto growbranch; 179 | if(i == t->branch.index && f < t->branch.flags) 180 | goto newbranch; 181 | if(i < t->branch.index) 182 | goto newbranch; 183 | Tbitmap b = twigbit(t, key, len); 184 | assert(hastwig(t, b)); 185 | t = twig(t, twigoff(t, b)); 186 | } 187 | newbranch:; 188 | Trie *twigs = malloc(sizeof(Trie) * 2); 189 | if(twigs == NULL) return(NULL); 190 | Trie t2 = *t; // Save before overwriting. 191 | Tbitmap b2 = nibbit(k2, f); 192 | t->branch.twigs = twigs; 193 | t->branch.flags = f; 194 | t->branch.index = i; 195 | t->branch.bitmap = b1 | b2; 196 | *twig(t, twigoff(t, b1)) = t1; 197 | *twig(t, twigoff(t, b2)) = t2; 198 | return(tbl); 199 | growbranch:; 200 | assert(!hastwig(t, b1)); 201 | uint s, m; TWIGOFFMAX(s, m, t, b1); 202 | twigs = realloc(t->branch.twigs, sizeof(Trie) * (m + 1)); 203 | if(twigs == NULL) return(NULL); 204 | memmove(twigs+s+1, twigs+s, sizeof(Trie) * (m - s)); 205 | memmove(twigs+s, &t1, sizeof(Trie)); 206 | t->branch.twigs = twigs; 207 | t->branch.bitmap |= b1; 208 | return(tbl); 209 | } 210 | -------------------------------------------------------------------------------- /wp.h: -------------------------------------------------------------------------------- 1 | // wp.h: word-wide popcount patricia tries 2 | // 3 | // Written by Tony Finch 4 | // You may do anything with this. It has no warranty. 5 | // 6 | 7 | // See qp.h for introductory comments about tries. 8 | // 9 | // The wp trie code is a straightforward clone-and-hack of the qp trie 10 | // code. The difference is that the key is used 6 bits at a time 11 | // instead of 4 bits, so the bitmap is 2^6 == 64 bits wide instead of 12 | // 2^4 == 16 bits wide. Trie nodes are three words instead of two words. 13 | // 14 | // These bigger nodes mean that (currently) space is wasted in the 15 | // leaf nodes. It's possible to make better use of space by embedding 16 | // key+value structures in the trie - see notes-generic-leaves.md 17 | 18 | typedef unsigned char byte; 19 | typedef unsigned int uint; 20 | 21 | typedef uint64_t Tbitmap; 22 | 23 | const char *dump_bitmap(Tbitmap w); 24 | 25 | #if defined(HAVE_SLOW_POPCOUNT) 26 | 27 | static inline uint 28 | popcount(Tbitmap w) { 29 | const uint64_t m1 = 0x5555555555555555; 30 | const uint64_t m2 = 0x3333333333333333; 31 | const uint64_t m4 = 0x0F0F0F0F0F0F0F0F; 32 | const uint64_t m7 = 0x0101010101010101; 33 | w -= (w >> 1) & m1; 34 | w = (w & m2) + ((w >> 2) & m2); 35 | w = (w + (w >> 4)) & m4; 36 | w = (w * m7) >> 56; 37 | return(w); 38 | } 39 | 40 | #else 41 | 42 | static inline uint 43 | popcount(Tbitmap w) { 44 | return((uint)__builtin_popcountll(w)); 45 | } 46 | 47 | #endif 48 | 49 | typedef struct Tleaf { 50 | const char *key; 51 | void *val; 52 | uint64_t wasted; 53 | } Tleaf; 54 | 55 | // flags & 1 == isbranch 56 | // flags & 6 == shift 57 | 58 | typedef struct Tbranch { 59 | union Trie *twigs; 60 | Tbitmap bitmap; 61 | uint64_t flags : 3, 62 | index : 61; 63 | } Tbranch; 64 | 65 | typedef union Trie { 66 | struct Tleaf leaf; 67 | struct Tbranch branch; 68 | } Trie; 69 | 70 | struct Tbl { 71 | union Trie root; 72 | }; 73 | 74 | // Test flags to determine type of this node. 75 | 76 | static inline bool 77 | isbranch(Trie *t) { 78 | return(t->branch.flags & 1); 79 | } 80 | 81 | // We need to extract 6 bits from the key, 2^6 == 64 82 | // 83 | // Diagram of possible alignments of 6 bits relative to bytes. 84 | // Bits are numbered little-endian from 0, like in a register. 85 | // Key indexes and shifts are numbered big-endian, so that they 86 | // increase as we go along the key from left to right. 87 | // 88 | // 6-bit chunks never overlap, so they always have a fixed alignment 89 | // relative to groups of three bytes, as illustrated below. We only 90 | // need to care about this alignment when we are working out the 91 | // position of the critical 6-bit chunk of a new key. At other times 92 | // what matters is that a 6-bit chunk occupies part of at most two 93 | // bytes, so the shift tells us how to pull the relevant bits out of 94 | // those two bytes. 95 | // 96 | // ..key[i%3==0].. ..key[i%3==1].. ..key[i%3==2].. 97 | // | | | | bytes 98 | // 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 99 | // | | | | | 6bits 100 | // ..shift=0.. ..shift=6.. ..shift=4.. ..shift=2.. 101 | 102 | static inline Tbitmap 103 | nibbit(uint k, uint flags) { 104 | uint shift = 16 - 6 - (flags & 6); 105 | return(1ULL << ((k >> shift) & 0x3FULL)); 106 | } 107 | 108 | static inline Tbitmap 109 | twigbit(Trie *t, const char *key, size_t len) { 110 | uint64_t i = t->branch.index; 111 | if(i >= len) return(1ULL); 112 | uint k = (byte)key[i] << 8; 113 | if(i+1 < len) 114 | k |= (byte)key[i+1]; 115 | return(nibbit(k, t->branch.flags)); 116 | } 117 | 118 | static inline bool 119 | hastwig(Trie *t, Tbitmap bit) { 120 | return(t->branch.bitmap & bit); 121 | } 122 | 123 | static inline uint 124 | twigoff(Trie *t, Tbitmap b) { 125 | return(popcount(t->branch.bitmap & (b-1ULL))); 126 | } 127 | 128 | static inline Trie * 129 | twig(Trie *t, uint i) { 130 | return(&t->branch.twigs[i]); 131 | } 132 | 133 | #define TWIGOFFMAX(off, max, t, b) do { \ 134 | off = twigoff(t, b); \ 135 | max = popcount(t->branch.bitmap); \ 136 | } while(0) 137 | --------------------------------------------------------------------------------