├── .gitignore ├── Documentation ├── performance.png ├── threadmill.png ├── hash-table-design.aux ├── threadmill.svg ├── long-threadmill.svg └── hash-table-design.tex ├── Code ├── package.lisp ├── x86-64 │ ├── VOPs │ │ ├── avx2-broadcastb.lisp │ │ ├── define-boring-vop.lisp │ │ ├── bsf.lisp │ │ ├── cas-bytes.lisp │ │ ├── sse2-vops.lisp │ │ └── avx2-vops.lisp │ ├── avx2-metadata.lisp │ └── sse-metadata.lisp ├── 42nd-at-threadmill.asd ├── diagnostics.lisp ├── hash-table-defstruct.lisp ├── counter.lisp ├── storage-vector.lisp ├── resize.lisp └── hash-table.lisp ├── LICENSE ├── Examples └── phony-redis.lisp └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | Documentation/auto/* 2 | *.log 3 | *.out 4 | *.pdf 5 | *.fasl 6 | *~ -------------------------------------------------------------------------------- /Documentation/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/telekons/42nd-at-threadmill/HEAD/Documentation/performance.png -------------------------------------------------------------------------------- /Documentation/threadmill.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/telekons/42nd-at-threadmill/HEAD/Documentation/threadmill.png -------------------------------------------------------------------------------- /Code/package.lisp: -------------------------------------------------------------------------------- 1 | (defpackage :threadmill 2 | (:use :cl) 3 | (:shadow #:make-hash-table 4 | #:hash-table #:hash-table-p #:hash-table-test 5 | #:hash-table-rehash-threshold 6 | #:hash-table-count #:hash-table-size 7 | #:gethash #:remhash #:clrhash #:maphash) 8 | (:export #:make-hash-table #:hash-table #:hash-table-p 9 | #:hash-table-test #:hash-table-count #:hash-table-size 10 | #:gethash #:remhash #:clrhash #:maphash #:modhash)) 11 | -------------------------------------------------------------------------------- /Code/x86-64/VOPs/avx2-broadcastb.lisp: -------------------------------------------------------------------------------- 1 | (in-package :sb-vm) 2 | 3 | (defknown threadmill::%avx2-broadcast ((unsigned-byte 8)) 4 | (sb-ext:simd-pack integer) 5 | (foldable movable flushable)) 6 | 7 | (threadmill::define-boring-vop threadmill::%avx2-broadcast 8 | ((byte unsigned-num :scs (unsigned-reg))) 9 | (broadcasted simd-pack-int :scs (int-sse-reg)) 10 | (inst movq broadcasted byte) 11 | (inst vpbroadcastb broadcasted broadcasted)) 12 | 13 | (in-package :threadmill) 14 | 15 | (sb-alien:define-alien-variable avx2-supported sb-alien:int) 16 | -------------------------------------------------------------------------------- /Code/x86-64/VOPs/define-boring-vop.lisp: -------------------------------------------------------------------------------- 1 | (in-package :threadmill) 2 | 3 | (defmacro define-boring-vop (name args result &body generator) 4 | `(progn 5 | (sb-vm::define-vop (,name) 6 | (:translate ,name) 7 | (:policy :fast-safe) 8 | (:args ,@(loop for (name nil . rest) in args 9 | collect (cons name rest))) 10 | (:arg-types ,@(mapcar #'second args)) 11 | (:results (,(first result) ,@(rest (rest result)))) 12 | (:result-types ,(second result)) 13 | (:generator 0 ,@generator)) 14 | (defun ,name ,(mapcar #'first args) 15 | (,name ,@(mapcar #'first args))))) 16 | -------------------------------------------------------------------------------- /Code/x86-64/VOPs/bsf.lisp: -------------------------------------------------------------------------------- 1 | (in-package :sb-vm) 2 | 3 | (defknown threadmill::bsf ((unsigned-byte 64)) 4 | (unsigned-byte 64) 5 | (foldable movable flushable) 6 | :overwrite-fndb-silently t) 7 | 8 | (threadmill::define-boring-vop threadmill::bsf 9 | ((value unsigned-num :scs (unsigned-reg))) 10 | (scan unsigned-num :scs (unsigned-reg)) 11 | (inst bsf scan value)) 12 | 13 | (in-package :threadmill) 14 | 15 | (declaim (inline bsf/16)) 16 | (defun bsf/16 (x) 17 | (declare ((unsigned-byte 16) x)) 18 | (sb-ext:truly-the (mod 16) (bsf x))) 19 | 20 | (declaim (inline bsf/32)) 21 | (defun bsf/32 (x) 22 | (declare ((unsigned-byte 32) x)) 23 | (sb-ext:truly-the (mod 32) (bsf x))) 24 | -------------------------------------------------------------------------------- /Code/42nd-at-threadmill.asd: -------------------------------------------------------------------------------- 1 | (asdf:defsystem :42nd-at-threadmill 2 | :depends-on (:atomics :bordeaux-threads) 3 | :serial t 4 | :components ((:file "package") 5 | (:module "x86-64" 6 | :components 7 | ((:module "VOPs" 8 | :components 9 | ((:file "define-boring-vop") 10 | (:file "avx2-broadcastb" 11 | :if-feature (:not :threadmill-avx2)) 12 | (:file "sse2-vops" 13 | :if-feature (:not :threadmill-avx2)) 14 | (:file "avx2-vops" 15 | :if-feature :threadmill-avx2) 16 | (:file "bsf") 17 | (:file "cas-bytes"))) 18 | (:file "sse-metadata" 19 | :if-feature (:not :threadmill-avx2)) 20 | (:file "avx2-metadata" 21 | :if-feature :threadmill-avx2))) 22 | (:file "counter") 23 | (:file "storage-vector") 24 | (:file "hash-table-defstruct") 25 | (:file "hash-table") 26 | (:file "resize"))) 27 | -------------------------------------------------------------------------------- /Code/diagnostics.lisp: -------------------------------------------------------------------------------- 1 | (in-package :threadmill) 2 | 3 | (defun group-sizes (table) 4 | (let* ((storage (hash-table-storage table)) 5 | (metadata (metadata-table storage)) 6 | (histogram (make-array (1+ +metadata-entries-per-group+) 7 | :element-type '(unsigned-byte 64) 8 | :initial-element 0))) 9 | (loop for group-start below (length metadata) 10 | by +metadata-entries-per-group+ 11 | for count = (count +empty-metadata+ metadata 12 | :start group-start 13 | :end (+ group-start +metadata-entries-per-group+) 14 | :test #'/=) 15 | do (incf (aref histogram count))) 16 | histogram)) 17 | 18 | (defun h2-counts (table) 19 | (let* ((storage (hash-table-storage table)) 20 | (metadata (metadata-table storage)) 21 | (histogram (make-array 128 22 | :element-type '(unsigned-byte 64) 23 | :initial-element 0))) 24 | (loop for value across metadata 25 | when (< value #x80) 26 | do (incf (aref histogram value))) 27 | histogram)) 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2021 Hayley Patton 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 14 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 15 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 16 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 17 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 18 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 19 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | -------------------------------------------------------------------------------- /Code/hash-table-defstruct.lisp: -------------------------------------------------------------------------------- 1 | (in-package :threadmill) 2 | 3 | (defstruct (hash-table (:constructor %make-hash-table)) 4 | (hash (error "no hash function") 5 | :type (function * fixnum) 6 | :read-only t) 7 | (test (error "no test function") 8 | :type function 9 | :read-only t) 10 | (storage (error "no storage") 11 | :type simple-vector) 12 | (rehash-threshold 0.5s0 13 | :type single-float 14 | :read-only t)) 15 | 16 | (defun make-hash-table (&key (test #'eql) (size 64) (hash-function #'sxhash) 17 | (rehash-threshold 0.5) 18 | &allow-other-keys) 19 | (assert (plusp size)) 20 | (assert (plusp rehash-threshold)) 21 | ;; Plan to store SIZE entries with the given rehash threshold. 22 | (let* ((size (round size rehash-threshold)) 23 | (hash-function (alexandria:ensure-function hash-function)) 24 | (test-function (alexandria:ensure-function test)) 25 | (storage (make-storage-vector (nearest-allowed-size size)))) 26 | (%make-hash-table :hash hash-function 27 | :test test-function 28 | :storage storage 29 | :rehash-threshold (float rehash-threshold 0.0s0)))) 30 | 31 | (defmethod print-object ((table hash-table) stream) 32 | (print-unreadable-object (table stream :type t :identity t) 33 | (format stream "~s ~s load ~d/~d" 34 | :test 35 | (hash-table-test table) 36 | (hash-table-count table) 37 | (hash-table-size table)))) 38 | -------------------------------------------------------------------------------- /Code/counter.lisp: -------------------------------------------------------------------------------- 1 | (in-package :threadmill) 2 | 3 | ;;; Cliff Click uses a counter which self-resizes, not unlike the hash table 4 | ;;; itself, but that's kinda messy. 5 | (defconstant +counter-slots+ 64) 6 | (deftype counter () 7 | `(simple-array sb-ext:word (,+counter-slots+))) 8 | (deftype counter-offset () 9 | `(mod ,+counter-slots+)) 10 | 11 | (defun make-counter () 12 | (make-array +counter-slots+ 13 | :initial-element 0 14 | :element-type 'sb-ext:word)) 15 | 16 | (defvar *counter-offset* (random +counter-slots+)) 17 | (declaim (sb-ext:always-bound *counter-offset*) 18 | (counter-offset *counter-offset*)) 19 | 20 | (push (cons '*counter-offset* '(random +counter-slots+)) 21 | bt:*default-special-bindings*) 22 | 23 | (declaim (inline change-counter increment-counter decrement-counter)) 24 | (defun change-counter (counter Δ) 25 | (declare (counter counter) 26 | (optimize (speed 3) (safety 0))) 27 | (atomics:atomic-incf (aref counter *counter-offset*) Δ)) 28 | 29 | (defun increment-counter (counter) 30 | (change-counter counter 1)) 31 | (defun decrement-counter (counter) 32 | (change-counter counter -1)) 33 | (defun counter-value (counter) 34 | (declare (counter counter)) 35 | (let ((sum 0)) 36 | (loop for value across counter 37 | do (setf sum (ldb (byte 64 0) (+ sum value)))) 38 | ;; If the sum has the MSB set, we either have a very, very large 39 | ;; table (Ed.: we don't) or the count is negative due to timing 40 | ;; weirdness. Just say the value is 0 in that case. 41 | (if (logtest (ash 1 63) sum) 42 | 0 43 | sum))) 44 | -------------------------------------------------------------------------------- /Code/x86-64/VOPs/cas-bytes.lisp: -------------------------------------------------------------------------------- 1 | (in-package :sb-vm) 2 | 3 | (sb-c:defknown threadmill::%cas-byte 4 | ((simple-array (unsigned-byte 8) 1) (unsigned-byte 62) (unsigned-byte 8) (unsigned-byte 8)) 5 | (unsigned-byte 8) 6 | () 7 | :overwrite-fndb-silently t) 8 | 9 | (define-vop (threadmill::%cas-byte) 10 | (:translate threadmill::%cas-byte) 11 | (:policy :fast-safe) 12 | (:args (object :scs (descriptor-reg) :to :eval) 13 | (index :scs (unsigned-reg) :to :eval) 14 | (old-value :scs (unsigned-reg)) 15 | (new-value :scs (unsigned-reg))) 16 | (:arg-types simple-array-unsigned-byte-8 unsigned-num unsigned-num unsigned-num) 17 | (:temporary (:sc unsigned-reg :offset rax-offset 18 | :from (:argument 1) :to :result :target value) 19 | rax) 20 | (:results (value :scs (unsigned-reg))) 21 | (:result-types unsigned-num) 22 | (:generator 5 23 | (move rax old-value) 24 | (inst cmpxchg :lock :byte 25 | (ea (- (* vector-data-offset n-word-bytes) 26 | other-pointer-lowtag) 27 | object index 1) 28 | new-value) 29 | (move value rax))) 30 | 31 | (in-package :threadmill) 32 | 33 | (defun %cas-byte (byte-vector index old new) 34 | (%cas-byte byte-vector index old new)) 35 | 36 | (declaim (inline cas-byte vector-cas-pair)) 37 | (defun cas-byte (byte-vector index old new) 38 | (declare ((simple-array (unsigned-byte 8) 1) byte-vector) 39 | ((unsigned-byte 8) old new) 40 | (optimize (speed 3) (safety 1))) 41 | (%cas-byte byte-vector 42 | (sb-kernel:check-bound byte-vector (length byte-vector) index) 43 | old new)) 44 | -------------------------------------------------------------------------------- /Documentation/hash-table-design.aux: -------------------------------------------------------------------------------- 1 | \relax 2 | \providecommand\hyper@newdestlabel[2]{} 3 | \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument} 4 | \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined 5 | \global\let\oldcontentsline\contentsline 6 | \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}} 7 | \global\let\oldnewlabel\newlabel 8 | \gdef\newlabel#1#2{\newlabelxx{#1}#2} 9 | \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}} 10 | \AtEndDocument{\ifx\hyper@anchor\@undefined 11 | \let\contentsline\oldcontentsline 12 | \let\newlabel\oldnewlabel 13 | \fi} 14 | \fi} 15 | \global\let\hyper@last\relax 16 | \gdef\HyperFirstAtBeginDocument#1{#1} 17 | \providecommand\HyField@AuxAddToFields[1]{} 18 | \providecommand\HyField@AuxAddToCoFields[2]{} 19 | \@writefile{toc}{\contentsline {section}{\numberline {1}A quick rundown of Click's hash table}{1}{section.1}\protected@file@percent } 20 | \@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Finding a value}{2}{subsection.1.1}\protected@file@percent } 21 | \@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Associating a key with a value}{2}{subsection.1.2}\protected@file@percent } 22 | \@writefile{toc}{\contentsline {subsection}{\numberline {1.3}Resizing}{3}{subsection.1.3}\protected@file@percent } 23 | \@writefile{toc}{\contentsline {section}{\numberline {2}How to introduce a metadata table}{3}{section.2}\protected@file@percent } 24 | \@writefile{toc}{\contentsline {subsection}{\numberline {2.1}The effects of stale metadata}{4}{subsection.2.1}\protected@file@percent } 25 | \@writefile{toc}{\contentsline {subsection}{\numberline {2.2}What do we gain?}{4}{subsection.2.2}\protected@file@percent } 26 | \@writefile{toc}{\contentsline {section}{\numberline {3}Maybe a good idea: Using one ``prime'' bit}{5}{section.3}\protected@file@percent } 27 | \@writefile{toc}{\contentsline {subsection}{\numberline {3.1}What happens if we test after updating?}{5}{subsection.3.1}\protected@file@percent } 28 | \@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Or maybe just simplify the prime test?}{6}{subsection.3.2}\protected@file@percent } 29 | -------------------------------------------------------------------------------- /Examples/phony-redis.lisp: -------------------------------------------------------------------------------- 1 | ;;; According to a presentation by an Amazon Web Services engineer, 2 | ;;; one cannot design an in-memory database using a garbage collected 3 | ;;; language implementation, because it would be too slow due to 4 | ;;; collection. He then goes to use a hash table with just one lock, 5 | ;;; copies the data to be stored just 'cause, finds performance similar 6 | ;;; to Redis, and calls it a day. 7 | 8 | ;;; Using structure sharing and a concurrent hash table, we can go 9 | ;;; much faster. Perhaps a magnitude or two faster - not that I'm 10 | ;;; really taking the problem seriously, by avoiding network 11 | ;;; serialisation, and even inter-thread mailboxes; but this should 12 | ;;; show that tasteful use of concurrent data structures makes things 13 | ;;; go fast. 14 | 15 | (defpackage :phony-redis 16 | (:use :cl) 17 | (:export #:make-server #:connect-to-server 18 | #:find-value #:close-connection)) 19 | (in-package :phony-redis) 20 | 21 | (defmacro specialize (string body) 22 | "Convince the compiler to generate a fast path for simple strings." 23 | `(if (typep ,string '(simple-array character 1)) 24 | ,body 25 | ,body)) 26 | 27 | (defun djb (string) 28 | (declare (string string) 29 | (optimize speed)) 30 | (let ((hash 5381)) 31 | (declare ((and unsigned-byte fixnum) hash)) 32 | (specialize 33 | string 34 | (dotimes (n (min 6 (length string))) 35 | (setf hash 36 | (logand most-positive-fixnum 37 | (logxor (* hash 33) 38 | (char-code (schar string n))))))) 39 | hash)) 40 | 41 | (defun make-server () 42 | (threadmill:make-hash-table 43 | :test #'equal 44 | :hash-function #'djb 45 | :size 512)) 46 | 47 | (defun connect-to-server (server) 48 | server) 49 | 50 | (defun find-value (connection name) 51 | (threadmill:gethash name connection)) 52 | 53 | (defun (setf find-value) (value connection name) 54 | (setf (threadmill:gethash name connection) 55 | value)) 56 | 57 | (defun close-connection (connection) 58 | (declare (ignore connection)) 59 | (values)) 60 | -------------------------------------------------------------------------------- /Code/x86-64/VOPs/sse2-vops.lisp: -------------------------------------------------------------------------------- 1 | (in-package :sb-vm) 2 | 3 | (defknown threadmill::%sse2-load 4 | ((simple-array (unsigned-byte 8) (*)) (unsigned-byte 64)) 5 | (simd-pack integer) 6 | (foldable flushable) 7 | :overwrite-fndb-silently t) 8 | 9 | (threadmill::define-boring-vop threadmill::%sse2-load 10 | ((vector simple-array-unsigned-byte-8 :scs (descriptor-reg)) 11 | (index unsigned-num :scs (unsigned-reg))) 12 | (bytes simd-pack-int :scs (int-sse-reg)) 13 | ;; SBCL vectors are aligned to 16 bytes. 14 | (inst movdqa bytes 15 | (ea (- (* vector-data-offset n-word-bytes) 16 | other-pointer-lowtag) 17 | vector index 1))) 18 | (defknown threadmill::%sse2-broadcast-byte 19 | ((unsigned-byte 8)) 20 | (simd-pack integer) 21 | (foldable movable flushable) 22 | :overwrite-fndb-silently t) 23 | 24 | (threadmill::define-boring-vop threadmill::%sse2-broadcast-byte 25 | ((byte unsigned-num :scs (unsigned-reg))) 26 | (bytes simd-pack-int :scs (int-sse-reg)) 27 | (inst movd bytes byte) ; xxxxxxxxxxxxxxxB 28 | (inst punpcklbw bytes bytes) ; xxxxxxxxxxxxxxBB 29 | (inst punpcklbw bytes bytes) ; xxxxxxxxxxxxBBBB 30 | (inst pshufd bytes bytes #4r0000)) ; BBBBBBBBBBBBBBBB 31 | 32 | (defknown threadmill::%sse2-movemask 33 | ((simd-pack integer)) 34 | (unsigned-byte 16) 35 | (foldable movable flushable) 36 | :overwrite-fndb-silently t) 37 | 38 | (threadmill::define-boring-vop threadmill::%sse2-movemask 39 | ((bytes simd-pack-int :scs (int-sse-reg))) 40 | (mask unsigned-num :scs (unsigned-reg)) 41 | (inst pmovmskb mask bytes)) 42 | 43 | (defknown threadmill::%sse2= 44 | ((simd-pack integer) (simd-pack integer)) 45 | (simd-pack integer) 46 | (foldable movable flushable) 47 | :overwrite-fndb-silently t) 48 | 49 | (threadmill::define-boring-vop threadmill::%sse2= 50 | ((a simd-pack-int :scs (int-sse-reg) :target result) 51 | (b simd-pack-int :scs (int-sse-reg))) 52 | (result simd-pack-int :scs (int-sse-reg)) 53 | (unless (location= a result) 54 | (inst movdqa result a)) 55 | (inst pcmpeqb result b)) 56 | -------------------------------------------------------------------------------- /Code/storage-vector.lisp: -------------------------------------------------------------------------------- 1 | (in-package :threadmill) 2 | 3 | ;;; The "storage vector" for a hash table contains four slots before the 4 | ;;; keys and values: 5 | ;;; 1. The metadata vector 6 | ;;; 2. The new storage vector we're copying to (or NIL) 7 | ;;; 3. How many elements we have finished copying (used when resizing) 8 | ;;; 4. How many elements we are going to copy (used when resizing) 9 | ;;; Then the rest of the table just contains a key, then a value, and so on. 10 | 11 | ;;; We define macros so that we can SETF and CAS the position, without 12 | ;;; having to write setters and (unportable) CAS-ers. 13 | (defconstant +words-before-values+ 8) 14 | (macrolet ((def (name offset) 15 | `(defmacro ,name (storage-vector) 16 | `(svref ,storage-vector ,,offset)))) 17 | (def %metadata-table 0) 18 | (def new-vector 1) 19 | (def finished-copying 2) 20 | (def going-to-copy 3) 21 | (def table-count 4) 22 | (def table-slot-count 5) 23 | (def creation-time 6) 24 | (def allocating-new-p 7)) 25 | 26 | (declaim (inline metadata-table)) 27 | (defun metadata-table (storage) 28 | (sb-ext:truly-the metadata-vector 29 | (%metadata-table storage))) 30 | 31 | (macrolet ((def (name offset) 32 | `(defmacro ,name (storage-vector n) 33 | `(svref ,storage-vector 34 | (+ +words-before-values+ ,,offset (* ,n 2)))))) 35 | (def key 0) 36 | (def value 1)) 37 | 38 | (defun make-storage-vector (size) 39 | (let ((storage (make-array (+ (* size 2) +words-before-values+) 40 | :initial-element +empty+))) 41 | (setf (%metadata-table storage) (make-metadata-vector size) 42 | (new-vector storage) nil 43 | (finished-copying storage) 0 44 | (going-to-copy storage) 0 45 | (table-count storage) (make-counter) 46 | (table-slot-count storage) (make-counter) 47 | (creation-time storage) (get-internal-real-time) 48 | (allocating-new-p storage) nil) 49 | storage)) 50 | 51 | (defun nearest-allowed-size (size) 52 | (max +metadata-entries-per-group+ 53 | ;; This form returns the next power of 2 above SIZE. 54 | (expt 2 (integer-length (1- size))))) 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A fast concurrent hash table. 2 | 3 | *42nd At Threadmill* is a nearly lock-free\* hash table based on Cliff 4 | Click's NonBlockingHashMap, and Abseil's `flat_hash_map`. We use the 5 | general layout of the former, and the fast metadata-based probing 6 | trick of the latter. 7 | 8 | We are aware of the table being very, very picky with hash functions 9 | (Abseil's table is like that too, but I don't think it's *that* bad), 10 | so you might want to hold off using this table in production still. 11 | 12 | See [A Fast Wait-Free Hash 13 | Table](https://www.youtube.com/watch?v=WYXgtXWejRM) and [Matt 14 | Kulukundis's "Designing a Fast, Efficient, Cache-friendly Hash Table, 15 | Step by Step" 16 | presentation](https://www.youtube.com/watch?v=ncHmEUmJZf4) for an 17 | introduction to both tables. 18 | 19 | We use SSE2 intrinsics for fast probing, and optionally use AVX2 for 20 | faster byte broadcasting. This library requires a post-2.0.5 version of 21 | SBCL, so that we can use some instructions introduced to the assembler 22 | around then. 23 | 24 | \*Okay, we effectively lock to resize but it won't deadlock and it appears 25 | to be faster than using Click's lock-free resizing technique; so you tell me 26 | if that is an acceptable trade-off. 27 | 28 | ## Pictures of a benchmark 29 | 30 | ![](Documentation/performance.png) 31 | 32 | ## Differences from Click's table 33 | 34 | We replace copied values with a single `+copied+` marker instead of 35 | an instance of a `Prime` class. This change generates less garbage 36 | when copying, and leads to slightly faster barrier code; though our table 37 | is not wait-free. (Truth be told, we're only barely getting a grip on Click's 38 | resize logic now.) 39 | 40 | We also removed the state, having removes transition 41 | back to ; a superficial change which doesn't appear to affect 42 | anything. 43 | 44 | ## Differences from Kulukundis's table 45 | 46 | As Click requires us to pin keys to entries, we don't ever use tombstone 47 | metadata. The metadata for a dead entry remains in the metadata table, 48 | as we need to be able to find the right key entry to reuse quickly. 49 | 50 | We have a somewhat improved load factor, but it is not as extreme as 51 | Kulukundis's demonstration. Kulukundis could approach a load factor of 52 | 87.5%, but we find that 50% is the best tradeoff between space and 53 | throughput with our table. However, we still have improved over 54 | Click's table -- Click doubles the table size with 25% live keys, and 55 | quadruples with 50% live, in order to "avoid endless 56 | reprobing". Faster probing lets us get away with just doubling at 50%. 57 | 58 | ## Previous work 59 | 60 | This concurrent hash table is based off the 61 | [NonBlockingHashMap](https://github.com/boundary/high-scale-lib/blob/master/src/main/java/org/cliffc/high_scale_lib/NonBlockingHashMap.java), 62 | and its Common Lisp port in 63 | [Luckless](https://github.com/Shinmera/luckless). It is also based off the 64 | [linear probing hash table implementation in SICL](https://github.com/robert-strandh/SICL/tree/master/Code/Hash-tables/Linear-probing), 65 | as well as [its SIMD fork](https://github.com/no-defun-allowed/simd-sicl-hash-table). 66 | -------------------------------------------------------------------------------- /Code/x86-64/VOPs/avx2-vops.lisp: -------------------------------------------------------------------------------- 1 | (in-package :sb-x86-64-asm) 2 | 3 | ;;; Patch around l. 870 of src/compiler/x86-64/avx2-insts.lisp 4 | (macrolet ((def (name prefix opcode &key reg-only) 5 | `(define-instruction ,name (segment dst src &optional (src-size :oword)) 6 | ,@(avx2-inst-printer-list 'reg-ymm/mem prefix opcode) 7 | (:emitter 8 | (aver (gpr-p dst)) 9 | ,(when reg-only 10 | `(aver (xmm-register-p src))) 11 | (let ((dst-size (operand-size dst))) 12 | (aver (or (eq dst-size :qword) (eq dst-size :dword))) 13 | (emit-avx2-inst segment src dst ,prefix ,opcode 14 | :w (ecase dst-size 15 | (:qword 1) 16 | (:dword 0)) 17 | :l (ecase src-size 18 | (:oword 0) 19 | (:hword 1)))))))) 20 | (def vcvtsd2si #xf2 #x2d) 21 | (def vcvtss2si #xf3 #x2d) 22 | (def vcvttsd2si #xf2 #x2c) 23 | (def vcvttss2si #xf3 #x2c) 24 | (def vmovmskpd #x66 #x50 :reg-only t) 25 | (def vmovmskps nil #x50 :reg-only t) 26 | (def vpmovmskb #x66 #xd7 :reg-only t)) 27 | 28 | (in-package :sb-vm) 29 | 30 | (defknown threadmill::%avx2-load 31 | ((simple-array (unsigned-byte 8) (*)) (unsigned-byte 64)) 32 | (simd-pack-256 integer) 33 | (foldable flushable) 34 | :overwrite-fndb-silently t) 35 | 36 | (threadmill::define-boring-vop threadmill::%avx2-load 37 | ((vector simple-array-unsigned-byte-8 :scs (descriptor-reg)) 38 | (index unsigned-num :scs (unsigned-reg))) 39 | (bytes simd-pack-256-int :scs (int-avx2-reg)) 40 | (inst vmovdqu bytes 41 | (ea (- (* vector-data-offset n-word-bytes) 42 | other-pointer-lowtag) 43 | vector index 1))) 44 | 45 | (defknown threadmill::%avx2-movemask 46 | ((simd-pack-256 integer)) 47 | (unsigned-byte 32) 48 | (foldable movable flushable) 49 | :overwrite-fndb-silently t) 50 | 51 | (threadmill::define-boring-vop threadmill::%avx2-movemask 52 | ((pack simd-pack-256-int :scs (int-avx2-reg))) 53 | (mask unsigned-num :scs (unsigned-reg)) 54 | (inst vpmovmskb mask pack :hword)) 55 | 56 | (defknown threadmill::%avx2-broadcast/256 ((unsigned-byte 8)) 57 | (simd-pack-256 integer) 58 | (foldable movable flushable) 59 | :overwrite-fndb-silently t) 60 | 61 | (threadmill::define-boring-vop threadmill::%avx2-broadcast/256 62 | ((byte unsigned-num :scs (unsigned-reg))) 63 | (broadcasted simd-pack-256-int :scs (int-avx2-reg)) 64 | (inst movq broadcasted byte) 65 | (inst vpbroadcastb broadcasted broadcasted)) 66 | 67 | (defknown threadmill::%avx2= ((simd-pack-256 integer) (simd-pack-256 integer)) 68 | (simd-pack-256 integer) 69 | (foldable movable flushable) 70 | :overwrite-fndb-silently t) 71 | 72 | (threadmill::define-boring-vop threadmill::%avx2= 73 | ((pack1 simd-pack-256-int :scs (int-avx2-reg)) 74 | (pack2 simd-pack-256-int :scs (int-avx2-reg))) 75 | (equals simd-pack-256-int :scs (int-avx2-reg)) 76 | (inst vpcmpeqb equals pack1 pack2)) 77 | -------------------------------------------------------------------------------- /Code/x86-64/avx2-metadata.lisp: -------------------------------------------------------------------------------- 1 | (in-package :threadmill) 2 | 3 | (defconstant +metadata-entries-per-group+ 32 4 | "The number of metadata entries we store per group.") 5 | 6 | (deftype metadata-group () 7 | `(sb-ext:simd-pack-256 integer)) 8 | (deftype metadata-vector () 9 | `(simple-array (unsigned-byte 8) 1)) 10 | (deftype metadata-index () 11 | `(mod ,(floor most-positive-fixnum +metadata-entries-per-group+))) 12 | (deftype vector-index () 13 | `(and fixnum unsigned-byte)) 14 | 15 | (defconstant +empty-metadata+ #x80 16 | "The metadata byte stored for an empty entry.") 17 | 18 | (declaim (inline bytes matches-p writable mask-h2 match-union)) 19 | 20 | (defun mask-h2 (h2) 21 | "Mask off part of the H2 hash, for use as metadata." 22 | (declare ((unsigned-byte 8) h2)) 23 | (logand #x7f h2)) 24 | 25 | (defun bytes (byte group) 26 | "Return matches for a byte in a metadata group." 27 | (declare ((unsigned-byte 8) byte)) 28 | (%avx2-movemask 29 | (%avx2= (%avx2-broadcast/256 byte) group))) 30 | 31 | (defun writable (group) 32 | "Return matches for metadata bytes we can put new mappings in." 33 | ;; movemask tests the high bit of each byte, and we want to test the 34 | ;; high bit, so we have nothing else to do. Magic! 35 | (%avx2-movemask group)) 36 | 37 | (defun match-union (m1 m2) 38 | (logior m1 m2)) 39 | 40 | (declaim (inline call-with-matches matches-p)) 41 | (defun call-with-matches (bit-mask continuation) 42 | (declare (function continuation) 43 | (optimize (speed 3) (safety 0)) 44 | ((unsigned-byte 32) bit-mask)) 45 | (loop until (zerop bit-mask) 46 | do (funcall continuation (bsf/32 bit-mask)) 47 | (setf bit-mask (logand bit-mask (1- bit-mask))))) 48 | 49 | (defmacro do-matches ((position bit-mask) &body body) 50 | "Evaluate BODY with POSITION bound to every match in the provided BIT-MASK." 51 | (let ((continuation (gensym "CONTINUATION"))) 52 | `(flet ((,continuation (,position) 53 | (declare ((mod ,+metadata-entries-per-group+) ,position)) 54 | ,@body)) 55 | (declare (inline ,continuation) 56 | (dynamic-extent #',continuation)) 57 | (call-with-matches ,bit-mask #',continuation)))) 58 | 59 | (defun matches-p (bit-mask) 60 | "Are there any matches in BIT-MASK?" 61 | (plusp bit-mask)) 62 | 63 | (defun make-metadata-vector (size) 64 | "Create a metadata vector for a hash table of a given size, with all elements initialized to +EMPTY-METADATA+." 65 | (let ((vector (make-array size 66 | :element-type '(unsigned-byte 8) 67 | :initial-element +empty-metadata+))) 68 | vector)) 69 | 70 | (declaim (inline metadata-group metadata-groups 71 | cas-metadata metadata (setf metadata))) 72 | 73 | (defun metadata-group (vector position) 74 | "Retrieve the Nth metadata group from a vector. 75 | Note that N has a length of an element." 76 | (declare (metadata-vector vector) 77 | (vector-index position) 78 | (optimize (speed 3) (safety 0))) 79 | ;; Why won't SSE:AREF-PI work? 80 | (%avx2-load vector position)) 81 | 82 | (defun metadata-groups (metadata) 83 | (floor (length metadata) +metadata-entries-per-group+)) 84 | 85 | (defun metadata (vector position) 86 | (declare (vector-index position)) 87 | (aref vector position)) 88 | 89 | (defun cas-metadata (vector position old-value new-value) 90 | (= old-value 91 | (cas-byte vector position old-value new-value))) 92 | 93 | (defun (setf metadata) (new-value vector position) 94 | (declare (vector-index position)) 95 | (setf (aref vector position) new-value)) 96 | 97 | (defmacro atomic-setf (place value) 98 | `(prog1 99 | (setf ,place ,value) 100 | (sb-vm:%memory-barrier))) 101 | -------------------------------------------------------------------------------- /Documentation/threadmill.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 41 | 47 | 48 | 50 | 51 | 53 | image/svg+xml 54 | 56 | 57 | 58 | 59 | 60 | 64 | 67 | 71 | 75 | 79 | 84 | 88 | 93 | 97 | 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /Code/x86-64/sse-metadata.lisp: -------------------------------------------------------------------------------- 1 | (in-package :threadmill) 2 | 3 | (defconstant +metadata-entries-per-group+ 16 4 | "The number of metadata entries we store per group.") 5 | 6 | (deftype metadata-group () 7 | `(sb-ext:simd-pack (unsigned-byte 8))) 8 | (deftype metadata-vector () 9 | `(simple-array (unsigned-byte 8) 1)) 10 | (deftype metadata-index () 11 | `(mod ,(floor most-positive-fixnum +metadata-entries-per-group+))) 12 | (deftype vector-index () 13 | `(and fixnum unsigned-byte)) 14 | 15 | (defconstant +empty-metadata+ #x80 16 | "The metadata byte stored for an empty entry.") 17 | 18 | (declaim (inline bytes matches-p writable mask-h2 match-union)) 19 | 20 | (defun mask-h2 (h2) 21 | "Mask off part of the H2 hash, for use as metadata." 22 | (declare ((unsigned-byte 8) h2)) 23 | (logand #x7f h2)) 24 | 25 | (if (= 1 avx2-supported) 26 | (defun bytes (byte group) 27 | "Return matches for a byte in a metadata group." 28 | (declare ((unsigned-byte 8) byte)) 29 | (%sse2-movemask 30 | (%sse2= (%avx2-broadcast byte) group))) 31 | (defun bytes (byte group) 32 | "Return matches for a byte in a metadata group." 33 | (declare ((unsigned-byte 8) byte)) 34 | (%sse2-movemask 35 | (%sse2= (%sse2-broadcast-byte byte) group)))) 36 | 37 | (defun writable (group) 38 | "Return matches for metadata bytes we can put new mappings in." 39 | ;; movemask tests the high bit of each byte, and we want to test the 40 | ;; high bit, so we have nothing else to do. Magic! 41 | (%sse2-movemask group)) 42 | 43 | (defun match-union (m1 m2) 44 | (logior m1 m2)) 45 | 46 | (declaim (inline call-with-matches matches-p)) 47 | (defun call-with-matches (bit-mask continuation) 48 | (declare (function continuation) 49 | (optimize (speed 3) (safety 0)) 50 | ((unsigned-byte 16) bit-mask)) 51 | (loop until (zerop bit-mask) 52 | do (funcall continuation (bsf/16 bit-mask)) 53 | (setf bit-mask (logand bit-mask (1- bit-mask))))) 54 | 55 | (defmacro do-matches ((position bit-mask) &body body) 56 | "Evaluate BODY with POSITION bound to every match in the provided BIT-MASK." 57 | (let ((continuation (gensym "CONTINUATION"))) 58 | `(flet ((,continuation (,position) 59 | (declare ((mod ,+metadata-entries-per-group+) ,position)) 60 | ,@body)) 61 | (declare (inline ,continuation) 62 | (dynamic-extent #',continuation)) 63 | (call-with-matches ,bit-mask #',continuation)))) 64 | 65 | (defun matches-p (bit-mask) 66 | "Are there any matches in BIT-MASK?" 67 | (plusp bit-mask)) 68 | 69 | (defun make-metadata-vector (size) 70 | "Create a metadata vector for a hash table of a given size, with all elements initialized to +EMPTY-METADATA+." 71 | (let ((vector (make-array size 72 | :element-type '(unsigned-byte 8) 73 | :initial-element +empty-metadata+))) 74 | vector)) 75 | 76 | (declaim (inline metadata-group metadata-groups 77 | cas-metadata metadata (setf metadata))) 78 | 79 | (defun metadata-group (vector position) 80 | "Retrieve the Nth metadata group from a vector. 81 | Note that N has a length of an element." 82 | (declare (metadata-vector vector) 83 | (vector-index position) 84 | (optimize (speed 3) (safety 0))) 85 | (%sse2-load vector position)) 86 | 87 | (defun metadata-groups (metadata) 88 | (floor (length metadata) +metadata-entries-per-group+)) 89 | 90 | (defun metadata (vector position) 91 | (declare (vector-index position)) 92 | (aref vector position)) 93 | 94 | (defun cas-metadata (vector position old-value new-value) 95 | (= old-value 96 | (cas-byte vector position old-value new-value))) 97 | 98 | (defun (setf metadata) (new-value vector position) 99 | (declare (vector-index position)) 100 | (setf (aref vector position) new-value)) 101 | 102 | (defmacro atomic-setf (place value) 103 | `(prog1 104 | (setf ,place ,value) 105 | (sb-vm:%memory-barrier))) 106 | -------------------------------------------------------------------------------- /Documentation/long-threadmill.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 41 | 47 | 48 | 50 | 51 | 53 | image/svg+xml 54 | 56 | 57 | 58 | 59 | 60 | 64 | 67 | 69 | 73 | 77 | 81 | 82 | 87 | 91 | 96 | 100 | 104 | 109 | 114 | 117 | 121 | 125 | 129 | 130 | 133 | 137 | 141 | 145 | 146 | 149 | 153 | 157 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /Code/resize.lisp: -------------------------------------------------------------------------------- 1 | (in-package :threadmill) 2 | 3 | (defun too-new-p (hash-table) 4 | "Was the last resize performed recently?" 5 | ;; If the last resize was done in the last second, we probably should grow, 6 | ;; regardless of load factor. 7 | ;; This heuristic is...somehow not easy to reason with, and makes things 8 | ;; slower most of the time. 9 | (< (creation-time (hash-table-storage hash-table)) 10 | (- (get-internal-real-time) 11 | (* 1.000 internal-time-units-per-second)))) 12 | 13 | (defun help-copy (hash-table storage) 14 | ;; If the storage vector was already swapped out, bail out. 15 | (unless (eq storage (hash-table-storage hash-table)) 16 | (return-from help-copy)) 17 | (let* ((old-size (hash-table-size hash-table)) 18 | (slot-count (counter-value (table-slot-count storage))) 19 | (new-size 20 | (if (or (> (/ (hash-table-count hash-table) 21 | (float old-size)) 22 | (hash-table-rehash-threshold hash-table)) 23 | (and (not (zerop slot-count)) 24 | (> (/ (hash-table-count hash-table) 25 | (float slot-count)) 26 | 0.75))) 27 | (* old-size 2) 28 | old-size)) 29 | (megabytes (ash (* new-size 16) -20))) 30 | (flet ((continuation (new-vector) 31 | (return-from help-copy 32 | (copy-into storage new-vector hash-table)))) 33 | (unless (null (new-vector storage)) 34 | (continuation (new-vector storage))) 35 | ;; Offer to make the new vector. 36 | (when (atomics:cas (allocating-new-p storage) nil t) 37 | #+log-copying 38 | (format t "~&Creating a ~d element storage vector" new-size) 39 | (let ((new-vector (make-storage-vector new-size))) 40 | (atomic-setf (new-vector storage) new-vector) 41 | (continuation new-vector))) 42 | ;; Else, wait for another thread to create the new vector. 43 | (loop while (null (new-vector storage)) 44 | do (sleep (* 8e-5 megabytes))) 45 | (continuation (new-vector storage))))) 46 | 47 | ;;; Copying is done in "segments". Each thread repeatedly claims 48 | ;;; segments of the storage vector to copy into the new vector, until there 49 | ;;; are no more segments to copy. 50 | (defconstant +segment-size+ 4096) 51 | 52 | (defun next-segment-to-copy (storage size) 53 | (loop for old-value = (going-to-copy storage) 54 | for new-value = (+ old-value +segment-size+) 55 | do (when (>= old-value size) 56 | ;; Nothing more to copy. 57 | (return (values 0 nil))) 58 | (when (atomics:cas (going-to-copy storage) 59 | old-value new-value) 60 | (return (values old-value t))))) 61 | 62 | (defun report-finished-copying (new-storage) 63 | (declare (ignorable new-storage)) 64 | #+log-copying 65 | (format t "~&Finished copying ~d entries after ~8e seconds" 66 | (counter-value (table-count new-storage)) 67 | (/ (- (get-internal-real-time) (creation-time new-storage)) 68 | internal-time-units-per-second))) 69 | 70 | (defun copy-into (old-storage new-storage hash-table) 71 | (let ((metadata-table (metadata-table new-storage)) 72 | (hash-function (hash-table-hash hash-table)) 73 | (size (length (metadata-table old-storage)))) 74 | (loop 75 | (multiple-value-bind (start present?) 76 | (next-segment-to-copy old-storage size) 77 | (unless present? 78 | (bt:thread-yield) 79 | (return)) 80 | (copy-segment hash-table 81 | old-storage metadata-table new-storage 82 | start size hash-function) 83 | ;; Bump the copy progress. 84 | (loop for old-value = (finished-copying old-storage) 85 | for new-value = (+ old-value +segment-size+) 86 | until (atomics:cas (finished-copying old-storage) 87 | old-value (+ old-value +segment-size+)) 88 | ;; When we copied the last segment, install the new table. 89 | ;; Note that if a thread creates a recursive copy (which is 90 | ;; definitely going to be linked here before we return from 91 | ;; COPY-SEGMENT), we do not install the new table. 92 | finally (when (>= new-value size) 93 | (when (null (new-vector new-storage)) 94 | (atomic-setf (hash-table-storage hash-table) 95 | new-storage)) 96 | (report-finished-copying new-storage) 97 | (return-from copy-into))))))) 98 | 99 | (defun copy-segment (hash-table old-storage metadata new-storage 100 | start size hash-function) 101 | (loop for position from start 102 | below (min size (+ start +segment-size+)) 103 | do (let ((k (key old-storage position)) 104 | (v (value old-storage position))) 105 | ;; Grab the last value stored here. 106 | (loop until (atomics:cas (value old-storage position) 107 | v +copied+) 108 | do (setf v (value old-storage position))) 109 | (unless (or (eq k +empty+) 110 | (eq v +empty+)) 111 | ;; Store it in the new table. 112 | (store-copied-value hash-table 113 | new-storage metadata 114 | (funcall hash-function k) 115 | k v size))))) 116 | 117 | (defun store-copied-value (hash-table storage metadata hash key value size) 118 | "Attempt to copy a key and value." 119 | (declare (hash-table hash-table) 120 | (simple-vector storage) 121 | (metadata-vector metadata) 122 | (fixnum size) 123 | (fixnum hash) 124 | #.+optimizations+) 125 | ;; Copying should never store duplicate keys. We exploit this to 126 | ;; avoid testing keys, instead only copying into new entries. 127 | (dx-labels ((test (this-key) 128 | (declare (ignore this-key)) 129 | t) 130 | (mask (group metadata) 131 | (declare (ignore metadata)) 132 | (writable group)) 133 | (consume (this-key position h2) 134 | (declare (ignore this-key)) 135 | (loop for old-key = (key storage position) 136 | do (unless (eq old-key +empty+) 137 | (return-from consume)) 138 | (when (atomics:cas (key storage position) 139 | +empty+ key) 140 | (return))) 141 | (atomic-setf (metadata metadata position) h2) 142 | (loop for old-value = (value storage position) 143 | do (when (eq old-value +copied+) 144 | (return-from store-copied-value 145 | (recursive-copy hash-table storage 146 | hash key value size))) 147 | (when (atomics:cas (value storage position) 148 | old-value value) 149 | (return))) 150 | (increment-counter (table-count storage)) 151 | (return-from store-copied-value))) 152 | (call-with-positions storage metadata hash 153 | #'test #'mask #'consume) 154 | (recursive-copy hash-table storage hash key value size))) 155 | 156 | (defun recursive-copy (hash-table storage hash key value size) 157 | (flet ((continuation (new-storage) 158 | (let ((new-metadata (metadata-table new-storage))) 159 | (store-copied-value hash-table new-storage new-metadata 160 | hash key value (length new-metadata)) 161 | (copy-into storage new-storage hash-table)))) 162 | (loop 163 | (unless (null (new-vector storage)) 164 | (return (continuation (new-vector storage)))) 165 | (let ((new-storage (make-storage-vector (* 2 size)))) 166 | (when (atomics:cas (new-vector storage) nil new-storage) 167 | #+log-copying 168 | (warn "Failed to copy; doing a recursive copy.") 169 | (return (continuation new-storage))))))) 170 | -------------------------------------------------------------------------------- /Documentation/hash-table-design.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \usepackage{hyperref} 3 | \usepackage{amsmath} 4 | \usepackage{charter} 5 | 6 | \begin{document} 7 | 8 | \title{A design for a fast, mostly-concurrent hash table} 9 | \author{Hayley Patton} 10 | \date{Draft of \today} 11 | \maketitle 12 | 13 | We have wanted to design a concurrent hash table which is designed to 14 | handle epheremal mappings, which are removed quickly. Our decentralise2 15 | implementation creates many such mappings, storing information on where 16 | to retrieve objects in a concurrent hash table, then removing it when 17 | the objects are successfully retrieved. Cliff Click's hash table (the 18 | \texttt{NonBlockingHashMap}\footnote{\url{https://github.com/boundary/high-scale-lib/blob/master/src/main/java/org/cliffc/high_scale_lib/NonBlockingHashMap.java}}), 19 | while usually providing excellent performance with many threads, does 20 | not handle such an application as gracefully, as it ``pins'' keys to 21 | entries in the table, requiring periodic resizes to remove keys. 22 | 23 | After playing around with the idea, we decided we sadly cannot remove 24 | the pinning requirement, as it is required to maintain important 25 | invariants of the table, and Click chose to accept these problems 26 | because the alternative is impossible. So we are left to optimise, 27 | rather than totally replace, the design of the table. 28 | 29 | There is one such optimisation we can make: we can maintain a 30 | \emph{metadata} table, summarising the state of the main entry table 31 | in a compact form that can be searched with SIMD instructions. Matt 32 | Kulukundis presented a serial hash table using parallel probing in a 33 | presentation entitled Designing a Fast, Efficient, Cache-friendly Hash 34 | Table, Step by Step.\footnote{ 35 | \url{https://www.youtube.com/watch?v=ncHmEUmJZf4}} This 36 | optimisation may still improve the performance of applications which 37 | handle epheremal mappings, as discussed later. 38 | 39 | Another optimisation could reduce the overhead of copying, by simplifying 40 | the protocol between updating and copying threads, but we are not yet 41 | confident this optimisation would be safe. 42 | 43 | \section{A quick rundown of Click's hash table} 44 | 45 | Cliff Click's hash table can be simplified down to a linear-probing hash 46 | table, which stores entries consisting of keys, values and ``prime'' 47 | bits. Keys may, of course be ``empty'', and values may also be 48 | ``tombstoned'' instead of having a value, as previously mentioned. The 49 | table implementation also defines some limit on how many entries can be 50 | probed, before it has probed too long, and the table should be resized. 51 | 52 | Every operation begins by hashing the key, and producing an index into 53 | the entry table. Typically, we would use a modulo function to wrap the 54 | hash value into the range of acceptable indices; but we may use a 55 | faster bitwise-and function, should we restrain the table size to 56 | powers of 2. Instead of writing 57 | $$ hash\text{-}value \;\mathrm{mod}\; table\text{-}size $$ 58 | we may instead write 59 | $$ hash\text{-}value \otimes (table\text{-}size - 1) $$ 60 | This hash value is the initial value of the \emph{probe position} for 61 | every operation. 62 | 63 | \subsection{Finding a value} 64 | 65 | To find a value associated with a key (as the Common Lisp 66 | \texttt{gethash} function does, and as the Java method \texttt{get} 67 | does): 68 | \begin{itemize} 69 | \item We load the key at the probe position. 70 | \item If the key is equal to the provided key, we then load the value 71 | and prime bit. 72 | \begin{itemize} 73 | \item If the prime bit is set, wait for the current resize to 74 | finish, and search again. 75 | \item If the value is a tombstone, then there is no value stored for 76 | the key. Return nothing. 77 | \item Otherwise, return the value stored. 78 | \end{itemize} 79 | \item If we exceeded the probe limit, then an entry could not have 80 | been stored any further, so return nothing. 81 | \item Otherwise, increment the probe position (modulo the table size), 82 | and try again. 83 | \end{itemize} 84 | 85 | \subsection{Associating a key with a value} 86 | 87 | To associate a value with a key, either inserting or updating a mapping 88 | (as the Common Lisp \texttt{(setf gethash)} function does, and as the 89 | Java method \texttt{put} does): 90 | \begin{itemize} 91 | \item We first attempt to ``claim'' an entry, ensuring that the entry 92 | has the provided key stored in it. 93 | \begin{itemize} 94 | \item We load the key at the probe position. 95 | \item If the key is empty, we then attempt to CAS (compare-and-swap) 96 | the key with the provided key. 97 | \begin{itemize} 98 | \item If the CAS succeeds, we have successfully claimed this 99 | entry. 100 | \item If it did not succeed, load the key at the probe position 101 | again. 102 | \end{itemize} 103 | \item If the key (which may be the key loaded after a failed CAS 104 | from the previous step) is equal to the provided key, then we have 105 | already claimed that entry. 106 | \item If we exceed the probe limit while probing, then begin 107 | resizing, and start over in the new table. 108 | \end{itemize} 109 | \item We then update the value in the entry. 110 | \begin{itemize} 111 | \item We load the value and probe bit at the probe position. 112 | \item If the probe bit is set, we need to start all over using the 113 | new table. 114 | \item Otherwise, attempt to CAS the loaded value and probe bit with 115 | the provided value and a cleared bit. 116 | \begin{itemize} 117 | \item If the CAS succeeds, we have successfully inserted this entry. 118 | \item If the CAS fails, re-load the value, and try again. 119 | \end{itemize} 120 | \end{itemize} 121 | \end{itemize} 122 | 123 | 124 | The same procedure applies for removing a key, except that we insert a 125 | tombstone value, and if the key is not present, either by finding an 126 | empty entry first, or by exceeding the probe limit, we do not need to 127 | insert anything. 128 | 129 | \subsection{Resizing} 130 | 131 | Resizing the table is relatively easy: 132 | \begin{itemize} 133 | \item Initialize a new table. 134 | \item For each entry in the old table: 135 | \begin{itemize} 136 | \item Atomically and unconditionally set the probe bit, preventing 137 | new writes. 138 | \item Read the key and value. 139 | \item If the key is not empty, and the value is not a tombstone, 140 | then insert the key and value into the new table. 141 | \end{itemize} 142 | \item Atomically and unconditionally replace the table. 143 | \end{itemize} 144 | 145 | \section{How to introduce a metadata table} 146 | 147 | We first note that this table produces a lot of tombstones if the user 148 | removes mappings frequently, and so keys may have longer probe lengths, 149 | compared to a typical hash table. Improving the performance of probing 150 | could thus greatly improve this concurrent hash table. 151 | 152 | Kulukundis's table implements one method to improve the performance of 153 | probing, which is to summarise the state of the table into another table 154 | with smaller elements, which is called a \emph{metadata} table. This 155 | table condenses the normal entries, each two words (128 bits on modern 156 | computers), into eight bits of metadata. Each metadata element is either 157 | ``empty'', or the lowest bits of the hash of the key stored. 158 | 159 | \subsection{The effects of stale metadata} 160 | 161 | We cannot update the metadata table and the entry table simultaneously, 162 | so we must update one first. If we update the entry table first, we can 163 | then verify that all the operations should still work as intended. 164 | 165 | When finding a value associated with a key, we would search for metadata 166 | matching the lowest bits of the hash of the provided key. We may find 167 | that the metadata is still empty, although there is an entry stored in 168 | the entry table. This is unfortunate, but acceptable. We may only 169 | guarantee that an update is visible when the procedure has completed, 170 | and this would not pose a problem. 171 | 172 | When associating a value with a key, we would search for metadata which 173 | either matches the lowest bits of the provided key, or is empty. We find 174 | that the metadata is still empty, although there is an entry stored in 175 | the entry table. This should not pose an issue, as we will then load the 176 | key and proceed based on the loaded key anyway. 177 | 178 | When removing a key, we would again search for metadata which matches 179 | the lowest bits of the provided key. As mentioned previously, if the 180 | metadata is empty, but there is an entry in the entry table, then the 181 | entry is not visible to the find procedure yet, and so it is also 182 | acceptable to find no entry to remove. 183 | 184 | \subsection{What do we gain?} 185 | 186 | An immediate advantage of the metadata table is that most \emph{misses} 187 | will require considerably fewer memory operations, loading fewer bytes 188 | than probing the entry table. Kulukundis encodes an empty key as the 189 | byte \texttt{\#x80}, and a present key as the lowest 7 bits of its 190 | hash. Assuming the hash function is uniformly distributed, then we 191 | can determine there is a $ \frac{1}{2^7} $ (1 in 128) probability that 192 | we have a false positive, where the metadata bytes are equal, but the 193 | keys are not. This is an unlikely, but possible occurence. Provided 194 | that probe lengths are still reasonably short, it is likely that a 195 | find operation which misses a key will not load any values outside 196 | the metadata table. 197 | 198 | Another advantage appears if we consider probing multiple entries at 199 | once, perhaps using the single instruction-multiple data (or 200 | ``vectorised'') instructions provided by the processor used. For 201 | example, the AMD64 instruction set guarantees the existence of the 202 | SSE2 instructions and registers, and many high-performance ARM 203 | processors include the NEON extension. Both provide 128-bit registers, 204 | which can store 16 packed metadata bytes. It should be noted that 205 | retrieving many metadata bytes at once may lead to retrieving stale 206 | metadata more frequently, but we have already described why stale 207 | metadata should not be a problem. On some machines, particuarly those 208 | with the AVX2 and AVX512 extensions, it could be possible to use 209 | larger group sizes (32 and 64, respectively), but Kulukundis found 210 | that this did not provide any significant performance increase with 211 | 256 bits.\footnote{From a comment on the Cppcon presentation: ``Any 212 | reason why you chose 128 bits SSE over 256 bits AVX2, or even 213 | AVX512?'' ``We have done tests with 256bit versions and did not find 214 | a win over 128. We have not tried 512''} 215 | 216 | \section{Maybe a good idea: Using one ``prime'' bit} 217 | 218 | One early idea was to replace the per-entry prime bit with one bit per 219 | table, which would be tested after a successful update operation. When 220 | the bit is set, the updater should wait for a new table to be 221 | inserted, and replicate the update in the new table. This would 222 | greatly reduce the cache invalidation caused by resizing, and would 223 | even allow reads to never wait for resizes, but it makes the table 224 | susceptible to rollbacks. 225 | 226 | We must describe why individual prime bits were used first; Click 227 | sets the prime bit by wrapping values in a \texttt{Prime} object, and 228 | reading the prime bit requires performing a type check. As such, if one 229 | thread sets an entry to be primed, then a writer which attempts to CAS 230 | after observing a cleared bit will still fail. Using one prime bit 231 | weakens the barrier between writing and resizing; a thread can still 232 | perform one update, before it must wait for resizing to finish, and 233 | replicate changes. 234 | 235 | \subsection{What happens if we test after updating?} 236 | 237 | Suppose thread 1 begins resizing, and copies some entry in the table. 238 | Another thread, say thread 2, updates that entry, and then notices 239 | it must replicate the change in the new table. Until the new table is 240 | installed, the updated entry is visible. Immediately after the new table 241 | is installed, the old entry is visible, and the entry is said to have 242 | \emph{rolled back}. The entry will be updated soon after, but the old 243 | value is still visible for some time. 244 | 245 | This, however, could be alleviated by allowing updating threads to 246 | replicate updates to the new table before it is installed, so that the 247 | old value is not visible after installing. The table must be large 248 | enough to hold all newly inserted associations; but this is ensured as 249 | no more insertions can succeed than there were slots in the old table, 250 | and the new table is at least as large as the old. 251 | 252 | The updates would also have to be applied in order; or threads which 253 | updated while resizing could just enqueue keys to be copied again into 254 | the new table, which would ensure the newest value was 255 | inserted. Allowing such updates does not block the progress of 256 | copying, as the number of updates in this state is bounded by the 257 | number of threads which have not yet begun waiting for copying to 258 | finish. However, the copying thread(s) could not determine when there 259 | are no more copies to be made, and no threads are in the process of 260 | writing to the old table, so this solution could not be made to work. 261 | 262 | \subsection{Or maybe just simplify the prime test?} 263 | 264 | As forementioned, Click uses a wrapper object to implement the prime 265 | bit; but the value in the prime object is never used except by the 266 | copier, and that use can be factored out fairly easily (the copier 267 | thread which succeeds at CASing the prime object in can maintain a 268 | reference to the value without pulling it again from the prime 269 | object). We could then replace the prime object with a constant value, 270 | simplifying the type test into a pointer-equality test. 271 | 272 | A quick test suggests pointer tests can be faster: 273 | 274 | \begin{verbatim} 275 | CL-USER> (defstruct a) 276 | CL-USER> (defvar *foo* (make-a)) 277 | CL-USER> (the-cost-of-nothing:bench (a-p *foo*)) 278 | 1.65 nanoseconds 279 | CL-USER> (defvar *bar* '+bar+) 280 | CL-USER> (the-cost-of-nothing:bench (eq *bar* '+bar+)) 281 | 695.06 picoseconds 282 | \end{verbatim} 283 | 284 | We would also reduce allocation and memory accesses outside the table, 285 | which would have more pronounced effects when testing in a real hash 286 | table. 287 | 288 | \end{document} -------------------------------------------------------------------------------- /Code/hash-table.lisp: -------------------------------------------------------------------------------- 1 | (in-package :threadmill) 2 | 3 | (defconstant +empty+ '+empty+) 4 | (defconstant +copied+ '+copied+) 5 | ;; SBCL seems to have difficulties proving that we won't go out of 6 | ;; bounds. Fair enough, we have a funny way of addressing the table, 7 | ;; but we don't want to do bounds checks. 8 | (alexandria:define-constant +optimizations+ 9 | '(optimize (speed 3) (sb-c::insert-array-bounds-checks 0)) 10 | :test #'equal) 11 | 12 | (declaim (inline split-hash cheap-mod)) 13 | (defun split-hash (hash) 14 | "Split a hash into two parts (called H1 and H2). 15 | H1 is used to find a starting probe position in the table, and H2 is used as metadata for fast probing." 16 | (declare ((unsigned-byte 64) hash)) 17 | (floor hash 256)) 18 | (defun cheap-mod (number divisor) 19 | "A cheap and usually incorrect MOD, which works when DIVISOR is a power of two." 20 | (logand number (1- divisor))) 21 | 22 | (defconstant +probe-limit+ 23 | (floor 256 +metadata-entries-per-group+) 24 | "The maximum number of groups to probe.") 25 | 26 | (declaim (inline call-with-positions)) 27 | (defun call-with-positions (storage metadata hash 28 | test mask-generator continuation 29 | &key (after-group (constantly nil))) 30 | (declare (function test mask-generator continuation after-group) 31 | (fixnum hash) 32 | (simple-vector storage) 33 | (metadata-vector metadata) 34 | (optimize (speed 3) (safety 0))) 35 | (multiple-value-bind (h1 h2) 36 | (split-hash hash) 37 | (let* ((probed 0) 38 | (length (length metadata)) 39 | (groups (floor length +metadata-entries-per-group+)) 40 | (probe-limit (min groups +probe-limit+)) 41 | (probe-position (* +metadata-entries-per-group+ 42 | (cheap-mod h1 groups))) 43 | (expected-metadata (mask-h2 h2))) 44 | (declare (vector-index probe-position) 45 | (fixnum probed)) 46 | (loop 47 | (let ((group (metadata-group metadata probe-position))) 48 | (do-matches (entry-offset 49 | (funcall mask-generator group expected-metadata)) 50 | (let* ((entry-position (+ entry-offset probe-position)) 51 | (this-key (key storage entry-position))) 52 | (when (funcall test this-key) 53 | (funcall continuation 54 | this-key entry-position expected-metadata)))) 55 | (funcall after-group group probe-position)) 56 | (incf probed) 57 | (setf probe-position 58 | (cheap-mod (+ probe-position +metadata-entries-per-group+) 59 | length)) 60 | (when (>= probed probe-limit) 61 | (return-from call-with-positions)))))) 62 | 63 | (defmacro dx-labels (definitions &body body) 64 | (let ((names (mapcar #'first definitions))) 65 | `(labels ,definitions 66 | (declare (inline ,@names) 67 | (sb-int:truly-dynamic-extent 68 | ,@(loop for name in names collect `#',name))) 69 | ,@body))) 70 | 71 | (defun gethash (key hash-table &optional (default nil)) 72 | (declare (hash-table hash-table) 73 | #.+optimizations+) 74 | (let* ((storage (hash-table-storage hash-table)) 75 | (metadata (metadata-table storage)) 76 | (hash (funcall (hash-table-hash hash-table) key)) 77 | (test-function (hash-table-test hash-table))) 78 | (dx-labels ((test (this-key) 79 | (or (eq this-key key) 80 | (funcall test-function this-key key))) 81 | (mask (group metadata) 82 | (bytes metadata group)) 83 | (consume (this-key position h2) 84 | (declare (ignore this-key h2)) 85 | (let ((value (value storage position))) 86 | (when (eq value +empty+) 87 | (return-from gethash (values default nil))) 88 | (when (eq value +copied+) 89 | (help-copy hash-table storage) 90 | (return-from gethash (gethash key hash-table))) 91 | (return-from gethash 92 | (values value t)))) 93 | (test-empty (group base-position) 94 | ;; We only fill groups from start to end, so we can 95 | ;; just test the last entry to figure if 96 | ;; any were empty. 97 | (declare (ignore group)) 98 | (let ((last-in-group 99 | (+ base-position -1 +metadata-entries-per-group+))) 100 | (when (= +empty-metadata+ 101 | (metadata metadata last-in-group)) 102 | (return-from gethash (values default nil)))))) 103 | (call-with-positions storage metadata 104 | hash #'test #'mask #'consume 105 | :after-group #'test-empty) 106 | (values default nil)))) 107 | 108 | (declaim (inline claim-key)) 109 | (defun claim-key (storage metadata key this-key position test h2) 110 | "Attempt to claim a position in the table, returning values: 111 | NIL, NIL if another thread claimed it for another key first 112 | T, NIL if this position already was claimed with this key 113 | T, T if we successfully claimed this position" 114 | (declare (optimize (speed 3) (safety 0)) 115 | (vector-index position) 116 | (metadata-vector metadata) 117 | (simple-vector storage) 118 | ((unsigned-byte 8) h2) 119 | (function test)) 120 | (loop for value = (metadata metadata position) 121 | do (when (= value h2) 122 | (return)) 123 | (when (/= value +empty-metadata+) 124 | (return-from claim-key (values nil nil))) 125 | (when (cas-metadata metadata position 126 | +empty-metadata+ h2) 127 | (return))) 128 | (loop 129 | (unless (eq this-key +empty+) 130 | (when (or (eq this-key key) 131 | (funcall test this-key key)) 132 | (return-from claim-key (values t nil))) 133 | (return-from claim-key (values nil nil))) 134 | (when (atomics:cas (key storage position) +empty+ key) 135 | (increment-counter (table-slot-count storage)) 136 | (return-from claim-key (values t t))) 137 | (setf this-key (key storage position)))) 138 | 139 | (defun (setf gethash) (new-value key hash-table &optional default) 140 | (declare (ignore default) 141 | #.+optimizations+) 142 | (let* ((storage (hash-table-storage hash-table)) 143 | (metadata (metadata-table storage)) 144 | (hash (funcall (hash-table-hash hash-table) key)) 145 | (test-function (hash-table-test hash-table))) 146 | (dx-labels ((lose-and-resize () 147 | (help-copy hash-table storage) 148 | (return-from gethash 149 | (setf (gethash key hash-table) new-value))) 150 | (test (this-key) 151 | (or (eq this-key key) 152 | (eq this-key +empty+) 153 | (funcall test-function this-key key))) 154 | (mask (group metadata) 155 | (match-union (writable group) 156 | (bytes metadata group))) 157 | (consume (this-key position h2) 158 | (multiple-value-bind (ours? new?) 159 | (claim-key storage metadata 160 | key this-key position 161 | test-function h2) 162 | (unless ours? 163 | ;; Another thread got this position. 164 | (return-from consume)) 165 | (loop for old-value = (value storage position) 166 | do (when (eq old-value +copied+) 167 | (lose-and-resize)) 168 | (when (eq old-value new-value) 169 | ;; Sure, that'll do. 170 | (return)) 171 | (when (atomics:cas (value storage position) 172 | old-value new-value) 173 | (when (eq old-value +empty+) 174 | (increment-counter (table-count storage))) 175 | (return))) 176 | (return-from gethash new-value)))) 177 | (call-with-positions storage metadata 178 | hash #'test #'mask #'consume) 179 | (lose-and-resize)))) 180 | 181 | (defun modhash (key hash-table modifier) 182 | (declare (function modifier) 183 | #.+optimizations+) 184 | (let* ((storage (hash-table-storage hash-table)) 185 | (metadata (metadata-table storage)) 186 | (hash (funcall (hash-table-hash hash-table) key)) 187 | (test-function (hash-table-test hash-table))) 188 | (dx-labels ((test (this-key) 189 | (or (eq this-key key) 190 | (eq this-key +empty+) 191 | (funcall test-function this-key key))) 192 | (mask (group metadata) 193 | (match-union (writable group) 194 | (bytes metadata group))) 195 | (consume (this-key position h2) 196 | ;;; This wastes a slot if we transition from empty -> empty. 197 | ;;; Otherwise it's less of a hassle to implement just this one 198 | ;;; function rather than PUT-IF-MATCH, PUT-IF-ABSENT, etc. 199 | (multiple-value-bind (ours? new?) 200 | (claim-key storage metadata 201 | key this-key position 202 | test-function h2) 203 | (unless ours? 204 | ;; Another thread got this position. 205 | (return-from consume)) 206 | (loop 207 | (let ((value (value storage position))) 208 | (when (eq value +copied+) 209 | (help-copy hash-table storage) 210 | (return-from modhash 211 | (modhash key hash-table modifier))) 212 | (multiple-value-bind (new-value new-present?) 213 | (funcall modifier 214 | value (not (eq value +empty+))) 215 | (cond 216 | (new-present? 217 | (when (eq value new-value) 218 | ;; Nothing to do. 219 | (return-from modhash)) 220 | (when (atomics:cas (value storage position) 221 | value new-value) 222 | ;; We only increment if we just brought this slot 223 | ;; to life. 224 | (when (eq value +empty+) 225 | (increment-counter (table-count storage))) 226 | (return-from modhash))) 227 | (t 228 | (when (eq value +empty+) 229 | ;; Nothing to do. 230 | (return-from modhash)) 231 | (when (atomics:cas (value storage position) 232 | value +empty+) 233 | (unless (eq value +empty+) 234 | (decrement-counter (table-count storage))) 235 | (return-from modhash)))))))))) 236 | (call-with-positions storage metadata 237 | hash #'test #'mask #'consume)) 238 | (help-copy hash-table storage) 239 | (return-from modhash 240 | (modhash key hash-table modifier)))) 241 | 242 | (defun remhash (key hash-table) 243 | (declare #.+optimizations+) 244 | (let* ((storage (hash-table-storage hash-table)) 245 | (metadata (metadata-table storage)) 246 | (hash (funcall (hash-table-hash hash-table) key)) 247 | (test-function (hash-table-test hash-table))) 248 | (dx-labels ((test (this-key) 249 | (or (eq this-key key) 250 | (funcall test-function this-key key))) 251 | (mask (group metadata) 252 | (bytes metadata group)) 253 | (consume (this-key position h2) 254 | (declare (ignore this-key h2)) 255 | (loop for last-value = (value storage position) 256 | do (when (eq last-value +empty+) 257 | ;; We didn't succeed if someone else removed the 258 | ;; entry first. 259 | (return-from remhash nil)) 260 | (when (eq last-value +copied+) 261 | (help-copy hash-table storage) 262 | (return-from remhash 263 | (remhash key hash-table))) 264 | (when (atomics:cas (value storage position) 265 | last-value +empty+) 266 | (return))) 267 | (decrement-counter (table-count storage)) 268 | (return-from remhash t)) 269 | (test-empty (group base-position) 270 | ;; We only fill groups from start to end, so we can 271 | ;; just test the last metadata byte to figure if 272 | ;; any were empty. 273 | (declare (ignore group)) 274 | (let ((last-in-group 275 | (+ base-position -1 +metadata-entries-per-group+))) 276 | (when (= +empty-metadata+ (metadata metadata last-in-group)) 277 | (return-from remhash nil))))) 278 | (call-with-positions storage metadata hash 279 | #'test #'mask #'consume 280 | :after-group #'test-empty))) 281 | nil) 282 | 283 | (defun maphash (function hash-table) 284 | (declare (function function)) 285 | (loop for storage = (hash-table-storage hash-table) 286 | until (null (new-vector storage)) 287 | do (help-copy hash-table storage)) 288 | (tagbody try-again 289 | ;; Try to copy out the hash table contents to an alist. 290 | (let* ((storage (hash-table-storage hash-table)) 291 | (length (length (metadata-table storage))) 292 | (alist '())) 293 | (dotimes (n length) 294 | (let ((k (key storage n)) 295 | (v (value storage n))) 296 | (unless (or (eq k +empty+) 297 | (eq v +empty+)) 298 | (when (eq v +copied+) 299 | (help-copy hash-table storage) 300 | (go try-again)) 301 | (push (cons k v) alist)))) 302 | (mapc (lambda (pair) 303 | (funcall function 304 | (car pair) 305 | (cdr pair))) 306 | alist))) 307 | hash-table) 308 | 309 | (defun hash-table-count (hash-table) 310 | (counter-value (table-count (hash-table-storage hash-table)))) 311 | (defun hash-table-size (hash-table) 312 | (length (metadata-table (hash-table-storage hash-table)))) 313 | --------------------------------------------------------------------------------