├── .gitignore
├── LICENSE.txt
├── README.md
├── User-Guide.html
├── User-Guide.org
├── User-Guide.pdf
├── User-Guide.tex
├── make-hash-tests.asd
├── make-hash.asd
├── make-hash.lisp
├── package.lisp
└── tests.lisp
/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore emacs backup files (at least, the ones I generate)
2 | .*~
3 | # Ignore scratch file
4 | .bonz
5 | .scratch
6 | # Ignore todo list
7 | TODO
8 | /auto/User-Guide.el
9 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2012, Christopher R. Genovese
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | * Redistributions of source code must retain the above copyright
8 | notice, this list of conditions and the following disclaimer.
9 |
10 | * Redistributions in binary form must reproduce the above copyright
11 | notice, this list of conditions and the following disclaimer in the
12 | documentation and/or other materials provided with the distribution.
13 |
14 | * The name of Christopher R. Genovese may not be used to endorse or
15 | promote products derived from this software without specific prior
16 | written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 | POSSIBILITY OF SUCH DAMAGE.
29 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Motivation and Overview
2 |
3 | Two common (and arguably apt) criticisms of hash tables in Common Lisp are
4 | that hash table initialization is bulky and awkward and that the
5 | representation of hash tables is not as integrated into the language as are
6 | the representations of lists and (to a degree) vectors.
7 |
8 | The `make-hash` package addresses these issues by supplying three
9 | useful, related mechanisms:
10 |
11 | 1. A hash table constructor `make-hash` with initialization that is
12 | concise, flexible, and extensible.
13 |
14 | See `make-hash`, `initialize-hash`, `hash-initializer-default-format`,
15 | and `make-hash-transformer` below.
16 |
17 | 2. Methods for defining hash-table factories with a customized
18 | set of initialization options, either as a globally or locally
19 | defined function.
20 |
21 | See `define-hash-factory`, `make-hash-factory`, and
22 | `*hash-factory-defaults*` below.
23 |
24 | 3. Readtable installers for defining a portable reader interface to
25 | the hash-table factories, either as (raw) delimited or dispatched
26 | reader macros.
27 |
28 | See `install-hash-reader` below.
29 |
30 | In particular, the function `make-hash` is a wrapper around the standard
31 | CL function `make-hash-table` with some additional keyword arguments
32 | that allow one to specify initial contents and format.
33 |
34 | As an illustation, consider the example on page 440 of the venerable
35 | _Common Lisp the Language, Second Edition_ by Guy Steele (CLtL2).
36 |
37 | (setq turtles (make-hash-table :size 9 :test 'eq))
38 | (setf (gethash 'howard-kaylan turtles) '(musician lead-singer))
39 | (setf (gethash 'john-barbata turtles) '(musician drummer))
40 | (setf (gethash 'leonardo turtles) '(ninja leader blue))
41 | (setf (gethash 'donatello turtles) '(ninja machines purple))
42 | (setf (gethash 'al-nichol turtles) '(musician guitarist))
43 | (setf (gethash 'mark-volman turtles) '(musician great-hair))
44 | (setf (gethash 'raphael turtles) '(ninja cool rude red))
45 | (setf (gethash 'michaelangelo turtles) '(ninja party-dude orange))
46 | (setf (gethash 'jim-pons turtles) '(musician bassist))
47 |
48 | (do-something turtles)
49 |
50 | This is not horrible by any means, but the repeated `setf`'s force an
51 | assignment-oriented block of statements and visually obscure the
52 | relationships in the table. And in practice, even more syntactic
53 | infrastructure is usually required (e.g., another level of let for a
54 | local definition, a loop for a larger hash table). While it is certainly
55 | a matter of taste which form one prefers, the goal of `make-hash' is to
56 | allow a more convenient, functional-style hash table construction that
57 | is consistent with constructors for lists, vectors, and arrays. Compare
58 | the above with
59 |
60 | (make-hash :size 9 :test 'eq
61 | :initial-contents '(howard-kaylan (musician lead-singer)
62 | jon-barbata (musician drummer)
63 | leonardo (ninja leader blue)
64 | donatello (ninja machines purple)
65 | al-nichol (musician guitarist)
66 | mark-volman (musician great-hair)
67 | raphael (ninja cool rude red)
68 | michaelangelo (ninja party-dude orange)
69 | jim-pons (musician bassist)))
70 |
71 | or, for example, with
72 |
73 | (make-hash :size 9 :test 'eq :init-format :lists
74 | :initial-contents '((howard-kaylan (musician lead-singer))
75 | (jon-barbata (musician drummer))
76 | (leonardo (ninja leader blue))
77 | (donatello (ninja machines purple))
78 | (al-nichol (musician guitarist))
79 | (mark-volman (musician great-hair))
80 | (raphael (ninja cool rude red))
81 | (michaelangelo (ninja party-dude orange))
82 | (jim-pons (musician bassist))))
83 |
84 | or with
85 |
86 | (make-hash :size 9 :test 'eq :init-format :keychain
87 | :initial-contents
88 | '(howard-kaylan jon-barbata leonardo
89 | donatello al-nichol mark-volman
90 | raphael michaelangelo jim-pons)
91 | :init-data
92 | '((musician lead-singer) (musician drummer) (ninja leader blue)
93 | (ninja machines purple) (musician guitarist) (musician great-hair)
94 | (ninja cool rude red) (ninja party-dude orange) (musician bassist)))
95 |
96 | or even with
97 |
98 | #{ howard-kaylan (musician lead-singer)
99 | jon-barbata (musician drummer)
100 | leonardo (ninja leader blue)
101 | donatello (ninja machines purple)
102 | al-nichol (musician guitarist)
103 | mark-volman (musician great-hair)
104 | raphael (ninja cool rude red)
105 | michaelangelo (ninja party-dude orange)
106 | jim-pons (musician bassist) }
107 |
108 | There are many other formats for the initial contents that would be
109 | convenient to use in other contexts, and make-hash supports a wide
110 | variety of them. Moreover, custom formats can be supported easily by
111 | defining a method for a single generic function, and default formats
112 | can be adjusted similarly. See below for more detail and examples.
113 |
114 | # Installation
115 |
116 | The simplest approach is to use quicklisp (www.quicklisp.org).
117 | With quicklisp installed, simply call `(ql:quickload "make-hash")`
118 | and quicklisp will do the rest.
119 |
120 | Otherwise, obtain the code from http://github.com/genovese/make-hash,
121 | cloning the repository or downloading and unpacking the tar/zip archive.
122 | Either load it directly or put the `make-hash` subdirectory
123 | where ASDF (www.cliki.net/asdf) can find the `.asd` file.
124 | With ASDF, call `(asdf:load-system "make-hash")` to load the
125 | package.
126 |
127 | For both quicklisp and ASDF, you may want to call
128 | `(use-package :make-hash)` to import the main functions. If you want to
129 | run the tests, which are in the package `make-hash-tests`, do the
130 | following.
131 |
132 | + In quicklisp:
133 |
134 | (ql:quickload "make-hash")
135 | (ql:quickload "make-hash-tests")
136 | (asdf:test-system "make-hash-tests")
137 |
138 | + With ASDF alone:
139 |
140 | (asdf:load-system "make-hash")
141 | (asdf:load-system "make-hash-tests")
142 | (asdf:test-system "make-hash-tests")
143 |
144 |
145 | # Examples
146 |
147 | The use of `make-hash` is pretty straightforward, and I think it will be
148 | clearer to see some examples before looking at the detailed specifications.
149 | It might help to scan these examples quickly on first read through and
150 | then come back after reading the specification in the ensuing sections.
151 | Here, I will assume that the predefined formats and defaults are in
152 | effect, although these can be overridden if desired.
153 |
154 | 1. No Initialization
155 |
156 | Use exactly like `make-hash-table`, with all standard or
157 | implementation-dependent keyword arguments.
158 |
159 | (make-hash)
160 | (make-hash :test #'equal)
161 | (make-hash :size 128 :rehash-size 1.75)
162 |
163 | 2. (Shallow) Copying an existing hash table
164 |
165 | (make-hash :initial-contents eql-hash-table)
166 | (make-hash :test (hash-table-test other-hash-table)
167 | :initial-contents other-hash-table)
168 |
169 | 3. Initializing from simple sequences containing keys and values
170 |
171 | (make-hash :initial-contents '(a 1 b 2 c 3 d 1 e 2 f 3 g 4))
172 | (make-hash :init-format :flat
173 | :initial-contents '(a 1 b 2 c 3 d 1 e 2 f 3 g 4))
174 | (make-hash :init-format :pairs
175 | :initial-contents '((a . 1) (b . 2) (c . 3)
176 | (d . 1) (e . 2) (f . 3) (g . 4)))
177 | (make-hash :init-format :lists
178 | :initial-contents '((a 1) (b 2) (c 3)
179 | (d 1) (e 2) (f 3) (g 4)))
180 | (make-hash :init-format :vectors
181 | :initial-contents '(#(a 1) #(b 2) #(c 3)
182 | #(d 1) #(e 2) #(f 3) #(g 4)))
183 | (make-hash :init-format :seqs
184 | :initial-contents '((a 1) #(b 2) (c 3)
185 | #(d 1) (e 2) #(f 3) #(g 4)))
186 |
187 | Here `:flat` is the default format, and the result in all these
188 | cases maps `a` `=>` 1, `b` `=>` 2, `c` `=>` 3, `d` `=>` 1, `e` `=>` 2,
189 | `f` `=>` 3, and `g` `=>` 4.
190 |
191 | 4. Initializing from separate sequences of keys and values
192 |
193 | (make-hash :init-format :keychain
194 | :initial-contents '(a b c d e f g)
195 | :init-data '(1 2 3 1 2 3 4))
196 | (make-hash :init-format :keychain
197 | :initial-contents '(a b c d e f g)
198 | :init-data #(1 2 3 1 2 3 4))
199 |
200 | The resulting tables are the same as in the last example.
201 |
202 | 5. Creating a hash table of keys and counts
203 |
204 | Given a sequence of objects, create a hash table with the unique
205 | objects as keys and the frequency counts in the sequence as values.
206 |
207 | (make-hash :init-format :keybag
208 | :initial-contents '(a b d d e c b a a e c c d a a c c e c c))
209 | (make-hash :init-format :keybag
210 | :initial-contents #(a b d d e c b a a e c c d a a c c e c c))
211 |
212 | The results map `a` `=>` 5, `b` `=>` 2, `c` `=>` 7, `d` `=>` 3, and `e` `=>` 3.
213 |
214 | 6. Building a hash from selected keys in another associative map or database
215 |
216 | Here, the `:initial-contents` is a sequence of keys, and the corresponding
217 | values are the values for those keys in the map given as `:init-data`,
218 | or the `:init-default` if none exists.
219 |
220 | Let `turtles` be the hash table above from CLtL2. Suppose
221 | `turtles-alist` is an associative list with the same data and that
222 | `turtles-database-reader` is a function that reads an associated record
223 | from a database. We can extract a ``sub-hash'' whose keys are those
224 | corresponding to mutant, ninja turtles as follows.
225 |
226 | (make-hash :init-format :keys
227 | :initial-contents '(leonardo donatello raphael michaelangelo)
228 | :init-data turtles)
229 | (make-hash :init-format :keys
230 | :initial-contents '(leonardo donatello raphael michaelangelo)
231 | :init-data turtles-alist)
232 | (make-hash :init-format :keys
233 | :initial-contents '(leonardo donatello raphael michaelangelo)
234 | :init-data turtles-database-reader)
235 |
236 | 7. Initializing from repeated calls to a function
237 |
238 | The following initializes the hash table from a _simple_ CSV
239 | (comma-separated value) file, with no commas within fields, using the
240 | first field as the key and the list of remaining fields as the value.
241 | The function `parse-csv-line` acts on one line at a time, skipping and
242 | either initializes or skips using the return value convention described
243 | below.
244 |
245 | (use-package :cl-ppcre)
246 |
247 | (defun parse-csv-line (stream)
248 | (let ((line (read-line stream nil)))
249 | (cond
250 | ((null line)
251 | (values nil nil nil))
252 | ((scan "^\\s*$" line)
253 | (values t t t))
254 | (t
255 | (let ((fields
256 | (split "\\s*,\\s*" line :limit most-positive-fixnum)))
257 | (values (first fields) (rest fields) nil))))))
258 |
259 | (with-open-file (s "data.csv" :direction :input :if-does-not-exist nil)
260 | (make-hash :test #'equal :init-format :function
261 | :initial-contents #'parse-csv-line :init-data (list s)))
262 |
263 | The following initializes the hash table from the key-value pairs in an
264 | INI file. The function `parse-ini-line` is acts on one line at a time and
265 | either initializes or skips using the return value convention described
266 | below.
267 |
268 | (use-package :cl-ppcre)
269 |
270 | (let ((ini-line-re
271 | (create-scanner
272 | "^\\s*(?:|;.*|\\[([^]]+)\\]|(\\w+)\\s*=\\s*(.*?))?\\s*$"))
273 | (current-section-name ""))
274 | (defun parse-ini-line (stream)
275 | (let ((line (read-line stream nil)))
276 | (unless line
277 | (setf current-section-name "")
278 | (return-from parse-ini (values nil nil nil)))
279 | (multiple-value-bind (beg end reg-begs reg-ends)
280 | (scan ini-line-re line)
281 | (declare (ignorable end))
282 | (unless beg
283 | (error "Improperly formatted INI line: ~A" line))
284 | (if (and (> (length reg-begs) 2) (aref reg-begs 1))
285 | (values
286 | (concatenate 'string
287 | current-section-name "/"
288 | (subseq line (aref reg-begs 1) (aref reg-ends 1)))
289 | (subseq line (aref reg-begs 2) (aref reg-ends 2))
290 | nil)
291 | (progn
292 | (when (and (> (length reg-begs) 0) (aref reg-begs 0))
293 | (setf current-section-name
294 | (subseq line (aref reg-begs 0) (aref reg-ends 0))))
295 | (values t t t)))))))
296 |
297 | (with-open-file (s "config.ini" :direction :input :if-does-not-exist nil)
298 | (make-hash :test #'equal :init-format :function
299 | :initial-contents #'parse-ini-line :init-data (list s)))
300 |
301 | 8. Transforming a hash built from a sequence of keys and values
302 |
303 | Passing a function as `:init-data` can be used to
304 | transform the initial contents as the hash is being initialized.
305 |
306 | (make-hash :init-format :flat
307 | :initial-contents '(a 1 b 2 c 3 d 1 e 2 f 3 g 4)
308 | :init-data (lambda (k v) (values k (* v v) nil)))
309 | (make-hash :init-format :pairs
310 | :initial-contents '((a . 1) (b . 2) (c . 3)
311 | (d . 1) (e . 2) (f . 3) (g . 4))
312 | :init-data (lambda (k v)
313 | (values (intern (symbol-name k) :keyword)
314 | (* v v))))
315 | (let ((scratch (make-hash)))
316 | (make-hash :init-format :lists
317 | :initial-contents '((a 1) (b 2) (c 3)
318 | (d 1) (e 2) (f 3) (g 4))
319 | :init-data (lambda (k v)
320 | (values v
321 | (setf (gethash v scratch)
322 | (cons k (gethash v scratch nil)))
323 | nil))))
324 |
325 | The first is a hash that maps `a` and `d` to 1, `b` and `e` to 4, `c` and `f` to 9,
326 | and `g` to 16. The second is the same except that the keys are the
327 | keywords with the same symbol-name (e.g., :a, :b). The third
328 | reverses the given alist, accumulated repeated values in a list:
329 | 1 `=>` `(d a)`, 2 `=>` `(e b)`, 3 `=>` `(f c)`, and 4 `=>` `(g)`.
330 |
331 | 9. Transforming an existing hash table or alist
332 |
333 | (defun lastcar (list)
334 | (car (last list)))
335 |
336 | (defvar *pet-hash*
337 | (make-hash :initial-contents
338 | '(dog (mammal pet loyal 3) cat (mammal pet independent 1)
339 | eagle 0 cobra 0
340 | goldfish (fish pet flushed 1) hamster (mammal pet injured sad 2)
341 | corn-snake (reptile pet dog-like 1) crab (crustacean quiet 4)
342 | grasshopper (insect methusala 1) black-widow 0)))
343 |
344 | (make-hash :initial-contents *pet-hash*
345 | :init-data (make-hash-transformer :value #'lastcar #'atom))
346 |
347 | The result maps `dog` `=>` 3, `cat` `=>` 1, `goldfish` `=>` 1, `hamster` `=>` 2,
348 | `corn-snake` `=>` 1, `grasshopper` `=>` 1, and `crab` `=>` 4. If `*pet-hash*`
349 | had been an alist instead of a hash table, the call to `make-hash`
350 | would be unchanged. Note that lastcar is not called on an entry unless
351 | atom returns `nil`.
352 |
353 | 10. Transforming a keybag
354 |
355 | Create a hash recording counts for each key (see example 7) but filter
356 | on some constraint. A function for `:init-data` takes the key and count
357 | and sets the values according to the return convention described below.
358 | With a vector for `:init-data`, the count is an index into the vector
359 | for the new value. With a hash table, the count is used as key to
360 | lookup the new value.
361 |
362 | (make-hash :init-format :keybag
363 | :initial-contents #(a b d d e c b a a e c c d a a c c e c c)
364 | :init-data (lambda (key count) (values key count (<= count 3))))
365 | (make-hash :init-format :keybag
366 | :initial-contents #(a b d d e c b a a e c c d a a c c e c c)
367 | :init-data #(zero one two three four)
368 | :init-default 'more-than-four)
369 | (make-hash :init-format :keybag
370 | :initial-contents #(a b d d e c b a a e c c d a a c c e c c)
371 | :init-data (make-hash :initial-contents '(3 "You're out!"))
372 | :init-default "Whatever!")
373 |
374 | The first gives a hash `a => 5`, `b => 2`, `c => 7`, `d => 3`, `e => 3`.
375 | The second gives a hash `a => more-than-four`, `b => two`, `c => more-than-four`,
376 | `d => three`, `e => three`. And the third gives a hash with `a`, `b`, and `c`
377 | mapping to the string "Whatever!" and d and e mapping to "You're out!".
378 |
379 | 11. Creating Hash Factories
380 |
381 | Hash factories are shortcuts that encapsulate a specified set of hash creation options,
382 | primarily for use with literal hash creation with sequence-style init formats.
383 | The factories are functions that package their arguments (&rest style) and
384 | use the resulting list as the `:initial-contents` argument to `make-hash`
385 | with the given options. The difference between `define-hash-factory` and
386 | `make-hash-factory` is that the former defines a toplevel function, whereas
387 | the latter returns an anonymous function.
388 |
389 | (define-hash-factory qhash
390 | :init-format :flat
391 | :test #'eq :size 128
392 | :documentation "Construct moderate size hash tables for symbols.")
393 |
394 | (qhash 'a 1 'b 2 'c 3 'd 4 'x 100 'y -100 'z 0)
395 | (apply #'qhash '(a 1 b 2 c 3 d 4 x 100 y -100 z 0))
396 |
397 | (define-hash-factory ahash
398 | :init-format :pairs
399 | :init-data (lambda (k v)
400 | (if (stringp k) (intern (string-upcase k)) k))
401 | :documentation "Alist->hash, converting string keys to symbols.")
402 |
403 | (ahash ("foo" 10) ("bar" 20) ("zap" 30))
404 | (apply #'ahash '((a . 1) (b . 2) (c . 3) ("d" . 4) ("foo" . "bar")))
405 |
406 | (let ((h (make-hash-factory :init-format :keys :init-data *big-hash*)))
407 | (apply h key1 key2 key3 key4)) ; quick subhash of *big-hash*
408 |
409 | 12. Portable Reader Factories
410 |
411 | It may be desirable to use reader macros to stand-in for particular
412 | hash table constructors. These are hash factories that are installed in
413 | a readtable using `install-hash-reader` at toplevel. Both dispatched
414 | and raw delimited forms are supported, and the installer can accept a
415 | list of options or an existing factory.
416 |
417 | Here are three separate uses yielding `:a``=>`1, `:b``=>`2, `:c``=>`3, `:d``=>`4.
418 |
419 | (install-hash-reader ()) ; default settings and options
420 | #{:a 1 :b 2 :c 3 :d 4}
421 |
422 |
423 | (install-hash-reader '(:init-format :pairs)
424 | :use-dispatch t :open-char #\[ :close-char #\])
425 | #['(:a . 1) '(:b . 2) '(:c . 3) '(:d . 4)]
426 |
427 |
428 | (install-hash-reader '(:init-format :lists)
429 | :use-dispatch nil :open-char #\{ :close-char #\})
430 | {'(:a 1) '(:b 2) '(:c 3) '(:d 4)}
431 |
432 |
433 | This accepts a readtable to modify (current readtable by default) and works
434 | well with the :named-readtables package.
435 |
436 | # Creating Hash Tables
437 |
438 | The function `make-hash` is an interface to the CL standard function
439 | `make-hash-table` that also allows flexible initialization. It accepts all
440 | the standard and implementation-dependent keyword arguments that the
441 | standard `make-hash-table` does but also accepts a few additional keyword
442 | arguments that can specify the initial contents of the table (analogously
443 | to the CL standard function `make-array`). The operation of the make-hash
444 | initializer is designed to handle all the common cases easily while
445 | enabling powerful abstractions where needed. See the Examples section
446 | below for examples.
447 |
448 | The new keyword arguments are:
449 |
450 | + `:initial-contents` _object_
451 |
452 | If the supplied object is non-nil, the object is used to initialize
453 | the created hash table in accordance with the `:init-format` argument.
454 | For some formats, the `:init-data` argument may also be needed to
455 | supply supplementary information for the initializer. The built-in
456 | formats support the cases where object is either a hash table or
457 | sequence from which the keys and values can be extracted. See the
458 | subsection below for a detailed description of the possibilities.
459 |
460 | + `:init-format` _keyword_
461 |
462 | A keyword specifying the structure of the initialization contents
463 | and auxilliary data given by the `:initial-contents` and `:init-data`
464 | arguments. Built-in support is provided for :hash, :flat, :pairs,
465 | :lists, :vectors, :seqs, :keys, :keychain, :keybag, and :function.
466 | These are described in detail in the subsection below.
467 |
468 | When an initializer format is not supplied, it is computed by
469 | calling the generic function `hash-initializer-default-format` on
470 | the given `:initial-contents` object. A methods for this function
471 | should be defined whenever the function `initialize-hash` is
472 | extended to handle a new class of `:initial-contents` objects. Methods
473 | can be overridden to change the default used in existing cases.
474 |
475 | + `:init-data` _object_
476 |
477 | Auxilliary data used for initialization with some formats. Its
478 | structure and meaning depends on the value of `:init-format`; as
479 | described in the subsection below.
480 |
481 | + `:init-default` _value_
482 |
483 | Default value to use in indirect initialization when the value for
484 | the given key cannot be determined from the `:initial-contents` and
485 | `:init-data` for the particular `:init-format` supplied.
486 |
487 | If no :initial-contents argument is supplied, the hash table is not
488 | initialized, and `make-hash` behaves exactly like the standard
489 | function `make-hash-table`. For many formats, initialization only
490 | requires an :initial-contents argument. See _Examples_ for more.
491 |
492 | ## Functions as `:init-data` (or `:initial-contents`)
493 |
494 | For most of the pre-defined formats, a function can be
495 | passed as the `:init-data`, and with the `:function` format,
496 | a can be passed as the `:initial-contents` as well.
497 | These functions are expected to return three values
498 | _KEY VALUE [BAD-VALUE]_
499 | that are used (under some conditions) to create a new key-value
500 | entry in the hash table being initialized. Here, BAD-VALUE
501 | is a **ternary** value: nil (or missing) means to use KEY and VALUE
502 | as is; t means to skip creating this entry entirely, and any
503 | other non-nil value means to associate KEY to the specified
504 | `:init-default` value _instead_ of VALUE.
505 |
506 | In the description of the predefined formats below, such function
507 | arguments are used in one of three ways:
508 |
509 | 1. Entry transformation: _INIT-KEY INIT-VALUE -> KEY VALUE [BAD-VALUE]_
510 |
511 | The key and value specified by `:initial-contents` (_INIT-KEY INIT-VALUE_)
512 | are passed to the function and the return values used as described above.
513 | (Formats `:hash`, `:flat`, `:pairs`, `:lists`, `:vectors`, `:seqs`.)
514 |
515 | 2. Key transformation: _INIT-KEY -> KEY VALUE [BAD-VALUE]_
516 |
517 | With format `:keys`, the key specified by `:initial-contents` is
518 | passed to the function and the return values used as described above.
519 |
520 | 3. Entry generation: _&rest ARGS -> KEY VALUE [BAD-VALUE]_
521 |
522 | With format `:function`, the `:initial-contents` argument is a function.
523 | This function is applied repeatedly to _ARGS_ and the return values
524 | used as described above. However, in this case, the first time
525 | that KEY is nil, initialization stops.
526 |
527 | See also the documentation for the function `make-hash-transformer`
528 | which creates a function suitable for use in this way from a simpler
529 | function on keys or entries.
530 |
531 | ## Predefined Initialization Formats
532 |
533 | The `:init-format` argument is a keyword that determines how the
534 | keyword arguments `:initial-contents` and `:init-data` are interpreted.
535 | If `:init-format` is not supplied, the default format is determined
536 | by the type of `:initial-contents`.
537 |
538 | There are four basic cases in the pre-defined initialization support:
539 |
540 | 1. Initializing from an existing hash table
541 |
542 | When `:init-format` is `:hash` or by default if `:initial-contents` is
543 | a hash-table, the new hash table is initialized by a shallow copy of
544 | the initial contents table, with shared structure in keys and values.
545 | If `:init-data` is a function, that function is used for entry
546 | transformation of the hash table given in `:initial-contents`.
547 |
548 | 2. Initializing from a sequence (or sequences) specifying key-value pairs.
549 |
550 | When `:init-format` is `:flat`, `:pairs`, `:lists`, `:vectors`, or
551 | `:seqs`, the `:initial-contents` should be a sequence that specifies a
552 | collection of key-value pairs. The only difference among these formats
553 | is the expected structure of the sequence's elements. For `:flat`, the
554 | keys and values alternate; for `:pairs`, it is a sequence of cons
555 | pairs (e.g., an alist); for `:lists`, `:vectors`, and `:seqs`, it is a
556 | sequence of lists, vectors, or arbitrary sequences respectively of
557 | which the first two elements of each give the corresponding key and
558 | value. In these cases, if `:init-data` is nil or missing, the key-value
559 | pairs are used as is; if `:init-data` is a function, the function is
560 | used for entry transformation, as described above, for each pair.
561 |
562 | When `:init-format` is `:keychain`, the `:initial-contents` should
563 | be a sequence of keys and `:init-data` should be a sequence of
564 | corresponding values _in the same order_. The table is initialized
565 | with the resultant key-value pairs.
566 |
567 | When `:init-format` is `:keys`, the `:initial-contents` should be a
568 | sequence of keys. The corresponding value is obtained by looking
569 | up the key in the hash table, alist, or function (via key mapping,
570 | see above) that is passed as `:init-data`, which in this case
571 | is required.
572 |
573 | 3. Initializing from a bag/multiset of keys.
574 |
575 | When `:init-format` is `:keybag`, the `:initial-contents` should be a
576 | sequence representing a _multiset_ (a collection with possibly
577 | repeated elements) of keys. The hash table is initialized to map the
578 | unique elements from that multiset (as keys) to the number of times
579 | that element appears in the multiset (as values).
580 |
581 | In this case, if `:init-data` is a vector, hash table, or function,
582 | the count is used to find the corresponding value by indexing into the
583 | vector, looking up the value associated with count in the data
584 | hash-table, or calling the function with the key and count. When a
585 | value cannot be found, the default is used instead, subject to the
586 | value of BAD-VALUE in the function case.
587 |
588 | 4. Initializing from a function.
589 |
590 | When `:init-format` is `:function` or `:initial-contents` is a function,
591 | the hash table is initialized by using the function for entry generation
592 | as described above.
593 |
594 | See also the documentation for `make-hash` for a relatively succinct
595 | table describing these options. Keep in mind that the interpretation of
596 | the formats is specified by methods of the `initialize-hash` generic
597 | function, and the default formats for different `:initial-contents` types
598 | by methods of the `hash-initializer-default-format`.
599 |
600 | # Defining Custom Initialization Formats
601 |
602 | Initialization by `make-hash` is controlled by the generic function
603 | `initialize-hash`. Defining new methods for this function, or overriding
604 | existing methods, makes it easy to extend the hash table initialization,
605 | to add or modify formats, change behaviors, and so forth.
606 |
607 | The function `initialize-hash` takes five arguments: the hash table being
608 | initialized, the format specifier, the initial contents source object, the
609 | auxilliary data (`:init-data`) object, and the default value (`:init-default`).
610 | The format is usually a keyword with eql specialization. The contents
611 | source and data object are specialized on type.
612 |
613 | # Specifying Default Formats
614 |
615 | When no `:init-format` argument is given to `make-hash`, the default format
616 | is determined by calling a suitable method of the generic function
617 | `hash-initializer-default-format`, passing the `:initial-contents` argument.
618 | The predefined methods use format `:hash` given a hash table, `:flat` given
619 | a sequence, and `:function` given a function. More flexibility may be
620 | desired in particular applications.
621 |
622 | # Hash Table Factories
623 |
624 | When specific patterns of hash table construction options are used repeatedly,
625 | it can be helpful to encapsulate those patterns in a simple way.
626 | Hash table factories are shortcut functions that create a hash table using
627 | prespecified construction options. Any of the keyword arguments to `make-hash`,
628 | except for `:initial-contents`, can be passed to the factory constructor
629 | and will be used for creating the hash table when the factory is called.
630 | The arguments in the factory call are packaged `&rest`-style in a list
631 | and used as the `:initial-contents`. There are two factory constructors:
632 | `define-hash-factory` creates a toplevel function of a given name
633 | and `make-hash-factory` creates an anonymous function.
634 |
635 | # Reader Representations
636 |
637 | Similarly, it might be desirable for the hash factories to be represented
638 | by syntax at read time via reader macros. The macro `install-hash-reader`
639 | updates a given readtable (the current readtable by default) so that
640 | a dispatched or raw delimited form creates a hash table. The effect
641 | is identical to the use of the hash table factories, except syntactically.
642 | Indeed, a factory can be passed directly to the `install-hash-reader`.
643 |
644 | Calls to this macro must occur at toplevel to have effect. It is designed
645 | to be as portable as possible and to work well with the named-readtables
646 | package. Common examples would be the use of #{} or {} to represent hash
647 | tables.
648 |
649 | # Dictionary
650 | ## make-hash [Function]
651 |
652 | **make-hash** _\&key initial-contents init-format init-data init-default ..._ `=>` _hash-table_
653 |
654 | Creates, initializes if requested, and returns a new hash table.
655 |
656 | Keyword options include all those of the standard `make-hash-table`, any
657 | extension options allowed by the given implementation, and the additional
658 | keyword options to control initialization: `:initial-contents`, the main
659 | source for information filling the table; `:init-format`, a keyword
660 | specifying how the initialization options are interpreted; `:init-data`,
661 | auxilliary data needed for initialization in some formats; and
662 | `:init-default`, a default value used when the value for a key cannot be
663 | initialized. See the description above in _Creating Hash Tables_. Users can
664 | support other types/configurations (or alter the default handling) by
665 | extending the generic function `initialize-hash` in this package; see
666 | _Defining Custom Initialization Formats_.
667 |
668 | ## make-hash-transformer [Function]
669 |
670 | **make-hash-transformer** _domain function &optional badp_ `=>` _function_
671 |
672 | Transform FUNCTION to be suitable for use as the `:init-data` (or
673 | `:initial-contents`) argument to `make-hash`. DOMAIN specifies the
674 | signature of FUNCTION and is one of the keywords `:key`, `:value`, or
675 | `:entry`, indicating that FUNCTION takes a key, a value, or a key and a
676 | value, repectively. BADP is a function with the same argument signature
677 | as FUNCTION that follows the return convention described above (_Functions as..._).
678 | Specifically, it returns a ternary value: nil means that the transformed
679 | entry should be used as is, t means that the entry should be skipped, and
680 | any other non-nil value means that the key should be used with a default.
681 | Note that FUNCTION is _not_ called for an entry if BADP returns a non-nil
682 | value.
683 |
684 | The returned function accepts a key and a value (the value is optional
685 | with DOMAIN :key) and returns three values: the key, the value, and the
686 | bad-value ternary for that entry.
687 |
688 | ## initialize-hash [Generic Function]
689 |
690 | **initialize-hash** _table form source data default_
691 |
692 | Creates and adds an entry to TABLE using info of format FORM in SOURCE
693 | and DATA. SOURCE contains the main contents, and DATA (optionally)
694 | contains auxilliary information or objects required for initialization
695 | for some formats. DEFAULT is the value that should be stored in the table
696 | when an appropriate value associated to a key cannot be found. Adding or
697 | redefining methods for this function allows extension or modification of
698 | the initialization mechanism.
699 |
700 | Note the convention, used by the predefined methods, that functions
701 | passed as either SOURCE or DATA are expected to return three values,
702 | using the convention described above (_Functions as..._).
703 |
704 | ## hash-initializer-default-format [Generic Function]
705 |
706 | **hash-initializer-default-format** _source_ `=>` _keyword or error_
707 |
708 | Selects an initializer format based on the given initial contents SOURCE.
709 | For example, the default format for sequence contents is `:flat`;
710 | to change it to `:pairs` so that an alist is expected as `:initial-contents`
711 | by default, do the following:
712 |
713 | (defmethod hash-initializer-default-format ((source list))
714 | :pairs)
715 |
716 | ## `*hash-factory-defaults*` [Special Variable]
717 |
718 | Hash table creation options used as defaults by hash factory
719 | constructors. These option specifications are passed last to make-hash by
720 | the hash factories and so are overridden by options passed as explicit
721 | arguments to the factory constructor.
722 |
723 | Changing this variable affects the options used by every hash factory
724 | that does not fully specify its options. This includes default calls to
725 | the reader constructors. Of particular note are the `:test` and
726 | `:init-format` options.
727 |
728 | ## define-hash-factory [Macro]
729 |
730 | **define-hash-factory** _name &key ...hash-options..._
731 |
732 | Create a hash-table factory NAME that calls `make-hash` with options
733 | specified by given by the hash-options arguments. The defined
734 | function packages its arguments as a list, which it passes as
735 | the `:initial-contents` argument to `make-hash`.
736 |
737 | The hash-options are alternating keyword-value pairs. The supplied
738 | keyword arguments precede and thus override the options in
739 | `*hash-factory-defaults*`, which is intended to allow one to use short
740 | names or customized policies in simple calling patterns. Complex
741 | initialization patterns may need the full power of `make-hash'
742 | itself.
743 |
744 | ## make-hash-factory [Function]
745 |
746 | **make-hash-factory** _&key ...hash-options..._ `=>` _factory-function_
747 |
748 | Like define-hash-factory but creates and returns an anonymous factory
749 | function.
750 |
751 | ## install-hash-reader [Macro]
752 |
753 | **install-hash-reader** _options &key readtable use-dispatch allow-numbered-dispatch open-char close-char dispatch-char_
754 |
755 | Creates a hash table factory specified by OPTIONS and installs it
756 | in READTABLE (the current readtable by default). To have effect,
757 | this must be called at toplevel.
758 |
759 | OPTIONS is either a list of keyword-value pairs (as would be passed to
760 | `make-hash` or `make-hash-factory`) or a hash factory function.
761 | READTABLE is a readtable object, `*readtable*` by default.
762 |
763 | The keyword arguments control how the reader is modified as follows:
764 |
765 | + USE-DISPATCH (t by default) determines whether the reader macro uses a
766 | dispatch character DISPATCH-CHAR before OPEN-CHAR. If non-nil, a
767 | dispatch character is used and is registered in READTABLE. If this is
768 | nil, then OPEN-CHAR and CLOSE-CHAR will be a raw delimited construct.
769 |
770 | + ALLOW-NUMBERED-DISPATCH (nil by default) allows a dispatched reader
771 | macro to modify its hash test when given numeric arguments between
772 | DISPATCH-CHAR and OPEN-CHAR. This only applies when USE-DISPATCH is
773 | non-nil and when OPTIONS is a list, not a factory function. The goal
774 | here is to make it easy to reuse reader factories in several contexts.
775 |
776 | If nil, numbered dispatch is not supported. If t, numeric arguments
777 | 0, 1, 2, and 3 correspond to hash tests `eq`, `eql`, `equal`, and
778 | `equalp` respectively. If a sequence of symbols or functions,
779 | those functions are used for the hash test given a numeric
780 | argument from 0 below the length of the sequence. In either case,
781 | dispatch _without_ a numeric argument uses the originally specified
782 | options.
783 |
784 | Note: This is an experimental feature and may be discontinued in
785 | future versions if it proves more confusing than helpful.
786 |
787 | + OPEN-CHAR (default open-brace) is the character that delimits the
788 | beginning of the hash-table contents. If USE-DISPATCH is non-nil,
789 | this character must be preceeded by DISPATCH-CHAR, and optionally
790 | a numeric argument.
791 |
792 | + CLOSE-CHAR (default close-brace) is the character that delimits
793 | the end of the hash-table contents.
794 |
795 | + DISPATCH-CHAR (default \#) is the character used to indicate a
796 | dispatched reader macro. When (and only when) USE-DISPATCH is non-nil.
797 | READTABLE is modified to register this as as a dispatch and a
798 | non-terminating macro character via `make-dispatch-macro-character`.
799 | Note that there can be more than one dispatch character in a read
800 | table.
801 |
802 |
--------------------------------------------------------------------------------
/User-Guide.org:
--------------------------------------------------------------------------------
1 | #+TITLE: User Guide for Common Lisp Package =make-hash=
2 | #+AUTHOR: Christopher Genovese (=genovese@cmu.edu=)
3 | #+DATE: 30 Jun 2012\vspace*{-0.5cm}
4 |
5 | * Motivation and Overview
6 |
7 | Two common (and arguably apt) criticisms of hash tables in Common Lisp are
8 | that hash table initialization is bulky and awkward and that the
9 | representation of hash tables is not as integrated into the language as are
10 | the representations of lists and (to a degree) vectors.
11 |
12 | The =make-hash= package addresses these issues by supplying three
13 | useful, related mechanisms:
14 |
15 | 1. A hash table constructor =make-hash= with initialization that is
16 | concise, flexible, and extensible.
17 |
18 | See =make-hash=, =initialize-hash=, =hash-initializer-default-format=,
19 | and =make-hash-transformer= below.
20 |
21 | 2. Methods for defining hash-table factories with a customized
22 | set of initialization options, either as a globally or locally
23 | defined function.
24 |
25 | See =define-hash-factory=, =make-hash-factory=, and \newline
26 | =*hash-factory-defaults*= below.
27 |
28 | 3. Readtable installers for defining a portable reader interface to
29 | the hash-table factories, either as (raw) delimited or dispatched
30 | reader macros.
31 |
32 | See =install-hash-reader= below.
33 |
34 | #+LaTeX: \noindent
35 | In particular, the function =make-hash= is a wrapper around the standard
36 | CL function =make-hash-table= with some additional keyword arguments
37 | that allow one to specify initial contents and format.
38 |
39 | As an illustation, consider the example on page 440 of the venerable
40 | /Common Lisp the Language, Second Edition/ by Guy Steele [CLtL2].
41 |
42 | #+begin_src lisp
43 | (setq turtles (make-hash-table :size 9 :test 'eq))
44 | (setf (gethash 'howard-kaylan turtles) '(musician lead-singer))
45 | (setf (gethash 'john-barbata turtles) '(musician drummer))
46 | (setf (gethash 'leonardo turtles) '(ninja leader blue))
47 | (setf (gethash 'donatello turtles) '(ninja machines purple))
48 | (setf (gethash 'al-nichol turtles) '(musician guitarist))
49 | (setf (gethash 'mark-volman turtles) '(musician great-hair))
50 | (setf (gethash 'raphael turtles) '(ninja cool rude red))
51 | (setf (gethash 'michaelangelo turtles) '(ninja party-dude orange))
52 | (setf (gethash 'jim-pons turtles) '(musician bassist))
53 | #+end_src
54 |
55 | #+LaTeX: \noindent
56 | This is not horrible by any means, but the repeated =setf='s force an
57 | assignment-oriented block of statements and visually obscure the
58 | relationships in the table. And in practice, even more syntactic
59 | infrastructure is usually required (e.g., another level of let for a
60 | local definition, a loop for a larger hash table). While it is certainly
61 | a matter of taste which form one prefers, the goal of =make-hash= is to
62 | allow a more convenient, functional-style hash table construction that
63 | is consistent with constructors for lists, vectors, and arrays. Compare
64 | the above with any of the following:
65 |
66 | #+begin_src lisp
67 | (make-hash :size 9 :test 'eq
68 | :initial-contents '(howard-kaylan (musician lead-singer)
69 | jon-barbata (musician drummer)
70 | leonardo (ninja leader blue)
71 | donatello (ninja machines purple)
72 | al-nichol (musician guitarist)
73 | mark-volman (musician great-hair)
74 | raphael (ninja cool rude red)
75 | michaelangelo (ninja party-dude orange)
76 | jim-pons (musician bassist)))
77 | #+end_src
78 |
79 | #+begin_src lisp
80 | (make-hash :size 9 :test 'eq :init-format :lists
81 | :initial-contents '((howard-kaylan (musician lead-singer))
82 | (jon-barbata (musician drummer))
83 | (leonardo (ninja leader blue))
84 | (donatello (ninja machines purple))
85 | (al-nichol (musician guitarist))
86 | (mark-volman (musician great-hair))
87 | (raphael (ninja cool rude red))
88 | (michaelangelo (ninja party-dude orange))
89 | (jim-pons (musician bassist))))
90 | #+end_src
91 |
92 | #+begin_src lisp
93 | (make-hash :size 9 :test 'eq :init-format :keychain
94 | :initial-contents
95 | '(howard-kaylan jon-barbata leonardo
96 | donatello al-nichol mark-volman
97 | raphael michaelangelo jim-pons)
98 | :init-data
99 | '((musician lead-singer) (musician drummer) (ninja leader blue)
100 | (ninja machines purple) (musician guitarist) (musician great-hair)
101 | (ninja cool rude red) (ninja party-dude orange) (musician bassist)))
102 | #+end_src
103 |
104 | #+begin_src lisp
105 | #{ howard-kaylan (musician lead-singer)
106 | jon-barbata (musician drummer)
107 | leonardo (ninja leader blue)
108 | donatello (ninja machines purple)
109 | al-nichol (musician guitarist)
110 | mark-volman (musician great-hair)
111 | raphael (ninja cool rude red)
112 | michaelangelo (ninja party-dude orange)
113 | jim-pons (musician bassist) }
114 | #+end_src
115 |
116 | There are many other formats for the initial contents that would be
117 | convenient to use in other contexts, and make-hash supports a wide
118 | variety of them. Moreover, custom formats can be supported easily by
119 | defining a method for a single generic function, and default formats
120 | can be adjusted similarly. See below for more detail and examples.
121 |
122 | * Installation
123 |
124 | The simplest approach is to use quicklisp (www.quicklisp.org).
125 | With quicklisp installed, simply call =(ql:quickload "make-hash")=
126 | and quicklisp will do the rest.
127 |
128 | Otherwise, obtain the code from http://github.com/genovese/make-hash,
129 | cloning the repository or downloading and unpacking the tar/zip archive.
130 | Either load it directly or put the =make-hash= subdirectory
131 | where ASDF (www.cliki.net/asdf) can find the =.asd= file.
132 | With ASDF, call =(asdf:load-system "make-hash")= to load the
133 | package.
134 |
135 | For both quicklisp and ASDF, you may want to call
136 | =(use-package :make-hash)= to import the main functions. If you want to
137 | run the tests, which are in the package =make-hash-tests=, do the
138 | following.
139 |
140 | + In quicklisp:
141 | #+begin_src lisp
142 | (ql:quickload "make-hash")
143 | (ql:quickload "make-hash-tests")
144 | (asdf:test-system "make-hash-tests")
145 | #+end_src
146 |
147 | + With ASDF alone:
148 | #+begin_src lisp
149 | (asdf:load-system "make-hash")
150 | (asdf:load-system "make-hash-tests")
151 | (asdf:test-system "make-hash-tests")
152 | #+end_src
153 |
154 | * Examples
155 |
156 | The use of =make-hash= is pretty straightforward, and I think it will be
157 | clearer to see some examples before looking at the detailed specifications.
158 | It might help to scan these examples quickly on first read through and
159 | then come back after reading the specification in the ensuing sections.
160 | Here, I will assume that the predefined formats and defaults are in
161 | effect, although these can be overridden if desired.
162 |
163 | 1. No Initialization
164 |
165 | Use exactly like =make-hash-table=, with all standard or
166 | implementation-dependent keyword arguments.
167 |
168 | #+begin_src lisp
169 | (make-hash)
170 | (make-hash :test #'equal)
171 | (make-hash :size 128 :rehash-size 1.75)
172 | #+end_src
173 |
174 | 2. (Shallow) Copying an existing hash table
175 |
176 | #+begin_src lisp
177 | (make-hash :initial-contents eql-hash-table)
178 | (make-hash :test (hash-table-test other-hash-table)
179 | :initial-contents other-hash-table)
180 | #+end_src
181 |
182 | 3. Initializing from simple sequences containing keys and values
183 |
184 | #+begin_src lisp
185 | (make-hash :initial-contents '(a 1 b 2 c 3 d 1 e 2 f 3 g 4))
186 | (make-hash :init-format :flat
187 | :initial-contents '(a 1 b 2 c 3 d 1 e 2 f 3 g 4))
188 | (make-hash :init-format :pairs
189 | :initial-contents '((a . 1) (b . 2) (c . 3)
190 | (d . 1) (e . 2) (f . 3) (g . 4)))
191 | (make-hash :init-format :lists
192 | :initial-contents '((a 1) (b 2) (c 3)
193 | (d 1) (e 2) (f 3) (g 4)))
194 | (make-hash :init-format :vectors
195 | :initial-contents '(#(a 1) #(b 2) #(c 3)
196 | #(d 1) #(e 2) #(f 3) #(g 4)))
197 | (make-hash :init-format :seqs
198 | :initial-contents '((a 1) #(b 2) (c 3)
199 | #(d 1) (e 2) #(f 3) #(g 4)))
200 | #+end_src
201 |
202 | Here =:flat= is the default format, and the result in all these
203 | cases maps =a= \to 1, =b= \to 2, =c= \to 3, =d= \to 1, =e= \to 2,
204 | =f= \to 3, and =g= \to 4.
205 |
206 | 4. Initializing from separate sequences of keys and values
207 |
208 | #+begin_src lisp
209 | (make-hash :init-format :keychain
210 | :initial-contents '(a b c d e f g)
211 | :init-data '(1 2 3 1 2 3 4))
212 | (make-hash :init-format :keychain
213 | :initial-contents '(a b c d e f g)
214 | :init-data #(1 2 3 1 2 3 4))
215 | #+end_src
216 |
217 | The resulting tables are the same as in the last example.
218 |
219 | 5. Creating a hash table of keys and counts
220 |
221 | Given a sequence of objects, create a hash table with the unique
222 | objects as keys and the frequency counts in the sequence as values.
223 |
224 | #+begin_src lisp
225 | (make-hash :init-format :keybag
226 | :initial-contents '(a b d d e c b a a e c c d a a c c e c c))
227 | (make-hash :init-format :keybag
228 | :initial-contents #(a b d d e c b a a e c c d a a c c e c c))
229 | #+end_src
230 |
231 | The results map =a= \to 5, =b= \to 2, =c= \to 7, =d= \to 3, and =e= \to 3.
232 |
233 | 6. Building a hash from selected keys in another associative map or database
234 |
235 | Here, the =:initial-contents= is a sequence of keys, and the corresponding
236 | values are the values for those keys in the map given as =:init-data=,
237 | or the =:init-default= if none exists.
238 |
239 | Let =turtles= be the hash table above from CLtL2. Suppose
240 | =turtles-alist= is an associative list with the same data and that
241 | =turtles-database-reader= is a function that reads an associated record
242 | from a database. We can extract a ``sub-hash'' whose keys are those
243 | corresponding to mutant, ninja turtles as follows.
244 |
245 | #+begin_src lisp
246 | (make-hash :init-format :keys
247 | :initial-contents '(leonardo donatello raphael michaelangelo)
248 | :init-data turtles)
249 | (make-hash :init-format :keys
250 | :initial-contents '(leonardo donatello raphael michaelangelo)
251 | :init-data turtles-alist)
252 | (make-hash :init-format :keys
253 | :initial-contents '(leonardo donatello raphael michaelangelo)
254 | :init-data turtles-database-reader)
255 | #+end_src
256 |
257 | 7. Initializing from repeated calls to a function
258 |
259 | The following initializes the hash table from a /simple/ CSV
260 | (comma-separated value) file, with no commas within fields, using the
261 | first field as the key and the list of remaining fields as the value.
262 | The function =parse-csv-line= acts on one line at a time, skipping and
263 | either initializes or skips using the return value convention described
264 | below.
265 |
266 | #+begin_src lisp
267 | (use-package :cl-ppcre)
268 |
269 | (defun parse-csv-line (stream)
270 | (let ((line (read-line stream nil)))
271 | (cond
272 | ((null line)
273 | (values nil nil nil))
274 | ((scan "^\\s*$" line)
275 | (values t t t))
276 | (t
277 | (let ((fields
278 | (split "\\s*,\\s*" line :limit most-positive-fixnum)))
279 | (values (first fields) (rest fields) nil))))))
280 |
281 | (with-open-file (s "data.csv" :direction :input :if-does-not-exist nil)
282 | (make-hash :test #'equal :init-format :function
283 | :initial-contents #'parse-csv-line :init-data (list s)))
284 | #+end_src
285 |
286 | The following initializes the hash table from the key-value pairs in an
287 | INI file. The function =parse-ini-line= is acts on one line at a time and
288 | either initializes or skips using the return value convention described
289 | below.
290 |
291 | #+begin_src lisp
292 | (use-package :cl-ppcre)
293 |
294 | (let ((ini-line-re
295 | (create-scanner
296 | "^\\s*(?:|;.*|\\[([^]]+)\\]|(\\w+)\\s*=\\s*(.*?))?\\s*$"))
297 | (current-section-name ""))
298 | (defun parse-ini-line (stream)
299 | (let ((line (read-line stream nil)))
300 | (unless line
301 | (setf current-section-name "")
302 | (return-from parse-ini (values nil nil nil)))
303 | (multiple-value-bind (beg end reg-begs reg-ends)
304 | (scan ini-line-re line)
305 | (declare (ignorable end))
306 | (unless beg
307 | (error "Improperly formatted INI line: ~A" line))
308 | (if (and (> (length reg-begs) 2) (aref reg-begs 1))
309 | (values
310 | (concatenate 'string
311 | current-section-name "/"
312 | (subseq line (aref reg-begs 1) (aref reg-ends 1)))
313 | (subseq line (aref reg-begs 2) (aref reg-ends 2))
314 | nil)
315 | (progn
316 | (when (and (> (length reg-begs) 0) (aref reg-begs 0))
317 | (setf current-section-name
318 | (subseq line (aref reg-begs 0) (aref reg-ends 0))))
319 | (values t t t)))))))
320 |
321 | (with-open-file (s "config.ini" :direction :input :if-does-not-exist nil)
322 | (make-hash :test #'equal :init-format :function
323 | :initial-contents #'parse-ini-line :init-data (list s)))
324 | #+end_src
325 |
326 | 8. Transforming a hash built from a sequence of keys and values
327 |
328 | Passing a function as =:init-data= can be used to
329 | transform the initial contents as the hash is being initialized.
330 |
331 | #+begin_src lisp
332 | (make-hash :init-format :flat
333 | :initial-contents '(a 1 b 2 c 3 d 1 e 2 f 3 g 4)
334 | :init-data (lambda (k v) (values k (* v v) nil)))
335 | (make-hash :init-format :pairs
336 | :initial-contents '((a . 1) (b . 2) (c . 3)
337 | (d . 1) (e . 2) (f . 3) (g . 4))
338 | :init-data (lambda (k v)
339 | (values (intern (symbol-name k) :keyword)
340 | (* v v))))
341 | (let ((scratch (make-hash)))
342 | (make-hash :init-format :lists
343 | :initial-contents '((a 1) (b 2) (c 3)
344 | (d 1) (e 2) (f 3) (g 4))
345 | :init-data (lambda (k v)
346 | (values v
347 | (setf (gethash v scratch)
348 | (cons k (gethash v scratch nil)))
349 | nil))))
350 | #+end_src
351 |
352 | The first is a hash that maps =a= and =d= to 1, =b= and =e= to 4, =c= and =f= to 9,
353 | and =g= to 16. The second is the same except that the keys are the
354 | keywords with the same symbol-name (e.g., :a, :b). The third
355 | reverses the given alist, accumulated repeated values in a list:
356 | 1 \to =(d a)=, 2 \to =(e b)=, 3 \to =(f c)=, and 4 \to =(g)=.
357 |
358 | 9. Transforming an existing hash table or alist
359 |
360 | #+begin_src lisp
361 | (defun lastcar (list)
362 | (car (last list)))
363 |
364 | (defvar *pet-hash*
365 | (make-hash :initial-contents
366 | '(dog (mammal pet loyal 3) cat (mammal pet independent 1)
367 | eagle 0 cobra 0
368 | goldfish (fish pet flushed 1) hamster (mammal pet injured sad 2)
369 | corn-snake (reptile pet dog-like 1) crab (crustacean quiet 4)
370 | grasshopper (insect methusala 1) black-widow 0)))
371 |
372 | (make-hash :initial-contents *pet-hash*
373 | :init-data (make-hash-transformer :value #'lastcar #'atom))
374 | #+end_src
375 |
376 | The result maps =dog= \to 3, =cat= \to 1, =goldfish= \to 1, =hamster= \to 2,
377 | =corn-snake= \to 1, =grasshopper= \to 1, and =crab= \to 4. If =*pet-hash*=
378 | had been an alist instead of a hash table, the call to =make-hash=
379 | would be unchanged. Note that lastcar is not called on an entry unless
380 | atom returns =nil=.
381 |
382 | 10. Transforming a keybag
383 |
384 | Create a hash recording counts for each key (see example 7) but filter
385 | on some constraint. A function for =:init-data= takes the key and count
386 | and sets the values according to the return convention described below.
387 | With a vector for =:init-data=, the count is an index into the vector
388 | for the new value. With a hash table, the count is used as key to
389 | lookup the new value.
390 |
391 | #+begin_src lisp
392 | (make-hash :init-format :keybag
393 | :initial-contents #(a b d d e c b a a e c c d a a c c e c c)
394 | :init-data (lambda (key count) (values key count (<= count 3))))
395 | (make-hash :init-format :keybag
396 | :initial-contents #(a b d d e c b a a e c c d a a c c e c c)
397 | :init-data #(zero one two three four)
398 | :init-default 'more-than-four)
399 | (make-hash :init-format :keybag
400 | :initial-contents #(a b d d e c b a a e c c d a a c c e c c)
401 | :init-data (make-hash :initial-contents '(3 "You're out!"))
402 | :init-default "Whatever!")
403 | #+end_src
404 |
405 | The first gives a hash a \to 5, b \to 2, c \to 7, d \to 3, e \to 3.
406 | The second gives a hash a \to more-than-four, b \to two, c \to more-than-four,
407 | d \to three, e \to three. And the third gives a hash with a, b, and c
408 | mapping to the string "Whatever!" and d and e mapping to "You're out!".
409 |
410 | 11. Creating Hash Factories
411 |
412 | Hash factories are shortcuts that encapsulate a specified set of hash creation options,
413 | primarily for use with literal hash creation with sequence-style init formats.
414 | The factories are functions that package their arguments (&rest style) and
415 | use the resulting list as the =:initial-contents= argument to =make-hash=
416 | with the given options. The difference between =define-hash-factory= and
417 | =make-hash-factory= is that the former defines a toplevel function, whereas
418 | the latter returns an anonymous function.
419 |
420 | #+begin_src lisp
421 | (define-hash-factory qhash
422 | :init-format :flat
423 | :test #'eq :size 128
424 | :documentation "Construct moderate size hash tables for symbols.")
425 |
426 | (qhash 'a 1 'b 2 'c 3 'd 4 'x 100 'y -100 'z 0)
427 | (apply #'qhash '(a 1 b 2 c 3 d 4 x 100 y -100 z 0))
428 |
429 | (define-hash-factory ahash
430 | :init-format :pairs
431 | :init-data (lambda (k v)
432 | (if (stringp k) (intern (string-upcase k)) k))
433 | :documentation "Alist->hash, converting string keys to symbols.")
434 |
435 | (ahash ("foo" 10) ("bar" 20) ("zap" 30))
436 | (apply #'ahash '((a . 1) (b . 2) (c . 3) ("d" . 4) ("foo" . "bar")))
437 |
438 | (let ((h (make-hash-factory :init-format :keys :init-data *big-hash*)))
439 | (apply h key1 key2 key3 key4)) ; quick subhash of *big-hash*
440 | #+end_src
441 |
442 | 12. Portable Reader Factories
443 |
444 | It may be desirable to use reader macros to stand-in for particular
445 | hash table constructors. These are hash factories that are installed in
446 | a readtable using =install-hash-reader= at toplevel. Both dispatched
447 | and raw delimited forms are supported, and the installer can accept a
448 | list of options or an existing factory.
449 |
450 | Here are three separate uses yielding =:a=\to1, =:b=\to2, =:c=\to3, =:d=\to4.
451 |
452 | #+begin_src lisp
453 | (install-hash-reader ()) ; default settings and options
454 | #{:a 1 :b 2 :c 3 :d 4}
455 | #+end_src
456 |
457 | #+begin_src lisp
458 | (install-hash-reader '(:init-format :pairs)
459 | :use-dispatch t :open-char #\[ :close-char #\])
460 | #['(:a . 1) '(:b . 2) '(:c . 3) '(:d . 4)]
461 | #+end_src
462 |
463 | #+begin_src lisp
464 | (install-hash-reader '(:init-format :lists)
465 | :use-dispatch nil :open-char #\{ :close-char #\})
466 | {'(:a 1) '(:b 2) '(:c 3) '(:d 4)}
467 | #+end_src
468 |
469 | This accepts a readtable to modify (current readtable by default) and works
470 | well with the :named-readtables package.
471 |
472 | * Creating Hash Tables
473 |
474 | The function =make-hash= is an interface to the CL standard function
475 | =make-hash-table= that also allows flexible initialization. It accepts all
476 | the standard and implementation-dependent keyword arguments that the
477 | standard =make-hash-table= does but also accepts a few additional keyword
478 | arguments that can specify the initial contents of the table (analogously
479 | to the CL standard function =make-array=). The operation of the make-hash
480 | initializer is designed to handle all the common cases easily while
481 | enabling powerful abstractions where needed. See the Examples section
482 | below for examples.
483 |
484 | The new keyword arguments are:
485 |
486 | + =:initial-contents= /object/
487 |
488 | If the supplied object is non-nil, the object is used to initialize
489 | the created hash table in accordance with the =:init-format= argument.
490 | For some formats, the =:init-data= argument may also be needed to
491 | supply supplementary information for the initializer. The built-in
492 | formats support the cases where object is either a hash table or
493 | sequence from which the keys and values can be extracted. See the
494 | subsection below for a detailed description of the possibilities.
495 |
496 | + =:init-format= /keyword/
497 |
498 | A keyword specifying the structure of the initialization contents
499 | and auxilliary data given by the =:initial-contents= and =:init-data=
500 | arguments. Built-in support is provided for :hash, :flat, :pairs,
501 | :lists, :vectors, :seqs, :keys, :keychain, :keybag, and :function.
502 | These are described in detail in the subsection below.
503 |
504 | When an initializer format is not supplied, it is computed by
505 | calling the generic function =hash-initializer-default-format= on
506 | the given =:initial-contents= object. A methods for this function
507 | should be defined whenever the function =initialize-hash= is
508 | extended to handle a new class of =:initial-contents= objects. Methods
509 | can be overridden to change the default used in existing cases.
510 |
511 | + =:init-data= /object/
512 |
513 | Auxilliary data used for initialization with some formats. Its
514 | structure and meaning depends on the value of =:init-format=; as
515 | described in the subsection below.
516 |
517 | + =:init-default= /value/
518 |
519 | Default value to use in indirect initialization when the value for
520 | the given key cannot be determined from the =:initial-contents= and
521 | =:init-data= for the particular =:init-format= supplied.
522 |
523 | If no :initial-contents argument is supplied, the hash table is not
524 | initialized, and =make-hash= behaves exactly like the standard
525 | function =make-hash-table=. For many formats, initialization only
526 | requires an :initial-contents argument. See [[Examples]] for more.
527 |
528 | ** Functions as =:init-data= (or =:initial-contents=)
529 |
530 | For most of the pre-defined formats, a function can be
531 | passed as the =:init-data=, and with the =:function= format,
532 | a can be passed as the =:initial-contents= as well.
533 | These functions are expected to return three values
534 | /KEY VALUE [BAD-VALUE]/
535 | that are used (under some conditions) to create a new key-value
536 | entry in the hash table being initialized. Here, BAD-VALUE
537 | is a *ternary* value: nil (or missing) means to use KEY and VALUE
538 | as is; t means to skip creating this entry entirely, and any
539 | other non-nil value means to associate KEY to the specified
540 | =:init-default= value /instead/ of VALUE.
541 |
542 | In the description of the predefined formats below, such function
543 | arguments are used in one of three ways:
544 |
545 | 1. Entry transformation: /INIT-KEY INIT-VALUE -> KEY VALUE [BAD-VALUE]/
546 |
547 | The key and value specified by =:initial-contents= (/INIT-KEY INIT-VALUE/)
548 | are passed to the function and the return values used as described above.
549 | (Formats =:hash=, =:flat=, =:pairs=, =:lists=, =:vectors=, =:seqs=.)
550 |
551 | 2. Key transformation: /INIT-KEY -> KEY VALUE [BAD-VALUE]/
552 |
553 | With format =:keys=, the key specified by =:initial-contents= is
554 | passed to the function and the return values used as described above.
555 |
556 | 3. Entry generation: /&rest ARGS -> KEY VALUE [BAD-VALUE]/
557 |
558 | With format =:function=, the =:initial-contents= argument is a function.
559 | This function is applied repeatedly to /ARGS/ and the return values
560 | used as described above. However, in this case, the first time
561 | that KEY is nil, initialization stops.
562 |
563 | See also the documentation for the function =make-hash-transformer=
564 | which creates a function suitable for use in this way from a simpler
565 | function on keys or entries.
566 |
567 | ** Predefined Initialization Formats
568 |
569 | The =:init-format= argument is a keyword that determines how the
570 | keyword arguments =:initial-contents= and =:init-data= are interpreted.
571 | If =:init-format= is not supplied, the default format is determined
572 | by the type of =:initial-contents=.
573 |
574 | There are four basic cases in the pre-defined initialization support:
575 |
576 | 1. Initializing from an existing hash table
577 |
578 | When =:init-format= is =:hash= or by default if =:initial-contents= is
579 | a hash-table, the new hash table is initialized by a shallow copy of
580 | the initial contents table, with shared structure in keys and values.
581 | If =:init-data= is a function, that function is used for entry
582 | transformation of the hash table given in =:initial-contents=.
583 |
584 | 2. Initializing from a sequence (or sequences) specifying key-value pairs.
585 |
586 | When =:init-format= is =:flat=, =:pairs=, =:lists=, =:vectors=, or
587 | =:seqs=, the =:initial-contents= should be a sequence that specifies a
588 | collection of key-value pairs. The only difference among these formats
589 | is the expected structure of the sequence's elements. For =:flat=, the
590 | keys and values alternate; for =:pairs=, it is a sequence of cons
591 | pairs (e.g., an alist); for =:lists=, =:vectors=, and =:seqs=, it is a
592 | sequence of lists, vectors, or arbitrary sequences respectively of
593 | which the first two elements of each give the corresponding key and
594 | value. In these cases, if =:init-data= is nil or missing, the key-value
595 | pairs are used as is; if =:init-data= is a function, the function is
596 | used for entry transformation, as described above, for each pair.
597 |
598 | When =:init-format= is =:keychain=, the =:initial-contents= should
599 | be a sequence of keys and =:init-data= should be a sequence of
600 | corresponding values /in the same order/. The table is initialized
601 | with the resultant key-value pairs.
602 |
603 | When =:init-format= is =:keys=, the =:initial-contents= should be a
604 | sequence of keys. The corresponding value is obtained by looking
605 | up the key in the hash table, alist, or function (via key mapping,
606 | see above) that is passed as =:init-data=, which in this case
607 | is required.
608 |
609 | 3. Initializing from a bag/multiset of keys.
610 |
611 | When =:init-format= is =:keybag=, the =:initial-contents= should be a
612 | sequence representing a /multiset/ (a collection with possibly
613 | repeated elements) of keys. The hash table is initialized to map the
614 | unique elements from that multiset (as keys) to the number of times
615 | that element appears in the multiset (as values).
616 |
617 | In this case, if =:init-data= is a vector, hash table, or function,
618 | the count is used to find the corresponding value by indexing into the
619 | vector, looking up the value associated with count in the data
620 | hash-table, or calling the function with the key and count. When a
621 | value cannot be found, the default is used instead, subject to the
622 | value of BAD-VALUE in the function case.
623 |
624 | 4. Initializing from a function.
625 |
626 | When =:init-format= is =:function= or =:initial-contents= is a function,
627 | the hash table is initialized by using the function for entry generation
628 | as described above.
629 |
630 | #+LaTeX: \noindent
631 | See also the documentation for =make-hash= for a relatively succinct
632 | table describing these options. Keep in mind that the interpretation of
633 | the formats is specified by methods of the =initialize-hash= generic
634 | function, and the default formats for different =:initial-contents= types
635 | by methods of the =hash-initializer-default-format=.
636 |
637 | * Defining Custom Initialization Formats
638 |
639 | Initialization by =make-hash= is controlled by the generic function
640 | =initialize-hash=. Defining new methods for this function, or overriding
641 | existing methods, makes it easy to extend the hash table initialization,
642 | to add or modify formats, change behaviors, and so forth.
643 |
644 | The function =initialize-hash= takes five arguments: the hash table being
645 | initialized, the format specifier, the initial contents source object, the
646 | auxilliary data (=:init-data=) object, and the default value (=:init-default=).
647 | The format is usually a keyword with eql specialization. The contents
648 | source and data object are specialized on type.
649 |
650 | * Specifying Default Formats
651 |
652 | When no =:init-format= argument is given to =make-hash=, the default format
653 | is determined by calling a suitable method of the generic function
654 | =hash-initializer-default-format=, passing the =:initial-contents= argument.
655 | The predefined methods use format =:hash= given a hash table, =:flat= given
656 | a sequence, and =:function= given a function. More flexibility may be
657 | desired in particular applications.
658 |
659 | * Hash Table Factories
660 |
661 | When specific patterns of hash table construction options are used repeatedly,
662 | it can be helpful to encapsulate those patterns in a simple way.
663 | Hash table factories are shortcut functions that create a hash table using
664 | prespecified construction options. Any of the keyword arguments to =make-hash=,
665 | except for =:initial-contents=, can be passed to the factory constructor
666 | and will be used for creating the hash table when the factory is called.
667 | The arguments in the factory call are packaged =&rest=-style in a list
668 | and used as the =:initial-contents=. There are two factory constructors:
669 | =define-hash-factory= creates a toplevel function of a given name
670 | and =make-hash-factory= creates an anonymous function.
671 |
672 | * Reader Representations
673 |
674 | Similarly, it might be desirable for the hash factories to be represented
675 | by syntax at read time via reader macros. The macro =install-hash-reader=
676 | updates a given readtable (the current readtable by default) so that
677 | a dispatched or raw delimited form creates a hash table. The effect
678 | is identical to the use of the hash table factories, except syntactically.
679 | Indeed, a factory can be passed directly to the =install-hash-reader=.
680 |
681 | Calls to this macro must occur at toplevel to have effect. It is designed
682 | to be as portable as possible and to work well with the named-readtables
683 | package. Common examples would be the use of #{} or {} to represent hash
684 | tables.
685 |
686 | * Dictionary
687 | ** make-hash [Function]
688 |
689 | *make-hash* /\&key initial-contents init-format init-data init-default ... \to hash-table/
690 |
691 | Creates, initializes if requested, and returns a new hash table.
692 |
693 | Keyword options include all those of the standard =make-hash-table=, any
694 | extension options allowed by the given implementation, and the additional
695 | keyword options to control initialization: =:initial-contents=, the main
696 | source for information filling the table; =:init-format=, a keyword
697 | specifying how the initialization options are interpreted; =:init-data=,
698 | auxilliary data needed for initialization in some formats; and
699 | =:init-default=, a default value used when the value for a key cannot be
700 | initialized. See the description above in [[Creating Hash Tables]]. Users can
701 | support other types/configurations (or alter the default handling) by
702 | extending the generic function =initialize-hash= in this package; see
703 | [[Defining Custom Initialization Formats]].
704 |
705 | ** make-hash-transformer [Function]
706 |
707 | *make-hash-transformer* /domain function &optional badp \to function/
708 |
709 | Transform FUNCTION to be suitable for use as the =:init-data= (or
710 | =:initial-contents=) argument to =make-hash=. DOMAIN specifies the
711 | signature of FUNCTION and is one of the keywords =:key=, =:value=, or
712 | =:entry=, indicating that FUNCTION takes a key, a value, or a key and a
713 | value, repectively. BADP is a function with the same argument signature
714 | as FUNCTION that follows the return convention described [[Functions as =:init-data= (or =:initial-contents=)][above]].
715 | Specifically, it returns a ternary value: nil means that the transformed
716 | entry should be used as is, t means that the entry should be skipped, and
717 | any other non-nil value means that the key should be used with a default.
718 | Note that FUNCTION is /not/ called for an entry if BADP returns a non-nil
719 | value.
720 |
721 | The returned function accepts a key and a value (the value is optional
722 | with DOMAIN :key) and returns three values: the key, the value, and the
723 | bad-value ternary for that entry.
724 |
725 | ** initialize-hash [Generic Function]
726 |
727 | *initialize-hash* /table form source data default/
728 |
729 | Creates and adds an entry to TABLE using info of format FORM in SOURCE
730 | and DATA. SOURCE contains the main contents, and DATA (optionally)
731 | contains auxilliary information or objects required for initialization
732 | for some formats. DEFAULT is the value that should be stored in the table
733 | when an appropriate value associated to a key cannot be found. Adding or
734 | redefining methods for this function allows extension or modification of
735 | the initialization mechanism.
736 |
737 | Note the convention, used by the predefined methods, that functions
738 | passed as either SOURCE or DATA are expected to return three values,
739 | using the convention described [[Functions as =:init-data= (or =:initial-contents=)][above]].
740 |
741 | ** hash-initializer-default-format [Generic Function]
742 |
743 | *hash-initializer-default-format* /source \to keyword or error/
744 |
745 | Selects an initializer format based on the given initial contents SOURCE.
746 | For example, the default format for sequence contents is =:flat=;
747 | to change it to =:pairs= so that an alist is expected as =:initial-contents=
748 | by default, do the following:
749 |
750 | #+begin_src lisp
751 | (defmethod hash-initializer-default-format ((source list))
752 | :pairs)
753 | #+end_src
754 |
755 | ** =*hash-factory-defaults*= [Special Variable]
756 |
757 | Hash table creation options used as defaults by hash factory
758 | constructors. These option specifications are passed last to make-hash by
759 | the hash factories and so are overridden by options passed as explicit
760 | arguments to the factory constructor.
761 |
762 | Changing this variable affects the options used by every hash factory
763 | that does not fully specify its options. This includes default calls to
764 | the reader constructors. Of particular note are the =:test= and
765 | =:init-format= options.
766 |
767 | ** define-hash-factory [Macro]
768 |
769 | *define-hash-factory* /name &key ...hash-options.../
770 |
771 | Create a hash-table factory NAME that calls =make-hash= with options
772 | specified by given by the hash-options arguments. The defined
773 | function packages its arguments as a list, which it passes as
774 | the =:initial-contents= argument to =make-hash=.
775 |
776 | The hash-options are alternating keyword-value pairs. The supplied
777 | keyword arguments precede and thus override the options in
778 | =*hash-factory-defaults*=, which is intended to allow one to use short
779 | names or customized policies in simple calling patterns. Complex
780 | initialization patterns may need the full power of `make-hash'
781 | itself.
782 |
783 | ** make-hash-factory [Function]
784 |
785 | *make-hash-factory* /&key ...hash-options... \to factory-function/
786 |
787 | Like define-hash-factory but creates and returns an anonymous factory
788 | function.
789 |
790 | ** install-hash-reader [Macro]
791 |
792 | *install-hash-reader* /options &key readtable use-dispatch allow-numbered-dispatch open-char close-char dispatch-char/
793 |
794 | Creates a hash table factory specified by OPTIONS and installs it
795 | in READTABLE (the current readtable by default). To have effect,
796 | this must be called at toplevel.
797 |
798 | OPTIONS is either a list of keyword-value pairs (as would be passed to
799 | =make-hash= or =make-hash-factory=) or a hash factory function.
800 | READTABLE is a readtable object, =*readtable*= by default.
801 |
802 | The keyword arguments control how the reader is modified as follows:
803 |
804 | + USE-DISPATCH (t by default) determines whether the reader macro uses a
805 | dispatch character DISPATCH-CHAR before OPEN-CHAR. If non-nil, a
806 | dispatch character is used and is registered in READTABLE. If this is
807 | nil, then OPEN-CHAR and CLOSE-CHAR will be a raw delimited construct.
808 |
809 | + ALLOW-NUMBERED-DISPATCH (nil by default) allows a dispatched reader
810 | macro to modify its hash test when given numeric arguments between
811 | DISPATCH-CHAR and OPEN-CHAR. This only applies when USE-DISPATCH is
812 | non-nil and when OPTIONS is a list, not a factory function. The goal
813 | here is to make it easy to reuse reader factories in several contexts.
814 |
815 | If nil, numbered dispatch is not supported. If t, numeric arguments
816 | 0, 1, 2, and 3 correspond to hash tests =eq=, =eql=, =equal=, and
817 | =equalp= respectively. If a sequence of symbols or functions,
818 | those functions are used for the hash test given a numeric
819 | argument from 0 below the length of the sequence. In either case,
820 | dispatch /without/ a numeric argument uses the originally specified
821 | options.
822 |
823 | Note: This is /an experimental feature and may be discontinued in
824 | future versions/ if it proves more confusing than helpful.
825 |
826 | + OPEN-CHAR (default open-brace) is the character that delimits the
827 | beginning of the hash-table contents. If USE-DISPATCH is non-nil,
828 | this character must be preceeded by DISPATCH-CHAR, and optionally
829 | a numeric argument.
830 |
831 | + CLOSE-CHAR (default close-brace) is the character that delimits
832 | the end of the hash-table contents.
833 |
834 | + DISPATCH-CHAR (default \#) is the character used to indicate a
835 | dispatched reader macro. When (and only when) USE-DISPATCH is non-nil.
836 | READTABLE is modified to register this as as a dispatch and a
837 | non-terminating macro character via =make-dispatch-macro-character=.
838 | Note that there can be more than one dispatch character in a read
839 | table.
840 |
841 |
--------------------------------------------------------------------------------
/User-Guide.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/genovese/make-hash/ae0909cd8e697520a1085fac6f54ac2b448ebd21/User-Guide.pdf
--------------------------------------------------------------------------------
/User-Guide.tex:
--------------------------------------------------------------------------------
1 | % Created 2012-07-03 Tue 15:31
2 | \documentclass[11pt]{article}
3 | \usepackage[utf8]{inputenc}
4 | \usepackage[T1]{fontenc}
5 | \usepackage{fixltx2e}
6 | \usepackage{graphicx}
7 | \usepackage{longtable}
8 | \usepackage{float}
9 | \usepackage{wrapfig}
10 | \usepackage{soul}
11 | \usepackage{textcomp}
12 | \usepackage{marvosym}
13 | \usepackage{wasysym}
14 | \usepackage{latexsym}
15 | \usepackage{amssymb}
16 | \usepackage{hyperref}
17 | \tolerance=1000
18 | \providecommand{\alert}[1]{\textbf{#1}}
19 |
20 | \title{User Guide for Common Lisp Package \texttt{make-hash}}
21 | \author{Christopher Genovese (\texttt{genovese@cmu.edu})}
22 | \date{30 Jun 2012\vspace*{-0.5cm}}
23 |
24 | \begin{document}
25 |
26 | \maketitle
27 |
28 | \setcounter{tocdepth}{3}
29 | \tableofcontents
30 | \vspace*{1cm}
31 |
32 | \section{Motivation and Overview}
33 | \label{sec-1}
34 |
35 |
36 | Two common (and arguably apt) criticisms of hash tables in Common Lisp are
37 | that hash table initialization is bulky and awkward and that the
38 | representation of hash tables is not as integrated into the language as are
39 | the representations of lists and (to a degree) vectors.
40 |
41 | The \texttt{make-hash} package addresses these issues by supplying three
42 | useful, related mechanisms:
43 |
44 | \begin{enumerate}
45 | \item A hash table constructor \texttt{make-hash} with initialization that is
46 | concise, flexible, and extensible.
47 |
48 | See \texttt{make-hash}, \texttt{initialize-hash}, \texttt{hash-initializer-default-format},
49 | and \texttt{make-hash-transformer} below.
50 | \item Methods for defining hash-table factories with a customized
51 | set of initialization options, either as a globally or locally
52 | defined function.
53 |
54 | See \texttt{define-hash-factory}, \texttt{make-hash-factory}, and \newline
55 | \texttt{*hash-factory-defaults*} below.
56 | \item Readtable installers for defining a portable reader interface to
57 | the hash-table factories, either as (raw) delimited or dispatched
58 | reader macros.
59 |
60 | See \texttt{install-hash-reader} below.
61 | \end{enumerate}
62 |
63 | \noindent
64 | In particular, the function \texttt{make-hash} is a wrapper around the standard
65 | CL function \texttt{make-hash-table} with some additional keyword arguments
66 | that allow one to specify initial contents and format.
67 |
68 | As an illustation, consider the example on page 440 of the venerable
69 | \emph{Common Lisp the Language, Second Edition} by Guy Steele [CLtL2].
70 |
71 |
72 | \begin{verbatim}
73 | (setq turtles (make-hash-table :size 9 :test 'eq))
74 | (setf (gethash 'howard-kaylan turtles) '(musician lead-singer))
75 | (setf (gethash 'john-barbata turtles) '(musician drummer))
76 | (setf (gethash 'leonardo turtles) '(ninja leader blue))
77 | (setf (gethash 'donatello turtles) '(ninja machines purple))
78 | (setf (gethash 'al-nichol turtles) '(musician guitarist))
79 | (setf (gethash 'mark-volman turtles) '(musician great-hair))
80 | (setf (gethash 'raphael turtles) '(ninja cool rude red))
81 | (setf (gethash 'michaelangelo turtles) '(ninja party-dude orange))
82 | (setf (gethash 'jim-pons turtles) '(musician bassist))
83 | \end{verbatim}
84 |
85 |
86 |
87 |
88 | \noindent
89 | This is not horrible by any means, but the repeated \texttt{setf}'s force an
90 | assignment-oriented block of statements and visually obscure the
91 | relationships in the table. And in practice, even more syntactic
92 | infrastructure is usually required (e.g., another level of let for a
93 | local definition, a loop for a larger hash table). While it is certainly
94 | a matter of taste which form one prefers, the goal of \texttt{make-hash} is to
95 | allow a more convenient, functional-style hash table construction that
96 | is consistent with constructors for lists, vectors, and arrays. Compare
97 | the above with any of the following:
98 |
99 |
100 | \begin{verbatim}
101 | (make-hash :size 9 :test 'eq
102 | :initial-contents '(howard-kaylan (musician lead-singer)
103 | jon-barbata (musician drummer)
104 | leonardo (ninja leader blue)
105 | donatello (ninja machines purple)
106 | al-nichol (musician guitarist)
107 | mark-volman (musician great-hair)
108 | raphael (ninja cool rude red)
109 | michaelangelo (ninja party-dude orange)
110 | jim-pons (musician bassist)))
111 | \end{verbatim}
112 |
113 |
114 |
115 |
116 |
117 | \begin{verbatim}
118 | (make-hash :size 9 :test 'eq :init-format :lists
119 | :initial-contents '((howard-kaylan (musician lead-singer))
120 | (jon-barbata (musician drummer))
121 | (leonardo (ninja leader blue))
122 | (donatello (ninja machines purple))
123 | (al-nichol (musician guitarist))
124 | (mark-volman (musician great-hair))
125 | (raphael (ninja cool rude red))
126 | (michaelangelo (ninja party-dude orange))
127 | (jim-pons (musician bassist))))
128 | \end{verbatim}
129 |
130 |
131 |
132 |
133 |
134 | \begin{verbatim}
135 | (make-hash :size 9 :test 'eq :init-format :keychain
136 | :initial-contents
137 | '(howard-kaylan jon-barbata leonardo
138 | donatello al-nichol mark-volman
139 | raphael michaelangelo jim-pons)
140 | :init-data
141 | '((musician lead-singer) (musician drummer) (ninja leader blue)
142 | (ninja machines purple) (musician guitarist) (musician great-hair)
143 | (ninja cool rude red) (ninja party-dude orange) (musician bassist)))
144 | \end{verbatim}
145 |
146 |
147 |
148 |
149 |
150 | \begin{verbatim}
151 | #{ howard-kaylan (musician lead-singer)
152 | jon-barbata (musician drummer)
153 | leonardo (ninja leader blue)
154 | donatello (ninja machines purple)
155 | al-nichol (musician guitarist)
156 | mark-volman (musician great-hair)
157 | raphael (ninja cool rude red)
158 | michaelangelo (ninja party-dude orange)
159 | jim-pons (musician bassist) }
160 | \end{verbatim}
161 |
162 |
163 |
164 |
165 | There are many other formats for the initial contents that would be
166 | convenient to use in other contexts, and make-hash supports a wide
167 | variety of them. Moreover, custom formats can be supported easily by
168 | defining a method for a single generic function, and default formats
169 | can be adjusted similarly. See below for more detail and examples.
170 | \section{Installation}
171 | \label{sec-2}
172 |
173 |
174 | The simplest approach is to use quicklisp (www.quicklisp.org).
175 | With quicklisp installed, simply call \texttt{(ql:quickload "make-hash")}
176 | and quicklisp will do the rest.
177 |
178 | Otherwise, obtain the code from \href{http://github.com/genovese/make-hash}{http://github.com/genovese/make-hash},
179 | cloning the repository or downloading and unpacking the tar/zip archive.
180 | Either load it directly or put the \texttt{make-hash} subdirectory
181 | where ASDF (www.cliki.net/asdf) can find the \texttt{.asd} file.
182 | With ASDF, call \texttt{(asdf:load-system "make-hash")} to load the
183 | package.
184 |
185 | For both quicklisp and ASDF, you may want to call
186 | \texttt{(use-package :make-hash)} to import the main functions. If you want to
187 | run the tests, which are in the package \texttt{make-hash-tests}, do the
188 | following.
189 |
190 | \begin{itemize}
191 | \item In quicklisp:
192 | \end{itemize}
193 |
194 | \begin{verbatim}
195 | (ql:quickload "make-hash")
196 | (ql:quickload "make-hash-tests")
197 | (asdf:test-system "make-hash-tests")
198 | \end{verbatim}
199 |
200 |
201 |
202 |
203 | \begin{itemize}
204 | \item With ASDF alone:
205 | \end{itemize}
206 |
207 | \begin{verbatim}
208 | (asdf:load-system "make-hash")
209 | (asdf:load-system "make-hash-tests")
210 | (asdf:test-system "make-hash-tests")
211 | \end{verbatim}
212 |
213 |
214 |
215 | \section{Examples}
216 | \label{sec-3}
217 |
218 |
219 | The use of \texttt{make-hash} is pretty straightforward, and I think it will be
220 | clearer to see some examples before looking at the detailed specifications.
221 | It might help to scan these examples quickly on first read through and
222 | then come back after reading the specification in the ensuing sections.
223 | Here, I will assume that the predefined formats and defaults are in
224 | effect, although these can be overridden if desired.
225 |
226 | \begin{enumerate}
227 | \item No Initialization
228 |
229 | Use exactly like \texttt{make-hash-table}, with all standard or
230 | implementation-dependent keyword arguments.
231 |
232 |
233 | \begin{verbatim}
234 | (make-hash)
235 | (make-hash :test #'equal)
236 | (make-hash :size 128 :rehash-size 1.75)
237 | \end{verbatim}
238 | \item (Shallow) Copying an existing hash table
239 |
240 |
241 | \begin{verbatim}
242 | (make-hash :initial-contents eql-hash-table)
243 | (make-hash :test (hash-table-test other-hash-table)
244 | :initial-contents other-hash-table)
245 | \end{verbatim}
246 | \item Initializing from simple sequences containing keys and values
247 |
248 |
249 | \begin{verbatim}
250 | (make-hash :initial-contents '(a 1 b 2 c 3 d 1 e 2 f 3 g 4))
251 | (make-hash :init-format :flat
252 | :initial-contents '(a 1 b 2 c 3 d 1 e 2 f 3 g 4))
253 | (make-hash :init-format :pairs
254 | :initial-contents '((a . 1) (b . 2) (c . 3)
255 | (d . 1) (e . 2) (f . 3) (g . 4)))
256 | (make-hash :init-format :lists
257 | :initial-contents '((a 1) (b 2) (c 3)
258 | (d 1) (e 2) (f 3) (g 4)))
259 | (make-hash :init-format :vectors
260 | :initial-contents '(#(a 1) #(b 2) #(c 3)
261 | #(d 1) #(e 2) #(f 3) #(g 4)))
262 | (make-hash :init-format :seqs
263 | :initial-contents '((a 1) #(b 2) (c 3)
264 | #(d 1) (e 2) #(f 3) #(g 4)))
265 | \end{verbatim}
266 |
267 |
268 |
269 |
270 | Here \texttt{:flat} is the default format, and the result in all these
271 | cases maps \texttt{a} $\to$ 1, \texttt{b} $\to$ 2, \texttt{c} $\to$ 3, \texttt{d} $\to$ 1, \texttt{e} $\to$ 2,
272 | \texttt{f} $\to$ 3, and \texttt{g} $\to$ 4.
273 | \item Initializing from separate sequences of keys and values
274 |
275 |
276 | \begin{verbatim}
277 | (make-hash :init-format :keychain
278 | :initial-contents '(a b c d e f g)
279 | :init-data '(1 2 3 1 2 3 4))
280 | (make-hash :init-format :keychain
281 | :initial-contents '(a b c d e f g)
282 | :init-data #(1 2 3 1 2 3 4))
283 | \end{verbatim}
284 |
285 |
286 |
287 |
288 | The resulting tables are the same as in the last example.
289 | \item Creating a hash table of keys and counts
290 |
291 | Given a sequence of objects, create a hash table with the unique
292 | objects as keys and the frequency counts in the sequence as values.
293 |
294 |
295 | \begin{verbatim}
296 | (make-hash :init-format :keybag
297 | :initial-contents '(a b d d e c b a a e c c d a a c c e c c))
298 | (make-hash :init-format :keybag
299 | :initial-contents #(a b d d e c b a a e c c d a a c c e c c))
300 | \end{verbatim}
301 |
302 |
303 |
304 |
305 | The results map \texttt{a} $\to$ 5, \texttt{b} $\to$ 2, \texttt{c} $\to$ 7, \texttt{d} $\to$ 3, and \texttt{e} $\to$ 3.
306 | \item Building a hash from selected keys in another associative map or database
307 |
308 | Here, the \texttt{:initial-contents} is a sequence of keys, and the corresponding
309 | values are the values for those keys in the map given as \texttt{:init-data},
310 | or the \texttt{:init-default} if none exists.
311 |
312 | Let \texttt{turtles} be the hash table above from CLtL2. Suppose
313 | \texttt{turtles-alist} is an associative list with the same data and that
314 | \texttt{turtles-database-reader} is a function that reads an associated record
315 | from a database. We can extract a ``sub-hash'' whose keys are those
316 | corresponding to mutant, ninja turtles as follows.
317 |
318 |
319 | \begin{verbatim}
320 | (make-hash :init-format :keys
321 | :initial-contents '(leonardo donatello raphael michaelangelo)
322 | :init-data turtles)
323 | (make-hash :init-format :keys
324 | :initial-contents '(leonardo donatello raphael michaelangelo)
325 | :init-data turtles-alist)
326 | (make-hash :init-format :keys
327 | :initial-contents '(leonardo donatello raphael michaelangelo)
328 | :init-data turtles-database-reader)
329 | \end{verbatim}
330 | \item Initializing from repeated calls to a function
331 |
332 | The following initializes the hash table from a \emph{simple} CSV
333 | (comma-separated value) file, with no commas within fields, using the
334 | first field as the key and the list of remaining fields as the value.
335 | The function \texttt{parse-csv-line} acts on one line at a time, skipping and
336 | either initializes or skips using the return value convention described
337 | below.
338 |
339 |
340 | \begin{verbatim}
341 | (use-package :cl-ppcre)
342 |
343 | (defun parse-csv-line (stream)
344 | (let ((line (read-line stream nil)))
345 | (cond
346 | ((null line)
347 | (values nil nil nil))
348 | ((scan "^\\s*$" line)
349 | (values t t t))
350 | (t
351 | (let ((fields
352 | (split "\\s*,\\s*" line :limit most-positive-fixnum)))
353 | (values (first fields) (rest fields) nil))))))
354 |
355 | (with-open-file (s "data.csv" :direction :input :if-does-not-exist nil)
356 | (make-hash :test #'equal :init-format :function
357 | :initial-contents #'parse-csv-line :init-data (list s)))
358 | \end{verbatim}
359 |
360 |
361 |
362 |
363 | The following initializes the hash table from the key-value pairs in an
364 | INI file. The function \texttt{parse-ini-line} is acts on one line at a time and
365 | either initializes or skips using the return value convention described
366 | below.
367 |
368 |
369 | \begin{verbatim}
370 | (use-package :cl-ppcre)
371 |
372 | (let ((ini-line-re
373 | (create-scanner
374 | "^\\s*(?:|;.*|\\[([^]]+)\\]|(\\w+)\\s*=\\s*(.*?))?\\s*$"))
375 | (current-section-name ""))
376 | (defun parse-ini-line (stream)
377 | (let ((line (read-line stream nil)))
378 | (unless line
379 | (setf current-section-name "")
380 | (return-from parse-ini (values nil nil nil)))
381 | (multiple-value-bind (beg end reg-begs reg-ends)
382 | (scan ini-line-re line)
383 | (declare (ignorable end))
384 | (unless beg
385 | (error "Improperly formatted INI line: ~A" line))
386 | (if (and (> (length reg-begs) 2) (aref reg-begs 1))
387 | (values
388 | (concatenate 'string
389 | current-section-name "/"
390 | (subseq line (aref reg-begs 1) (aref reg-ends 1)))
391 | (subseq line (aref reg-begs 2) (aref reg-ends 2))
392 | nil)
393 | (progn
394 | (when (and (> (length reg-begs) 0) (aref reg-begs 0))
395 | (setf current-section-name
396 | (subseq line (aref reg-begs 0) (aref reg-ends 0))))
397 | (values t t t)))))))
398 |
399 | (with-open-file (s "config.ini" :direction :input :if-does-not-exist nil)
400 | (make-hash :test #'equal :init-format :function
401 | :initial-contents #'parse-ini-line :init-data (list s)))
402 | \end{verbatim}
403 | \item Transforming a hash built from a sequence of keys and values
404 |
405 | Passing a function as \texttt{:init-data} can be used to
406 | transform the initial contents as the hash is being initialized.
407 |
408 |
409 | \begin{verbatim}
410 | (make-hash :init-format :flat
411 | :initial-contents '(a 1 b 2 c 3 d 1 e 2 f 3 g 4)
412 | :init-data (lambda (k v) (values k (* v v) nil)))
413 | (make-hash :init-format :pairs
414 | :initial-contents '((a . 1) (b . 2) (c . 3)
415 | (d . 1) (e . 2) (f . 3) (g . 4))
416 | :init-data (lambda (k v)
417 | (values (intern (symbol-name k) :keyword)
418 | (* v v))))
419 | (let ((scratch (make-hash)))
420 | (make-hash :init-format :lists
421 | :initial-contents '((a 1) (b 2) (c 3)
422 | (d 1) (e 2) (f 3) (g 4))
423 | :init-data (lambda (k v)
424 | (values v
425 | (setf (gethash v scratch)
426 | (cons k (gethash v scratch nil)))
427 | nil))))
428 | \end{verbatim}
429 |
430 |
431 |
432 |
433 | The first is a hash that maps \texttt{a} and \texttt{d} to 1, \texttt{b} and \texttt{e} to 4, \texttt{c} and \texttt{f} to 9,
434 | and \texttt{g} to 16. The second is the same except that the keys are the
435 | keywords with the same symbol-name (e.g., :a, :b). The third
436 | reverses the given alist, accumulated repeated values in a list:
437 | 1 $\to$ \texttt{(d a)}, 2 $\to$ \texttt{(e b)}, 3 $\to$ \texttt{(f c)}, and 4 $\to$ \texttt{(g)}.
438 | \item Transforming an existing hash table or alist
439 |
440 |
441 | \begin{verbatim}
442 | (defun lastcar (list)
443 | (car (last list)))
444 |
445 | (defvar *pet-hash*
446 | (make-hash :initial-contents
447 | '(dog (mammal pet loyal 3) cat (mammal pet independent 1)
448 | eagle 0 cobra 0
449 | goldfish (fish pet flushed 1) hamster (mammal pet injured sad 2)
450 | corn-snake (reptile pet dog-like 1) crab (crustacean quiet 4)
451 | grasshopper (insect methusala 1) black-widow 0)))
452 |
453 | (make-hash :initial-contents *pet-hash*
454 | :init-data (make-hash-transformer :value #'lastcar #'atom))
455 | \end{verbatim}
456 |
457 |
458 |
459 |
460 | The result maps \texttt{dog} $\to$ 3, \texttt{cat} $\to$ 1, \texttt{goldfish} $\to$ 1, \texttt{hamster} $\to$ 2,
461 | \texttt{corn-snake} $\to$ 1, \texttt{grasshopper} $\to$ 1, and \texttt{crab} $\to$ 4. If \texttt{*pet-hash*}
462 | had been an alist instead of a hash table, the call to \texttt{make-hash}
463 | would be unchanged. Note that lastcar is not called on an entry unless
464 | atom returns \texttt{nil}.
465 | \item Transforming a keybag
466 |
467 | Create a hash recording counts for each key (see example 7) but filter
468 | on some constraint. A function for \texttt{:init-data} takes the key and count
469 | and sets the values according to the return convention described below.
470 | With a vector for \texttt{:init-data}, the count is an index into the vector
471 | for the new value. With a hash table, the count is used as key to
472 | lookup the new value.
473 |
474 |
475 | \begin{verbatim}
476 | (make-hash :init-format :keybag
477 | :initial-contents #(a b d d e c b a a e c c d a a c c e c c)
478 | :init-data (lambda (key count) (values key count (<= count 3))))
479 | (make-hash :init-format :keybag
480 | :initial-contents #(a b d d e c b a a e c c d a a c c e c c)
481 | :init-data #(zero one two three four)
482 | :init-default 'more-than-four)
483 | (make-hash :init-format :keybag
484 | :initial-contents #(a b d d e c b a a e c c d a a c c e c c)
485 | :init-data (make-hash :initial-contents '(3 "You're out!"))
486 | :init-default "Whatever!")
487 | \end{verbatim}
488 |
489 |
490 |
491 |
492 | The first gives a hash a $\to$ 5, b $\to$ 2, c $\to$ 7, d $\to$ 3, e $\to$ 3.
493 | The second gives a hash a $\to$ more-than-four, b $\to$ two, c $\to$ more-than-four,
494 | d $\to$ three, e $\to$ three. And the third gives a hash with a, b, and c
495 | mapping to the string ``Whatever!'' and d and e mapping to ``You're out!''.
496 | \item Creating Hash Factories
497 |
498 | Hash factories are shortcuts that encapsulate a specified set of hash creation options,
499 | primarily for use with literal hash creation with sequence-style init formats.
500 | The factories are functions that package their arguments (\&rest style) and
501 | use the resulting list as the \texttt{:initial-contents} argument to \texttt{make-hash}
502 | with the given options. The difference between \texttt{define-hash-factory} and
503 | \texttt{make-hash-factory} is that the former defines a toplevel function, whereas
504 | the latter returns an anonymous function.
505 |
506 |
507 | \begin{verbatim}
508 | (define-hash-factory qhash
509 | :init-format :flat
510 | :test #'eq :size 128
511 | :documentation "Construct moderate size hash tables for symbols.")
512 |
513 | (qhash 'a 1 'b 2 'c 3 'd 4 'x 100 'y -100 'z 0)
514 | (apply #'qhash '(a 1 b 2 c 3 d 4 x 100 y -100 z 0))
515 |
516 | (define-hash-factory ahash
517 | :init-format :pairs
518 | :init-data (lambda (k v)
519 | (if (stringp k) (intern (string-upcase k)) k))
520 | :documentation "Alist->hash, converting string keys to symbols.")
521 |
522 | (ahash ("foo" 10) ("bar" 20) ("zap" 30))
523 | (apply #'ahash '((a . 1) (b . 2) (c . 3) ("d" . 4) ("foo" . "bar")))
524 |
525 | (let ((h (make-hash-factory :init-format :keys :init-data *big-hash*)))
526 | (apply h key1 key2 key3 key4)) ; quick subhash of *big-hash*
527 | \end{verbatim}
528 | \item Portable Reader Factories
529 |
530 | It may be desirable to use reader macros to stand-in for particular
531 | hash table constructors. These are hash factories that are installed in
532 | a readtable using \texttt{install-hash-reader} at toplevel. Both dispatched
533 | and raw delimited forms are supported, and the installer can accept a
534 | list of options or an existing factory.
535 |
536 | Here are three separate uses yielding \texttt{:a}\to1, \texttt{:b}\to2, \texttt{:c}\to3, \texttt{:d}\to4.
537 |
538 |
539 | \begin{verbatim}
540 | (install-hash-reader ()) ; default settings and options
541 | #{:a 1 :b 2 :c 3 :d 4}
542 | \end{verbatim}
543 |
544 |
545 |
546 |
547 |
548 | \begin{verbatim}
549 | (install-hash-reader '(:init-format :pairs)
550 | :use-dispatch t :open-char #\[ :close-char #\])
551 | #['(:a . 1) '(:b . 2) '(:c . 3) '(:d . 4)]
552 | \end{verbatim}
553 |
554 |
555 |
556 |
557 |
558 | \begin{verbatim}
559 | (install-hash-reader '(:init-format :lists)
560 | :use-dispatch nil :open-char #\{ :close-char #\})
561 | {'(:a 1) '(:b 2) '(:c 3) '(:d 4)}
562 | \end{verbatim}
563 |
564 |
565 |
566 |
567 | This accepts a readtable to modify (current readtable by default) and works
568 | well with the :named-readtables package.
569 | \end{enumerate}
570 | \section{Creating Hash Tables}
571 | \label{sec-4}
572 |
573 |
574 | The function \texttt{make-hash} is an interface to the CL standard function
575 | \texttt{make-hash-table} that also allows flexible initialization. It accepts all
576 | the standard and implementation-dependent keyword arguments that the
577 | standard \texttt{make-hash-table} does but also accepts a few additional keyword
578 | arguments that can specify the initial contents of the table (analogously
579 | to the CL standard function \texttt{make-array}). The operation of the make-hash
580 | initializer is designed to handle all the common cases easily while
581 | enabling powerful abstractions where needed. See the Examples section
582 | below for examples.
583 |
584 | The new keyword arguments are:
585 |
586 | \begin{itemize}
587 | \item \texttt{:initial-contents} \emph{object}
588 |
589 | If the supplied object is non-nil, the object is used to initialize
590 | the created hash table in accordance with the \texttt{:init-format} argument.
591 | For some formats, the \texttt{:init-data} argument may also be needed to
592 | supply supplementary information for the initializer. The built-in
593 | formats support the cases where object is either a hash table or
594 | sequence from which the keys and values can be extracted. See the
595 | subsection below for a detailed description of the possibilities.
596 | \item \texttt{:init-format} \emph{keyword}
597 |
598 | A keyword specifying the structure of the initialization contents
599 | and auxilliary data given by the \texttt{:initial-contents} and \texttt{:init-data}
600 | arguments. Built-in support is provided for :hash, :flat, :pairs,
601 | :lists, :vectors, :seqs, :keys, :keychain, :keybag, and :function.
602 | These are described in detail in the subsection below.
603 |
604 | When an initializer format is not supplied, it is computed by
605 | calling the generic function \texttt{hash-initializer-default-format} on
606 | the given \texttt{:initial-contents} object. A methods for this function
607 | should be defined whenever the function \texttt{initialize-hash} is
608 | extended to handle a new class of \texttt{:initial-contents} objects. Methods
609 | can be overridden to change the default used in existing cases.
610 | \item \texttt{:init-data} \emph{object}
611 |
612 | Auxilliary data used for initialization with some formats. Its
613 | structure and meaning depends on the value of \texttt{:init-format}; as
614 | described in the subsection below.
615 | \item \texttt{:init-default} \emph{value}
616 |
617 | Default value to use in indirect initialization when the value for
618 | the given key cannot be determined from the \texttt{:initial-contents} and
619 | \texttt{:init-data} for the particular \texttt{:init-format} supplied.
620 | \end{itemize}
621 |
622 | If no :initial-contents argument is supplied, the hash table is not
623 | initialized, and \texttt{make-hash} behaves exactly like the standard
624 | function \texttt{make-hash-table}. For many formats, initialization only
625 | requires an :initial-contents argument. See \hyperref[sec-3]{Examples} for more.
626 | \subsection{Functions as \texttt{:init-data} (or \texttt{:initial-contents})}
627 | \label{sec-4-1}
628 |
629 |
630 | For most of the pre-defined formats, a function can be
631 | passed as the \texttt{:init-data}, and with the \texttt{:function} format,
632 | a can be passed as the \texttt{:initial-contents} as well.
633 | These functions are expected to return three values
634 | \emph{KEY VALUE [BAD-VALUE]}
635 | that are used (under some conditions) to create a new key-value
636 | entry in the hash table being initialized. Here, BAD-VALUE
637 | is a \textbf{ternary} value: nil (or missing) means to use KEY and VALUE
638 | as is; t means to skip creating this entry entirely, and any
639 | other non-nil value means to associate KEY to the specified
640 | \texttt{:init-default} value \emph{instead} of VALUE.
641 |
642 | In the description of the predefined formats below, such function
643 | arguments are used in one of three ways:
644 |
645 | \begin{enumerate}
646 | \item Entry transformation: \emph{INIT-KEY INIT-VALUE -> KEY VALUE [BAD-VALUE]}
647 |
648 | The key and value specified by \texttt{:initial-contents} (\emph{INIT-KEY INIT-VALUE})
649 | are passed to the function and the return values used as described above.
650 | (Formats \texttt{:hash}, \texttt{:flat}, \texttt{:pairs}, \texttt{:lists}, \texttt{:vectors}, \texttt{:seqs}.)
651 | \item Key transformation: \emph{INIT-KEY -> KEY VALUE [BAD-VALUE]}
652 |
653 | With format \texttt{:keys}, the key specified by \texttt{:initial-contents} is
654 | passed to the function and the return values used as described above.
655 | \item Entry generation: \emph{&rest ARGS -> KEY VALUE [BAD-VALUE]}
656 |
657 | With format \texttt{:function}, the \texttt{:initial-contents} argument is a function.
658 | This function is applied repeatedly to \emph{ARGS} and the return values
659 | used as described above. However, in this case, the first time
660 | that KEY is nil, initialization stops.
661 | \end{enumerate}
662 |
663 | See also the documentation for the function \texttt{make-hash-transformer}
664 | which creates a function suitable for use in this way from a simpler
665 | function on keys or entries.
666 | \subsection{Predefined Initialization Formats}
667 | \label{sec-4-2}
668 |
669 |
670 | The \texttt{:init-format} argument is a keyword that determines how the
671 | keyword arguments \texttt{:initial-contents} and \texttt{:init-data} are interpreted.
672 | If \texttt{:init-format} is not supplied, the default format is determined
673 | by the type of \texttt{:initial-contents}.
674 |
675 | There are four basic cases in the pre-defined initialization support:
676 |
677 | \begin{enumerate}
678 | \item Initializing from an existing hash table
679 |
680 | When \texttt{:init-format} is \texttt{:hash} or by default if \texttt{:initial-contents} is
681 | a hash-table, the new hash table is initialized by a shallow copy of
682 | the initial contents table, with shared structure in keys and values.
683 | If \texttt{:init-data} is a function, that function is used for entry
684 | transformation of the hash table given in \texttt{:initial-contents}.
685 | \item Initializing from a sequence (or sequences) specifying key-value pairs.
686 |
687 | When \texttt{:init-format} is \texttt{:flat}, \texttt{:pairs}, \texttt{:lists}, \texttt{:vectors}, or
688 | \texttt{:seqs}, the \texttt{:initial-contents} should be a sequence that specifies a
689 | collection of key-value pairs. The only difference among these formats
690 | is the expected structure of the sequence's elements. For \texttt{:flat}, the
691 | keys and values alternate; for \texttt{:pairs}, it is a sequence of cons
692 | pairs (e.g., an alist); for \texttt{:lists}, \texttt{:vectors}, and \texttt{:seqs}, it is a
693 | sequence of lists, vectors, or arbitrary sequences respectively of
694 | which the first two elements of each give the corresponding key and
695 | value. In these cases, if \texttt{:init-data} is nil or missing, the key-value
696 | pairs are used as is; if \texttt{:init-data} is a function, the function is
697 | used for entry transformation, as described above, for each pair.
698 |
699 | When \texttt{:init-format} is \texttt{:keychain}, the \texttt{:initial-contents} should
700 | be a sequence of keys and \texttt{:init-data} should be a sequence of
701 | corresponding values \emph{in the same order}. The table is initialized
702 | with the resultant key-value pairs.
703 |
704 | When \texttt{:init-format} is \texttt{:keys}, the \texttt{:initial-contents} should be a
705 | sequence of keys. The corresponding value is obtained by looking
706 | up the key in the hash table, alist, or function (via key mapping,
707 | see above) that is passed as \texttt{:init-data}, which in this case
708 | is required.
709 | \item Initializing from a bag/multiset of keys.
710 |
711 | When \texttt{:init-format} is \texttt{:keybag}, the \texttt{:initial-contents} should be a
712 | sequence representing a \emph{multiset} (a collection with possibly
713 | repeated elements) of keys. The hash table is initialized to map the
714 | unique elements from that multiset (as keys) to the number of times
715 | that element appears in the multiset (as values).
716 |
717 | In this case, if \texttt{:init-data} is a vector, hash table, or function,
718 | the count is used to find the corresponding value by indexing into the
719 | vector, looking up the value associated with count in the data
720 | hash-table, or calling the function with the key and count. When a
721 | value cannot be found, the default is used instead, subject to the
722 | value of BAD-VALUE in the function case.
723 | \item Initializing from a function.
724 |
725 | When \texttt{:init-format} is \texttt{:function} or \texttt{:initial-contents} is a function,
726 | the hash table is initialized by using the function for entry generation
727 | as described above.
728 | \end{enumerate}
729 |
730 | \noindent
731 | See also the documentation for \texttt{make-hash} for a relatively succinct
732 | table describing these options. Keep in mind that the interpretation of
733 | the formats is specified by methods of the \texttt{initialize-hash} generic
734 | function, and the default formats for different \texttt{:initial-contents} types
735 | by methods of the \texttt{hash-initializer-default-format}.
736 |
737 | \section{Defining Custom Initialization Formats}
738 | \label{sec-5}
739 |
740 |
741 | Initialization by \texttt{make-hash} is controlled by the generic function
742 | \texttt{initialize-hash}. Defining new methods for this function, or overriding
743 | existing methods, makes it easy to extend the hash table initialization,
744 | to add or modify formats, change behaviors, and so forth.
745 |
746 | The function \texttt{initialize-hash} takes five arguments: the hash table being
747 | initialized, the format specifier, the initial contents source object, the
748 | auxilliary data (\texttt{:init-data}) object, and the default value (\texttt{:init-default}).
749 | The format is usually a keyword with eql specialization. The contents
750 | source and data object are specialized on type.
751 | \section{Specifying Default Formats}
752 | \label{sec-6}
753 |
754 |
755 | When no \texttt{:init-format} argument is given to \texttt{make-hash}, the default format
756 | is determined by calling a suitable method of the generic function
757 | \texttt{hash-initializer-default-format}, passing the \texttt{:initial-contents} argument.
758 | The predefined methods use format \texttt{:hash} given a hash table, \texttt{:flat} given
759 | a sequence, and \texttt{:function} given a function. More flexibility may be
760 | desired in particular applications.
761 | \section{Hash Table Factories}
762 | \label{sec-7}
763 |
764 |
765 | When specific patterns of hash table construction options are used repeatedly,
766 | it can be helpful to encapsulate those patterns in a simple way.
767 | Hash table factories are shortcut functions that create a hash table using
768 | prespecified construction options. Any of the keyword arguments to \texttt{make-hash},
769 | except for \texttt{:initial-contents}, can be passed to the factory constructor
770 | and will be used for creating the hash table when the factory is called.
771 | The arguments in the factory call are packaged \texttt{\&rest}-style in a list
772 | and used as the \texttt{:initial-contents}. There are two factory constructors:
773 | \texttt{define-hash-factory} creates a toplevel function of a given name
774 | and \texttt{make-hash-factory} creates an anonymous function.
775 | \section{Reader Representations}
776 | \label{sec-8}
777 |
778 |
779 | Similarly, it might be desirable for the hash factories to be represented
780 | by syntax at read time via reader macros. The macro \texttt{install-hash-reader}
781 | updates a given readtable (the current readtable by default) so that
782 | a dispatched or raw delimited form creates a hash table. The effect
783 | is identical to the use of the hash table factories, except syntactically.
784 | Indeed, a factory can be passed directly to the \texttt{install-hash-reader}.
785 |
786 | Calls to this macro must occur at toplevel to have effect. It is designed
787 | to be as portable as possible and to work well with the named-readtables
788 | package. Common examples would be the use of \#\{} or \{} to represent hash
789 | tables.
790 | \section{Dictionary}
791 | \label{sec-9}
792 | \subsection{make-hash [Function]}
793 | \label{sec-9-1}
794 |
795 |
796 | \textbf{make-hash} \emph{\&key initial-contents init-format init-data init-default \ldots{} $\to$ hash-table}
797 |
798 | Creates, initializes if requested, and returns a new hash table.
799 |
800 | Keyword options include all those of the standard \texttt{make-hash-table}, any
801 | extension options allowed by the given implementation, and the additional
802 | keyword options to control initialization: \texttt{:initial-contents}, the main
803 | source for information filling the table; \texttt{:init-format}, a keyword
804 | specifying how the initialization options are interpreted; \texttt{:init-data},
805 | auxilliary data needed for initialization in some formats; and
806 | \texttt{:init-default}, a default value used when the value for a key cannot be
807 | initialized. See the description above in \hyperref[sec-4]{Creating Hash Tables}. Users can
808 | support other types/configurations (or alter the default handling) by
809 | extending the generic function \texttt{initialize-hash} in this package; see
810 | \hyperref[sec-5]{Defining Custom Initialization Formats}.
811 |
812 | \subsection{make-hash-transformer [Function]}
813 | \label{sec-9-2}
814 |
815 |
816 | \textbf{make-hash-transformer} \emph{domain function \&optional badp $\to$ function}
817 |
818 | Transform FUNCTION to be suitable for use as the \texttt{:init-data} (or
819 | \texttt{:initial-contents}) argument to \texttt{make-hash}. DOMAIN specifies the
820 | signature of FUNCTION and is one of the keywords \texttt{:key}, \texttt{:value}, or
821 | \texttt{:entry}, indicating that FUNCTION takes a key, a value, or a key and a
822 | value, repectively. BADP is a function with the same argument signature
823 | as FUNCTION that follows the return convention described \hyperref[sec-4-1]{above}.
824 | Specifically, it returns a ternary value: nil means that the transformed
825 | entry should be used as is, t means that the entry should be skipped, and
826 | any other non-nil value means that the key should be used with a default.
827 | Note that FUNCTION is \emph{not} called for an entry if BADP returns a non-nil
828 | value.
829 |
830 | The returned function accepts a key and a value (the value is optional
831 | with DOMAIN :key) and returns three values: the key, the value, and the
832 | bad-value ternary for that entry.
833 | \subsection{initialize-hash [Generic Function]}
834 | \label{sec-9-3}
835 |
836 |
837 | \textbf{initialize-hash} \emph{table form source data default}
838 |
839 | Creates and adds an entry to TABLE using info of format FORM in SOURCE
840 | and DATA. SOURCE contains the main contents, and DATA (optionally)
841 | contains auxilliary information or objects required for initialization
842 | for some formats. DEFAULT is the value that should be stored in the table
843 | when an appropriate value associated to a key cannot be found. Adding or
844 | redefining methods for this function allows extension or modification of
845 | the initialization mechanism.
846 |
847 | Note the convention, used by the predefined methods, that functions
848 | passed as either SOURCE or DATA are expected to return three values,
849 | using the convention described \hyperref[sec-4-1]{above}.
850 | \subsection{hash-initializer-default-format [Generic Function]}
851 | \label{sec-9-4}
852 |
853 |
854 | \textbf{hash-initializer-default-format} \emph{source $\to$ keyword or error}
855 |
856 | Selects an initializer format based on the given initial contents SOURCE.
857 | For example, the default format for sequence contents is \texttt{:flat};
858 | to change it to \texttt{:pairs} so that an alist is expected as \texttt{:initial-contents}
859 | by default, do the following:
860 |
861 |
862 | \begin{verbatim}
863 | (defmethod hash-initializer-default-format ((source list))
864 | :pairs)
865 | \end{verbatim}
866 |
867 |
868 |
869 |
870 | \subsection{\texttt{*hash-factory-defaults*} [Special Variable]}
871 | \label{sec-9-5}
872 |
873 |
874 | Hash table creation options used as defaults by hash factory
875 | constructors. These option specifications are passed last to make-hash by
876 | the hash factories and so are overridden by options passed as explicit
877 | arguments to the factory constructor.
878 |
879 | Changing this variable affects the options used by every hash factory
880 | that does not fully specify its options. This includes default calls to
881 | the reader constructors. Of particular note are the \texttt{:test} and
882 | \texttt{:init-format} options.
883 | \subsection{define-hash-factory [Macro]}
884 | \label{sec-9-6}
885 |
886 |
887 | \textbf{define-hash-factory} \emph{name \&key \ldots{}hash-options\ldots{}}
888 |
889 | Create a hash-table factory NAME that calls \texttt{make-hash} with options
890 | specified by given by the hash-options arguments. The defined
891 | function packages its arguments as a list, which it passes as
892 | the \texttt{:initial-contents} argument to \texttt{make-hash}.
893 |
894 | The hash-options are alternating keyword-value pairs. The supplied
895 | keyword arguments precede and thus override the options in
896 | \texttt{*hash-factory-defaults*}, which is intended to allow one to use short
897 | names or customized policies in simple calling patterns. Complex
898 | initialization patterns may need the full power of `make-hash'
899 | itself.
900 | \subsection{make-hash-factory [Function]}
901 | \label{sec-9-7}
902 |
903 |
904 | \textbf{make-hash-factory} \emph{&key \ldots{}hash-options\ldots{} $\to$ factory-function}
905 |
906 | Like define-hash-factory but creates and returns an anonymous factory
907 | function.
908 | \subsection{install-hash-reader [Macro]}
909 | \label{sec-9-8}
910 |
911 |
912 | \textbf{install-hash-reader} \emph{options \&key readtable use-dispatch allow-numbered-dispatch open-char close-char dispatch-char}
913 |
914 | Creates a hash table factory specified by OPTIONS and installs it
915 | in READTABLE (the current readtable by default). To have effect,
916 | this must be called at toplevel.
917 |
918 | OPTIONS is either a list of keyword-value pairs (as would be passed to
919 | \texttt{make-hash} or \texttt{make-hash-factory}) or a hash factory function.
920 | READTABLE is a readtable object, \texttt{*readtable*} by default.
921 |
922 | The keyword arguments control how the reader is modified as follows:
923 |
924 | \begin{itemize}
925 | \item USE-DISPATCH (t by default) determines whether the reader macro uses a
926 | dispatch character DISPATCH-CHAR before OPEN-CHAR. If non-nil, a
927 | dispatch character is used and is registered in READTABLE. If this is
928 | nil, then OPEN-CHAR and CLOSE-CHAR will be a raw delimited construct.
929 | \item ALLOW-NUMBERED-DISPATCH (nil by default) allows a dispatched reader
930 | macro to modify its hash test when given numeric arguments between
931 | DISPATCH-CHAR and OPEN-CHAR. This only applies when USE-DISPATCH is
932 | non-nil and when OPTIONS is a list, not a factory function. The goal
933 | here is to make it easy to reuse reader factories in several contexts.
934 |
935 | If nil, numbered dispatch is not supported. If t, numeric arguments
936 | 0, 1, 2, and 3 correspond to hash tests \texttt{eq}, \texttt{eql}, \texttt{equal}, and
937 | \texttt{equalp} respectively. If a sequence of symbols or functions,
938 | those functions are used for the hash test given a numeric
939 | argument from 0 below the length of the sequence. In either case,
940 | dispatch \emph{without} a numeric argument uses the originally specified
941 | options.
942 |
943 | Note: This is \emph{an experimental feature and may be discontinued in future versions} if it proves more confusing than helpful.
944 | \item OPEN-CHAR (default open-brace) is the character that delimits the
945 | beginning of the hash-table contents. If USE-DISPATCH is non-nil,
946 | this character must be preceeded by DISPATCH-CHAR, and optionally
947 | a numeric argument.
948 | \item CLOSE-CHAR (default close-brace) is the character that delimits
949 | the end of the hash-table contents.
950 | \item DISPATCH-CHAR (default \#) is the character used to indicate a
951 | dispatched reader macro. When (and only when) USE-DISPATCH is non-nil.
952 | READTABLE is modified to register this as as a dispatch and a
953 | non-terminating macro character via \texttt{make-dispatch-macro-character}.
954 | Note that there can be more than one dispatch character in a read
955 | table.
956 | \end{itemize}
957 |
958 | \end{document}
--------------------------------------------------------------------------------
/make-hash-tests.asd:
--------------------------------------------------------------------------------
1 | (asdf:defsystem #:make-hash-tests
2 | :depends-on (:make-hash :fiveam)
3 | :components ((:file "tests")))
4 |
5 | (in-package :asdf)
6 | (defmethod perform ((o test-op) (c (eql (find-system "make-hash-tests"))))
7 | (flet ((run-tests (&rest args)
8 | (apply (intern (string '#:run-tests) '#:make-hash-tests) args)))
9 | (run-tests)))
10 | (in-package :cl-user)
11 |
12 |
--------------------------------------------------------------------------------
/make-hash.asd:
--------------------------------------------------------------------------------
1 | ;;;; make-hash.asd
2 |
3 | (asdf:defsystem #:make-hash
4 | :serial t
5 | :components ((:file "package")
6 | (:file "make-hash")))
7 |
8 |
--------------------------------------------------------------------------------
/make-hash.lisp:
--------------------------------------------------------------------------------
1 | ;;;; make-hash -- hash table creation with flexible, extensible initializers
2 | ;;;
3 | ;;; Copyright (C) 2012 Christopher R. Genovese, all rights reserved.
4 | ;;; License: See file LICENSE.txt in this distribution.
5 | ;;;
6 | ;;; Author: Christopher Genovese
7 | ;;; Maintainer: Christopher R. Genovese
8 | ;;; URL: http://github.com/genovese/make-hash
9 | ;;;
10 | ;;; Version: 1.0.2
11 | ;;; Update#: 15
12 | ;;; Created: Wed 18 Apr 2012 at 09:55 EDT
13 | ;;; Last-Updated: Sun 21 Apr 2013 at 11:00 EDT
14 | ;;; Updated By: Christopher R. Genovese
15 |
16 |
17 | (in-package #:make-hash)
18 |
19 |
20 | ;;; Error conditions
21 |
22 | (define-condition make-hash-error (error)
23 | ((text :initarg :text :reader make-hash-error-text)
24 | (data :initarg :data :reader make-hash-error-data))
25 | (:documentation "Error encountered during hash table creation or initialization.")
26 | (:default-initargs :text "" :data nil)
27 | (:report
28 | (lambda (c s)
29 | (format s (make-hash-error-text c) (make-hash-error-data c)))))
30 |
31 | (define-condition unknown-initialization-pattern (make-hash-error)
32 | ()
33 | (:documentation "Attempt to initialize-hash with an unrecognized
34 | pattern of argument specialization or unrecognized format."))
35 |
36 | (define-condition unknown-default-initializer (make-hash-error)
37 | ()
38 | (:documentation "Attempt to make-hash without :init-format when no
39 | default initialization is defined for the given initial-contents."))
40 |
41 | (define-condition unmatched-closing-delimiter (make-hash-error)
42 | ()
43 | (:documentation "Reader can find no closing delimiter for literal
44 | hash table."))
45 |
46 | (define-condition numeric-dispatch-bounds-error (make-hash-error)
47 | ()
48 | (:documentation "A numeric argument to a dispatch reader
49 | macro is out of bounds." ))
50 |
51 |
52 | ;;; Hash initialization support
53 | ;;;
54 | ;;; There are many different types and formats for the initialization of a
55 | ;;; hash table that arise in practice, and I would like to support these
56 | ;;; as naturally as possible. It is also desirable to offer flexibility
57 | ;;; that will allow users to adapt the initialization to their needs
58 | ;;; while maintaining clarity and concision. As such, the initialization
59 | ;;; is performed by generic function `initialize-hash' that uses multi-dispatch
60 | ;;; to handle the many cases modularly and extensibly. This kind of dispatching
61 | ;;; is ideally suited to CLOS, and the arguments to `initialize-hash' are
62 | ;;; specialized for each case, as described below.
63 | ;;;
64 | ;;; In addition, the generic function `hash-initializer-default-format' gives
65 | ;;; a default format (:init-format to `make-hash') to use for a given
66 | ;;; source (:initial-contents to `make-hash'), allowing easy, contingent control
67 | ;;; within an application or at the REPL.
68 | ;;;
69 | ;;; The initialization mechanism can be extended or adapted by the user by
70 | ;;; defining or redefining methods of these two generic functions.
71 | ;;;
72 | ;;; The initialize-hash generic function takes four (specialized) arguments:
73 | ;;;
74 | ;;; + table -- the hash-table being initialized;
75 | ;;; + form -- the value of the :init-format argument to make-hash;
76 | ;;; + source -- principal data source for the initialization contents,
77 | ;;; the value of the :initial-contents argument to make-hash;
78 | ;;; + data -- auxilliary data for the initialization,
79 | ;;; the value of the :init-data argument to make-hash;
80 | ;;; + default -- a default value when the value for a key is not available,
81 | ;;; the value of the :init-default argument to make-hash.
82 | ;;;
83 | ;;; The form argument is typically a keyword. Built-in support is provided for
84 | ;;; :flat, :hash, :pairs, :lists, :vectors, :seqs, :keys, :keychain, :keybag, and
85 | ;;; :function. These are described in the documentation to `make-hash'.
86 | ;;;
87 | ;;; Function arguments given as source or data are expected to return three
88 | ;;; values KEY VALUE [BAD-VALUE] that are used (under some conditions) to
89 | ;;; create a new entry KEY . VALUE in the hash to be initialized. Here,
90 | ;;; BAD-VALUE is a *ternary* value: nil means to use KEY and VALUE as is;
91 | ;;; t means to skip creating this entry; and any other non-nil object means
92 | ;;; to associate to KEY the specified *default* value instead of VALUE.
93 | ;;;
94 | ;;; In the built-in initialization support, such functions are used in one
95 | ;;; of three related ways:
96 | ;;;
97 | ;;; 1. Entry transformation: KEY0 VALUE0 -> KEY VALUE [BAD-VALUE]
98 | ;;; ftype: (function (t t) (values t t &optional t))
99 | ;;;
100 | ;;; Given an associative map (formats :hash, :pairs, :flat, :lists, :vectors)
101 | ;;; with entries (KEY0 . VALUE0) transformed to (KEY . VALUE) by the function.
102 | ;;; See above for the interpretation of bad-value, which is the same for all
103 | ;;; three cases.
104 | ;;;
105 | ;;; 2. Key mapping: KEY -> KEY VALUE [BAD-VALUE]
106 | ;;; ftype: (function (t t) (values t &optional t))
107 | ;;;
108 | ;;; Given a key (format :keys), this creates value from key and creates
109 | ;;; pair (KEY . VALUE). See below for the interpretation of bad-value.
110 | ;;;
111 | ;;; 3. Entry generation: &rest ARGS -> KEY VALUE [BAD-VALUE]
112 | ;;; ftype (function (&rest t) (values t t &optional t))
113 | ;;;
114 | ;;; Each call produces a new (KEY . VALUE) pair subject to two provisos,
115 | ;;; i) the interpretation of bad-value, and ii) if the returned key is nil
116 | ;;; then generation halts.
117 | ;;;
118 | ;;; All three signatures are supported by the following construct.
119 | ;;;
120 |
121 | (defmacro set-transformed-entry (entry0 default via transform into table
122 | &optional on condition do)
123 | "Create a new hash table entry from a given entry and transform protocol.
124 | ENTRY0 specifies the information to which the transform is apply'd.
125 | DEFAULT is a default value to use for entry if the transform indicates
126 | so. TRANSFORM is a function that accepts arguments in the entry (e.g., a
127 | key value pair) and returns three values a KEY, a VALUE, and a BAD-VALUE
128 | ternary indicator, with the last of these optional. If BAD-VALUE is nil,
129 | KEY and VALUE will be entered in TABLE; if BAD-VALUE is t, the entry is
130 | skipped; and BAD-VALUE is otherwise non-nil, KEY and DEFAULT are entered
131 | into TABLE. TABLE is the hash table to modify.
132 |
133 | If ON is supplied (it's value does not matter), then the function
134 | CONDITION is called on the KEY returned by TRANSFORM, and the form DO is
135 | executed if this returns a non-nil value. This occurs *before* the
136 | BAD-VALUE is checked or the key assigned.
137 |
138 | VIA and INTO are syntactic placeholders whose values are ignored."
139 | (declare (ignore into via))
140 | (let ((key (gensym "key"))
141 | (val (gensym "val"))
142 | (bad (gensym "bad")))
143 | `(multiple-value-bind (,key ,val ,bad)
144 | ,(if (listp entry0)
145 | `(funcall ,transform ,@entry0)
146 | `(apply ,transform ,entry0))
147 | ,@(if on (list (list 'when (list condition key) do)) nil)
148 | (unless (and ,bad (eq ,bad t))
149 | (setf (gethash ,key ,table) (if ,bad ,default ,val))))))
150 |
151 |
152 | (defgeneric initialize-hash (table form source data default)
153 | (:documentation
154 | "Create and add entry to TABLE using info of format FORM in SOURCE and DATA.
155 | SOURCE contains the main contents, and DATA contains auxilliary
156 | information or objects required for initialization. DEFAULT is the value
157 | that should be stored in the table when an appropriate value associated
158 | to a key cannot be found. See `make-hash' for details on configurations
159 | with predefined support. Adding or redefining methods for this function
160 | allows extension or modification of the initialization mechanism.
161 |
162 | Note the convention, used by the predefined methods, that functions
163 | passed as either SOURCE or DATA are expected to return three values
164 |
165 | KEY VALUE [BAD-VALUE]
166 |
167 | that are used (under certain conditions) to create a new entry KEY .
168 | VALUE in the TABLE to be initialized. Here, BAD-VALUE is a *ternary*
169 | value: nil means to use KEY and VALUE as is; t means to skip creating
170 | this entry; and any other non-nil object means to associate to KEY the
171 | specified DEFAULT instead of VALUE."))
172 |
173 |
174 | ;; Unspecialized method to catch unsupported configurations
175 |
176 | (defmethod initialize-hash ((table t) (form t) (source t) (data t) default)
177 | "Fallback initialization of unsupported configurations."
178 | (let ((mesg
179 | (format nil "Unsupported signature (~{~A~^ ~}) for initialize-hash"
180 | (mapcar (lambda (u)
181 | (if (keywordp u) u (class-name (class-of u))))
182 | (list table form source data default)))))
183 | (error 'unknown-initialization-pattern
184 | :text mesg
185 | :data (list table form source data default))))
186 |
187 |
188 | ;; Initialize from existing hash table either shallow copy or transformed
189 |
190 | (defmethod initialize-hash
191 | ((table hash-table) (form (eql :hash)) (source hash-table) (data null) default)
192 | "Make TABLE a shallow copy of SOURCE with shared key and value structure."
193 | (loop for key being each hash-key of source using (hash-value val)
194 | do (setf (gethash key table) val)))
195 |
196 | (defmethod initialize-hash
197 | ((table hash-table) (form (eql :hash)) (source hash-table) (data function) default)
198 | "Make TABLE a transformed copy of SOURCE by function DATA, possibly sharing structure."
199 | (loop for key0 being each hash-key of source using (hash-value val0)
200 | do (set-transformed-entry (key0 val0) default via data into table)))
201 |
202 |
203 | ;; Initialize from flat sequence , optionally transforming entries
204 |
205 | (defmethod initialize-hash
206 | ((table hash-table) (form (eql :flat)) (source list) (data null) default)
207 | "SOURCE is a list of alternating key-value pairs that are entered into TABLE."
208 | (loop for keyval on source by #'cddr do
209 | (setf (gethash (first keyval) table) (second keyval))))
210 |
211 | (defmethod initialize-hash
212 | ((table hash-table) (form (eql :flat)) (source list) (data function) default)
213 | "SOURCE is a list of alternating key-value pairs that are transformed
214 | by the function DATA and entered into TABLE."
215 | (loop for keyval on source by #'cddr do
216 | (set-transformed-entry ((first keyval) (second keyval)) default
217 | via data into table)))
218 |
219 | (defmethod initialize-hash
220 | ((table hash-table) (form (eql :flat)) (source vector) (data null) default)
221 | "SOURCE is a vector of alternating key-value pairs that are entered into TABLE."
222 | (loop for i from 0 below (length source) by 2 do
223 | (setf (gethash (aref source i) table) (aref source (+ i 1)))))
224 |
225 | (defmethod initialize-hash
226 | ((table hash-table) (form (eql :flat)) (source vector) (data function) default)
227 | "SOURCE is a vector of alternating key-value pairs that are transformed by the
228 | function DATA and entered into TABLE."
229 | (loop for i from 0 below (length source) by 2
230 | for key0 = (aref source i)
231 | for val0 = (aref source (+ i 1))
232 | do (set-transformed-entry (key0 val0) default via data into table)))
233 |
234 |
235 | ;; Initialize from existing alist (or avector), optionally transforming entries
236 |
237 | (defmethod initialize-hash
238 | ((table hash-table) (form (eql :pairs)) (source list) (data null) default)
239 | "SOURCE is an alist whose key-value pairs are entered into TABLE."
240 | (dolist (entry source)
241 | (setf (gethash (car entry) table) (cdr entry))))
242 |
243 | (defmethod initialize-hash
244 | ((table hash-table) (form (eql :pairs)) (source list) (data function) default)
245 | "SOURCE is an alist whose key-value pairs are transformed by function DATA and
246 | entered into TABLE."
247 | (dolist (entry source)
248 | (set-transformed-entry ((car entry) (cdr entry)) default
249 | via data into table)))
250 |
251 | (defmethod initialize-hash
252 | ((table hash-table) (form (eql :pairs)) (source vector) (data null) default)
253 | "SOURCE is a vector of (key . value) cons pairs that are entered into TABLE."
254 | (loop for entry across source do
255 | (setf (gethash (car entry) table) (cdr entry))))
256 |
257 | (defmethod initialize-hash
258 | ((table hash-table) (form (eql :pairs)) (source vector) (data function) default)
259 | "SOURCE is a vector of (key . value) pairs that are transformed by function
260 | DATA and entered into TABLE."
261 | (loop for entry across source do
262 | (set-transformed-entry ((car entry) (cdr entry)) default
263 | via data into table)))
264 |
265 |
266 | ;; Initialize from sequence of lists, optionally transforming entries
267 |
268 | (defmethod initialize-hash
269 | ((table hash-table) (form (eql :lists)) (source list) (data null) default)
270 | "SOURCE is a list of lists of the form (key val ...), and each key-val pair
271 | is entered into TABLE."
272 | (dolist (entry source)
273 | (setf (gethash (first entry) table) (second entry))))
274 |
275 | (defmethod initialize-hash
276 | ((table hash-table) (form (eql :lists)) (source list) (data function) default)
277 | "SOURCE is a list of lists of the form (key val ...), and each key-val pair
278 | is transformed by DATA and entered into TABLE."
279 | (dolist (entry source)
280 | (set-transformed-entry ((first entry) (second entry)) default
281 | via data into table)))
282 |
283 | (defmethod initialize-hash
284 | ((table hash-table) (form (eql :lists)) (source vector) (data null) default)
285 | "SOURCE is a vector of lists of the form (key val ...), and each key-val pair
286 | is transformed by DATA then entered into TABLE."
287 | (loop for entry across source do
288 | (setf (gethash (first entry) table) (second entry))))
289 |
290 | (defmethod initialize-hash
291 | ((table hash-table) (form (eql :lists)) (source vector) (data function) default)
292 | "SOURCE is a vector of lists of the form (key val ...), and each key-val pair
293 | is transformed by DATA and entered into TABLE."
294 | (loop for entry across source do
295 | (set-transformed-entry ((first entry) (second entry)) default
296 | via data into table)))
297 |
298 |
299 | ;; Initialize from sequence of vectors, optionally transforming entries
300 |
301 | (defmethod initialize-hash
302 | ((table hash-table) (form (eql :vectors)) (source list) (data null) default)
303 | "SOURCE is a list of lists of the form (key val ...), and each key-val pair
304 | is entered into TABLE."
305 | (dolist (entry source)
306 | (setf (gethash (aref entry 0) table) (aref entry 1))))
307 |
308 | (defmethod initialize-hash
309 | ((table hash-table) (form (eql :vectors)) (source list) (data function) default)
310 | "SOURCE is a list of vectors of the form [key val ...], and each key-val pair
311 | is transformed by DATA and entered into TABLE."
312 | (dolist (entry source)
313 | (set-transformed-entry ((aref entry 0) (aref entry 1)) default
314 | via data into table)))
315 |
316 | (defmethod initialize-hash
317 | ((table hash-table) (form (eql :vectors)) (source vector) (data null) default)
318 | "SOURCE is a vector of lists of the form (key val ...), and each key-val pair
319 | is transformed by DATA then entered into TABLE."
320 | (loop for entry across source do
321 | (setf (gethash (aref entry 0) table) (aref entry 1))))
322 |
323 | (defmethod initialize-hash
324 | ((table hash-table) (form (eql :vectors)) (source vector) (data function) default)
325 | "SOURCE is a vector of lists of the form (key val ...), and each key-val pair
326 | is transformed by DATA and entered into TABLE."
327 | (loop for entry across source do
328 | (set-transformed-entry ((aref entry 0) (aref entry 1)) default
329 | via data into table)))
330 |
331 |
332 | ;; Initialize from sequence of sequences, optionally transforming entries
333 |
334 | (defmethod initialize-hash
335 | ((table hash-table) (form (eql :seqs)) (source list) (data null) default)
336 | "SOURCE is a list of sequences of the form (key val ...), and each key-val pair
337 | is entered into TABLE."
338 | (dolist (entry source)
339 | (setf (gethash (elt entry 0) table) (elt entry 1))))
340 |
341 | (defmethod initialize-hash
342 | ((table hash-table) (form (eql :seqs)) (source list) (data function) default)
343 | "SOURCE is a list of sequences of the form [key val ...], and each key-val pair
344 | is transformed by DATA and entered into TABLE."
345 | (dolist (entry source)
346 | (set-transformed-entry ((elt entry 0) (elt entry 1)) default
347 | via data into table)))
348 |
349 | (defmethod initialize-hash
350 | ((table hash-table) (form (eql :seqs)) (source vector) (data null) default)
351 | "SOURCE is a vector of sequences of the form (key val ...), and each key-val pair
352 | is transformed by DATA then entered into TABLE."
353 | (loop for entry across source do
354 | (setf (gethash (elt entry 0) table) (elt entry 1))))
355 |
356 | (defmethod initialize-hash
357 | ((table hash-table) (form (eql :seqs)) (source vector) (data function) default)
358 | "SOURCE is a vector of sequences of the form (key val ...), and each key-val pair
359 | is transformed by DATA and entered into TABLE."
360 | (loop for entry across source do
361 | (set-transformed-entry ((elt entry 0) (elt entry 1)) default
362 | via data into table)))
363 |
364 |
365 | ;; Initialize from given keys and another associative map (alist or hash)
366 |
367 | (defmethod initialize-hash
368 | ((table hash-table) (form (eql :keys)) (source list) (data hash-table) default)
369 | "SOURCE is a list of keys, DATA is a hash table whose corresponding entries are stored in TABLE."
370 | (loop for key in source
371 | do (setf (gethash key table) (gethash key data default))))
372 |
373 | (defmethod initialize-hash
374 | ((table hash-table) (form (eql :keys)) (source list) (data list) default)
375 | "SOURCE is a list of keys, DATA is an alist whose corresponding entries are stored in TABLE."
376 | (loop for key in source
377 | for entry = (assoc key data :test (hash-table-test table))
378 | do (setf (gethash key table) (if entry (cdr entry) default))))
379 |
380 | (defmethod initialize-hash
381 | ((table hash-table) (form (eql :keys)) (source list) (data function) default)
382 | "SOURCE is a list of keys, DATA is an function mapping keys to entries stored in TABLE."
383 | (loop for key in source
384 | do (set-transformed-entry (key) default via data into table)))
385 |
386 | (defmethod initialize-hash
387 | ((table hash-table) (form (eql :keys)) (source vector) (data hash-table) default)
388 | "SOURCE is a vector of keys, DATA is a hash table whose corresponding entries are stored in TABLE."
389 | (loop for key across source
390 | do (setf (gethash key table) (gethash key data default))))
391 |
392 | (defmethod initialize-hash
393 | ((table hash-table) (form (eql :keys)) (source vector) (data list) default)
394 | "SOURCE is a vector of keys, DATA is an alist whose corresponding entries are stored in TABLE."
395 | (loop for key across source
396 | for entry = (assoc key data :test (hash-table-test table))
397 | do (setf (gethash key table) (if entry (cdr entry) default))))
398 |
399 | (defmethod initialize-hash
400 | ((table hash-table) (form (eql :keys)) (source vector) (data function) default)
401 | "SOURCE is a vector of keys, DATA is an function mapping keys to entries stored in TABLE."
402 | (loop for key across source
403 | do (set-transformed-entry (key) default via data into table)))
404 |
405 |
406 | ;; Initialize from parallel ordered sequences of keys and values
407 |
408 | (defmethod initialize-hash
409 | ((table hash-table) (form (eql :keychain)) (source list) (data list) default)
410 | "SOURCE is a list of keys, DATA is a parallel list of values; pairs stored in TABLE.
411 | If SOURCE is longer than DATA, additional entries use DEFAULT for their value."
412 | (loop for key in source
413 | for val-tail = data then (cdr val-tail)
414 | do (setf (gethash key table) (if val-tail (car val-tail) default))))
415 |
416 | (defmethod initialize-hash
417 | ((table hash-table) (form (eql :keychain)) (source vector) (data list) default)
418 | "SOURCE is a vector of keys, DATA is a parallel list of values; pairs stored in TABLE.
419 | If SOURCE is longer than DATA, additional entries use DEFAULT for their value."
420 | (loop for key across source
421 | for val-tail = data then (cdr val-tail)
422 | do (setf (gethash key table) (if val-tail (car val-tail) default))))
423 |
424 | (defmethod initialize-hash
425 | ((table hash-table) (form (eql :keychain)) (source list) (data vector) default)
426 | "SOURCE is a list of keys, DATA is a parallel vector of values; pairs stored in TABLE.
427 | If SOURCE is longer than DATA, additional entries use DEFAULT for their value."
428 | (loop with n = (length data)
429 | for key in source
430 | for i = 0 then (+ i 1)
431 | do (setf (gethash key table) (if (< i n) (aref data i) default))))
432 |
433 | (defmethod initialize-hash
434 | ((table hash-table) (form (eql :keychain)) (source vector) (data vector) default)
435 | "SOURCE is a vector of keys, DATA a parallel vector of values; pairs stored in TABLE.
436 | If SOURCE is longer than DATA, additional entries have value DEFAULT."
437 | (loop with n = (length data)
438 | for key across source
439 | for i = 0 then (+ i 1)
440 | do (setf (gethash key table) (if (< i n) (aref data i) default))))
441 |
442 |
443 | ;; Initialize from a bag (multiset) of keys, where the counts determine the values
444 |
445 | (defmacro bag-seq-to-hash (seq seqtype hash)
446 | "Convert a sequence representation of a bag/multiset to a hash table.
447 | SEQ is a sequence of type SEQTYPE, a symbol, either list or vector explicitly.
448 | Its entries are elements of the base set, possibly with repetitions.
449 | HASH is a hash table. Upon return, its keys are the elements of the base
450 | set, and its values are the repetition counts for each element."
451 | (declare (type (member list vector) seqtype))
452 | (let ((key (gensym "KEY")))
453 | `(loop for ,key ,(if (eq seqtype 'list) 'in 'across) ,seq
454 | do (incf (gethash ,key ,hash 0)))))
455 |
456 | (defmethod initialize-hash
457 | ((table hash-table) (form (eql :keybag)) (source list) (data null) default)
458 | "SOURCE is a list of keys as a bag/multiset, counts stored in TABLE for each key.
459 | DEFAULT is ignored."
460 | (bag-seq-to-hash source list table))
461 |
462 | (defmethod initialize-hash
463 | ((table hash-table) (form (eql :keybag)) (source vector) (data null) default)
464 | "SOURCE is a vector of keys as a bag/multiset, counts stored in TABLE for each key.
465 | DEFAULT is ignored."
466 | (bag-seq-to-hash source vector table))
467 |
468 | (defmethod initialize-hash
469 | ((table hash-table) (form (eql :keybag)) (source list) (data vector) default)
470 | "SOURCE is a list of keys, element of DATA at index of key's count is key's value.
471 | If count is >= length of DATA vector, DEFAULT is used instead."
472 | (let ((counts
473 | (make-hash-table :test (hash-table-test table)
474 | :size (hash-table-size table)
475 | :rehash-size (hash-table-rehash-size table)
476 | :rehash-threshold (hash-table-rehash-threshold table))))
477 | (bag-seq-to-hash source list counts)
478 | (loop with n = (length data)
479 | for key being each hash-key of counts using (hash-value count) do
480 | (setf (gethash key table)
481 | (if (>= count n) default (aref data count))))))
482 |
483 | (defmethod initialize-hash
484 | ((table hash-table) (form (eql :keybag)) (source vector) (data vector) default)
485 | "SOURCE is a vector of keys, element of DATA at index of key's count is key's value.
486 | If count is >= length of DATA vector, DEFAULT is used instead."
487 | (let ((counts
488 | (make-hash-table :test (hash-table-test table)
489 | :size (hash-table-size table)
490 | :rehash-size (hash-table-rehash-size table)
491 | :rehash-threshold (hash-table-rehash-threshold table))))
492 | (bag-seq-to-hash source vector counts)
493 | (loop with n = (length data)
494 | for key being each hash-key of counts using (hash-value count) do
495 | (setf (gethash key table)
496 | (if (>= count n) default (aref data count))))))
497 |
498 | (defmethod initialize-hash
499 | ((table hash-table) (form (eql :keybag)) (source list) (data hash-table) default)
500 | "SOURCE is a list of keys, value in DATA for key's count is key's value.
501 | If count is not in DATA table, DEFAULT is used instead."
502 | (let ((counts
503 | (make-hash-table :test (hash-table-test table)
504 | :size (hash-table-size table)
505 | :rehash-size (hash-table-rehash-size table)
506 | :rehash-threshold (hash-table-rehash-threshold table))))
507 | (bag-seq-to-hash source list counts)
508 | (loop for key being each hash-key of counts using (hash-value count) do
509 | (setf (gethash key table) (gethash count data default)))))
510 |
511 | (defmethod initialize-hash
512 | ((table hash-table) (form (eql :keybag)) (source vector) (data hash-table) default)
513 | "SOURCE is a list of keys, value in DATA for key's count is key's value.
514 | If count is not in DATA table, DEFAULT is used instead."
515 | (let ((counts
516 | (make-hash-table :test (hash-table-test table)
517 | :size (hash-table-size table)
518 | :rehash-size (hash-table-rehash-size table)
519 | :rehash-threshold (hash-table-rehash-threshold table))))
520 | (bag-seq-to-hash source vector counts)
521 | (loop for key being each hash-key of counts using (hash-value count) do
522 | (setf (gethash key table) (gethash count data default)))))
523 |
524 | (defmethod initialize-hash
525 | ((table hash-table) (form (eql :keybag)) (source list) (data function) default)
526 | "SOURCE is a vector of keys, DATA maps keys and counts to entries stored in TABLE."
527 | (let ((counts
528 | (make-hash-table :test (hash-table-test table)
529 | :size (hash-table-size table)
530 | :rehash-size (hash-table-rehash-size table)
531 | :rehash-threshold (hash-table-rehash-threshold table))))
532 | (bag-seq-to-hash source list counts)
533 | (loop for key being each hash-key of counts using (hash-value count) do
534 | (set-transformed-entry (key count) default via data into table))))
535 |
536 | (defmethod initialize-hash
537 | ((table hash-table) (form (eql :keybag)) (source vector) (data function) default)
538 | "SOURCE is a vector of keys, DATA maps keys and counts to entries stored in TABLE."
539 | (let ((counts
540 | (make-hash-table :test (hash-table-test table)
541 | :size (hash-table-size table)
542 | :rehash-size (hash-table-rehash-size table)
543 | :rehash-threshold (hash-table-rehash-threshold table))))
544 | (bag-seq-to-hash source vector counts)
545 | (loop for key being each hash-key of counts using (hash-value count) do
546 | (set-transformed-entry (key count) default via data into table))))
547 |
548 |
549 | ;; Initialize from repeated calls to a function returning entries until nil
550 |
551 | (defmethod initialize-hash
552 | ((table hash-table) (form (eql :function)) (source function) (data list) default)
553 | "SOURCE is a function returning entries in TABLE until primary value is nil.
554 | DATA is a list of arguments, possibly null, passed to the function each call."
555 | (loop named iteration do
556 | (set-transformed-entry data default via source into table
557 | at null (return-from iteration))))
558 |
559 |
560 |
561 | ;; DWIM Default Initializer Format
562 |
563 | (defgeneric hash-initializer-default-format (source)
564 | (:documentation
565 | "Select an initializer format based on the given initial contents SOURCE."))
566 |
567 | (defmethod hash-initializer-default-format ((source hash-table))
568 | :hash)
569 |
570 | (defmethod hash-initializer-default-format ((source list))
571 | :flat)
572 |
573 | (defmethod hash-initializer-default-format ((source vector))
574 | :flat)
575 |
576 | (defmethod hash-initializer-default-format ((source function))
577 | :function)
578 |
579 | (defmethod hash-initializer-default-format ((source t))
580 | ; TODO: define restarts to allow the user to choose a format.
581 | (let ((mesg
582 | (format nil "No default initializer for source of class ~A"
583 | (class-name (class-of source)))))
584 | (error 'unknown-default-initializer :text mesg :data source)))
585 |
586 |
587 |
588 | ;; Main Exported Entry Point
589 |
590 | (defun make-hash (&rest hash-options &key
591 | (initial-contents nil)
592 | (init-format
593 | (hash-initializer-default-format initial-contents))
594 | (init-data nil)
595 | (init-default nil)
596 | &allow-other-keys)
597 | "Create, initialize, and return a new hash table.
598 |
599 | Keyword options include all those of the standard `make-hash-table', any
600 | extension options allowed by the given implementation, and the following
601 | four additional keyword arguments to control initialization. Users can
602 | support other types/configurations (or alter the default handling) by
603 | extending the generic function `initialize-hash' in this package.
604 |
605 | :INITIAL-CONTENTS object
606 |
607 | If the supplied object is non-nil, the object is used to initialize
608 | the created hash table in accordance with the INIT-FORMAT argument.
609 | For some formats, the INIT-DATA argument may also be needed to
610 | supply supplementary information for the initializer. The built-in
611 | formats support the cases where object is either a hash table or
612 | sequence from which the keys and values can be extracted. See the
613 | table below for a detailed description of the possibilities.
614 |
615 | :INIT-FORMAT keyword
616 |
617 | A keyword specifying the structure of the initialization contents
618 | and auxilliary data given by the INITIAL-CONTENTS and INIT-DATA
619 | arguments. Built-in support is provided for :hash, :flat, :pairs,
620 | :lists, :vectors, :seqs, :keys, :keychain, :keybag, and :function.
621 | These are described in detail below. When an initializer format is
622 | not supplied, it is computed by calling the generic function
623 | `hash-initializer-default-format' on the given INITIAL-CONTENTS
624 | object. A methods for this function should be defined whenever the
625 | function `initialize-hash' is extended to handle a new class of
626 | INITIAL-CONTENTS objects. Methods can be overridden to change the
627 | default used in existing cases.
628 |
629 | :INIT-DATA object
630 |
631 | Auxilliary data used for initialization. Its structure and meaning
632 | depends on the value of INIT-FORMAT; see below for details.
633 |
634 | :INIT-DEFAULT value
635 |
636 | Default value to use in indirect initialization when the value for
637 | the given key cannot be determined from the INITIAL-CONTENTS and
638 | INIT-DATA for the particular INIT-FORMAT supplied.
639 |
640 | Note that in cases where INITIAL-CONTENTS or INIT-DATA is a function,
641 | that function should return three values. The primary value is the
642 | key to set in the hash table; the secondary value is the value associated
643 | with that key; and the optional ternary value is an indicator of whether
644 | the key's value or entry should be used. For the latter, `nil' means that
645 | the returned key and value should be stored in the table; `t' means that
646 | the entry should be *skipped*; and any other non-nil value means that
647 | the key should be used with the supplied default value.
648 |
649 | For the built-in initialization methods, such functions are used in three
650 | ways: entry transformation -- taking a key and value as arguments, key
651 | mapping -- taking just a key as argument, and entry generation -- taking
652 | arbitrary &rest arguments and returning nil when iteration should stop.
653 | See below for more detail.
654 |
655 | The following table describes the built-in initialization formats and
656 | how the INITIAL-CONTENTS and INIT-DATA arguments are interpreted.
657 | Either vectors or lists can be used interchangeably for passing sequences
658 | to the latter arguments, except when data is an alist with :keys and a
659 | list of arguments with :function. But in those latter cases, the user
660 | can coerce a vector to list first if necessary.
661 |
662 | Format Contents Data Description
663 | --------- -------- ---- -----------
664 | :hash hash-table null Shallow copy of given hash table with
665 | shared structure in keys and values.
666 | function Entry transformation of given hash table,
667 | shared structure is possible.
668 |
669 | :flat list or null List or vector of alternating keys and values
670 | vector (key1 val1 ...) or #(key1 val1 ...),
671 | in the style of a plist.
672 | function Entry transformation of supplied
673 | keys and values via given function
674 |
675 | :pairs list or null List or vector of (key . value) cons pairs,
676 | vector ((key1 . val1) ...) or #((key1 . val1)...).
677 | function Entry transformation of supplied
678 | key-value pairs via given function
679 |
680 | :lists list or null List or vector of (key value) lists,
681 | vector with only the first two elements of each used
682 | function Entry transformation of supplied
683 | keys and values via given function
684 |
685 | :vectors list or null List or vector of #(key value) vectors,
686 | vector with only the first two elements of each used
687 | function Entry transformation of supplied
688 | keys and values via given function
689 |
690 | :seqs list or null List or vector of [key value] sequences,
691 | vector with each sequence either a list or vector
692 | and only the first two elements of each used
693 | function Entry transformation of supplied
694 | keys and values via given function
695 |
696 | :keys list or hash-table Contents is a list or vector of keys that
697 | vector are looked up in the hash-table data. These
698 | key-value pairs are used for initialization,
699 | with keys that are not found associated with
700 | the given default.
701 |
702 | list or list Contents is a list or vector of keys that
703 | vector are looked up in the alist data. These
704 | key-value pairs are used for initialization,
705 | with keys that are not found associated with
706 | the given default.
707 |
708 | list or function Contents is a list or vector of keys that
709 | vector are passed to the function data for key
710 | mapping. The resulting key-value pairs
711 | (allowing skips or defaults via bad-value)
712 | are used for initialization.
713 |
714 | :keychain list or list or Contents is a list or vector of keys, and
715 | vector vector data is a parallel list or vector of values
716 | given *in the same order*. Corresponding
717 | key-value pairs are used in initialization.
718 |
719 | :keybag list or null Contents is a bag/multiset represented
720 | vector as a list or vector. The hash table is
721 | initialized to associate each unique key
722 | to its count in the multiset.
723 |
724 | list or vector Contents is a bag/multiset represented
725 | vector as a list or vector. Data is a vector to
726 | be indexed by the counts of each key.
727 | Hash table is initialized to associate
728 | each key to the value in data at index
729 | equal to that key's count. Counts outside
730 | the bounds of data are associated to
731 | the default value.
732 |
733 | list or hash-table Contents is a bag/multiset represented
734 | vector as a list or vector. Data is a hash-table
735 | with positive integer keys representing a
736 | sparse vector. Hash table is initialized to
737 | associate each key to the value in data that
738 | is associated with that key's count. Counts
739 | not in data are associated to the default
740 | value.
741 |
742 | list or function Contents is a bag/multiset represented
743 | vector as a list or vector. Data is a function
744 | of two arguments KEY and COUNT, the latter
745 | a positive integer. Table is initialized to
746 | associate each key to the value returned
747 | by data on that key and its count. Data
748 | satisfies the bad-value convention described
749 | earlier.
750 |
751 | :function function list or Contents is a function that is applied to
752 | null the values in the list data. Hash table
753 | is initialized by entry generation until
754 | the function returns a nil primary value.
755 | "
756 | (let* ((hash (apply #'make-hash-table :allow-other-keys t hash-options)))
757 | (when initial-contents
758 | (initialize-hash
759 | hash init-format initial-contents init-data init-default))
760 | hash))
761 |
762 |
763 | ;; Convenient way to specify transformation in function-based initialization
764 |
765 | (defun make-hash-transformer (domain f &optional (badp (constantly nil)))
766 | "Transform function on DOMAIN, F, to be suitable for use with `make-hash'.
767 | DOMAIN is one of the keywords :key, :value, or :entry. F is a function
768 | that takes a key, a value, or a key and a value, respectively. BADP is a
769 | function with the same argument signature that returns a ternary value:
770 | nil means that the transformed entry should be used as is, t means that
771 | the entry should be skipped, and any other non-nil value means that the
772 | key should be used with a default. Note that if BADP returns a non-nil
773 | value, then F is *not* called for that entry.
774 |
775 | The returned function accepts a key and a value (the value is optional
776 | with DOMAIN :key) and returns three values: the key, the value, and the
777 | bad-value ternary for that entry. This has a signature appropriate for
778 | passing as the :initial-contents or :init-data arguments to `make-hash'
779 | when the format expects/allows a function in those slots."
780 | (declare (type (member :key :value :entry) domain)
781 | (type function f badp))
782 | (ecase domain
783 | (:key
784 | (lambda (key &optional val)
785 | (declare (ignorable val))
786 | (let ((bad? (funcall badp key)))
787 | (values key (if bad? nil (funcall f key)) bad?))))
788 | (:value
789 | (lambda (key val)
790 | (let ((bad? (funcall badp val)))
791 | (values key (if bad? nil (funcall f val)) bad?))))
792 | (:entry
793 | (lambda (key val)
794 | (let ((bad? (funcall badp key val)))
795 | (values key (if bad? nil (funcall f key val)) bad?))))))
796 |
797 |
798 | ;; Hash factory constructors
799 | ;;
800 | ;; It can be useful to have a shortcut for hash-table creation when
801 | ;; making many literal hashes of small-to-moderate size with one or a
802 | ;; few initialization policies. The routines here provide such a
803 | ;; mechanism. Both `define-hash-factory' and `make-hash-factory' return
804 | ;; functions, named and anonymous respectively, that package their
805 | ;; arguments as the initial contents in a call to `make-hash'. The
806 | ;; options passed to `make=hash' can be specified when the factory
807 | ;; is created but are otherwise taken from the dynamic variable
808 | ;; `*hash-factory-defaults*'. For example, we can do the following:
809 | ;;
810 | ;; (define-hash-factory hash :test #'equal :init-format :flat)
811 | ;; (define-hash-factory hash-pairs :test #'eql :init-format :pairs)
812 | ;;
813 | ;; (do-something-with-hash
814 | ;; (hash "A" 1 "B" 2 :C 3))
815 | ;;
816 | ;; (do-something-with-hash
817 | ;; (hash-pairs '(a . "aardvaark") '(b . "Borges") '(c . "cerulean")))
818 | ;;
819 | ;; (let ((subhash-big
820 | ;; (make-hash-factory :init-format :keys :init-data some-big-hash)))
821 | ;; (in-some-loop-with-some-variables
822 | ;; (let ((X (funcall subhash-big key-u key-v key-w key-x key-y key-z)))
823 | ;; (do-stuff-with-hash X))))
824 | ;;
825 | ;; For simple things and often at the REPL, this can be clearer and
826 | ;; more convenient, though the generality of `make-hash' may be needed
827 | ;; in most cases.
828 |
829 | (defvar *hash-factory-defaults*
830 | (list
831 | :test #'eql
832 | :init-format :flat
833 | :init-data nil
834 | :init-default nil
835 | :size 127
836 | :rehash-size 1.5
837 | :rehash-threshold 1)
838 | "Hash table creation options used as defaults by hash factory constructors.
839 | These option specifications are passed last to make-hash by the hash
840 | factories and so are overridden by options passed as explicit arguments
841 | to the factory constructor.
842 |
843 | Changing this variable affects the options used by every hash factory
844 | that does not fully specify its options. This includes default calls to
845 | the reader constructors. Of particular note are the :test
846 | and :init-format options.")
847 |
848 | (defmacro define-hash-factory (name &rest hash-options &key &allow-other-keys)
849 | "Create a hash-table factory NAME that calls `make-hash' with HASH-OPTIONS.
850 | The resulting function packages its arguments as a list, which it passes
851 | as the :initial-contents argument to `make-hash'. The HASH-OPTIONS are
852 | alternating keywords and values that are passed as additional keyword
853 | arguments to `make-hash', followed by -- and thus overriding -- the
854 | options in `*hash-factory-defaults*'. This is intended to allow one to
855 | use short names or customized policies in simple calling patterns.
856 | Complex initialization patterns may need the full power of `make-hash'
857 | itself."
858 | (declare (type symbol name))
859 | (let ((hash-contents (gensym "contents")))
860 | `(defun ,name (&rest ,hash-contents)
861 | (apply #'make-hash
862 | :initial-contents ,hash-contents
863 | ,@hash-options *hash-factory-defaults*))))
864 |
865 | (defun make-hash-factory (&rest hash-options &key &allow-other-keys)
866 | "Create anonymous hash-table factory that calls `make-hash' with HASH-OPTIONS.
867 | The resulting function packages its arguments as a list, which it passes
868 | as the :initial-contents argument to `make-hash'. The HASH-OPTIONS are
869 | alternating keywords and values that are passed as additional keyword
870 | arguments to `make-hash', followed by -- and thus overriding -- the
871 | options in `*hash-factory-defaults*'. This is intended to allow one to
872 | use short names or customized policies in simple calling patterns.
873 | Complex initialization patterns may need the full power of `make-hash'
874 | itself."
875 | (lambda (&rest hash-contents)
876 | (apply #'make-hash
877 | :initial-contents hash-contents
878 | (append hash-options *hash-factory-defaults*))))
879 |
880 |
881 | ;; Reader Support
882 | ;;
883 | ;; As with the hash factory commentary above, it can often be pleasant
884 | ;; and useful to have a reader-macro for representing hash-tables factories,
885 | ;; analogous to those for lists and vectors. The functions below,
886 | ;; `install-hash-reader' and `install-hash-dispatch-reader', provide two
887 | ;; portable ways of setting those macros in a given readtable, named
888 | ;; or otherwise. The former defines terminating macro characters to
889 | ;; act as delimiters for the hash factory, such as { ... }. The latter
890 | ;; is analogous except that it requires a dispatch character first,
891 | ;; # by default, such as #{ ... }. Typically, only one would be used for
892 | ;; any particular readtable, but it is flexible.
893 |
894 | (defun read-close-delimiter (stream char)
895 | (error 'unmatched-closing-delimiter
896 | :text "Unmatched closing ~A on stream ~A"
897 | :data (list (substitute #\Space #\_ (string-downcase (char-name char)))
898 | stream)))
899 |
900 | ;; ATTN change install-hash-reader to install-hash-delimited-reader
901 | ;; and then make install-hash-reader a macro that expands to an
902 | ;; eval-when. Add a :hash-factory argument (nil by default)
903 | ;; for constructing the main hash, add a :use-dispatch argument
904 | ;; (t by default) that determines whether delimited or dispatch version
905 | ;; is used. Change :custom-test to :alternative-test
906 | ;; Add a dispatch arg table argument :dispatch-arg-table that
907 | ;; takes the number after the dispatch
908 | ;; character and gets the factory from that or uses alternative-test
909 | ;; if the number is too high, or default if no number. Set up a special
910 | ;; variable with the default case
911 | ;; ATTN: Could put the dispatch arg table as the argument to use-dispatch
912 | ;; it should be a vector and it will be used for getting the factory
913 | ;; from numeric arguments. (Is this worth supporting??)
914 | ;; This gets rid of the dispatch-arg-table keyword which is annoying.
915 | ;; Consider getting rid of alternate-test then because that can be
916 | ;; included in the table
917 |
918 | (defvar *numbered-dispatch-defaults*
919 | (vector #'eq #'eql #'equal #'equalp)
920 | "Hash table tests used by dispatch reader macros with numeric arguments.
921 | A numeric argument of n to the reader macro uses the test at index n,
922 | but an index longer than the length of this sequence raises an error.")
923 |
924 | (defmacro install-hash-reader (options
925 | &key (readtable '*readtable*)
926 | (use-dispatch t)
927 | (allow-numbered-dispatch nil)
928 | (open-char #\{) (close-char #\})
929 | (dispatch-char #\#))
930 | "Creates a hash table factory specified by OPTIONS and installs it in
931 | READTABLE (the current readtable by default). To have effect, this
932 | must be called at toplevel.
933 |
934 | OPTIONS is either a list of keyword-value pairs (as would be passed to
935 | `make-hash' or `make-hash-factory') or a hash factory function.
936 | READTABLE is a readtable object, `*readtable*' by default.
937 |
938 | The keyword arguments control how the reader is modified as follows:
939 |
940 | + USE-DISPATCH (t by default) determines whether the reader macro uses a
941 | dispatch character DISPATCH-CHAR before OPEN-CHAR. If non-nil, a
942 | dispatch character is used and is registered in READTABLE. If this is
943 | nil, then OPEN-CHAR and CLOSE-CHAR will be a raw delimited construct.
944 |
945 | + ALLOW-NUMBERED-DISPATCH (nil by default) allows a dispatched reader
946 | macro to modify its hash test when given numeric arguments between
947 | DISPATCH-CHAR and OPEN-CHAR. This only applies when USE-DISPATCH is
948 | non-nil and when OPTIONS is a list, not a factory function. The goal
949 | here is to make it easy to reuse reader factories in several contexts.
950 |
951 | If nil, numbered dispatch is not supported. If t, numeric arguments
952 | 0, 1, 2, and 3 correspond to hash tests `eq', `eql', `equal', and
953 | `equalp' respectively. If a sequence of symbols or functions,
954 | those functions are used for the hash test given a numeric
955 | argument from 0 below the length of the sequence. In either case,
956 | dispatch without a numeric argument uses the originally specified
957 | options.
958 |
959 | Note: This is an experimental feature and may be discontinued in
960 | future versions if it proves more confusing than helpful.
961 |
962 | + OPEN-CHAR (default open-brace) is the character that delimits the
963 | beginning of the hash-table contents. If USE-DISPATCH is non-nil,
964 | this character must be preceeded by DISPATCH-CHAR, and optionally
965 | a numeric argument.
966 |
967 | + CLOSE-CHAR (default close-brace) is the character that delimits
968 | the end of the hash-table contents.
969 |
970 | + DISPATCH-CHAR (default #) is the character used to indicate a
971 | dispatched reader macro. When (and only when) USE-DISPATCH is non-nil.
972 | READTABLE is modified to register this as as a dispatch and a
973 | non-terminating macro character via `make-dispatch-macro=character'.
974 | Note that there can be more than one dispatch character in a read
975 | table."
976 | (flet ((do-dispatch (numtests)
977 | `(install-hash-dispatch-reader ,options ,readtable ,dispatch-char
978 | ,open-char ,close-char ,numtests))
979 | (do-delimited ()
980 | `(install-hash-delimited-reader ,options ,readtable
981 | ,open-char ,close-char)))
982 | `(eval-when (:compile-toplevel :load-toplevel :execute)
983 | ,(cond
984 | ((eq use-dispatch t)
985 | (do-dispatch (if (eq allow-numbered-dispatch t)
986 | '*numbered-dispatch-defaults*
987 | allow-numbered-dispatch)))
988 | ((null use-dispatch)
989 | (do-delimited))
990 | (t ; general case, but uncommon; why clutter the output unnecessarily
991 | (let ((dispatch (gensym "use-dispatch"))
992 | (numargs (gensym "allow-numargs"))
993 | (argvec (gensym "argvec")))
994 | `(let* ((,dispatch ,use-dispatch)
995 | (,numargs (and ,dispatch ,allow-numbered-dispatch))
996 | (,argvec (if (eq ,numargs t)
997 | *numbered-dispatch-defaults* ,numargs)))
998 | (if ,dispatch
999 | ,(do-dispatch argvec)
1000 | ,(do-delimited)))))))))
1001 |
1002 | (defun install-hash-delimited-reader (options readtable open-char close-char)
1003 | (declare (type character open-char close-char))
1004 | (let ((hash-factory (apply #'make-hash-factory options)))
1005 | (flet ((hash-reader (stream char)
1006 | (declare (ignore char))
1007 | (apply hash-factory (read-delimited-list close-char stream t))))
1008 | (set-macro-character open-char #'hash-reader nil readtable)
1009 | (set-macro-character close-char #'read-close-delimiter nil readtable))))
1010 |
1011 | (defun install-hash-dispatch-reader (options readtable dispatch-char
1012 | open-char close-char numeric-arg-table)
1013 | (let ((factory (apply #'make-hash-factory options))
1014 | (alt-factories (map 'vector
1015 | #'(lambda (test)
1016 | (apply #'make-hash-factory :test test options))
1017 | numeric-arg-table)))
1018 | (flet ((hash-dispatch-reader (stream char num)
1019 | (declare (ignore char))
1020 | (let ((create
1021 | (cond
1022 | ((or (null num) (not (numberp num)))
1023 | factory)
1024 | ((< num (length alt-factories))
1025 | (aref alt-factories num))
1026 | (t
1027 | (error 'numeric-dispatch-bounds-error
1028 | :text "Numeric argument to dispatch reader-macro out of bounds."
1029 | :data `(>= ,num ,(length alt-factories)))))))
1030 | (apply create (read-delimited-list close-char stream t)))))
1031 | (handler-case ; sbcl raises an error if dispatch-char already a dispatch char
1032 | (make-dispatch-macro-character dispatch-char t readtable)
1033 | (error () nil))
1034 | (set-dispatch-macro-character dispatch-char open-char
1035 | #'hash-dispatch-reader readtable)
1036 | (set-macro-character close-char #'read-close-delimiter nil readtable))))
1037 |
1038 |
1039 | ;;;; make-hash.lisp ends here
1040 |
--------------------------------------------------------------------------------
/package.lisp:
--------------------------------------------------------------------------------
1 | ;;;; package.lisp
2 |
3 | (defpackage #:make-hash
4 | (:use #:cl)
5 | (:export #:make-hash
6 | #:make-hash-transformer
7 | #:initialize-hash
8 | #:hash-initializer-default-format
9 | #:*hash-factory-defaults*
10 | #:define-hash-factory
11 | #:make-hash-factory
12 | #:install-hash-reader
13 | #:make-hash-error))
14 |
15 |
16 |
--------------------------------------------------------------------------------