├── .gitignore ├── LICENSE ├── README.mediawiki ├── examples └── examples.lisp ├── group-by.asd ├── group-by.lisp └── tests └── group-by.lisp /.gitignore: -------------------------------------------------------------------------------- 1 | .svn 2 | build 3 | dist 4 | *~ 5 | *# 6 | *.fasl 7 | .git 8 | _darcs 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ;; Copyright (c) 2011 Russ Tyndall , Acceleration.net http://www.acceleration.net 2 | 3 | ;; Permission is hereby granted, free of charge, to any person obtaining 4 | ;; a copy of this software and associated documentation files (the 5 | ;; "Software"), to deal in the Software without restriction, including 6 | ;; without limitation the rights to use, copy, modify, merge, publish, 7 | ;; distribute, sublicense, and/or sell copies of the Software, and to 8 | ;; permit persons to whom the Software is furnished to do so, subject to 9 | ;; the following conditions: 10 | 11 | ;; The above copyright notice and this permission notice shall be 12 | ;; included in all copies or substantial portions of the Software. 13 | 14 | ;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | ;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | ;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | ;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | ;; LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | ;; OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | ;; WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.mediawiki: -------------------------------------------------------------------------------- 1 | = group-by = 2 | A Common Lisp library to help group data into trees (of various 3 | formats) based on common/shared values 4 | 5 | == API == 6 | 7 | === group-by === 8 | 9 | groups the list into an alist using the key function and value function to group by key, 10 | with a list of all values for that key. 11 | 12 | ''key'': is used to determine the key in the a-list
13 | ''value'': is used to determine the value in the a-list
14 | ''key-fn'': is passed as the :key to assoc (essentially the key of your key)
15 | ''test'': is passed as the :test to assoc
16 | 17 | eg: (group-by '((a 1 2) (a 3 4) (b 5 6)))
18 | => ((A (1 2) (3 4)) (B (5 6)))
19 | 20 | eg: (group-by '((a 1 2) (a 3 4) (b 5 6)) :value #'identity)
21 | => ((A (A 1 2) (A 3 4)) (B (B 5 6)))
22 | 23 | === make-grouped-list, grouped-list === 24 | 25 | Given a list of input, produce a grouped-list CLOS object that contains 26 | the original list, configuration about the groupings and the result tree 27 | of grouped-list objects 28 | 29 | ''keys'': a list of keys to group by
30 | ''tests'': a list of tests to compare the keys with
31 | 32 | ''grouping-implmentation'': What data structure should be used to perform the grouping
33 | '':list, :hash-table''
34 | The implementation doesnt change the output, but it does change 35 | the performance characteristics of the grouped-object (see: 36 | grouped-list-speed-tester for help deciding which to use) 37 | 38 | ---- 39 | 40 |
 41 | For the following docs consider the grouped list (as from examples)
 42 | 
 43 | grouped-list
 44 |   russ
 45 |     PROJ-A
 46 |       (list of timeclock records)
 47 |     PROJ-B
 48 |       (list of timeclock records)
 49 |     PROJ-C
 50 |       (list of timeclock records)
 51 |   bob
 52 |     PROJ-A
 53 |       (list of timeclock records)
 54 |     PROJ-B
 55 |       (list of timeclock records)
 56 |     PROJ-C
 57 |       (list of timeclock records)
 58 | 
 59 | 
60 | 61 | ==== key-value ==== 62 | 63 | Returns the key-value that this grouped-list represents 64 | 65 | nil for the root
66 | the key we are grouping under otherwise
67 | 68 | EG: the <gl russ> node returns the string "russ" 69 | 70 | ==== child-groupings ==== 71 | 72 | Returns the direct child grouped-lists of the current grouped-list 73 | 74 | If called on the root grouped list, will return a list of (<gl russ> <gl bob>)
75 | If called on the "russ" grouped list, will return a list of (<gl PROJ-A> <gl PROJ-B> <gl PROJ-C>)
76 | 77 | ==== items-in-group ==== 78 | 79 | Returns all the items in a grouped list. 80 | 81 | ''key-values'':If specified, return only items that match 82 | 83 | When called on the root (items-in-group gl) of a grouped-list returns all 84 | of the items that list groups (leaf nodes of the tree). 85 | 86 | When called as (items-in-group gl "russ" :proj-a) returns the items under 87 | the key proj-A that are found under toplevel key "russ" 88 | 89 | === group-by-repeated === 90 | Sames as group-by, but groups on multiple keys and tests (into an alist tree) 91 | 92 | == Examples == 93 | 94 | * The [https://github.com/bobbysmith007/group-by/blob/master/examples/examples.lisp#L1 examples file] contains some examples increasing in complexity 95 | * [https://github.com/bobbysmith007/group-by/blob/master/tests/group-by.lisp#L83 The tests file also contains running examples] 96 | 97 | == Authors == 98 | * [http://www.acceleration.net/ Acceleration.net] [http://www.acceleration.net/programming/donate-to-acceleration-net/ Donate] 99 | ** [http://russ.unwashedmeme.com/blog Russ Tyndall] 100 | ** [http://the.unwashedmeme.com/blog Nathan Bird] 101 | ** [http://ryepup.unwashedmeme.com/blog Ryan Davis] 102 | 103 |
104 | ;; Copyright (c) 2011 Russ Tyndall , Acceleration.net http://www.acceleration.net
105 | ;; All rights reserved.
106 | ;;
107 | ;; Redistribution and use in source and binary forms, with or without
108 | ;; modification, are permitted provided that the following conditions are
109 | ;; met:
110 | ;;
111 | ;;  - Redistributions of source code must retain the above copyright
112 | ;;    notice, this list of conditions and the following disclaimer.
113 | ;;
114 | ;;  - Redistributions in binary form must reproduce the above copyright
115 | ;;    notice, this list of conditions and the following disclaimer in the
116 | ;;    documentation and/or other materials provided with the distribution.
117 | ;;
118 | ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
119 | ;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
120 | ;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
121 | ;; A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
122 | ;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
123 | ;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
124 | ;; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
125 | ;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
126 | ;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
127 | ;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
128 | ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
129 | 
-------------------------------------------------------------------------------- /examples/examples.lisp: -------------------------------------------------------------------------------- 1 | (in-package :group-by) 2 | 3 | (defparameter +example-timeclock-data+ 4 | `(("russ" 1 :proj-a) 5 | ("russ" 2 :proj-a) 6 | ("bob" 1 :proj-a) 7 | ("russ" 2 :proj-b) 8 | ("bob" 1 :proj-b) 9 | ("bob" 1 :proj-b) 10 | ("russ" 2 :proj-b) 11 | ("bob" 1 :proj-c) 12 | ("russ" 4 :proj-c))) 13 | 14 | (group-by +example-timeclock-data+) 15 | ;; results in 16 | '(("russ" 17 | (1 :proj-a) (2 :proj-a) (2 "time on proj b") 18 | (2 "time on proj b") (4 :proj-c)) 19 | ("bob" 20 | (1 :proj-a) (1 "time on proj b") (1 "time on proj b") 21 | (1 :proj-c))) 22 | 23 | (defparameter +example-multiple-grouped-timeclock-data+ 24 | (group-by-repeated 25 | +example-timeclock-data+ 26 | :keys (list #'first #'third) 27 | :tests (list #'string-equal #'eql))) 28 | ;; results in 29 | '(("bob" 30 | (:proj-c ("bob" 1 :proj-c)) 31 | (:proj-b ("bob" 1 :proj-b) ("bob" 1 :proj-b)) 32 | (:proj-a ("bob" 1 :proj-a))) 33 | ("russ" 34 | (:proj-c ("russ" 4 :proj-c)) 35 | (:proj-b ("russ" 2 :proj-b) ("russ" 2 :proj-b)) 36 | (:proj-a ("russ" 2 :proj-a) ("russ" 1 :proj-a)))) 37 | 38 | (defparameter +example-grouped-list-timeclock-data-alist+ 39 | (make-grouped-list 40 | +example-timeclock-data+ 41 | :keys (list #'first #'third) 42 | :tests (list #'string-equal #'eql))) 43 | 44 | (defclass example-timeclock-record () 45 | ((name :accessor name :initarg :name :initform nil) 46 | (hours :accessor hours :initarg :hours :initform 0) 47 | (proj :accessor proj :initarg :proj :initform nil))) 48 | 49 | (defun example-tcr (name hours proj) 50 | (make-instance 'example-timeclock-record :name name :hours hours :proj proj)) 51 | 52 | (defparameter +example-timeclock-objs+ 53 | (iter top (for i from 0 to 10) 54 | (iter (for rec in +example-timeclock-data+) 55 | (in top (collect (apply #'example-tcr rec)))))) 56 | 57 | (defun timeclock-report-rec-print (gl &optional (spaces "")) 58 | (let ((further-groups (child-groupings gl))) 59 | (if further-groups 60 | (iter 61 | (with hours = 0) 62 | (for group in further-groups) 63 | (format T "~?~A~%" spaces () (key-value group) ) 64 | (incf hours (timeclock-report-rec-print 65 | group (concatenate 'string spaces "~2,1@T"))) 66 | (finally 67 | (format T "~?Total: ~D~%" spaces () hours ) 68 | (return hours))) 69 | (iter (for kid in (items-in-group gl)) 70 | (sum (hours kid) into hours) 71 | (finally 72 | (format T "~?Total: ~D~%" spaces () hours ) 73 | (return hours)))))) 74 | 75 | (defun print-timeclock-report () 76 | (let ((by-person-project 77 | (make-grouped-list 78 | +example-timeclock-objs+ 79 | :keys (list #'name #'proj) 80 | :tests (list #'equalp #'eql) 81 | :grouping-implementation :hash-table)) 82 | (by-project-person 83 | (make-grouped-list 84 | +example-timeclock-objs+ 85 | :keys (list #'proj #'name) 86 | :tests (list #'eql #'equalp) 87 | :grouping-implementation :list))) 88 | 89 | (format T "Hours BY Project > Person~%-----------~%") 90 | (timeclock-report-rec-print by-project-person) 91 | (format T "~%~%Hours BY Person > Project~%-----------~%") 92 | (timeclock-report-rec-print by-person-project) 93 | )) 94 | 95 | #| 96 | Hours BY Project > Person 97 | ----------- 98 | PROJ-C 99 | russ 100 | Total: 44 101 | bob 102 | Total: 11 103 | Total: 55 104 | PROJ-B 105 | bob 106 | Total: 22 107 | russ 108 | Total: 44 109 | Total: 66 110 | PROJ-A 111 | bob 112 | Total: 11 113 | russ 114 | Total: 33 115 | Total: 44 116 | Total: 165 117 | 118 | 119 | Hours BY Person > Project 120 | ----------- 121 | russ 122 | PROJ-A 123 | Total: 33 124 | PROJ-B 125 | Total: 44 126 | PROJ-C 127 | Total: 44 128 | Total: 121 129 | bob 130 | PROJ-A 131 | Total: 11 132 | PROJ-B 133 | Total: 22 134 | PROJ-C 135 | Total: 11 136 | Total: 44 137 | Total: 165 138 | |# 139 | 140 | 141 | (defparameter +example-speedtest-timeclock-objs+ 142 | (iter top (for i from 0 to 100) 143 | (iter (for rec in +example-timeclock-data+) 144 | (in top (collect (apply #'example-tcr rec)))))) 145 | 146 | (defun speed-test-example () 147 | (format *trace-output* "~%build-gl-speed-test~%") 148 | (grouped-list-speed-tester 149 | :iterations 100 150 | :list +example-speedtest-timeclock-objs+ 151 | :keys (list #'name #'proj) 152 | :tests (list #'string-equal #'eql)) 153 | 154 | (format *trace-output* "~%~%build-gl-speed-test with-item-access 155 | This shows how implementations differ based on workload~%") 156 | (grouped-list-speed-tester 157 | :iterations 10 158 | :list +example-speedtest-timeclock-objs+ 159 | :keys (list #'name #'proj) 160 | :tests (list #'string-equal #'eql) 161 | :actions (lambda (gl) 162 | (iter (for c from 0 to 1000 ) 163 | (iter 164 | (for i in '("russ" "bob")) 165 | (items-in-group gl i) 166 | (iter (for j in `(:proj-a :proj-b :proj-c)) 167 | (items-in-group gl i j))))))) 168 | 169 | #| 170 | build-gl-speed-test 171 | Grouping Implentation Speed Tests 172 | 173 | HASH-TABLE Implementation 174 | Evaluation took: 175 | 0.181 seconds of real time 176 | 0.160000 seconds of total run time (0.120000 user, 0.040000 system) 177 | [ Run times consist of 0.080 seconds GC time, and 0.080 seconds non-GC time. ] 178 | 88.40% CPU 179 | 92 lambdas converted 180 | 451,161,165 processor cycles 181 | 4,939,600 bytes consed 182 | 183 | 184 | 185 | LIST Implementation 186 | Evaluation took: 187 | 0.107 seconds of real time 188 | 0.100000 seconds of total run time (0.080000 user, 0.020000 system) 189 | 93.46% CPU 190 | 46 lambdas converted 191 | 265,815,870 processor cycles 192 | 2,749,600 bytes consed 193 | 194 | 195 | 196 | build-gl-speed-test with-item-access 197 | This shows how implementations differ based on workload 198 | Grouping Implentation Speed Tests 199 | 200 | HASH-TABLE Implementation 201 | Evaluation took: 202 | 0.873 seconds of real time 203 | 0.860000 seconds of total run time (0.760000 user, 0.100000 system) 204 | [ Run times consist of 0.330 seconds GC time, and 0.530 seconds non-GC time. ] 205 | 98.51% CPU 206 | 2,175,013,267 processor cycles 207 | 294,658,896 bytes consed 208 | 209 | 210 | 211 | LIST Implementation 212 | Evaluation took: 213 | 1.031 seconds of real time 214 | 0.990000 seconds of total run time (0.900000 user, 0.090000 system) 215 | [ Run times consist of 0.460 seconds GC time, and 0.530 seconds non-GC time. ] 216 | 96.02% CPU 217 | 2,572,489,710 processor cycles 218 | 293,604,720 bytes consed 219 | |# 220 | 221 | (defparameter +example-names+ #("russ" "alice" "bob" "charlie")) 222 | (defparameter +example-projects+ #(:proj-a :proj-b :proj-c :proj-d)) 223 | 224 | (defun incremental-grouping-examples () 225 | "An example of building a grouped list up from individual items 226 | rather than starting with a full list then grouping it" 227 | (iter (for type in `(:hash-table :tree :alist)) 228 | (iter 229 | (with gl = (make-grouped-list 230 | nil 231 | :keys (list #'name #'proj) 232 | :tests (list #'string-equal #'eql) 233 | :grouping-implementation type)) 234 | (for i from 0 to 1000) 235 | (for tcr = 236 | (example-tcr 237 | (alexandria:random-elt +example-names+) 238 | (random 10) 239 | (alexandria:random-elt +example-projects+))) 240 | (add-item-to-grouping tcr gl) 241 | (finally (timeclock-report-rec-print gl) 242 | (format T "----------------------~%"))))) -------------------------------------------------------------------------------- /group-by.asd: -------------------------------------------------------------------------------- 1 | ;; -*- lisp -*- 2 | 3 | (eval-when (:compile-toplevel :load-toplevel :execute) 4 | (unless (find-package :group-by.system) 5 | (defpackage :group-by.system 6 | (:use :common-lisp :asdf)))) 7 | 8 | (in-package group-by.system) 9 | 10 | (defsystem :group-by 11 | :description "A Common Lisp library to help group data into trees (of various 12 | formats) based on common/shared values" 13 | :licence "BSD" 14 | :version "0.1" 15 | :components ((:file "group-by")) 16 | :depends-on (:iterate :alexandria)) 17 | 18 | (defsystem :group-by-test 19 | :description "A Common Lisp library to help group data into trees (of various 20 | formats) based on common/shared values" 21 | :licence "BSD" 22 | :version "0.1" 23 | :components ((:module :tests 24 | :serial t 25 | :components ((:file "group-by")))) 26 | :depends-on (:group-by :lisp-unit2)) 27 | 28 | (defmethod asdf:perform ((o asdf:test-op) (c (eql (find-system :group-by)))) 29 | (asdf:oos 'asdf:load-op :group-by-test) 30 | (let ((*package* (find-package :group-by-test))) 31 | (eval (read-from-string " 32 | (run-tests :package :group-by-test 33 | :name :group-by 34 | :run-contexts #'with-summary-context)")))) 35 | 36 | ;; Copyright (c) 2011 Russ Tyndall , Acceleration.net http://www.acceleration.net 37 | 38 | ;; Permission is hereby granted, free of charge, to any person obtaining 39 | ;; a copy of this software and associated documentation files (the 40 | ;; "Software"), to deal in the Software without restriction, including 41 | ;; without limitation the rights to use, copy, modify, merge, publish, 42 | ;; distribute, sublicense, and/or sell copies of the Software, and to 43 | ;; permit persons to whom the Software is furnished to do so, subject to 44 | ;; the following conditions: 45 | 46 | ;; The above copyright notice and this permission notice shall be 47 | ;; included in all copies or substantial portions of the Software. 48 | 49 | ;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 50 | ;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 51 | ;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 52 | ;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 53 | ;; LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 54 | ;; OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 55 | ;; WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /group-by.lisp: -------------------------------------------------------------------------------- 1 | (defpackage :group-by 2 | (:use :cl :cl-user :iterate) 3 | (:export 4 | :group-by 5 | :categorize-item 6 | :grouped-list 7 | :make-grouped-list 8 | :add-item-to-grouping 9 | :key-value 10 | :child-groupings 11 | :items-in-group 12 | :parent-grouping 13 | :keys :tests 14 | :make-child-grouped-list 15 | :group-by-repeated 16 | :grouped-list-speed-tester)) 17 | 18 | (in-package :group-by) 19 | 20 | (defun group-by (list &key (key #'car) (value #'cdr) (key-fn #'identity) (test #'equal)) 21 | "groups the list into an alist using the key function and value function to group by key, 22 | with a list of all values for that key. 23 | 24 | key is used to determine the key in the a-list 25 | value is used to determin the value in the a-list 26 | key-fn is passed as the :key to assoc 27 | test is passed as the :test to assoc 28 | 29 | eg: (group-by '((a 1 2) (a 3 4) (b 5 6))) 30 | => ((A (1 2) (3 4)) (B (5 6)))" 31 | ;; we keep 2 alist - ( key . value-list-head ) & ( key . value-list-tail ) 32 | ;; so we can collect at the end (which saves us infinitesimal time and space) 33 | (iter (for i in list) 34 | (for k = (funcall key i)) 35 | (for v = (cons (funcall value i) nil)) 36 | (for cell = (assoc k tails :test test :key key-fn)) 37 | (cond 38 | (cell (setf (cddr cell) v 39 | (cdr cell) v)) 40 | (t ;; dont reuse this cons cell, we want two distinct ones 41 | (collect (cons k v) into results) 42 | (collect (cons k v) into tails))) 43 | (finally (return results)))) 44 | 45 | (defgeneric categorize-item (item root &key &allow-other-keys ) 46 | (:documentation "Insert a new item into a grouped list ")) 47 | 48 | (defmethod categorize-item (item (root list) &key keys tests &allow-other-keys) 49 | "Categorize a new item into an alist as produced by group-by-repeated 50 | This will create new category nodes if necessary" 51 | (if (null keys) 52 | (push item root) 53 | (let ((key (funcall (first keys) item))) 54 | (let ((data (assoc key root :test (or (first tests) #'equal)))) 55 | (if data 56 | ;; Add the rest of the categorization to the 57 | ;; data of this item 58 | (setf (cdr data) (categorize-item 59 | item (cdr data) 60 | :keys (rest keys) 61 | :tests (rest tests))) 62 | ;; we have no data for this node, build a new subtree 63 | (push (cons key (categorize-item 64 | item nil 65 | :keys (rest keys) 66 | :tests (rest tests))) 67 | root))))) 68 | root) 69 | 70 | (defun group-by-repeated (list &key keys tests) 71 | "Returns an alist tree that represents the items in the list as categorized 72 | by keys (compared with tests) 73 | ex: ((a 3 sam) (c 4 bob) (a 3 ted)) 74 | 75 | 76 | keys: a list of key functions that describe the categorizations in order 77 | tests: how we are testing whether or not two keys are equal, defaults to #'equal 78 | " 79 | (let (root) 80 | (iter (for item in list) 81 | (setf root (categorize-item item root :keys keys :tests tests))) 82 | root)) 83 | 84 | (defclass grouped-list () 85 | ((orig-list :accessor orig-list :initarg :orig-list :initform nil) 86 | (grouping-implementation 87 | :accessor grouping-implementation :initarg :grouping-implementation :initform :list 88 | :documentation 89 | "What data structure should be used to perform the grouping :list, :hash-table") 90 | (keys :accessor keys :initarg :keys :initform nil 91 | :documentation "A list of key functions we will use to group the list") 92 | (tests :accessor tests :initarg :tests :initform nil 93 | :documentation "A list of test functions we will use to test key equality 94 | tree: defaults to #'equal 95 | hash-table: this be a single hash-equality symbol (defaults to 'equal)") 96 | (%child-groupings :accessor %child-groupings :initarg :%child-groupings :initform nil) 97 | (%items :accessor %items :initarg :%items :initform nil) 98 | (parent-grouping :accessor parent-grouping :initarg :parent :initform nil 99 | :documentation "If this is a subgrouping of another grouped-list, what is the parent grouping we are apart of (mostly for testing)") 100 | (key-value :accessor key-value :initarg :key-value :initform nil 101 | :documentation "If this is a subgrouping of another grouped-list, what is the key this grouped-list represents in the parent grouping (mostly for testing)")) 102 | (:documentation "This class represents a list that we have grouped by multiple key values 103 | ala one of the group-by-repeatedly functions ")) 104 | 105 | (defgeneric child-groupings (grouped-list) 106 | (:method ((gl grouped-list)) 107 | (case (grouping-implementation gl) 108 | (:hash-table (iter (for (k v) in-hashtable (%child-groupings gl)) 109 | (collect v))) 110 | (T (%child-groupings gl))))) 111 | 112 | (defun make-grouped-list (inp &key tests keys (grouping-implementation :alist)) 113 | "Given a list of input, produce a grouped-list CLOS object that contains 114 | the original list, configuration about the groupings and the result tree 115 | of grouped-list objects 116 | 117 | ''keys'': a list of keys to group by
118 | ''tests'': a list of tests to compare the keys with
119 | 120 | ''grouping-implmentation'': What data structure should be used to perform the grouping
121 | '':alist, :tree , :hash-table''
122 | The implementation doesnt change the output, but it does change 123 | the performance characteristics of the grouped-object (see: 124 | grouped-list-speed-tester for help deciding which to use) 125 | " 126 | (make-instance 'grouped-list 127 | :tests tests 128 | :keys keys 129 | :grouping-implementation grouping-implementation 130 | :list inp)) 131 | 132 | (defmethod initialize-instance :after ((o grouped-list) &key list &allow-other-keys) 133 | (unless (listp (keys o)) (setf (keys o) (list (keys o)))) 134 | (unless (listp (tests o)) (setf (tests o) (list (tests o)))) 135 | (when (eql :hash-table (grouping-implementation o)) 136 | (setf (%child-groupings o) 137 | (make-hash-table :test (or (first (tests o)) 'equal)))) 138 | 139 | (when list ;; only do this if we are not a child-grouped-list 140 | (setf (orig-list o) list) 141 | (iter (for x in list) 142 | (add-item-to-grouping x o)))) 143 | 144 | (defun find-single-sub-category (gl key-value &key test) 145 | (case (grouping-implementation gl) 146 | (:hash-table (gethash key-value (%child-groupings gl))) 147 | (t (find key-value (%child-groupings gl) :key #'key-value :test test)))) 148 | 149 | (defmethod categorize-item (item (root grouped-list) &key &allow-other-keys) 150 | (iter 151 | (with node = root) 152 | (with tests = (tests root)) 153 | (with keys = (keys root)) 154 | (for keyfn in keys) 155 | (for testfn = (or (first tests) #'equal)) 156 | (setf tests (rest tests)) 157 | (for key = (funcall keyfn item)) 158 | (setf node 159 | (or (find-single-sub-category node key :test testfn) 160 | (make-child-grouped-list node key item))) 161 | (finally 162 | (push item (%items node)))) 163 | root) 164 | 165 | (defgeneric add-item-to-grouping (item grouped-list) 166 | (:method (item (gl grouped-list)) 167 | "puts a new item in the grouping of the grouped list (but not in the original list)" 168 | (categorize-item item gl))) 169 | 170 | (defgeneric %grouping-items (grouped-list) 171 | (:method ((gl grouped-list)) 172 | "Returns the items in a given group" 173 | (append (%items gl) 174 | (iter 175 | (for cgl in (child-groupings gl)) 176 | (nconcing (%grouping-items cgl)))))) 177 | 178 | (defmethod make-child-grouped-list ((gl grouped-list) key-value grouped-list) 179 | (let ((c (make-instance 180 | 'grouped-list 181 | :orig-list (orig-list gl) 182 | :keys (rest (keys gl)) 183 | :tests (rest (tests gl)) 184 | :grouping-implementation (grouping-implementation gl) 185 | :parent-grouping gl 186 | :key-value key-value))) 187 | (case (grouping-implementation gl) 188 | (:hash-table 189 | (setf (gethash key-value (%child-groupings gl)) c)) 190 | (t (push c (%child-groupings gl)))) 191 | c)) 192 | 193 | (defgeneric items-in-group (grouped-list &rest keys) 194 | (:documentation 195 | " a list of key values that will produce a list of all the items in a given group") 196 | (:method ((gl grouped-list) &rest key-values) 197 | (let ((subgroup gl) 198 | (tests (tests gl))) 199 | (iter 200 | (for key in key-values) 201 | (for test = (or (first tests) #'equal)) 202 | (setf tests (rest tests)) 203 | (setf subgroup (find-single-sub-category subgroup key :test test))) 204 | 205 | ;; Get all the items for that subgrouping (for alists this is a list we just produced) 206 | ;; and that list will simply pass through 207 | (%grouping-items subgroup)))) 208 | 209 | (defun grouped-list-speed-tester (&key list keys tests hash-tests (iterations 10) actions) 210 | "A function to help assess which implementation will work best in your given scenario 211 | actions : (lambda (gl) ...) -to help test whatever grouped list 212 | operations you will need to do repeatedly 213 | 214 | " 215 | (format *trace-output* "Grouping Implentation Speed Tests" ) 216 | (format *trace-output* "~%~%HASH-TABLE Implementation~%" ) 217 | (time 218 | (iter (for i from 1 to iterations) 219 | (let ((gl (make-instance 220 | 'grouped-list 221 | :list list :keys keys :tests hash-tests 222 | :grouping-implementation :hash-table))) 223 | (when actions (funcall actions gl))))) 224 | (format *trace-output* "~%~%LIST Implementation~%" ) 225 | (time 226 | (iter (for i from 1 to iterations) 227 | (let ((gl (make-instance 'grouped-list :list list :keys keys :tests tests 228 | :grouping-implementation :list))) 229 | (when actions (funcall actions gl))) 230 | ))) 231 | -------------------------------------------------------------------------------- /tests/group-by.lisp: -------------------------------------------------------------------------------- 1 | (defpackage :group-by-test 2 | (:use :cl :cl-user :group-by :lisp-unit2 :iter)) 3 | 4 | (in-package :group-by-test) 5 | 6 | (defvar +lower-case-ascii-alphabet+ 7 | "abcdefghijklmnopqrstuvwxyz" 8 | "All the lower case letters in 7 bit ASCII.") 9 | 10 | (defun random-string (&optional (length 32) (alphabet +lower-case-ascii-alphabet+)) 11 | "Returns a random alphabetic string. 12 | 13 | The returned string will contain LENGTH characters chosen from 14 | the vector ALPHABET. 15 | " 16 | (iter (with id = (make-string length)) 17 | (with alphabet-length = (length alphabet)) 18 | (for i below length) 19 | (setf (cl:aref id i) 20 | (cl:aref alphabet (random alphabet-length))) 21 | (finally (return id)))) 22 | 23 | (defun info (message &rest arguments) 24 | (format *standard-output* message arguments)) 25 | 26 | (defun test-data (&optional (num-rows 25) (depth 5)) 27 | " This function returns (:list data :keys keys :tests tests) 28 | for application to make-instance in make-test-data-instance 29 | 30 | the data returned is a list of arrays with keys representing 31 | each successive datum in the array. These datums are alternating 32 | random strings and numbers with bounds such there there should 33 | always be some overlap 34 | " 35 | (list 36 | :list 37 | (let ((alphabet (if (< num-rows (length +lower-case-ascii-alphabet+)) 38 | (subseq +lower-case-ascii-alphabet+ 39 | 0 (floor (+ 1 (/ num-rows 3)))) 40 | +lower-case-ascii-alphabet+))) 41 | (iter (for i from 1 to num-rows) 42 | (collect (apply #'vector 43 | (iter (for i from 1 to depth) 44 | (collect 45 | (case (mod i 2) 46 | (0 (random-string 47 | (truncate (+ 1 (/ num-rows (length alphabet) 3))) 48 | alphabet)) 49 | (1 (random (max (/ num-rows 10) 3)))))))))) 50 | :keys (iter (for i from 0 below depth) 51 | (collect (alexandria:rcurry #'elt i))) 52 | :tests (iter (for i from 1 to depth) 53 | (collect 54 | (case (mod i 2) 55 | (0 #'equalp) 56 | (1 #'eql)))))) 57 | 58 | (defmethod print-object ((o grouped-list) s) 59 | ;; This is way slow so dont have this in live code and you might wish to undefine it 60 | ;; for more accurate speed tests 61 | (print-unreadable-object (o s :type t :identity t) 62 | (format s "p-key:~a " (key-value o)) 63 | (format s "num-kids:~a " (length (child-groupings o))) 64 | (format s "num-data:~a " (length (items-in-group o))))) 65 | 66 | (defun make-test-data-instance (test-data &rest other-keywords) 67 | (let* ((args (append (list 'grouped-list) 68 | test-data 69 | other-keywords)) 70 | (o (apply #'make-instance args))) 71 | o 72 | )) 73 | 74 | (defparameter +test-timeclock-data+ 75 | `(("Russ" 1 "time on proj A") 76 | ("Russ" 2 "time on proj A") 77 | ("Bob" 1 "time on proj A") 78 | ("Russ" 2 "time on proj B") 79 | ("Bob" 1 "time on proj B") 80 | ("Bob" 1 "time on proj B") 81 | ("Russ" 2 "time on proj B") 82 | ("Bob" 1 "time on proj C") 83 | ("Russ" 4 "time on proj C"))) 84 | 85 | (define-test basic-group-by () 86 | (let ((res (group-by +test-timeclock-data+))) 87 | (assert-equal 2 (length res) "Grouped by the two employees") 88 | (assert-equal 89 | "Russ" (car (first res)) 90 | "items should have same order, Russ should be first key") 91 | (assert-equal 92 | 5 (length (cdr (assoc "Russ" res :test #'string-equal))) 93 | "russ had 5 records") 94 | (assert-equal "Bob" (car (second res))) 95 | (assert-equal 96 | 4 (length (cdr (assoc "Bob" res :test #'string-equal))) 97 | "Bob had 4 records") 98 | )) 99 | 100 | (define-test basic-group-by-2 () 101 | (let* ((res (group-by (second (test-data 25 3)) 102 | :key #'(lambda (s) (elt s 1)) 103 | :value #'identity 104 | :test #'string-equal))) 105 | (iter (for (k . data) in res) 106 | (iter (for s in data) 107 | (assert-equal k (elt s 1))) 108 | (summing (length data) into row-cnt) 109 | (finally 110 | (assert-equal 25 row-cnt))) 111 | )) 112 | 113 | (define-test basic-grouped-list () 114 | (let* ((res (make-grouped-list 115 | (second (test-data 25 3)) 116 | :keys (list #'(lambda (s) (elt s 1)))))) 117 | (iter 118 | (for gl in (child-groupings res)) 119 | (for kv = (key-value gl)) 120 | (iter (for item in (items-in-group gl)) 121 | (assert-equal kv (elt item 1)))) 122 | (assert-equal 25 (length (items-in-group res))) 123 | )) 124 | 125 | (define-test basic-grouped-list2 () 126 | (assert-true 127 | (apply #'make-instance 'grouped-list (test-data 25 3)))) 128 | 129 | (define-test run-accuracy-tests () 130 | (let ((num-rows 1000) (depth 5)) 131 | (labels ((assertions (g1 g2) 132 | ;; all grouped lists contain the same number of children 133 | (assert-equal 134 | (length (items-in-group g1)) 135 | (length (items-in-group g2))) 136 | 137 | ;; all grouped lists contain the same data 138 | (assert-true (null (set-difference 139 | (items-in-group g1) 140 | (items-in-group g2) 141 | :test #'equalp ))) 142 | 143 | ;; assert that all children should actually be in this group 144 | (when (parent-grouping g1) 145 | (let ((pk (key-value g1))) 146 | (when pk 147 | (let* ((test (first (tests (parent-grouping g1)))) 148 | (key (first (keys (parent-grouping g1))))) 149 | (iter (for item in (items-in-group g1)) 150 | (assert-true (funcall test (funcall key item) pk)))))))) 151 | 152 | ;; A function to run the tests on each sub grouping tree 153 | (recursert (g1 g2) 154 | ;;(break "recursert:~%~a~%~a~%~a" g1 g2 g3) 155 | (assertions g1 g2) 156 | (let* ((k1 (child-groupings g1)) 157 | (k2 (child-groupings g2)) 158 | (pred (when k1 159 | (if (numberp (key-value (first k1))) 160 | #'< 161 | #'string<)))) 162 | (when k1 163 | (setf k1 (sort k1 pred :key #'key-value)) 164 | (setf k2 (sort k2 pred :key #'key-value))) 165 | ;(break "~A" (list k1 k2 k3)) 166 | (iter (for kg1 in k1) 167 | (for kg2 in k2) 168 | (recursert kg1 kg2))))) 169 | (let* ((data (test-data num-rows depth)) 170 | (lgl (make-test-data-instance data :grouping-implementation :list)) 171 | (hgl (make-test-data-instance data :grouping-implementation :hash-table))) 172 | (recursert lgl hgl))))) 173 | 174 | (defun %run-creation-speed-tests (&key (num-rows 1000) (depth 5) (times 10)) 175 | (macrolet ((time-to-log (&body body) 176 | `(info 177 | (with-output-to-string (*trace-output*) 178 | (time (progn ,@body)))))) 179 | (let ((test-data (iter (for i from 1 to times) 180 | (collect (test-data num-rows depth))))) 181 | (info "Grouping Implentation Speed Tests" ) 182 | (info "~%HASH-TABLE Implementation~%" ) 183 | (time-to-log 184 | (iter (for data in test-data) 185 | (make-test-data-instance data :grouping-implementation :hash-table))) 186 | 187 | (info "~%LIST Implementation~%" ) 188 | (time-to-log 189 | (iter (for data in test-data) 190 | (make-test-data-instance data :grouping-implementation :list))) 191 | 192 | ))) 193 | 194 | (define-test run-creation-speed-tests (:tags '(speed)) 195 | (%run-creation-speed-tests)) 196 | 197 | --------------------------------------------------------------------------------