├── .gitignore
├── LICENSE
├── README.mediawiki
├── examples
└── examples.lisp
├── group-by.asd
├── group-by.lisp
└── tests
└── group-by.lisp
/.gitignore:
--------------------------------------------------------------------------------
1 | .svn
2 | build
3 | dist
4 | *~
5 | *#
6 | *.fasl
7 | .git
8 | _darcs
9 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | ;; Copyright (c) 2011 Russ Tyndall , Acceleration.net http://www.acceleration.net
2 |
3 | ;; Permission is hereby granted, free of charge, to any person obtaining
4 | ;; a copy of this software and associated documentation files (the
5 | ;; "Software"), to deal in the Software without restriction, including
6 | ;; without limitation the rights to use, copy, modify, merge, publish,
7 | ;; distribute, sublicense, and/or sell copies of the Software, and to
8 | ;; permit persons to whom the Software is furnished to do so, subject to
9 | ;; the following conditions:
10 |
11 | ;; The above copyright notice and this permission notice shall be
12 | ;; included in all copies or substantial portions of the Software.
13 |
14 | ;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | ;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | ;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | ;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | ;; LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | ;; OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | ;; WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/README.mediawiki:
--------------------------------------------------------------------------------
1 | = group-by =
2 | A Common Lisp library to help group data into trees (of various
3 | formats) based on common/shared values
4 |
5 | == API ==
6 |
7 | === group-by ===
8 |
9 | groups the list into an alist using the key function and value function to group by key,
10 | with a list of all values for that key.
11 |
12 | ''key'': is used to determine the key in the a-list
13 | ''value'': is used to determine the value in the a-list
14 | ''key-fn'': is passed as the :key to assoc (essentially the key of your key)
15 | ''test'': is passed as the :test to assoc
16 |
17 | eg: (group-by '((a 1 2) (a 3 4) (b 5 6)))
18 | => ((A (1 2) (3 4)) (B (5 6)))
19 |
20 | eg: (group-by '((a 1 2) (a 3 4) (b 5 6)) :value #'identity)
21 | => ((A (A 1 2) (A 3 4)) (B (B 5 6)))
22 |
23 | === make-grouped-list, grouped-list ===
24 |
25 | Given a list of input, produce a grouped-list CLOS object that contains
26 | the original list, configuration about the groupings and the result tree
27 | of grouped-list objects
28 |
29 | ''keys'': a list of keys to group by
30 | ''tests'': a list of tests to compare the keys with
31 |
32 | ''grouping-implmentation'': What data structure should be used to perform the grouping
33 | '':list, :hash-table''
34 | The implementation doesnt change the output, but it does change
35 | the performance characteristics of the grouped-object (see:
36 | grouped-list-speed-tester for help deciding which to use)
37 |
38 | ----
39 |
40 |
41 | For the following docs consider the grouped list (as from examples) 42 | 43 | grouped-list 44 | russ 45 | PROJ-A 46 | (list of timeclock records) 47 | PROJ-B 48 | (list of timeclock records) 49 | PROJ-C 50 | (list of timeclock records) 51 | bob 52 | PROJ-A 53 | (list of timeclock records) 54 | PROJ-B 55 | (list of timeclock records) 56 | PROJ-C 57 | (list of timeclock records) 58 | 59 |60 | 61 | ==== key-value ==== 62 | 63 | Returns the key-value that this grouped-list represents 64 | 65 | nil for the root
104 | ;; Copyright (c) 2011 Russ Tyndall , Acceleration.net http://www.acceleration.net 105 | ;; All rights reserved. 106 | ;; 107 | ;; Redistribution and use in source and binary forms, with or without 108 | ;; modification, are permitted provided that the following conditions are 109 | ;; met: 110 | ;; 111 | ;; - Redistributions of source code must retain the above copyright 112 | ;; notice, this list of conditions and the following disclaimer. 113 | ;; 114 | ;; - Redistributions in binary form must reproduce the above copyright 115 | ;; notice, this list of conditions and the following disclaimer in the 116 | ;; documentation and/or other materials provided with the distribution. 117 | ;; 118 | ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 119 | ;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 120 | ;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 121 | ;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 122 | ;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 123 | ;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 124 | ;; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 125 | ;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 126 | ;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 127 | ;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 128 | ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 129 |-------------------------------------------------------------------------------- /examples/examples.lisp: -------------------------------------------------------------------------------- 1 | (in-package :group-by) 2 | 3 | (defparameter +example-timeclock-data+ 4 | `(("russ" 1 :proj-a) 5 | ("russ" 2 :proj-a) 6 | ("bob" 1 :proj-a) 7 | ("russ" 2 :proj-b) 8 | ("bob" 1 :proj-b) 9 | ("bob" 1 :proj-b) 10 | ("russ" 2 :proj-b) 11 | ("bob" 1 :proj-c) 12 | ("russ" 4 :proj-c))) 13 | 14 | (group-by +example-timeclock-data+) 15 | ;; results in 16 | '(("russ" 17 | (1 :proj-a) (2 :proj-a) (2 "time on proj b") 18 | (2 "time on proj b") (4 :proj-c)) 19 | ("bob" 20 | (1 :proj-a) (1 "time on proj b") (1 "time on proj b") 21 | (1 :proj-c))) 22 | 23 | (defparameter +example-multiple-grouped-timeclock-data+ 24 | (group-by-repeated 25 | +example-timeclock-data+ 26 | :keys (list #'first #'third) 27 | :tests (list #'string-equal #'eql))) 28 | ;; results in 29 | '(("bob" 30 | (:proj-c ("bob" 1 :proj-c)) 31 | (:proj-b ("bob" 1 :proj-b) ("bob" 1 :proj-b)) 32 | (:proj-a ("bob" 1 :proj-a))) 33 | ("russ" 34 | (:proj-c ("russ" 4 :proj-c)) 35 | (:proj-b ("russ" 2 :proj-b) ("russ" 2 :proj-b)) 36 | (:proj-a ("russ" 2 :proj-a) ("russ" 1 :proj-a)))) 37 | 38 | (defparameter +example-grouped-list-timeclock-data-alist+ 39 | (make-grouped-list 40 | +example-timeclock-data+ 41 | :keys (list #'first #'third) 42 | :tests (list #'string-equal #'eql))) 43 | 44 | (defclass example-timeclock-record () 45 | ((name :accessor name :initarg :name :initform nil) 46 | (hours :accessor hours :initarg :hours :initform 0) 47 | (proj :accessor proj :initarg :proj :initform nil))) 48 | 49 | (defun example-tcr (name hours proj) 50 | (make-instance 'example-timeclock-record :name name :hours hours :proj proj)) 51 | 52 | (defparameter +example-timeclock-objs+ 53 | (iter top (for i from 0 to 10) 54 | (iter (for rec in +example-timeclock-data+) 55 | (in top (collect (apply #'example-tcr rec)))))) 56 | 57 | (defun timeclock-report-rec-print (gl &optional (spaces "")) 58 | (let ((further-groups (child-groupings gl))) 59 | (if further-groups 60 | (iter 61 | (with hours = 0) 62 | (for group in further-groups) 63 | (format T "~?~A~%" spaces () (key-value group) ) 64 | (incf hours (timeclock-report-rec-print 65 | group (concatenate 'string spaces "~2,1@T"))) 66 | (finally 67 | (format T "~?Total: ~D~%" spaces () hours ) 68 | (return hours))) 69 | (iter (for kid in (items-in-group gl)) 70 | (sum (hours kid) into hours) 71 | (finally 72 | (format T "~?Total: ~D~%" spaces () hours ) 73 | (return hours)))))) 74 | 75 | (defun print-timeclock-report () 76 | (let ((by-person-project 77 | (make-grouped-list 78 | +example-timeclock-objs+ 79 | :keys (list #'name #'proj) 80 | :tests (list #'equalp #'eql) 81 | :grouping-implementation :hash-table)) 82 | (by-project-person 83 | (make-grouped-list 84 | +example-timeclock-objs+ 85 | :keys (list #'proj #'name) 86 | :tests (list #'eql #'equalp) 87 | :grouping-implementation :list))) 88 | 89 | (format T "Hours BY Project > Person~%-----------~%") 90 | (timeclock-report-rec-print by-project-person) 91 | (format T "~%~%Hours BY Person > Project~%-----------~%") 92 | (timeclock-report-rec-print by-person-project) 93 | )) 94 | 95 | #| 96 | Hours BY Project > Person 97 | ----------- 98 | PROJ-C 99 | russ 100 | Total: 44 101 | bob 102 | Total: 11 103 | Total: 55 104 | PROJ-B 105 | bob 106 | Total: 22 107 | russ 108 | Total: 44 109 | Total: 66 110 | PROJ-A 111 | bob 112 | Total: 11 113 | russ 114 | Total: 33 115 | Total: 44 116 | Total: 165 117 | 118 | 119 | Hours BY Person > Project 120 | ----------- 121 | russ 122 | PROJ-A 123 | Total: 33 124 | PROJ-B 125 | Total: 44 126 | PROJ-C 127 | Total: 44 128 | Total: 121 129 | bob 130 | PROJ-A 131 | Total: 11 132 | PROJ-B 133 | Total: 22 134 | PROJ-C 135 | Total: 11 136 | Total: 44 137 | Total: 165 138 | |# 139 | 140 | 141 | (defparameter +example-speedtest-timeclock-objs+ 142 | (iter top (for i from 0 to 100) 143 | (iter (for rec in +example-timeclock-data+) 144 | (in top (collect (apply #'example-tcr rec)))))) 145 | 146 | (defun speed-test-example () 147 | (format *trace-output* "~%build-gl-speed-test~%") 148 | (grouped-list-speed-tester 149 | :iterations 100 150 | :list +example-speedtest-timeclock-objs+ 151 | :keys (list #'name #'proj) 152 | :tests (list #'string-equal #'eql)) 153 | 154 | (format *trace-output* "~%~%build-gl-speed-test with-item-access 155 | This shows how implementations differ based on workload~%") 156 | (grouped-list-speed-tester 157 | :iterations 10 158 | :list +example-speedtest-timeclock-objs+ 159 | :keys (list #'name #'proj) 160 | :tests (list #'string-equal #'eql) 161 | :actions (lambda (gl) 162 | (iter (for c from 0 to 1000 ) 163 | (iter 164 | (for i in '("russ" "bob")) 165 | (items-in-group gl i) 166 | (iter (for j in `(:proj-a :proj-b :proj-c)) 167 | (items-in-group gl i j))))))) 168 | 169 | #| 170 | build-gl-speed-test 171 | Grouping Implentation Speed Tests 172 | 173 | HASH-TABLE Implementation 174 | Evaluation took: 175 | 0.181 seconds of real time 176 | 0.160000 seconds of total run time (0.120000 user, 0.040000 system) 177 | [ Run times consist of 0.080 seconds GC time, and 0.080 seconds non-GC time. ] 178 | 88.40% CPU 179 | 92 lambdas converted 180 | 451,161,165 processor cycles 181 | 4,939,600 bytes consed 182 | 183 | 184 | 185 | LIST Implementation 186 | Evaluation took: 187 | 0.107 seconds of real time 188 | 0.100000 seconds of total run time (0.080000 user, 0.020000 system) 189 | 93.46% CPU 190 | 46 lambdas converted 191 | 265,815,870 processor cycles 192 | 2,749,600 bytes consed 193 | 194 | 195 | 196 | build-gl-speed-test with-item-access 197 | This shows how implementations differ based on workload 198 | Grouping Implentation Speed Tests 199 | 200 | HASH-TABLE Implementation 201 | Evaluation took: 202 | 0.873 seconds of real time 203 | 0.860000 seconds of total run time (0.760000 user, 0.100000 system) 204 | [ Run times consist of 0.330 seconds GC time, and 0.530 seconds non-GC time. ] 205 | 98.51% CPU 206 | 2,175,013,267 processor cycles 207 | 294,658,896 bytes consed 208 | 209 | 210 | 211 | LIST Implementation 212 | Evaluation took: 213 | 1.031 seconds of real time 214 | 0.990000 seconds of total run time (0.900000 user, 0.090000 system) 215 | [ Run times consist of 0.460 seconds GC time, and 0.530 seconds non-GC time. ] 216 | 96.02% CPU 217 | 2,572,489,710 processor cycles 218 | 293,604,720 bytes consed 219 | |# 220 | 221 | (defparameter +example-names+ #("russ" "alice" "bob" "charlie")) 222 | (defparameter +example-projects+ #(:proj-a :proj-b :proj-c :proj-d)) 223 | 224 | (defun incremental-grouping-examples () 225 | "An example of building a grouped list up from individual items 226 | rather than starting with a full list then grouping it" 227 | (iter (for type in `(:hash-table :tree :alist)) 228 | (iter 229 | (with gl = (make-grouped-list 230 | nil 231 | :keys (list #'name #'proj) 232 | :tests (list #'string-equal #'eql) 233 | :grouping-implementation type)) 234 | (for i from 0 to 1000) 235 | (for tcr = 236 | (example-tcr 237 | (alexandria:random-elt +example-names+) 238 | (random 10) 239 | (alexandria:random-elt +example-projects+))) 240 | (add-item-to-grouping tcr gl) 241 | (finally (timeclock-report-rec-print gl) 242 | (format T "----------------------~%"))))) -------------------------------------------------------------------------------- /group-by.asd: -------------------------------------------------------------------------------- 1 | ;; -*- lisp -*- 2 | 3 | (eval-when (:compile-toplevel :load-toplevel :execute) 4 | (unless (find-package :group-by.system) 5 | (defpackage :group-by.system 6 | (:use :common-lisp :asdf)))) 7 | 8 | (in-package group-by.system) 9 | 10 | (defsystem :group-by 11 | :description "A Common Lisp library to help group data into trees (of various 12 | formats) based on common/shared values" 13 | :licence "BSD" 14 | :version "0.1" 15 | :components ((:file "group-by")) 16 | :depends-on (:iterate :alexandria)) 17 | 18 | (defsystem :group-by-test 19 | :description "A Common Lisp library to help group data into trees (of various 20 | formats) based on common/shared values" 21 | :licence "BSD" 22 | :version "0.1" 23 | :components ((:module :tests 24 | :serial t 25 | :components ((:file "group-by")))) 26 | :depends-on (:group-by :lisp-unit2)) 27 | 28 | (defmethod asdf:perform ((o asdf:test-op) (c (eql (find-system :group-by)))) 29 | (asdf:oos 'asdf:load-op :group-by-test) 30 | (let ((*package* (find-package :group-by-test))) 31 | (eval (read-from-string " 32 | (run-tests :package :group-by-test 33 | :name :group-by 34 | :run-contexts #'with-summary-context)")))) 35 | 36 | ;; Copyright (c) 2011 Russ Tyndall , Acceleration.net http://www.acceleration.net 37 | 38 | ;; Permission is hereby granted, free of charge, to any person obtaining 39 | ;; a copy of this software and associated documentation files (the 40 | ;; "Software"), to deal in the Software without restriction, including 41 | ;; without limitation the rights to use, copy, modify, merge, publish, 42 | ;; distribute, sublicense, and/or sell copies of the Software, and to 43 | ;; permit persons to whom the Software is furnished to do so, subject to 44 | ;; the following conditions: 45 | 46 | ;; The above copyright notice and this permission notice shall be 47 | ;; included in all copies or substantial portions of the Software. 48 | 49 | ;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 50 | ;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 51 | ;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 52 | ;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 53 | ;; LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 54 | ;; OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 55 | ;; WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /group-by.lisp: -------------------------------------------------------------------------------- 1 | (defpackage :group-by 2 | (:use :cl :cl-user :iterate) 3 | (:export 4 | :group-by 5 | :categorize-item 6 | :grouped-list 7 | :make-grouped-list 8 | :add-item-to-grouping 9 | :key-value 10 | :child-groupings 11 | :items-in-group 12 | :parent-grouping 13 | :keys :tests 14 | :make-child-grouped-list 15 | :group-by-repeated 16 | :grouped-list-speed-tester)) 17 | 18 | (in-package :group-by) 19 | 20 | (defun group-by (list &key (key #'car) (value #'cdr) (key-fn #'identity) (test #'equal)) 21 | "groups the list into an alist using the key function and value function to group by key, 22 | with a list of all values for that key. 23 | 24 | key is used to determine the key in the a-list 25 | value is used to determin the value in the a-list 26 | key-fn is passed as the :key to assoc 27 | test is passed as the :test to assoc 28 | 29 | eg: (group-by '((a 1 2) (a 3 4) (b 5 6))) 30 | => ((A (1 2) (3 4)) (B (5 6)))" 31 | ;; we keep 2 alist - ( key . value-list-head ) & ( key . value-list-tail ) 32 | ;; so we can collect at the end (which saves us infinitesimal time and space) 33 | (iter (for i in list) 34 | (for k = (funcall key i)) 35 | (for v = (cons (funcall value i) nil)) 36 | (for cell = (assoc k tails :test test :key key-fn)) 37 | (cond 38 | (cell (setf (cddr cell) v 39 | (cdr cell) v)) 40 | (t ;; dont reuse this cons cell, we want two distinct ones 41 | (collect (cons k v) into results) 42 | (collect (cons k v) into tails))) 43 | (finally (return results)))) 44 | 45 | (defgeneric categorize-item (item root &key &allow-other-keys ) 46 | (:documentation "Insert a new item into a grouped list ")) 47 | 48 | (defmethod categorize-item (item (root list) &key keys tests &allow-other-keys) 49 | "Categorize a new item into an alist as produced by group-by-repeated 50 | This will create new category nodes if necessary" 51 | (if (null keys) 52 | (push item root) 53 | (let ((key (funcall (first keys) item))) 54 | (let ((data (assoc key root :test (or (first tests) #'equal)))) 55 | (if data 56 | ;; Add the rest of the categorization to the 57 | ;; data of this item 58 | (setf (cdr data) (categorize-item 59 | item (cdr data) 60 | :keys (rest keys) 61 | :tests (rest tests))) 62 | ;; we have no data for this node, build a new subtree 63 | (push (cons key (categorize-item 64 | item nil 65 | :keys (rest keys) 66 | :tests (rest tests))) 67 | root))))) 68 | root) 69 | 70 | (defun group-by-repeated (list &key keys tests) 71 | "Returns an alist tree that represents the items in the list as categorized 72 | by keys (compared with tests) 73 | ex: ((a 3 sam) (c 4 bob) (a 3 ted)) 74 | 75 | 76 | keys: a list of key functions that describe the categorizations in order 77 | tests: how we are testing whether or not two keys are equal, defaults to #'equal 78 | " 79 | (let (root) 80 | (iter (for item in list) 81 | (setf root (categorize-item item root :keys keys :tests tests))) 82 | root)) 83 | 84 | (defclass grouped-list () 85 | ((orig-list :accessor orig-list :initarg :orig-list :initform nil) 86 | (grouping-implementation 87 | :accessor grouping-implementation :initarg :grouping-implementation :initform :list 88 | :documentation 89 | "What data structure should be used to perform the grouping :list, :hash-table") 90 | (keys :accessor keys :initarg :keys :initform nil 91 | :documentation "A list of key functions we will use to group the list") 92 | (tests :accessor tests :initarg :tests :initform nil 93 | :documentation "A list of test functions we will use to test key equality 94 | tree: defaults to #'equal 95 | hash-table: this be a single hash-equality symbol (defaults to 'equal)") 96 | (%child-groupings :accessor %child-groupings :initarg :%child-groupings :initform nil) 97 | (%items :accessor %items :initarg :%items :initform nil) 98 | (parent-grouping :accessor parent-grouping :initarg :parent :initform nil 99 | :documentation "If this is a subgrouping of another grouped-list, what is the parent grouping we are apart of (mostly for testing)") 100 | (key-value :accessor key-value :initarg :key-value :initform nil 101 | :documentation "If this is a subgrouping of another grouped-list, what is the key this grouped-list represents in the parent grouping (mostly for testing)")) 102 | (:documentation "This class represents a list that we have grouped by multiple key values 103 | ala one of the group-by-repeatedly functions ")) 104 | 105 | (defgeneric child-groupings (grouped-list) 106 | (:method ((gl grouped-list)) 107 | (case (grouping-implementation gl) 108 | (:hash-table (iter (for (k v) in-hashtable (%child-groupings gl)) 109 | (collect v))) 110 | (T (%child-groupings gl))))) 111 | 112 | (defun make-grouped-list (inp &key tests keys (grouping-implementation :alist)) 113 | "Given a list of input, produce a grouped-list CLOS object that contains 114 | the original list, configuration about the groupings and the result tree 115 | of grouped-list objects 116 | 117 | ''keys'': a list of keys to group by