├── .gitignore ├── xecto.asd ├── LICENSE ├── mpsc-queue.lisp ├── xecto-impl-reshape.lisp ├── xecto-impl-reduce.lisp ├── parallel-futures.lisp ├── work-units.lisp ├── xecto-impl-scan.lisp ├── status.lisp ├── README.md ├── xecto-impl-map.lisp ├── loop-nest-transpose.lisp ├── xecto-impl.lisp ├── vector-futures.lisp ├── futures.lisp ├── work-stack.lisp ├── thread-pool.lisp ├── parallel-primitives.lisp └── affine-arrays.lisp /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.fasl 3 | 4 | -------------------------------------------------------------------------------- /xecto.asd: -------------------------------------------------------------------------------- 1 | (asdf:defsystem "xecto" 2 | :version "0.0.1" 3 | :licence "3-clause BSD" 4 | :license "3-clause BSD" 5 | :description "Xecto is a simple parallel vector-processing library" 6 | :author "Paul Khuong" 7 | :depends-on ("sb-queue") 8 | :components 9 | ((:file "status") 10 | (:file "work-units") 11 | (:file "work-stack" :depends-on ("work-units")) 12 | (:file "thread-pool" :depends-on ("work-stack")) 13 | (:file "futures" :depends-on ("work-stack" "status")) 14 | (:file "parallel-futures" :depends-on ("thread-pool" "futures")) 15 | (:file "parallel-primitives" :depends-on ("status" 16 | "work-stack" "thread-pool" 17 | "futures" "parallel-futures")) 18 | (:file "vector-futures" :depends-on ("parallel-futures")) 19 | (:file "loop-nest-transpose") 20 | (:file "xecto-impl" :depends-on ("vector-futures" "loop-nest-transpose")) 21 | (:file "xecto-impl-reshape" :depends-on ("xecto-impl")) 22 | (:file "xecto-impl-map" :depends-on ("xecto-impl")) 23 | (:file "xecto-impl-reduce" :depends-on ("xecto-impl")) 24 | (:file "xecto-impl-scan" :depends-on ("xecto-impl")))) 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011-2015, Paul Khuong 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of string-case nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /mpsc-queue.lisp: -------------------------------------------------------------------------------- 1 | (defpackage "MPSC-QUEUE" 2 | (:use "CL" "SB-EXT") 3 | (:shadow cl:get) 4 | (:export "QUEUE" "P" "MAKE" "PUT" "GET" "P")) 5 | 6 | (in-package "MPSC-QUEUE") 7 | 8 | (defstruct (queue 9 | (:constructor %make-queue (head))) 10 | (head nil :type list) 11 | (tail nil :type list)) 12 | 13 | (declaim (inline p)) 14 | (defun p (x) 15 | (queue-p x)) 16 | 17 | (defun slow-get (queue) 18 | (declare (type queue queue)) 19 | (let ((head (queue-head queue))) 20 | (when head (return-from slow-get head))) 21 | (let ((tail (loop ; stupid. It's just an xchg 22 | (let ((tail (queue-tail queue))) 23 | (when (eql (cas (queue-tail queue) tail nil) 24 | tail) 25 | (return tail)))))) 26 | (setf (queue-head queue) (reverse tail)))) 27 | 28 | (declaim (inline get put)) 29 | (defun get (queue &optional default) 30 | (declare (type queue queue)) 31 | (let ((head (queue-head queue))) 32 | (cond ((or head 33 | (setf head (slow-get queue))) 34 | (destructuring-bind (value . next) head 35 | (setf (queue-head queue) next 36 | (car head) nil 37 | (cdr head) nil) 38 | (values value t))) 39 | (t 40 | (values default nil))))) 41 | 42 | (defun put (queue value) 43 | (declare (type queue queue)) 44 | (let ((cons (list value))) 45 | (loop 46 | (let ((tail (queue-tail queue))) 47 | (setf (cdr cons) tail) 48 | (when (eql tail (cas (queue-tail queue) tail cons)) 49 | (return value)))))) 50 | (declaim (notinline get put)) 51 | 52 | (defun make (&optional initial-contents constructor &rest args) 53 | (let ((contents (coerce initial-contents 'list))) 54 | (if constructor 55 | (apply constructor :head contents :tail nil args) 56 | (%make-queue initial-contents)))) 57 | -------------------------------------------------------------------------------- /xecto-impl-reshape.lisp: -------------------------------------------------------------------------------- 1 | (in-package "XECTO-IMPL") 2 | 3 | (defun transpose (xecto i j) 4 | (declare (type xecto xecto) 5 | (type index i j)) 6 | (when (= i j) 7 | (return-from transpose xecto)) 8 | (when (> i j) 9 | (rotatef i j)) 10 | (let ((shape (copy-seq (xecto-shape xecto)))) 11 | (assert (< j (length shape))) 12 | (let ((last (aref shape j))) 13 | (replace shape shape :start1 (1+ i) :start2 i :end2 (1+ j)) 14 | (setf (aref shape i) last)) 15 | (setf (xecto-shape xecto) (intern-shape shape))) 16 | xecto) 17 | 18 | (defun slice (xecto dimension begin &optional end step) 19 | ;; FIXME: -ve step 20 | (unless step 21 | (setf step 1)) 22 | (let* ((shape (copy-seq (xecto-shape xecto))) 23 | (len (car (aref shape dimension))) 24 | (stride (cdr (aref shape dimension))) 25 | (offset (xecto-offset xecto))) 26 | (unless end 27 | (setf end (truncate (- len begin) step))) 28 | (incf offset (* begin stride)) 29 | (setf (aref shape dimension) (cons end (* step stride))) 30 | (setf (xecto-shape xecto) (intern-shape shape) 31 | (xecto-offset xecto) offset) 32 | xecto)) 33 | 34 | (defun remove-index (vector index) 35 | (remove-if (constantly t) vector :start index :count 1)) 36 | 37 | (defun select (xecto dimension &optional value) 38 | (unless value 39 | (setf value 0)) 40 | (let* ((shape (copy-seq (xecto-shape xecto))) 41 | (offset (xecto-offset xecto))) 42 | (destructuring-bind (dim . stride) (aref shape dimension) 43 | (assert (< value dim)) 44 | (setf (xecto-shape xecto) (intern-shape 45 | (remove-index shape dimension)) 46 | (xecto-offset xecto) (+ offset 47 | (* stride value))))) 48 | xecto) 49 | 50 | (defun replicate (xecto &rest dimensions) 51 | (declare (dynamic-extent dimensions)) 52 | (setf (xecto-shape xecto) (intern-shape 53 | (concatenate 'simple-vector 54 | (mapcar (lambda (dim) 55 | (cons dim 0)) 56 | dimensions) 57 | (xecto-shape xecto)))) 58 | xecto) 59 | 60 | (defun extend-shape-or-die (result-shape shape) 61 | (declare (type shape result-shape shape)) 62 | (when (eql result-shape shape) 63 | (return-from extend-shape-or-die shape)) 64 | (assert (every (lambda (x y) 65 | (= (car x) (car y))) 66 | result-shape 67 | shape)) 68 | (when (= (length result-shape) (length shape)) 69 | (return-from extend-shape-or-die shape)) 70 | (let ((new-shape (make-array (length result-shape)))) 71 | (replace new-shape shape) 72 | (loop for i from (length shape) below (length new-shape) 73 | for (dim) across result-shape 74 | do (setf (aref new-shape i) (cons dim 0))) 75 | (intern-shape new-shape))) 76 | -------------------------------------------------------------------------------- /xecto-impl-reduce.lisp: -------------------------------------------------------------------------------- 1 | (in-package "XECTO-IMPL") 2 | 3 | (defun reduce-xecto (fun arg) 4 | (declare (type xecto arg)) 5 | (let* ((shape (xecto-shape arg)) 6 | (spine (aref shape 0)) 7 | (bulk (remove-index shape 0))) 8 | (multiple-value-bind (r-size r-shape) 9 | (%canonical-size-and-shape bulk) 10 | (execute-reduce fun r-size r-shape 11 | spine 12 | (xecto-loop-nest:optimize (cons 0 r-shape) 13 | (cons (xecto-offset arg) bulk)) 14 | arg)))) 15 | 16 | (defun compute-reduce-tasks (function spine pattern arg) 17 | (let ((tasks (make-array 16 :fill-pointer 0 :adjustable t)) 18 | (data (xecto-data arg))) 19 | (destructuring-bind (offsets . loop) pattern 20 | (declare (type (simple-array index (2)) offsets) 21 | (type simple-vector loop)) 22 | (labels 23 | ((rec (depth offsets) 24 | (declare (type (simple-array index (2)) offsets)) 25 | (let ((offsets (copy-seq offsets))) 26 | (if (= depth (length loop)) 27 | (vector-push-extend 28 | (let ((offsets (copy-seq offsets))) 29 | (lambda (dst index) index 30 | (execute-subreduce dst function spine 31 | offsets data))) 32 | tasks) 33 | (destructuring-bind (trip . strides) (aref loop depth) 34 | (declare (type (simple-array fixnum (2)) strides)) 35 | (loop repeat trip do 36 | (rec (1+ depth) offsets) 37 | (map-into offsets #'+ 38 | offsets strides))))))) 39 | (rec 0 offsets))) 40 | (coerce tasks 'simple-vector))) 41 | 42 | (defun execute-subreduce (destination function spine offsets arg) 43 | (declare (type vector-future:vector-future destination arg) 44 | (type (simple-array index (2)) offsets)) 45 | (destructuring-bind (repeat . stride) spine 46 | (declare (type index repeat stride)) 47 | (let* ((dst-vec (vector-future:data destination)) 48 | (dst-off (aref offsets 0)) 49 | (src-vec (vector-future:data arg)) 50 | (src-off (aref offsets 1)) 51 | (acc (aref src-vec src-off))) 52 | (declare (type double-float acc) 53 | (type index dst-off src-off)) 54 | (if (eql function #'+) 55 | (loop repeat (1- repeat) do 56 | (incf src-off stride) 57 | (incf acc (aref src-vec src-off))) 58 | (loop repeat (1- repeat) do 59 | (incf src-off stride) 60 | (setf acc (funcall function acc (aref src-vec src-off))))) 61 | (setf (aref dst-vec dst-off) acc)))) 62 | 63 | (defun execute-reduce (fun r-size r-shape spine pattern arg) 64 | (let* ((tasks (compute-reduce-tasks fun spine pattern arg)) 65 | (data (vector-future:make r-size 66 | (list (xecto-data arg)) 67 | tasks))) 68 | (%make-xecto r-shape data))) 69 | -------------------------------------------------------------------------------- /parallel-futures.lisp: -------------------------------------------------------------------------------- 1 | (defpackage "PARALLEL-FUTURE" 2 | (:use "CL" "SB-EXT") 3 | (:export "*CONTEXT*" "WITH-CONTEXT" 4 | "FUTURE" "P" 5 | "MAKE")) 6 | 7 | ;;; Parallel futures: hooking up futures with the work queue 8 | ;;; 9 | ;;; A parallel is a future with an execution in three periods: 10 | ;;; - a list designator of setup functions 11 | ;;; - a vector of subtasks to execute in parallel 12 | ;;; - a list designator of cleanup functions 13 | ;;; 14 | ;;; When a parallel future is ready for execution, a task that 15 | ;;; executes the setup functions and pushes the subtasks to 16 | ;;; the local stack is created. That task is enqueued or pushed 17 | ;;; to the local stack. 18 | ;;; Once the setup functions have been executed, the subtasks are 19 | ;;; pushed as a bulk-task. 20 | ;;; Once the bulk-task is completed, the cleanup functions are executed, 21 | ;;; and the future is marked as done. 22 | 23 | (in-package "PARALLEL-FUTURE") 24 | 25 | (defvar *context* (work-queue:make 2)) 26 | 27 | (defmacro with-context ((count) &body body) 28 | (let ((context (gensym "CONTEXT"))) 29 | `(let* ((,context nil)) 30 | (unwind-protect (progn 31 | (setf ,context (work-queue:make ,count)) 32 | (let ((*context* ,context)) 33 | ,@body)) 34 | (when ,context 35 | (work-queue:stop ,context)))))) 36 | 37 | (defstruct (future 38 | (:include future:future)) 39 | (setup nil :type (or list symbol function))) 40 | 41 | (declaim (inline p)) 42 | (defun p (x) 43 | (future-p x)) 44 | 45 | (defun map-list-designator (functions argument) 46 | (etypecase functions 47 | (null) 48 | (list 49 | (dolist (function functions) 50 | (funcall function argument))) 51 | ((or symbol function) 52 | (funcall functions argument))) 53 | nil) 54 | 55 | (defun future-push-self (future) 56 | (declare (type future future)) 57 | (let ((setup (future-setup future))) 58 | (setf (future-setup future) nil) 59 | (work-queue:push-self 60 | (lambda () 61 | (map-list-designator setup future) 62 | (cond ((plusp (future-waiting future)) 63 | (work-queue:push-self future)) 64 | ((zerop (future-remaining future)) 65 | (map-list-designator (future-cleanup future) future) 66 | (setf (future-cleanup future) nil)) 67 | (t (error "Mu?")))) 68 | (or (work-queue:current-queue) 69 | *context*)))) 70 | 71 | (defun make (dependencies setup subtasks cleanup &optional constructor &rest arguments) 72 | (declare (type simple-vector dependencies subtasks)) 73 | (let* ((count (length subtasks)) 74 | (future (apply (or constructor #'make-future) 75 | :function #'future-push-self 76 | :dependencies dependencies 77 | :setup setup 78 | :subtasks subtasks 79 | :waiting count 80 | :remaining count 81 | :cleanup (if (listp cleanup) 82 | (append cleanup (list #'future:mark-done)) 83 | (list cleanup #'future:mark-done)) 84 | arguments))) 85 | (future:mark-dependencies future))) 86 | -------------------------------------------------------------------------------- /work-units.lisp: -------------------------------------------------------------------------------- 1 | (defpackage "WORK-UNIT" 2 | (:use "CL" "SB-EXT") 3 | (:export "TASK" "TASK-P" "TASK-FUNCTION" 4 | "BULK-TASK" "BULK-TASK-P" "BULK-TASK-WAITING" "BULK-TASK-REMAINING" 5 | "BULK-TASK-SUBTASK-FUNCTION" "BULK-TASK-SUBTASKS" 6 | "BULK-TASK-CLEANUP" 7 | "TASK-DESIGNATOR" 8 | "EXECUTE-TASK" "%BULK-FIND-TASK")) 9 | 10 | (in-package "WORK-UNIT") 11 | 12 | (defstruct (task 13 | (:constructor nil)) 14 | (function (error "Missing arg") :type (or symbol function))) 15 | 16 | (defstruct (bulk-task 17 | (:constructor nil)) 18 | ;; count waiting to be executed, initially (length subtasks) 19 | (waiting (error "Missing arg") :type word) 20 | ;; count not done yet, initially (length subtasks) 21 | (remaining (error "Missing arg") :type word) 22 | (subtask-function nil :type (or null symbol function)) 23 | (subtasks (error "Missing arg") :type simple-vector) 24 | (cleanup nil :type (or list symbol function))) 25 | 26 | (deftype task-designator () 27 | `(or (and symbol (not null)) function task bulk-task)) 28 | 29 | (defun execute-task (task) 30 | (etypecase task 31 | ((or symbol function) 32 | (funcall task)) 33 | (task 34 | (let ((function (task-function task))) 35 | (when (and function 36 | (eql (cas (task-function task) function nil) 37 | function)) 38 | (funcall function task)))))) 39 | 40 | (declaim (inline random-bit)) 41 | (defun random-bit (state max) 42 | (return-from random-bit 0) 43 | (let ((random (logand (1- (ash 1 (integer-length max))) 44 | (random (1+ most-positive-fixnum) 45 | state)))) 46 | (if (zerop random) 47 | 0 48 | (logxor random (1- random))))) 49 | 50 | (declaim (inline %bulk-find-task)) 51 | (defun %bulk-find-task (bulk hint random) 52 | (declare (type fixnum hint) 53 | (type (or null bulk-task) bulk) 54 | (type random-state random)) 55 | (when (null bulk) 56 | (return-from %bulk-find-task (values nil nil))) 57 | (let* ((subtasks (bulk-task-subtasks bulk)) 58 | (begin hint) 59 | (end (length subtasks))) 60 | (flet ((acquire (index) 61 | (let ((x (aref subtasks index))) 62 | (when (and x 63 | (eql (cas (svref subtasks index) x nil) 64 | x)) 65 | (atomic-decf (bulk-task-waiting bulk)) 66 | (return-from %bulk-find-task 67 | (values x index))))) 68 | (quick-check () 69 | (when (zerop (bulk-task-waiting bulk)) 70 | (return-from %bulk-find-task 71 | (values nil nil))))) 72 | (declare (inline acquire quick-check)) 73 | (when (>= begin end) 74 | (setf begin 0)) 75 | (quick-check) 76 | (acquire begin) 77 | (setf begin (mod (logxor begin (random-bit random end)) 78 | end)) 79 | (loop 80 | (quick-check) 81 | (let ((index (position nil subtasks :start begin :end end :test-not #'eql))) 82 | (cond (index 83 | (setf begin (1+ index)) 84 | (acquire index)) 85 | ((zerop begin) 86 | (return (values nil nil))) 87 | (t 88 | (setf begin 0 89 | end hint)))))))) 90 | 91 | -------------------------------------------------------------------------------- /xecto-impl-scan.lisp: -------------------------------------------------------------------------------- 1 | (in-package "XECTO-IMPL") 2 | 3 | (defun scan-xecto (fun arg) 4 | (declare (type xecto arg)) 5 | (let ((shape (xecto-shape arg))) 6 | (multiple-value-bind (r-size r-shape) 7 | (%canonical-size-and-shape shape) 8 | (let ((src-spine (aref shape 0)) 9 | (dst-spine (aref r-shape 0)) 10 | (src-bulk (remove-index shape 0)) 11 | (dst-bulk (remove-index r-shape 0))) 12 | (assert (eql (car src-spine) (car dst-spine))) 13 | (execute-scan fun r-size r-shape 14 | (list (car dst-spine) (cdr dst-spine) (cdr src-spine)) 15 | (xecto-loop-nest:optimize (cons 0 dst-bulk) 16 | (cons (xecto-offset arg) src-bulk)) 17 | arg))))) 18 | 19 | (defun compute-scan-tasks (function spine pattern arg) 20 | (let ((tasks (make-array 16 :fill-pointer 0 :adjustable t)) 21 | (data (xecto-data arg))) 22 | (destructuring-bind (offsets . loop) pattern 23 | (declare (type (simple-array index (2)) offsets) 24 | (type simple-vector loop)) 25 | (labels 26 | ((rec (depth offsets) 27 | (declare (type (simple-array index (2)) offsets)) 28 | (let ((offsets (copy-seq offsets))) 29 | (if (= depth (length loop)) 30 | (vector-push-extend 31 | (let ((offsets (copy-seq offsets))) 32 | (lambda (dst index) index 33 | (execute-subscan dst function spine 34 | offsets data))) 35 | tasks) 36 | (destructuring-bind (trip . strides) (aref loop depth) 37 | (declare (type (simple-array fixnum (2)) strides)) 38 | (loop repeat trip do 39 | (rec (1+ depth) offsets) 40 | (map-into offsets #'+ 41 | offsets strides))))))) 42 | (rec 0 offsets))) 43 | (coerce tasks 'simple-vector))) 44 | 45 | (defun execute-subscan (destination function spine offsets arg) 46 | (declare (type vector-future:vector-future destination arg) 47 | (type (simple-array index (2)) offsets)) 48 | (destructuring-bind (repeat dst-stride src-stride) spine 49 | (declare (type index repeat) 50 | (type fixnum dst-stride src-stride)) 51 | (let* ((dst-vec (vector-future:data destination)) 52 | (dst-off (aref offsets 0)) 53 | (src-vec (vector-future:data arg)) 54 | (src-off (aref offsets 1)) 55 | (acc (aref src-vec src-off))) 56 | (declare (type double-float acc) 57 | (type index dst-off src-off)) 58 | (setf (aref dst-vec dst-off) acc) 59 | (if (eql function #'+) 60 | (loop repeat (1- repeat) do 61 | (incf src-off src-stride) 62 | (incf dst-off dst-stride) 63 | (setf acc (setf (aref dst-vec dst-off) 64 | (+ acc (aref src-vec src-off))))) 65 | (loop repeat (1- repeat) do 66 | (incf src-off src-stride) 67 | (incf dst-off dst-stride) 68 | (setf (aref dst-vec dst-off) acc) 69 | (setf acc (funcall function acc (aref src-vec src-off)))))))) 70 | 71 | (defun execute-scan (fun r-size r-shape spine pattern arg) 72 | (let* ((tasks (compute-scan-tasks fun spine pattern arg)) 73 | (data (vector-future:make r-size 74 | (list (xecto-data arg)) 75 | tasks))) 76 | (%make-xecto r-shape data))) 77 | -------------------------------------------------------------------------------- /status.lisp: -------------------------------------------------------------------------------- 1 | (defpackage "STATUS" 2 | (:use "CL" "SB-EXT" "SB-THREAD") 3 | (:export "SLOW-STATUS" "DEFINE-STATUS-TYPE")) 4 | 5 | (in-package "STATUS") 6 | 7 | (defstruct (slow-status 8 | (:constructor nil)) 9 | (status nil :type t) 10 | (lock (make-mutex) :type mutex 11 | :read-only t) 12 | (cvar (make-waitqueue) :type waitqueue 13 | :read-only t)) 14 | 15 | (defmacro define-status-type (type-name 16 | (&key (fast-type t) 17 | (status-type t) 18 | (default-status '(error "Status missing")) 19 | (constructor 20 | (intern 21 | (format nil "MAKE-~A" 22 | (symbol-name type-name)))) 23 | (final-states '())) 24 | fast-accessor 25 | status-function 26 | wait-function 27 | upgrade-function) 28 | `(progn 29 | (defstruct (,type-name 30 | (:constructor ,constructor) 31 | (:include slow-status 32 | (status ,default-status :type ,status-type)))) 33 | 34 | (defun ,status-function (value) 35 | (declare (type ,fast-type value)) 36 | (let ((%status (,fast-accessor value))) 37 | (if (typep %status ',type-name) 38 | (slow-status-status %status) 39 | %status))) 40 | 41 | (defun ,wait-function (value &rest stopping-conditions) 42 | (declare (type ,fast-type value)) 43 | (declare (dynamic-extent stopping-conditions)) 44 | (let (slow 45 | slow-status) 46 | (loop 47 | (let ((%status (,fast-accessor value))) 48 | (when (typep %status ',type-name) 49 | (setf slow-status %status) 50 | (return)) 51 | (when (member %status stopping-conditions) 52 | (return-from ,wait-function %status)) 53 | (if slow 54 | (setf (slow-status-status slow) %status) 55 | (setf slow (,constructor :status %status))) 56 | (when (eql (cas (,fast-accessor value) %status slow) 57 | %status) 58 | (setf slow-status slow) 59 | (return)))) 60 | (let* ((slow-status slow-status) 61 | (lock (slow-status-lock slow-status)) 62 | (cvar (slow-status-cvar slow-status))) 63 | (declare (type slow-status slow-status)) 64 | (return-from ,wait-function 65 | (with-mutex (lock) 66 | (loop 67 | (let ((status (slow-status-status slow-status))) 68 | (when (member status stopping-conditions) 69 | (return status))) 70 | (condition-wait cvar lock))))))) 71 | 72 | (defun ,upgrade-function (value to &rest from) 73 | (declare (type ,fast-type value)) 74 | (declare (dynamic-extent from)) 75 | (let (slow-status) 76 | (loop 77 | (let ((%status (,fast-accessor value))) 78 | (when (typep %status ',type-name) 79 | (setf slow-status %status) 80 | (return)) 81 | (when (or (not (member %status from)) 82 | (eql (compare-and-swap (,fast-accessor value) 83 | %status to) 84 | %status)) 85 | (return-from ,upgrade-function %status)))) 86 | (with-mutex ((slow-status-lock slow-status)) 87 | (let ((status (slow-status-status slow-status))) 88 | (when (member status from) 89 | (setf (slow-status-status slow-status) to) 90 | (when (or ,@(mapcar (lambda (x) 91 | `(eql to ',x)) 92 | final-states)) 93 | (setf (,fast-accessor value) to)) 94 | (condition-broadcast (slow-status-cvar slow-status))) 95 | status)))))) 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Xecto: 80% of xectors 2 | ===================== 3 | 4 | This is a library for regular array parallelism in CL. It's also an 5 | "80%" library: the goal isn't to extract all the performance there is, 6 | but to allow programmers to easily express computations with a decent 7 | overhead compared to hand-written loops, in common cases. Uncommon 8 | cases should either be handled specially, or rewritten to better fit 9 | Xecto's simplistic (and simple) approach. 10 | 11 | Overview: 12 | 13 | * Structured data 14 | * Typed arrays 15 | * Views (later) 16 | * Reshaping 17 | * Bulk operations 18 | - Copy on write 19 | - Parallelism 20 | * SIMD 21 | * Thread-level 22 | 23 | Structured data 24 | --------------- 25 | 26 | _Very lazily sketched out; this looks more like an interface design 27 | issue_ 28 | 29 | Xecto provides homogeneously, but dynamically-typed bulk structured 30 | data. Arrays of structures are actually always structures of 31 | specialised, homogeneous arrays. 32 | 33 | [more stuff, when it's better thought out] 34 | 35 | Think data frames: it's not just a bunch of vectors, there's a level 36 | of semantic typing. 37 | 38 | Upgrading rules?? 39 | 40 | Typed arrays 41 | ------------ 42 | 43 | _Prototyped; just more of the same_ 44 | 45 | Arrays in Xecto are typed: currently, we only have arrays of double 46 | floats, but we'll also have arrays of single floats, machine integers, 47 | etc. What's the stance on T arrays? No clue. 48 | 49 | Arrays can be of arbitrary rank and size; plans include allocating 50 | them from the foreign heap (or directly via `mmap`). 51 | 52 | Reshaping 53 | --------- 54 | 55 | _Prototyped_ 56 | 57 | As with many similar libraries, each array is represented as a flat 58 | data vector and shape information; shape information includes the 59 | dimensions, and an affine transformation from the coordinates to an 60 | index in the data vector. 61 | 62 | Data vectors are immutable, but reference counted to help with 63 | copy-on-write. However, arrays themselves are mutable: the copying 64 | (if any) is transparent, and references to data vectors are updated as 65 | needed. 66 | 67 | This means that operations like slicing or transposition are nearly 68 | free and do not directly entail copying or allocation. 69 | 70 | Bulk operations 71 | --------------- 72 | 73 | _Half prototyped_ 74 | 75 | Usual stuff: map, reduce, scan. 76 | 77 | Some amount of recycling rule: single element is replicated as needed, 78 | but nothing more. 79 | 80 | Reduce and scan work on the first dimension of the single input; the 81 | reduced/scanned function is then applied, map-like on the remaining 82 | dimensions. 83 | 84 | 85 | ## Copy on write 86 | 87 | _Not yet, but it's a SMOP_ 88 | 89 | Mention ! variants (foo-into). Expresses partial writing, but also 90 | storage reuse. 91 | 92 | `mmap` and `tmpfs` for TLB-level copying. 93 | 94 | ## Parallelism 95 | 96 | The library is "just" a minimally smart interpreter; the upside is 97 | that the primitives are chosen so they execute efficiently. 98 | 99 | Rather than working with scalars, the primitives compute on strided 100 | spans of vectors (for inputs and outputs), much like level 1 BLAS 101 | operations. This allows for SIMD-level parallelism 102 | 103 | This also means that each operation boils down to a large number of 104 | specialized primitive calls; that's where thread-level parallelism 105 | comes in. 106 | 107 | ### SIMD 108 | 109 | _Hard part done_ 110 | 111 | Each map operation boils down to a perfect nest of for-loops. The 112 | nesting is reordered for locality: we attempt to get monotonous 113 | address sequences as much as possible, especially in the result 114 | vector. Loops are also merged when possible to increase trip counts 115 | and reduce nesting depth. Finally, we attempt to ensure a trip count 116 | in the innermost loop, to better exploit the primitives. 117 | 118 | Primitives are pre-compiled and specialised for some key trip count 119 | and stride values. That's how we get SIMD. 120 | 121 | Execution then proceeds by first finding the primitive corresponding 122 | to the operation and the inner loop's stride and trip count, and 123 | interprets the remainder of the loop nest. 124 | 125 | ### TLP 126 | 127 | _Half-designed_ 128 | 129 | As shown earlier, each operation is executed as a perfect loop nest 130 | that gives rise to a number of primitive operations. The key to 131 | exploiting threads is that operations are implemented as futures 132 | (with a thread pool and task stealing). 133 | 134 | Each outer loop is executed a couple times to yield a small number of 135 | tasks. Tasks then note dependencies, which gives us pipelining. 136 | 137 | -> Note: probably want early dealloc. 138 | 139 | Futures are triggered via a stack and task stealing, so we get 140 | locality for free. 141 | 142 | NUMA awareness via hashing on middle bits of written addresses. 143 | 144 | -------------------------------------------------------------------------------- /xecto-impl-map.lisp: -------------------------------------------------------------------------------- 1 | (in-package "XECTO-IMPL") 2 | 3 | (defun map-xecto (fun arg &rest args) 4 | (declare (type xecto arg)) 5 | (let* ((args (cons arg args)) 6 | (shapes (mapcar #'xecto-shape args))) 7 | (multiple-value-bind (r-size r-shape) 8 | (%canonical-size-and-shape (reduce (lambda (x y) 9 | (if (> (length x) (length y)) 10 | x y)) 11 | shapes)) 12 | (map-into shapes (lambda (x) 13 | (extend-shape-or-die r-shape x)) 14 | shapes) 15 | (apply 'execute-map 16 | fun r-size r-shape 17 | (apply 'xecto-loop-nest:optimize 18 | (cons 0 r-shape) 19 | (mapcar (lambda (xecto shape) 20 | (cons (xecto-offset xecto) shape)) 21 | args shapes)) 22 | args)))) 23 | 24 | (defvar *max-inner-loop-count* (ash 1 16)) 25 | 26 | (defun compute-map-tasks (function pattern &rest arguments) 27 | (let ((tasks (make-array 16 :fill-pointer 0 :adjustable t)) 28 | (data (map 'simple-vector #'xecto-data arguments)) 29 | (max-count *max-inner-loop-count*)) 30 | (destructuring-bind (offsets . loop) pattern 31 | (declare (type (simple-array index 1) offsets) 32 | (type simple-vector loop)) 33 | (labels 34 | ((rec (depth offsets) 35 | (declare (type (simple-array index 1) offsets)) 36 | (let ((offsets (copy-seq offsets))) 37 | (destructuring-bind (trip . strides) (aref loop depth) 38 | (if (= depth (1- (length loop))) 39 | (loop for i below trip by max-count 40 | do (let* ((start i) 41 | (count (min max-count 42 | (- trip start)))) 43 | (vector-push-extend 44 | (let ((offsets (copy-seq offsets)) 45 | (loop (cons count strides))) 46 | (lambda (dst index) index 47 | (execute-submap dst function 48 | offsets 49 | loop 50 | data))) 51 | tasks) 52 | (map-into offsets 53 | (lambda (x inc) 54 | (+ x (* inc max-count))) 55 | offsets strides))) 56 | (loop repeat trip do 57 | (rec (1+ depth) offsets) 58 | (map-into offsets #'+ 59 | offsets strides))))))) 60 | (rec 0 offsets))) 61 | (coerce tasks 'simple-vector))) 62 | 63 | (defun execute-submap (destination function offsets loop arguments) 64 | (declare (type vector-future:vector-future destination) 65 | (type (simple-array index 1) offsets) 66 | (type (cons index (simple-array fixnum 1)) loop) 67 | (type (simple-array vector-future:vector-future 1) 68 | arguments)) 69 | (let ((data (make-array (1+ (length arguments)))) 70 | (offsets (copy-seq offsets))) 71 | (declare (type (simple-array (simple-array double-float 1) 1) data)) 72 | (setf (aref data 0) (vector-future:data destination)) 73 | (loop for i from 1 below (length data) do 74 | (setf (aref data i) (vector-future:data (aref arguments (1- i))))) 75 | (destructuring-bind (repeat . strides) loop 76 | (if (eql function #'+) 77 | (loop for i below repeat do 78 | (setf (aref (aref data 0) (aref offsets 0)) 79 | (let ((acc 0d0)) 80 | (declare (optimize speed)) 81 | (declare (double-float acc)) 82 | (loop for j from 1 below (length data) 83 | do (incf acc (aref (aref data j) (aref offsets j)))) 84 | acc)) 85 | (map-into offsets #'+ offsets strides)) 86 | (loop for i below repeat do 87 | (setf (aref (aref data 0) (aref offsets 0)) 88 | (apply function 89 | (loop for j from 1 below (length data) 90 | collect (aref (aref data j) (aref offsets j))))) 91 | (map-into offsets #'+ offsets strides)))))) 92 | 93 | (defun execute-map (fun r-size r-shape 94 | pattern 95 | &rest args) 96 | (let* ((tasks (apply 'compute-map-tasks fun pattern args)) 97 | (data (vector-future:make r-size 98 | (mapcar #'xecto-data args) 99 | tasks))) 100 | (%make-xecto r-shape data))) 101 | -------------------------------------------------------------------------------- /loop-nest-transpose.lisp: -------------------------------------------------------------------------------- 1 | (defpackage "XECTO-LOOP-NEST" 2 | (:use "CL") 3 | (:shadow "OPTIMIZE") 4 | (:export "OPTIMIZE" "*MINIMAL-INNER-TRIP-COUNT*")) 5 | 6 | ;;; Optimize perfect constant loop nests. 7 | ;;; Input: (offset . shape)+ 8 | ;;; offset: initial offset of index, 9 | ;;; shape: simple-vector of (trip-count . stride) 10 | ;;; 11 | ;;; All the shapes must be compatible: trip count is the 12 | ;;; same at each nesting depth, and total depth is the same. 13 | ;;; 14 | ;;; Optimize will reorder the nesting to have lower strides 15 | ;;; at the bottom, merge outer loops that can be expressed 16 | ;;; as additional iterations of an inner one, and reorder the 17 | ;;; nesting again to ensure a *minimal-inner-trip-count*, as 18 | ;;; much as possible. 19 | ;;; 20 | ;;; The output is (offsets . loops), where offsets is a 21 | ;;; (simple-array index 1) of initial offsets, and loops 22 | ;;; a simple-vector of (trip-count . strides), where strides 23 | ;;; is a (simple-array fixnum 1). 24 | 25 | (in-package "XECTO-LOOP-NEST") 26 | 27 | (deftype index () 28 | '(and unsigned-byte fixnum)) 29 | 30 | (defun remove-index (vector index) 31 | (remove-if (constantly t) vector :start index :count 1)) 32 | 33 | (defun shapes-compatible-p (shapes) 34 | (let ((shape (aref shapes 0))) 35 | (loop for i from 1 below (length shapes) 36 | for other = (aref shapes i) 37 | always (every (lambda (x y) 38 | (= (car x) (car y))) 39 | shape other)))) 40 | 41 | (defun lex-compare (x y) 42 | (map nil (lambda (x y) 43 | (let ((x (abs x)) 44 | (y (abs y))) 45 | (cond ((< x y) (return-from lex-compare -1)) 46 | ((> x y) (return-from lex-compare 1))))) 47 | x y) 48 | 0) 49 | 50 | (defun transpose-shapes (offsets shapes) 51 | (declare (type (simple-array index 1) offsets) 52 | (type simple-vector shapes)) 53 | (assert (shapes-compatible-p shapes)) 54 | (let* ((dimensions (map 'simple-vector #'car (aref shapes 0))) 55 | (pattern (make-array (length dimensions))) 56 | (n (length shapes))) 57 | (dotimes (i (length dimensions) pattern) 58 | (let ((strides (make-array n :element-type 'fixnum)) 59 | (count (aref dimensions i))) 60 | (dotimes (j n) 61 | (setf (aref strides j) 62 | (cdr (aref (aref shapes j) i)))) 63 | (let ((nz (find 0 strides :test-not 'eql))) 64 | (when (and nz 65 | (minusp nz)) 66 | (map-into offsets (lambda (stride offset) 67 | (+ offset (* stride count))) 68 | strides offsets) 69 | (map-into strides #'- strides))) 70 | (setf (aref pattern i) (cons count strides)))))) 71 | 72 | (defun merge-pattern-1 (pattern) 73 | (declare (type simple-vector pattern)) 74 | (let ((len (length pattern))) 75 | (loop 76 | for i from (1- len) downto 0 77 | for (i-count . i-strides) = (aref pattern i) 78 | do (loop 79 | for j from (1- i) downto 0 80 | for (j-count . j-strides) = (aref pattern j) 81 | do (when (every (lambda (i-stride j-stride) 82 | (= (* i-stride i-count) j-stride)) 83 | i-strides j-strides) 84 | (setf (car (aref pattern i)) (* i-count j-count)) 85 | (return-from merge-pattern-1 (remove-index pattern j))))))) 86 | 87 | (defun merge-pattern (pattern) 88 | (declare (type simple-vector pattern)) 89 | (loop 90 | (let ((new-pattern (merge-pattern-1 pattern))) 91 | (if new-pattern 92 | (setf pattern new-pattern) 93 | (return pattern))))) 94 | 95 | (defvar *minimal-inner-trip-count* 16) 96 | 97 | (defun ensure-minimal-trip-count (pattern) 98 | (declare (type simple-vector pattern)) 99 | (let ((best-index nil) 100 | (best-count 0)) 101 | (loop for i upfrom 0 102 | for (count) across pattern 103 | for clamped-count = (min count *minimal-inner-trip-count*) 104 | do (when (>= clamped-count best-count) 105 | (setf best-index i))) 106 | (assert best-index) 107 | (let ((inner-loop (aref pattern best-index))) 108 | (replace pattern pattern :start1 best-index :start2 (1+ best-index)) 109 | (setf (aref pattern (1- (length pattern))) inner-loop)) 110 | pattern)) 111 | 112 | (defun optimize (offset-and-shape &rest offsets-and-shapes) 113 | (let* ((data (cons offset-and-shape offsets-and-shapes)) 114 | (offsets (map '(simple-array index 1) #'car data)) 115 | (shapes (map 'simple-vector #'cdr data)) 116 | (pattern (transpose-shapes offsets shapes))) 117 | (sort pattern (lambda (x y) 118 | (ecase (lex-compare (cdr x) (cdr y)) 119 | (-1 nil) 120 | ( 0 (< (car x) (car y))) 121 | ( 1 t)))) 122 | (cons offsets 123 | (if (zerop (length pattern)) 124 | (make-array 1 :initial-element 125 | (cons 1 (make-array (length data) 126 | :element-type 'fixnum 127 | :initial-element 0))) 128 | (ensure-minimal-trip-count (merge-pattern pattern)))))) 129 | -------------------------------------------------------------------------------- /xecto-impl.lisp: -------------------------------------------------------------------------------- 1 | (defpackage "XECTO-IMPL" 2 | (:use "CL" "SB-EXT" "SB-THREAD")) 3 | 4 | ;;; Internal primitive xecto stuff 5 | 6 | (in-package "XECTO-IMPL") 7 | 8 | ;; shape representation 9 | 10 | (deftype index () 11 | '(and unsigned-byte fixnum)) 12 | 13 | (deftype shape (&optional rank) 14 | `(simple-array (cons index fixnum) 15 | (,rank))) 16 | 17 | (defglobal **shape-table-lock** (make-mutex :name "SHAPE TABLE LOCK")) 18 | (defglobal **shape-table** (make-hash-table :test #'equalp 19 | :weakness :key-and-value)) 20 | 21 | (declaim (ftype (function (shape) (values shape &optional)) intern-shape)) 22 | (defun intern-shape (shape) 23 | (declare (type shape shape)) 24 | (with-mutex (**shape-table-lock**) 25 | (or (gethash shape **shape-table**) 26 | (setf (gethash shape **shape-table**) 27 | shape)))) 28 | 29 | (defstruct (xecto 30 | (:constructor %make-xecto (%shape %data 31 | &optional (offset 0) 32 | &aux (shape (intern-shape %shape)) 33 | (handle (list %data)))) 34 | (:copier %copy-xecto)) 35 | (shape nil :type shape) 36 | (offset nil :type index) 37 | (%data nil :type vector-future:vector-future) 38 | (handle nil :type (cons vector-future:vector-future null))) 39 | 40 | (defun xecto-data (xecto) 41 | (declare (type xecto xecto)) 42 | (xecto-%data xecto)) 43 | 44 | (defun (setf xecto-data) (data xecto) 45 | (declare (type vector-future:vector-future data) 46 | (type xecto xecto)) 47 | (vector-future:retain data) 48 | (vector-future:release (xecto-%data xecto)) 49 | (setf (xecto-%data xecto) data 50 | (car (xecto-handle xecto)) data) 51 | data) 52 | 53 | (defun set-finalizer (xecto) 54 | (finalize xecto (let ((handle (xecto-handle xecto))) 55 | (lambda () 56 | (vector-future:release (car handle)))))) 57 | 58 | (defun size-and-shape (dimensions) 59 | (unless (listp dimensions) 60 | (setf dimensions (list dimensions))) 61 | (let* ((rdim (reverse dimensions)) 62 | (shape (make-array (length rdim))) 63 | (len (length shape)) 64 | (stride 1)) 65 | (loop for i upfrom 1 66 | for dim in rdim 67 | do (assert (typep dim 'index)) 68 | (setf (aref shape (- len i)) (cons dim stride) 69 | stride (* stride dim))) 70 | (values stride shape))) 71 | 72 | (defun %canonical-size-and-shape (shape) 73 | (declare (type shape shape)) 74 | (let* ((len (length shape)) 75 | (canon (make-array len)) 76 | (stride 1)) 77 | (loop for i upfrom 1 upto len 78 | for dim = (car (aref shape (- len i))) 79 | do (assert (typep dim 'index)) 80 | (setf (aref canon (- len i)) (cons dim stride) 81 | stride (* stride dim))) 82 | (values stride (intern-shape canon)))) 83 | 84 | (defun make-xecto (dimensions &key initial-element) 85 | (multiple-value-bind (size shape) 86 | (size-and-shape dimensions) 87 | (let ((xecto (%make-xecto shape 88 | (vector-future:make size 89 | '() 90 | (if initial-element 91 | (vector (lambda (data index) index 92 | (fill (vector-future:data data) 93 | (float initial-element 1d0)))) 94 | #()))))) 95 | (set-finalizer xecto) 96 | xecto))) 97 | 98 | (defun wait (xecto &rest condition) 99 | (values xecto (apply 'future:wait (xecto-data xecto) condition))) 100 | 101 | (defun copy-xecto (xecto &key shape offset) 102 | (let ((new (%make-xecto (or shape 103 | (xecto-shape xecto)) 104 | (xecto-data xecto) 105 | (or offset 106 | (xecto-offset xecto))))) 107 | (vector-future:retain (xecto-data new)) 108 | (set-finalizer new))) 109 | 110 | #|| 111 | (gc :full t) 112 | (setf *print-circle* t *print-length* 20) 113 | (defvar xx) 114 | (defvar yy) 115 | (sb-thread:join-thread 116 | (sb-thread:make-thread 117 | (lambda () 118 | (parallel-future:with-context (11) 119 | (let () 120 | #+nil ((xx (make-xecto '(16 16) :initial-element 1)) 121 | (yy (transpose (make-xecto '(16 16) :initial-element 5) 0 1))) 122 | (setf xx (make-xecto '(16384 16384) :initial-element 1) 123 | yy (transpose (make-xecto '(16384 16384) :initial-element 5) 0 1)) 124 | (wait xx :done) 125 | (wait yy :done) 126 | (time (let ((x (map-xecto #'+ xx yy)) 127 | (y (scan-xecto #'+ xx))) 128 | (wait (reduce-xecto #'+ (reduce-xecto #'+ (map-xecto #'+ x y))) 129 | :done))) 130 | (setf *print-length* 20 xx nil yy nil) 131 | (sleep 1) 132 | (gc :full t) 133 | (room) 134 | (sleep 1) 135 | (gc :full t) 136 | (room)) 137 | parallel-future:*context*)))) 138 | 139 | Evaluation took: 140 | 104.652 seconds of real time 141 | 104.462529 seconds of total run time (101.230327 user, 3.232202 system) 142 | [ Run times consist of 0.248 seconds GC time, and 104.215 seconds non-GC time. ] 143 | 99.82% CPU 144 | 145 | Evaluation took: 146 | 49.085 seconds of real time 147 | 97.618102 seconds of total run time (94.157885 user, 3.460217 system) 148 | [ Run times consist of 0.256 seconds GC time, and 97.363 seconds non-GC time. ] 149 | 198.88% CPU 150 | 151 | Evaluation took: 152 | 26.356 seconds of real time 153 | 103.402463 seconds of total run time (100.010251 user, 3.392212 system) 154 | [ Run times consist of 0.404 seconds GC time, and 102.999 seconds non-GC time. ] 155 | 392.33% CPU 156 | 157 | Evaluation took: 158 | 14.404 seconds of real time 159 | 108.718794 seconds of total run time (105.314582 user, 3.404212 system) 160 | [ Run times consist of 0.612 seconds GC time, and 108.107 seconds non-GC time. ] 161 | 754.78% CPU 162 | 163 | Evaluation took: 164 | 11.439 seconds of real time 165 | 110.318895 seconds of total run time (106.422651 user, 3.896244 system) 166 | [ Run times consist of 0.896 seconds GC time, and 109.423 seconds non-GC time. ] 167 | 964.41% CPU 168 | ||# 169 | -------------------------------------------------------------------------------- /vector-futures.lisp: -------------------------------------------------------------------------------- 1 | (defpackage "VECTOR-FUTURE" 2 | (:use "CL" "SB-EXT") 3 | (:export "VECTOR-FUTURE" 4 | "RETAIN" 5 | "RELEASE" 6 | "MAKE" 7 | "DATA")) 8 | 9 | ;;; A vector-future is a parallel future that is also 10 | ;;; an on-demand-allocated vector. 11 | ;;; 12 | ;;; There's also a reference count in prevision of storage reuse 13 | ;;; and/or foreign allocation. 14 | ;;; 15 | ;;; retain/release update the reference count. 16 | ;;; data returns the data vector for a vector-future 17 | ;;; make creates a new vector future with a refcount of 1. 18 | ;;; (make allocation dependencies tasks) 19 | ;;; allocation defines the allocation procedure. 20 | ;;; integer -> make a brand new array 21 | ;;; vector-future: make a copy of the vector 22 | ;;; dependencies: list of dependencies (vector-futures) 23 | ;;; tasks: vector of functions (subtasks) 24 | 25 | (in-package "VECTOR-FUTURE") 26 | 27 | (defstruct (vector-future 28 | (:include parallel-future:future)) 29 | (refcount 0 :type word) 30 | (size 0 :type (and unsigned-byte fixnum)) 31 | (%data nil :type (or null (simple-array double-float 1))) 32 | (handle (list nil) :type cons :read-only t)) 33 | 34 | (defun vector-future-data (vector-future) 35 | (vector-future-%data vector-future)) 36 | 37 | (defun (setf vector-future-data) (value vector-future) 38 | (setf (vector-future-%data vector-future) value 39 | (car (vector-future-handle vector-future)) (make-weak-pointer value)) 40 | value) 41 | 42 | (defun data (vector-future) 43 | (declare (type vector-future vector-future)) 44 | (the (not null) 45 | (vector-future-data vector-future))) 46 | 47 | (defun retain (future) 48 | (when (zerop (atomic-incf (vector-future-refcount future))) 49 | (assert (null (vector-future-data future)))) 50 | nil) 51 | 52 | (defun release (future) 53 | (when (= 1 (atomic-decf (vector-future-refcount future))) 54 | (let ((data (vector-future-data future))) 55 | (when data 56 | (setf (vector-future-data future) nil) 57 | (sb-kernel:%shrink-vector data 0)))) 58 | nil) 59 | 60 | (defun finalize-vector-future (future) 61 | (declare (type vector-future future)) 62 | (finalize future (let ((handle (vector-future-handle future))) 63 | (lambda () 64 | (let* ((data (shiftf (car handle) nil)) 65 | (vector (and data (weak-pointer-value data)))) 66 | (when vector 67 | (sb-kernel:%shrink-vector vector 0)))))) 68 | future) 69 | 70 | (defun make-allocator (allocation) 71 | ;; finalize this 72 | (etypecase allocation 73 | ((and unsigned-byte fixnum) 74 | (lambda (data) 75 | (declare (type vector-future data)) 76 | (retain data) ;; maybe we should just abort here... 77 | (setf (vector-future-data data) 78 | (make-array allocation :element-type 'double-float)) 79 | nil)) 80 | (vector-future 81 | (retain allocation) 82 | (lambda (data) 83 | (declare (type vector-future data)) 84 | (retain data) 85 | (let ((source (vector-future-data allocation))) 86 | (declare (type (simple-array double-float 1) source)) 87 | (cond ((= 1 (vector-future-refcount allocation)) 88 | (shiftf (vector-future-data data) 89 | (vector-future-data allocation) 90 | nil)) 91 | (t 92 | (setf (vector-future-data data) 93 | (make-array (length source) 94 | :element-type 'double-float 95 | :initial-contents source)))) 96 | (release allocation)) 97 | nil)))) 98 | 99 | (defun make-deallocator (dependencies) 100 | (map nil #'retain dependencies) 101 | (lambda (data) 102 | (declare (type vector-future data)) 103 | (release data) 104 | (map nil #'release dependencies))) 105 | 106 | (defun make (allocation dependencies tasks 107 | &optional constructor 108 | &rest arguments) 109 | (declare (dynamic-extent arguments)) 110 | (let ((size (if (vector-future-p allocation) 111 | (vector-future-size allocation) 112 | allocation))) 113 | (finalize-vector-future 114 | (apply 'parallel-future:make 115 | (coerce (remove-duplicates 116 | (if (vector-future-p allocation) 117 | (adjoin allocation dependencies) 118 | dependencies)) 119 | 'simple-vector) 120 | (make-allocator allocation) 121 | (coerce tasks 'simple-vector) 122 | (make-deallocator dependencies) 123 | (or constructor #'make-vector-future) 124 | :size size 125 | :refcount 1 126 | :%data nil 127 | :handle (list nil) 128 | arguments)))) 129 | 130 | #|| 131 | ;; demo 132 | 133 | (defun pmap (fun x y) 134 | (let* ((args (list x y)) 135 | (size (min (vector-future-size x) 136 | (vector-future-size y)))) 137 | (make size args 138 | (loop with step = (max 1 (round size 16)) 139 | for i from 0 below size by step 140 | collect 141 | (let ((end (min size (+ i step))) 142 | (start i)) 143 | (lambda (data) 144 | (let ((r (vector-future-data data)) 145 | (x (vector-future-data x)) 146 | (y (vector-future-data y))) 147 | (declare (type (simple-array double-float 1) r x y)) 148 | (loop for i from start below end 149 | do (setf (aref r i) 150 | (funcall fun (aref x i) (aref y i))))))))))) 151 | 152 | (defun test (n) 153 | (let* ((src (make n '() '())) 154 | (one (pmap (lambda (x y) x y 155 | 1d0) 156 | src src)) 157 | (two (pmap (lambda (x y) x y 158 | 2d0) 159 | src src)) 160 | (three (pmap #'+ one two)) 161 | (four (pmap #'* two two))) 162 | (release src) 163 | (release one) 164 | (release two) 165 | (future:wait two :done) 166 | (format t "rc: ~A ~A~%" 167 | (vector-future-refcount two) 168 | (future:status two)) 169 | (sleep 1) 170 | (future:wait three :done) 171 | (future:wait four :done) 172 | (format t "rc: ~A~%" (vector-future-refcount two)) 173 | (values src one two three four))) 174 | ||# 175 | -------------------------------------------------------------------------------- /futures.lisp: -------------------------------------------------------------------------------- 1 | (defpackage "FUTURE" 2 | (:use "CL" "SB-EXT" "SB-THREAD") 3 | (:export "FUTURE" 4 | "DEPENDENTS" 5 | "STATUS" "WAIT" "CANCEL" 6 | "MARK-DEPENDENCIES" "THAW" "MARK-DONE")) 7 | 8 | ;;; Infrastructure for futures: lenient-evaluated values 9 | ;;; 10 | ;;; A future is a computation with a set of dependencies; whenever 11 | ;;; all the dependencies for a computation have been fully executed, 12 | ;;; it too is executed. 13 | ;;; 14 | ;;; In order to do so, each future also tracks its dependents 15 | ;;; in list of weak pointers. When the future is marked as done, the 16 | ;;; depcount (number of yet unfulfilled dependencies) of its 17 | ;;; dependents is decremented. When all the dependencies are 18 | ;;; fullfilled (depcount is zero), the future is recursively executed. 19 | ;;; 20 | ;;; For convenience, futures are also bulk-tasks, but this is irrelevant 21 | ;;; to the interface. 22 | ;;; 23 | ;;; Slots in a FUTURE: 24 | ;;; function: list designator of functions to be called on execution; 25 | ;;; they receive the future as their single argument. 26 | ;;; dependents: list of weak pointers to dependents, initialized 27 | ;;; to zero and updated on demand. 28 | ;;; dependencies: vector of dependencies 29 | ;;; depcount: number of dependencies yet to be fullfilled, updated on 30 | ;;; demand. 31 | ;;; %status: current status of the future. Upgraded to a slow, lock-ful 32 | ;;; representation as needed. 33 | ;;; 34 | ;;; A future goes through a few stages: 35 | ;;; 36 | ;;; :orphan is the initial stage. The future is initialized, but not 37 | ;;; yet linked to its dependencies. 38 | ;;; :frozen futures have been linked to their dependencies (via 39 | ;;; MARK-DEPENDENCIES), but not been marked for execution. 40 | ;;; :waiting futures have been marked for execution (via THAW), 41 | ;;; and will wait until all their dependencies are satisfied. 42 | ;;; :running futures have had all their dependencies satisfied 43 | ;;; :done futures have finished executing 44 | ;;; :cancelled futures have been cancalled 45 | ;;; 46 | ;;; STATUS and WAIT can be used to poll a future's current status or wait 47 | ;;; until it becomes equal to a value in a set of status. 48 | ;;; 49 | ;;; CANCEL marks a future as cancelled, unless it is already executing. 50 | ;;; 51 | ;;; Creating a future should follow this pattern: 52 | ;;; - Allocate a future 53 | ;;; - MARK-DEPENDENCIES 54 | ;;; - Maybe walk its DEPENDENTS list for analyses 55 | ;;; - THAW it 56 | ;;; - Maybe WAIT until :cancelled or :done 57 | 58 | (in-package "FUTURE") 59 | 60 | (deftype status () 61 | '(member :orphan :frozen :waiting :running :done :cancelled)) 62 | 63 | (defstruct (future 64 | (:include work-stack:bulk-task) 65 | (:constructor nil)) 66 | (function nil :type (or list symbol function)) 67 | (dependents nil :type (or list (member :done :cancelled))) 68 | (dependencies nil :type simple-vector) 69 | (depcount 0 :type word) 70 | (%status :orphan :type (or status slow-status))) 71 | 72 | (defun dependents (future) 73 | (let ((dependents (future-dependents future))) 74 | (and (listp dependents) 75 | dependents))) 76 | 77 | (status:define-status-type slow-status 78 | (:fast-type future 79 | :status-type status 80 | :default-status :orphan 81 | :final-states (:done :cancelled)) 82 | future-%status status wait status-upgrade) 83 | 84 | (defun execute (future) 85 | (unless (eql (status-upgrade future :running :waiting) 86 | :waiting) 87 | (return-from execute)) 88 | (let ((function (future-function future))) 89 | (etypecase function 90 | (null) 91 | (cons 92 | (dolist (function function) 93 | (funcall function future))) 94 | ((or symbol function) 95 | (funcall function future))) 96 | (setf (future-function future) nil)) 97 | nil) 98 | 99 | (defun cancel (future) 100 | (declare (type future future)) 101 | (let ((status (status-upgrade future :cancelled :orphan :frozen :waiting))) 102 | (when (member status '(:frozen :waiting)) 103 | ;; recursively mark as cancelled? 104 | (setf (future-dependents future) :cancelled)) 105 | status)) 106 | 107 | (defun thaw (future &key (recursive t)) 108 | (declare (type future future)) 109 | (labels ((rec (future) 110 | (declare (type future future)) 111 | (case (status-upgrade future :waiting :frozen) 112 | (:orphan (error "Thawing orphan future")) 113 | (:frozen 114 | (when recursive 115 | (map nil #'rec (future-dependencies future))) 116 | (when (zerop (future-depcount future)) 117 | (execute future)))))) 118 | (rec future)) 119 | future) 120 | 121 | (defun mark-dependencies (future &key (thaw t) (recursive nil)) 122 | (declare (type future future)) 123 | (assert (eql (status-upgrade future :frozen :orphan) :orphan)) 124 | (let ((wp (make-weak-pointer future))) 125 | (flet ((mark-dep (dep) 126 | (declare (type future dep)) 127 | (ecase (status dep) 128 | (:orphan 129 | (if recursive 130 | (mark-dependencies dep :thaw thaw :recursive t) 131 | (error "Dependency is an orphan"))) 132 | ((:frozen :waiting :running)) 133 | (:done 134 | (return-from mark-dep)) 135 | (:cancelled 136 | (error "Dependency cancelled"))) 137 | (let ((cons (list wp))) 138 | (atomic-incf (future-depcount future)) 139 | (loop 140 | (let ((dependents (future-dependents dep))) 141 | (setf (cdr cons) dependents) 142 | (cond ((eql dependents :done) 143 | (atomic-decf (future-depcount future)) 144 | (return-from mark-dep)) 145 | ((eql dependents :cancelled) 146 | ;; cancel self? 147 | (atomic-decf (future-depcount future)) 148 | (error "Dependency cancelled")) 149 | ((eql (cas (future-dependents dep) 150 | dependents cons) 151 | dependents) 152 | (return-from mark-dep)))))))) 153 | (declare (dynamic-extent #'mark-dep)) 154 | (map nil #'mark-dep (future-dependencies future)) 155 | (when thaw (thaw future))))) 156 | 157 | (defun mark-done (future) 158 | (declare (type future future)) 159 | (unless (eql :running (status-upgrade future :done :running)) 160 | (return-from mark-done)) 161 | (setf (future-dependencies future) #()) 162 | (let ((dependents 163 | (loop 164 | (let ((dependents (future-dependents future))) 165 | (when (or (eql dependents :done) 166 | (eql dependents :cancelled)) 167 | (return-from mark-done)) 168 | (when (eql (cas (future-dependents future) 169 | dependents :done) 170 | dependents) 171 | (return dependents)))))) 172 | (dolist (wp dependents) 173 | (let ((value (weak-pointer-value wp))) 174 | (when (and value 175 | (= 1 (atomic-decf (future-depcount value))) 176 | (eql :waiting (status value))) 177 | (execute value)))))) 178 | -------------------------------------------------------------------------------- /work-stack.lisp: -------------------------------------------------------------------------------- 1 | (defpackage "WORK-STACK" 2 | (:use "CL" "SB-EXT" "SB-THREAD" "WORK-UNIT") 3 | (:shadow cl:push) 4 | (:export "TASK" "TASK-P" "BULK-TASK" "BULK-TASK-P" 5 | "TASK-DESIGNATOR" 6 | "EXECUTE-TASK" 7 | "STACK" "MAKE" "P" 8 | "PUSH" "PUSH-ALL" "STEAL" "RUN-ONE")) 9 | 10 | ;;; Work-unit stack 11 | ;;; 12 | ;;; Normal task-stealing stack, with special support for tasks composed 13 | ;;; of subtasks. 14 | ;;; 15 | ;;; A task designator is either a function designator, a task, or a 16 | ;;; bulk-task. 17 | ;;; 18 | ;;; A function designator is called, and a task's fun is called with the 19 | ;;; task as its only argument. 20 | ;;; 21 | ;;; When only those are used, the work stack is a normal stack of task 22 | ;;; units, with PUSH to insert a new task (PUSH-ALL to insert a sequence 23 | ;;; of tasks), STEAL to get one task from the bottom of the stack, and 24 | ;;; RUN-ONE to execute and pop the topmost task. 25 | ;;; 26 | ;;; Bulk-task objects represent a set of subtasks to be executed, and 27 | ;;; a sequence of operations to perform once all the subtasks have been 28 | ;;; completed. 29 | ;;; 30 | ;;; Task stealing of bulk tasks is special: bulk tasks have multiple 31 | ;;; owners, so bulk tasks aren't stolen as much as forcibly shared. All 32 | ;;; the workers that share a bulk task cooperate to complete the subtasks; 33 | ;;; the last worker to finish executing a subtask then executes the 34 | ;;; cleanups. 35 | ;;; 36 | ;;; Subtasks and cleanups are functions that are called with the 37 | ;;; subtask as their one argument. 38 | ;;; 39 | ;;; Cooperating threads avoid hammering the same subtasks by 40 | ;;; beginning/resuming their search for remaining subtasks from different 41 | ;;; indices: PUSH/PUSH-ALL take an optional argument to determine the 42 | ;;; fraction of the subtask vector from which to initialise the thread's 43 | ;;; search (defaults to 0). Incidentally, this is also useful for 44 | ;;; locality, when the subtasks are sorted right. 45 | 46 | (in-package "WORK-STACK") 47 | 48 | (defconstant +stacklet-size+ 128) 49 | 50 | (declaim (inline split-index)) 51 | (defun split-index (index) 52 | (multiple-value-bind (major minor) 53 | (truncate index +stacklet-size+) 54 | (cond ((plusp minor) 55 | (values major minor)) 56 | ((zerop major) 57 | (values 0 0)) 58 | (t 59 | (values (1- major) +stacklet-size+))))) 60 | 61 | (defstruct stack 62 | (stacklets (error "Foo") :type (array (simple-vector #.+stacklet-size+) 1) 63 | :read-only t) 64 | (top 0 :type (and unsigned-byte fixnum)) 65 | (bottom 0 :type (and unsigned-byte fixnum))) 66 | 67 | (defun make () 68 | (make-stack :stacklets (make-array 16 :fill-pointer 0 :adjustable t))) 69 | 70 | (declaim (inline p)) 71 | (defun p (x) 72 | (stack-p x)) 73 | 74 | (defun %update-stack-top (stack) 75 | (declare (type stack stack)) 76 | (let ((top (stack-top stack))) 77 | (when (zerop top) 78 | (return-from %update-stack-top)) 79 | (multiple-value-bind (major minor) (split-index top) 80 | (let* ((stacklets (stack-stacklets stack)) 81 | (stacklet (aref stacklets major)) 82 | (position (position nil stacklet :from-end t :end minor :test-not #'eql))) 83 | (cond (position 84 | (setf (stack-top stack) (+ (* major +stacklet-size+) 85 | position 1))) 86 | (t 87 | (setf (stack-top stack) (* major +stacklet-size+)) 88 | (%update-stack-top stack))))))) 89 | 90 | (defun %push (stack value) 91 | (declare (type stack stack) (type (not null) value)) 92 | (%update-stack-top stack) 93 | (multiple-value-bind (stacklet index) 94 | (truncate (stack-top stack) +stacklet-size+) 95 | (let ((stacklets (stack-stacklets stack))) 96 | (loop while (<= (length stacklets) stacklet) 97 | do (vector-push-extend (make-array +stacklet-size+ :initial-element nil) 98 | stacklets)) 99 | (let ((stacklet (aref stacklets stacklet))) 100 | (setf (aref stacklet index) value) 101 | (incf (stack-top stack)) 102 | value)))) 103 | 104 | (defun steal (stack) 105 | (declare (type stack stack)) 106 | (labels ((update-bottom (i) 107 | (when (/= i (stack-bottom stack)) 108 | (setf (stack-bottom stack) i))) 109 | (sub-steal (begin end) 110 | (declare (type (and fixnum unsigned-byte) begin end)) 111 | (loop with stacklets = (stack-stacklets stack) 112 | for i from begin below (max end (length stacklets)) 113 | for stacklet = (aref stacklets i) 114 | do 115 | (let ((start 0)) 116 | (loop 117 | (let* ((position (position nil stacklet 118 | :start start 119 | :test-not #'eql)) 120 | (x (and position 121 | (aref stacklet position)))) 122 | (cond ((null position) 123 | (return)) 124 | ((null x) 125 | (setf start (1+ position))) 126 | ((consp x) 127 | (let ((bulk (cdr x))) 128 | (when (and bulk 129 | (plusp (bulk-task-waiting bulk))) 130 | (update-bottom i) 131 | (return-from steal bulk))) 132 | (setf (cdr x) nil) 133 | (setf start position) 134 | (when (eql x (cas (svref stacklet position) x nil)) 135 | (incf start))) 136 | ((eql x (cas (svref stacklet position) x nil)) 137 | (update-bottom i) 138 | (return-from steal x))))))))) 139 | (declare (inline update-bottom)) 140 | (let ((bottom (stack-bottom stack)) 141 | (top (ceiling (stack-top stack) +stacklet-size+))) 142 | (cond ((>= bottom top) 143 | (update-bottom 0) 144 | (sub-steal 0 top)) 145 | (t 146 | (sub-steal bottom top) 147 | (sub-steal 0 bottom) 148 | (update-bottom 0) 149 | nil))))) 150 | 151 | ;; bulk tasks are represented, on-stack as conses: the CAR is a hint 152 | ;; wrt where to start looking for subtasks, and the CDR is the bulk-task 153 | ;; object. When we're done with the bulk-task, the CDR is NIL. 154 | (declaim (inline bulk-task-hintify)) 155 | (defun bulk-task-hintify (x &optional (hint 0)) 156 | (declare (type (real 0 1) hint)) 157 | (etypecase x 158 | ((or function symbol task) x) 159 | (bulk-task 160 | (cons (truncate (* hint (length (bulk-task-subtasks x)))) 161 | x)))) 162 | 163 | (defun push (stack x &optional (hint 0)) 164 | (when x 165 | (%push stack (bulk-task-hintify x hint)))) 166 | 167 | (defun push-all (stack values &optional (hint 0)) 168 | (map nil (lambda (x) 169 | (when x 170 | (%push stack (bulk-task-hintify x hint)))) 171 | values)) 172 | 173 | (defun pop-one-task (stack) 174 | (declare (type stack stack)) 175 | (loop 176 | (when (zerop (stack-top stack)) 177 | (return nil)) 178 | (multiple-value-bind (major minor) (split-index (stack-top stack)) 179 | (let* ((stacklets (stack-stacklets stack)) 180 | (stacklet (aref stacklets major)) 181 | (position (position nil stacklet :from-end t 182 | :end minor 183 | :test-not #'eql))) 184 | (cond (position 185 | (let ((x (aref stacklet position))) 186 | (etypecase x 187 | (null) 188 | (cons 189 | (let ((bulk-task (cdr x))) 190 | (when (and bulk-task 191 | (plusp (bulk-task-waiting bulk-task))) 192 | (setf (stack-top stack) (+ 1 (* major +stacklet-size+) 193 | position)) 194 | (return x))) 195 | (setf (cdr x) nil 196 | (svref stacklet position) nil 197 | (stack-top stack) (+ (* major +stacklet-size+) 198 | position)) 199 | (barrier (:memory))) 200 | ((or task symbol function) 201 | (setf (stack-top stack) (+ (* major +stacklet-size+) 202 | position)) 203 | (when (eql (cas (svref stacklet position) x nil) x) 204 | (return x)))))) 205 | ((zerop major) 206 | (setf (stack-top stack) 0) 207 | (return nil)) 208 | (t 209 | (setf (stack-top stack) (* major +stacklet-size+)))))))) 210 | 211 | (declaim (inline bulk-find-task)) 212 | (defun bulk-find-task (hint-and-bulk random-state) 213 | (declare (type cons hint-and-bulk)) 214 | (destructuring-bind (hint . bulk) hint-and-bulk 215 | (declare (type fixnum hint) 216 | (type (or null bulk-task) bulk)) 217 | (when (null bulk) 218 | (return-from bulk-find-task (values nil nil nil))) 219 | (multiple-value-bind (task index) 220 | (%bulk-find-task bulk hint random-state) 221 | (cond (task 222 | (setf (car hint-and-bulk) index) 223 | (values task index bulk)) 224 | (t 225 | (setf (cdr hint-and-bulk) nil) 226 | (values nil nil nil)))))) 227 | 228 | (defun run-one (stack random-state) 229 | (let ((task (pop-one-task stack)) 230 | subtask subtask-index bulk-task) 231 | (cond ((not task) nil) 232 | ((atom task) 233 | (execute-task task) 234 | t) 235 | ((setf (values subtask subtask-index bulk-task) 236 | (bulk-find-task task random-state)) 237 | (let* ((bulk-task bulk-task) 238 | (function (bulk-task-subtask-function bulk-task))) 239 | (declare (type bulk-task bulk-task)) 240 | (if function 241 | (funcall function subtask bulk-task subtask-index) 242 | (funcall subtask bulk-task subtask-index)) 243 | (when (= (atomic-decf (bulk-task-remaining bulk-task)) 244 | 1) 245 | (setf (cdr task) nil) 246 | (setf (bulk-task-subtasks bulk-task) #() 247 | (bulk-task-subtask-function bulk-task) nil) 248 | (let ((cleanup (bulk-task-cleanup bulk-task))) 249 | (etypecase cleanup 250 | (null) 251 | (cons 252 | (dolist (cleanup cleanup) 253 | (funcall cleanup bulk-task))) 254 | ((or function symbol) 255 | (funcall cleanup bulk-task)))) 256 | (setf (bulk-task-cleanup bulk-task) nil))) 257 | t) 258 | (t 259 | (run-one stack random-state))))) 260 | -------------------------------------------------------------------------------- /thread-pool.lisp: -------------------------------------------------------------------------------- 1 | (defpackage "WORK-QUEUE" 2 | (:use "CL" "SB-EXT" "SB-THREAD") 3 | (:export "TASK" "TASK-P" "BULK-TASK" "BULK-TASK-P" 4 | "TASK-DESIGNATOR" 5 | "QUEUE" "MAKE" "P" "ALIVE-P" 6 | "ENQUEUE" "ENQUEUE-ALL" "STOP" 7 | "PUSH-SELF" "PUSH-SELF-ALL" 8 | "PROGRESS-UNTIL" 9 | "CURRENT-QUEUE" "WORKER-ID" "WORKER-COUNT") 10 | (:import-from "WORK-STACK" 11 | "TASK" "TASK-P" 12 | "BULK-TASK" "BULK-TASK-P" 13 | "TASK-DESIGNATOR")) 14 | 15 | ;;; Work-unit queue/stack, with thread-pool 16 | ;;; 17 | ;;; Normal work queue: created with a fixed number of worker threads, 18 | ;;; and a shared FIFO of work units (c.f. work-stack). 19 | ;;; 20 | ;;; However, each worker also has a work-stack. This way, tasks can 21 | ;;; spawn new tasks recursively, while enjoying temporal locality and 22 | ;;; skipping in front of the rest of the queue. 23 | ;;; 24 | ;;; ENQUEUE/ENQUEUE-ALL insert work units in the queue. 25 | ;;; 26 | ;;; PUSH-SELF/PUSH-SELF-ALL insert work units in the worker's local 27 | ;;; stack, or, if not executed by a worker, punt to ENQUEUE/ENQUEUE-ALL. 28 | ;;; 29 | ;;; Note that the work-stacks support task-stealing, so pushing to the 30 | ;;; local stack does not reduce parallelism. 31 | 32 | (in-package "WORK-QUEUE") 33 | 34 | (defconstant +max-thread-count+ 1024) 35 | (deftype thread-count () 36 | `(integer 1 ,+max-thread-count+)) 37 | (deftype thread-id () 38 | `(mod ,+max-thread-count+)) 39 | 40 | (defstruct (queue 41 | (:constructor %make-queue)) 42 | (locks (error "foo") :type (simple-array mutex 1) 43 | :read-only t) 44 | (cvar (make-waitqueue) :type waitqueue 45 | :read-only t) 46 | (nthread (error "foo") :type thread-count 47 | :read-only t) 48 | (state (error "foo") :type cons 49 | :read-only t) 50 | (queue (sb-queue:make-queue) :type sb-queue:queue) 51 | (stacks (error "Foo") :type (simple-array work-stack:stack 1) 52 | :read-only t) 53 | (threads (error "Foo") :type (simple-array t 1) 54 | :read-only t) 55 | (randoms (error "Foo") :type (simple-array random-state 1) 56 | :read-only t)) 57 | 58 | (declaim (inline p)) 59 | (defun p (x) 60 | (queue-p x)) 61 | 62 | (defun grab-task (queue stacks i) 63 | (declare (type thread-id i) 64 | (type simple-vector stacks)) 65 | (let ((task (sb-queue:dequeue queue))) 66 | (when task 67 | (return-from grab-task task))) 68 | (let* ((n (length stacks)) 69 | (lb (integer-length (1- n))) 70 | (ceil (ash 1 lb)) 71 | (ceil-1 (1- ceil)) 72 | (scaled (ceiling (ash i lb) n))) 73 | (declare (type thread-count n ceil) 74 | (type thread-id scaled)) 75 | ;; scaled is the least value such that 76 | ;; (truncate (* scaled n) ceil) = i. 77 | (dotimes (j ceil) 78 | (let* ((scaledp (* n (logxor scaled j))) 79 | (unscaled (ash scaledp (- lb))) 80 | (r (logand scaledp ceil-1))) 81 | (when (< r n) ;; when (logxor scaled j) is the least value 82 | ;; such that the truncation yields unscaled 83 | (let ((task (or (work-stack:steal (aref stacks unscaled)) 84 | (sb-queue:dequeue queue)))) 85 | (when task 86 | (return-from grab-task task)))))))) 87 | 88 | (declaim (type (or null thread-id) *worker-id*)) 89 | (defvar *worker-id* nil) 90 | (defvar *worker-hint* 0) 91 | (defvar *current-queue* nil) 92 | 93 | (declaim (inline current-queue worker-id worker-count)) 94 | (defun current-queue (&optional default) 95 | (if *current-queue* 96 | (weak-pointer-value *current-queue*) 97 | default)) 98 | 99 | (defun worker-id () 100 | *worker-id*) 101 | 102 | (defun worker-count (&optional (queue (current-queue))) 103 | (and queue (queue-nthread queue))) 104 | 105 | (defun loop-get-task (state lock cvar queue stacks i 106 | &optional max-time) 107 | (flet ((try () 108 | (when (eql (car state) :done) 109 | (return-from loop-get-task nil)) 110 | (let ((task (grab-task queue stacks i))) 111 | (when task 112 | (return-from loop-get-task task))))) 113 | (declare (inline try)) 114 | (let ((timeout 1e-4) 115 | (total 0d0) 116 | (fast t)) 117 | (declare (single-float timeout) 118 | (double-float total)) 119 | (loop 120 | (if fast 121 | (dotimes (i 128) 122 | (try) 123 | (loop repeat (* i 128) 124 | do (spin-loop-hint))) 125 | (try)) 126 | ;; Don't do this at home. 127 | (setf fast nil) 128 | (with-mutex (lock) 129 | (if (condition-wait cvar lock :timeout timeout) 130 | (setf fast t) 131 | (grab-mutex lock))) 132 | (when (and max-time 133 | (> (incf total timeout) max-time)) 134 | (return :timeout)) 135 | (setf timeout (min 1.0 (* timeout 1.1))))))) 136 | 137 | (declaim (inline %worker-loop)) 138 | (defun %worker-loop (weak-queue index hint &optional poll-function wait-time) 139 | (declare (muffle-conditions code-deletion-note)) 140 | (let* ((wait-time (and poll-function (or wait-time 1))) 141 | (wqueue (or (weak-pointer-value weak-queue) 142 | (return-from %worker-loop))) 143 | (i index) 144 | (state (queue-state wqueue)) 145 | (cvar (queue-cvar wqueue)) 146 | (locks (queue-locks wqueue)) 147 | (lock (aref locks i)) 148 | (queue (queue-queue wqueue)) 149 | (stacks (queue-stacks wqueue)) 150 | (stack (aref stacks i)) 151 | (random (aref (queue-randoms wqueue) i))) 152 | (labels ((poll () 153 | (when poll-function 154 | (let ((x (funcall poll-function))) 155 | (when x (return-from %worker-loop x))))) 156 | (work () 157 | (loop while (progn 158 | (poll) 159 | (work-stack:run-one stack random)) 160 | do (when (eq (car state) :done) 161 | (return-from %worker-loop))))) 162 | (declare (inline poll work)) 163 | (work) 164 | (let ((task (if (and wait-time (zerop wait-time)) 165 | (grab-task queue stacks i) 166 | (loop-get-task state lock cvar 167 | queue stacks i 168 | wait-time)))) 169 | (cond ((not task) 170 | (poll) 171 | (return-from %worker-loop)) 172 | (poll-function 173 | (when (eq task :timeout) 174 | (poll) 175 | (return-from %worker-loop))) 176 | (t 177 | (assert (not (eq task :timeout))))) 178 | (if (bulk-task-p task) 179 | (work-stack:push stack task hint) 180 | (work-stack:execute-task task)) 181 | (work) 182 | (setf queue nil))))) 183 | 184 | (defun %make-worker (wqueue i &optional binding-names binding-compute) 185 | (declare (type queue wqueue)) 186 | (let* ((state (queue-state wqueue)) 187 | (nthread (queue-nthread wqueue)) 188 | (hint (float (/ i nthread) 1d0)) 189 | (weak-queue (make-weak-pointer wqueue))) 190 | (make-thread 191 | (lambda (&aux (*worker-id* i) (*current-queue* weak-queue) (*worker-hint* hint)) 192 | (progv binding-names (mapcar (lambda (x) 193 | (if (functionp x) (funcall x) x)) 194 | binding-compute) 195 | (loop 196 | (flet ((inner () 197 | (%worker-loop weak-queue i hint))) 198 | (declare (notinline inner)) 199 | (inner) 200 | (sb-sys:scrub-control-stack) 201 | (when (eq (car state) :done) 202 | (return)))))) 203 | :name (format nil "Work queue worker ~A/~A" i nthread)))) 204 | 205 | (defun progress-until (condition) 206 | (let* ((condition (if (functionp condition) 207 | condition (fdefinition condition))) 208 | (i (worker-id)) 209 | (hint *worker-hint*) 210 | (weak-queue *current-queue*) 211 | (state (queue-state 212 | (or (current-queue) 213 | (error "Not in recursive wait?!"))))) 214 | (tagbody 215 | retry 216 | (flet ((check () 217 | (let ((value (funcall condition))) 218 | (when value 219 | (return-from progress-until value))))) 220 | (declare (inline check)) 221 | (%worker-loop weak-queue i hint #'check 0) 222 | (unless (eql :done (car state)) 223 | (go retry)) 224 | (check))))) 225 | 226 | (defun make (nthread &optional constructor &rest arguments) 227 | (declare (type thread-count nthread) 228 | (dynamic-extent arguments)) 229 | (let* ((threads (make-array nthread)) 230 | (default-bindings (getf arguments :bindings)) 231 | (arguments (loop for (key value) on arguments by #'cddr 232 | unless (eql key :bindings) 233 | nconc (list key value))) 234 | (wqueue (apply (or constructor #'%make-queue) 235 | :locks (map-into (make-array nthread) #'make-mutex) 236 | :cvar (make-waitqueue) 237 | :nthread nthread 238 | :state (list :running) 239 | :queue (sb-queue:make-queue) 240 | :stacks (map-into (make-array nthread) #'work-stack:make) 241 | :threads threads 242 | :randoms (let ((i 0)) 243 | (map-into (make-array nthread) 244 | (lambda () 245 | (seed-random-state (incf i))))) 246 | arguments))) 247 | (finalize wqueue (let ((cvar (queue-cvar wqueue)) 248 | (state (queue-state wqueue))) 249 | (lambda () 250 | (setf (car state) :done) 251 | (condition-broadcast cvar)))) 252 | (let ((binding-names (mapcar #'car default-bindings)) 253 | (binding-values (mapcar #'cdr default-bindings))) 254 | (dotimes (i nthread wqueue) 255 | (setf (aref threads i) 256 | (%make-worker wqueue i binding-names binding-values)))))) 257 | 258 | (defun stop (queue) 259 | (declare (type queue queue)) 260 | (setf (car (queue-state queue)) :done) 261 | (condition-broadcast (queue-cvar queue)) 262 | nil) 263 | 264 | (defun alive-p (queue) 265 | (declare (type queue queue)) 266 | (eql (car (queue-state queue)) :running)) 267 | 268 | (defun enqueue (task &optional (queue (current-queue))) 269 | (declare (type task-designator task) 270 | (type queue queue)) 271 | (assert (and (alive-p queue) 'enqueue)) 272 | (sb-queue:enqueue task (queue-queue queue)) 273 | (condition-broadcast (queue-cvar queue)) 274 | nil) 275 | 276 | (defun enqueue-all (tasks &optional (queue (current-queue))) 277 | (declare (type queue queue)) 278 | (assert (and (alive-p queue) 'enqueue-all)) 279 | (let ((queue (queue-queue queue))) 280 | (map nil (lambda (task) 281 | (sb-queue:enqueue task queue)) 282 | tasks)) 283 | (condition-broadcast (queue-cvar queue)) 284 | nil) 285 | 286 | (defun push-self (task &optional (queue (current-queue))) 287 | (declare (type queue queue) 288 | (type task-designator task)) 289 | (assert (and (alive-p queue) 'push-self)) 290 | (let ((id *worker-id*)) 291 | (cond (id 292 | (assert (eql (aref (queue-threads queue) id) 293 | *current-thread*)) 294 | (work-stack:push (aref (queue-stacks queue) id) task *worker-hint*)) 295 | (t 296 | (enqueue task queue))))) 297 | 298 | (defun push-self-all (tasks &optional (queue (current-queue))) 299 | (declare (type queue queue)) 300 | (assert (and (alive-p queue) 'push-self-all)) 301 | (let ((id *worker-id*)) 302 | (cond (id 303 | (assert (eql (aref (queue-threads queue) id) 304 | *current-thread*)) 305 | (work-stack:push-all (aref (queue-stacks queue) id) tasks *worker-hint*)) 306 | (t 307 | (enqueue-all tasks queue))))) 308 | -------------------------------------------------------------------------------- /parallel-primitives.lisp: -------------------------------------------------------------------------------- 1 | (defpackage "PARALLEL" 2 | (:use) 3 | (:export "PROMISE" "PROMISE-VALUE" "PROMISE-VALUE*" "LET" 4 | "FUTURE" "FUTURE-VALUE" "FUTURE-VALUE*" "BIND" 5 | "DOTIMES" "MAP" "REDUCE" 6 | "MAP-GROUP-REDUCE")) 7 | 8 | (defpackage "PARALLEL-IMPL" 9 | (:use "CL" "SB-EXT") 10 | (:import-from "PARALLEL" "PROMISE" "PROMISE-VALUE" "PROMISE-VALUE*" 11 | "FUTURE" "FUTURE-VALUE" "FUTURE-VALUE*" 12 | "BIND" "MAP-GROUP-REDUCE")) 13 | 14 | (in-package "PARALLEL-IMPL") 15 | 16 | (deftype status () 17 | `(member :waiting :done)) 18 | 19 | (defstruct (promise 20 | (:constructor make-promise (function)) 21 | (:include work-stack:task)) 22 | %values 23 | (%status :waiting :type (or status promise-slow-status))) 24 | 25 | (status:define-status-type promise-slow-status 26 | (:fast-type promise 27 | :status-type status 28 | :default-status :waiting 29 | :final-states (:done)) 30 | promise-%status 31 | promise-status 32 | %promise-wait 33 | %promise-upgrade) 34 | 35 | (defun promise (thunk &rest args) 36 | (let ((promise 37 | (make-promise (lambda (promise) 38 | (declare (type promise promise)) 39 | (setf (promise-%values promise) 40 | (multiple-value-list (apply thunk args))) 41 | (%promise-upgrade promise :done :waiting))))) 42 | (work-queue:push-self promise (work-queue:current-queue 43 | parallel-future:*context*)) 44 | promise)) 45 | 46 | (defun promise-value (promise) 47 | (declare (type promise promise)) 48 | (when (work-queue:worker-id) 49 | (work-queue:progress-until 50 | (lambda () 51 | (eql :done (promise-status promise))))) 52 | (%promise-wait promise :done) 53 | (values-list (promise-%values promise))) 54 | 55 | (defun promise-value* (promise) 56 | (unless (promise-p promise) 57 | (return-from promise-value* promise)) 58 | (loop 59 | (multiple-value-call (lambda (&optional (value nil value-p) &rest args) 60 | (cond ((promise-p value) 61 | (setf promise value)) 62 | (value-p 63 | (return (multiple-value-call #'values 64 | value (values-list args)))) 65 | (t 66 | (return (values))))) 67 | (promise-value promise)))) 68 | 69 | (defmacro parallel:let ((&rest bindings) &body body) 70 | (let* ((parallelp t) 71 | (names '()) 72 | (values '()) 73 | (temporaries (loop for (name value) in bindings 74 | if (eql name :parallel) 75 | do (setf parallelp value) 76 | else collect 77 | (progn 78 | (push name names) 79 | (push value values) 80 | `(,(gensym "PROMISE") (promise 81 | (lambda () 82 | ,value)))))) 83 | (function (gensym "PARALLEL-LET-FUNCTION"))) 84 | (setf names (nreverse names) 85 | values (nreverse values)) 86 | `(flet ((,function (,@names) 87 | ,@body)) 88 | (if ,parallelp 89 | (let ,temporaries 90 | (,function ,@(loop for (temp) in temporaries 91 | collect `(promise-value ,temp)))) 92 | (,function ,@values))))) 93 | 94 | (defstruct (future 95 | (:include parallel-future:future)) 96 | %values) 97 | 98 | (defun call-with-future-values (function futures) 99 | (declare (type simple-vector futures)) 100 | (apply function (map 'list (lambda (x) 101 | (if (future-p x) 102 | (future-value x) 103 | x)) 104 | futures))) 105 | 106 | (defun future (dependencies callback &key subtasks cleanup) 107 | (declare (type simple-vector dependencies) 108 | (type (or null simple-vector) subtasks)) 109 | (let ((future (parallel-future:make 110 | (remove-if-not #'future-p dependencies) 111 | (lambda (self) 112 | (setf (future-%values self) 113 | (multiple-value-list 114 | (call-with-future-values 115 | callback dependencies)))) 116 | (or subtasks #()) 117 | (and cleanup 118 | (lambda (self) 119 | (setf (future-%values self) 120 | (multiple-value-list 121 | (call-with-future-values 122 | cleanup dependencies))))) 123 | #'make-future))) 124 | future)) 125 | 126 | (defun future-value (future) 127 | (declare (type future future)) 128 | (when (work-queue:worker-id) 129 | (work-queue:progress-until (lambda () 130 | (eql (future:status future) :done)))) 131 | (future:wait future :done) 132 | (values-list (future-%values future))) 133 | 134 | (defun future-value* (future) 135 | (unless (future-p future) 136 | (return-from future-value* future)) 137 | (loop 138 | (multiple-value-call 139 | (lambda (&optional (value nil value-p) &rest values) 140 | (cond ((future-p value) 141 | (setf future value)) 142 | (value-p 143 | (return (multiple-value-call #'values 144 | value (values-list values)))) 145 | (t 146 | (return (values))))) 147 | (future-value future)))) 148 | 149 | (defmacro parallel:bind ((&rest bindings) 150 | &body body) 151 | (let ((wait nil)) 152 | (when (eql :wait (car body)) 153 | (setf wait t) 154 | (pop body)) 155 | `(,(if wait 'future-value 'identity) 156 | (future (vector ,@(mapcar #'second bindings)) 157 | (lambda ,(mapcar #'first bindings) 158 | ,@body))))) 159 | 160 | (defun %call-n-times (count function cleanup) 161 | (let ((future 162 | (parallel-future:make 163 | #() 164 | nil 165 | (make-array count :initial-element 0) 166 | (and cleanup 167 | (lambda (self) 168 | (setf (future-%values self) 169 | (multiple-value-list (funcall cleanup))))) 170 | #'make-future 171 | :%values '(nil) 172 | :subtask-function (lambda (subtask self index) 173 | (declare (ignore subtask self)) 174 | (funcall function index))))) 175 | future)) 176 | 177 | (defun call-n-times (count function aggregate-function &optional cleanup) 178 | (let* ((worker-count (or (work-queue:worker-count 179 | (work-queue:current-queue 180 | parallel-future:*context*)) 181 | (error "No current queue"))) 182 | (max (expt worker-count 2))) 183 | (if (<= count max) 184 | (%call-n-times count function cleanup) 185 | (let ((step (truncate count max))) 186 | (%call-n-times (ceiling count step) 187 | (lambda (i) 188 | (let* ((begin (* i step)) 189 | (end (min (+ begin step) count))) 190 | (funcall aggregate-function begin end))) 191 | cleanup))))) 192 | 193 | (defmacro parallel:dotimes ((var count &optional result) &body body) 194 | (let ((begin (gensym "BEGIN")) 195 | (end (gensym "END")) 196 | (i (gensym "I")) 197 | (wait nil) 198 | (tid (gensym "TID"))) 199 | (when (eql (car body) :wait) 200 | (setf wait t) 201 | (pop body)) 202 | `(,(if wait 'future-value 'identity) 203 | (call-n-times ,count 204 | (lambda (,var) 205 | ,@body) 206 | (lambda (,begin ,end &aux (,tid (work-queue:worker-id))) 207 | (declare (type fixnum ,begin ,end ,tid)) 208 | (flet ((work-queue:worker-id () 209 | ,tid)) 210 | (declare (inline work-queue:worker-id) 211 | (ignorable #'work-queue:worker-id)) 212 | (loop for ,i of-type fixnum from ,begin below ,end 213 | do 214 | (let ((,var ,i)) 215 | ,@body)))) 216 | ,(and result 217 | `(lambda () 218 | (let ((,var nil)) 219 | (declare (ignorable ,var)) 220 | (progn ,result)))))))) 221 | 222 | (declaim (maybe-inline parallel:map parallel:reduce parallel:map-group-reduce)) 223 | (defun parallel:map (type function arg &key (wait t)) 224 | (let* ((arg (coerce arg 'simple-vector)) 225 | (function (if (functionp function) 226 | function 227 | (fdefinition function))) 228 | (future (if (eql nil type) 229 | (parallel:dotimes (i (length arg)) 230 | (funcall function (aref arg i))) 231 | (let ((destination (make-array (length arg)))) 232 | (parallel:dotimes (i (length arg) (coerce destination type)) 233 | (setf (aref destination i) 234 | (funcall function (aref arg i)))))))) 235 | (if wait 236 | (future-value future) 237 | future))) 238 | 239 | (defun parallel:reduce (function arg seed &key (wait t) key) 240 | (let* ((arg (coerce arg 'simple-vector)) 241 | (function (if (functionp function) 242 | function 243 | (fdefinition function))) 244 | (accumulators (make-array (* 16 (or (work-queue:worker-count 245 | (work-queue:current-queue 246 | parallel-future:*context*)) 247 | (error "No current queue"))) 248 | :initial-element seed)) 249 | (future 250 | (if key 251 | (let ((key (if (functionp key) key (fdefinition key)))) 252 | (parallel:dotimes (i (length arg) 253 | (reduce function accumulators 254 | :initial-value seed)) 255 | (let ((idx (* 16 (work-queue:worker-id)))) 256 | (setf (aref accumulators idx) 257 | (funcall function 258 | (aref accumulators idx) 259 | (funcall key (aref arg i))))))) 260 | (parallel:dotimes (i (length arg) 261 | (reduce function accumulators 262 | :initial-value seed)) 263 | (let ((idx (work-queue:worker-id))) 264 | (setf (aref accumulators idx) 265 | (funcall function 266 | (aref accumulators idx) 267 | (aref arg i)))))))) 268 | (if wait 269 | (future-value future) 270 | future))) 271 | 272 | (defun map-group-reduce (sequence map reduce 273 | &key (group-test #'eql) 274 | group-by 275 | (wait t) 276 | (master-table nil) 277 | fancy) 278 | (let* ((arg (coerce sequence 'simple-vector)) 279 | (nthread (work-queue:worker-count 280 | (work-queue:current-queue 281 | parallel-future:*context*))) 282 | (tables (map-into (make-array nthread) 283 | (lambda () (make-hash-table :test group-test)))) 284 | (map (if (functionp map) map (fdefinition map))) 285 | (reduce (if (functionp reduce) reduce (fdefinition reduce))) 286 | (group-by (and group-by 287 | (if (functionp group-by) group-by (fdefinition group-by))))) 288 | (declare (type (simple-array hash-table 1) tables)) 289 | (labels ((clean-table (table) 290 | (declare (type hash-table table)) 291 | (ecase master-table 292 | ((nil) nil) 293 | (:quick table) 294 | (t 295 | (maphash (lambda (k v) 296 | (setf (gethash k table) (cdr v))) 297 | table) 298 | table))) 299 | (aggregate-keys () 300 | (let* ((size (reduce #'max tables :key #'hash-table-count)) 301 | (master (make-hash-table :test group-test :size size)) 302 | (vector (make-array size :adjustable t :fill-pointer 0))) 303 | (map nil (lambda (table) 304 | (maphash (lambda (k v) 305 | (let ((cache (gethash k master))) 306 | (if cache 307 | (push v (cdr cache)) 308 | (let ((cache (cons k (list v)))) 309 | (vector-push-extend cache vector) 310 | (setf (gethash k master) cache))))) 311 | table)) 312 | tables) 313 | (let ((vector (coerce (shiftf vector nil) 'simple-vector))) 314 | (declare (type (simple-array cons 1) vector)) 315 | (parallel:dotimes (i (length vector) 316 | (values vector (clean-table master))) 317 | (let* ((cache (aref vector i)) 318 | (values (cdr cache))) 319 | (setf (cdr cache) (reduce reduce values))))))) 320 | (accumulate (table key val) 321 | (declare (type hash-table table)) 322 | (multiple-value-bind (acc foundp) 323 | (gethash key table) 324 | (setf (gethash key table) 325 | (if foundp 326 | (funcall reduce acc val) 327 | val))))) 328 | (declare (inline accumulate)) 329 | (let ((future (parallel:dotimes (i (length arg) (aggregate-keys)) 330 | (let* ((x (aref arg i)) 331 | (table (aref tables (work-queue:worker-id)))) 332 | (declare (type hash-table table)) 333 | (if fancy 334 | (funcall map x (lambda (key value) 335 | (accumulate table key value))) 336 | (multiple-value-bind (val key) 337 | (if group-by 338 | (values (funcall map x) 339 | (funcall group-by x)) 340 | (funcall map x)) 341 | (accumulate table key val))))))) 342 | (if wait 343 | (future-value* future) 344 | future))))) 345 | 346 | #|| 347 | 348 | (defun count-words (documents) 349 | (parallel:map-group-reduce documents 350 | (lambda (document accumulator) 351 | (map nil (lambda (word) 352 | (funcall accumulator word 1)) 353 | document)) 354 | #'+ 355 | :group-test #'equal 356 | :fancy t)) 357 | 358 | (deftype index () 359 | `(mod ,most-positive-fixnum)) 360 | 361 | ;; todo: three-way partition? 362 | 363 | (declaim (inline selection-sort partition find-pivot)) 364 | (defun partition (vec begin end pivot) 365 | (declare (type (simple-array fixnum 1) vec) 366 | (type index begin end) 367 | (type fixnum pivot) 368 | (optimize speed)) 369 | (loop while (> end begin) 370 | do (if (<= (aref vec begin) pivot) 371 | (incf begin) 372 | (rotatef (aref vec begin) 373 | (aref vec (decf end)))) 374 | finally (return begin))) 375 | 376 | (defun selection-sort (vec begin end) 377 | (declare (type (simple-array fixnum 1) vec) 378 | (type index begin end) 379 | (optimize speed)) 380 | (loop for dst from begin below end 381 | do 382 | (let ((min (aref vec dst)) 383 | (min-i dst)) 384 | (declare (type fixnum min) 385 | (type index min-i)) 386 | (loop for i from (1+ dst) below end 387 | do (let ((x (aref vec i))) 388 | (when (< x min) 389 | (setf min x 390 | min-i i)))) 391 | (rotatef (aref vec dst) (aref vec min-i))))) 392 | 393 | (defun find-pivot (vec begin end) 394 | (declare (type (simple-array fixnum 1) vec) 395 | (type index begin end) 396 | (optimize speed)) 397 | (let ((first (aref vec begin)) 398 | (last (aref vec (1- end))) 399 | (middle (aref vec (truncate (+ begin end) 2)))) 400 | (declare (type fixnum first last middle)) 401 | (when (> first last) 402 | (rotatef first last)) 403 | (cond ((< middle first) 404 | first 405 | (setf middle first)) 406 | ((> middle last) 407 | last) 408 | (t 409 | middle)))) 410 | 411 | (defun pqsort (vec) 412 | (declare (type (simple-array fixnum 1) vec) 413 | (optimize speed)) 414 | (labels ((rec (begin end) 415 | (declare (type index begin end)) 416 | (when (<= (- end begin) 8) 417 | (return-from rec (selection-sort vec begin end))) 418 | (let* ((pivot (find-pivot vec begin end)) 419 | (split (partition vec begin end pivot))) 420 | (declare (type fixnum pivot) 421 | (type index split)) 422 | (cond ((= split begin) 423 | (let ((next (position pivot vec 424 | :start begin 425 | :end end 426 | :test-not #'eql))) 427 | (assert (> next begin)) 428 | (rec next end))) 429 | ((= split end) 430 | (let ((last (position pivot vec 431 | :start begin 432 | :end end 433 | :from-end t 434 | :test-not #'eql))) 435 | (assert last) 436 | (rec begin last))) 437 | (t 438 | (parallel:let ((left (rec begin split)) 439 | (right (rec split end)) 440 | (:parallel (>= (- end begin) 512))) 441 | (declare (ignore left right)))))))) 442 | (rec 0 (length vec)) 443 | vec)) 444 | 445 | (defun shuffle (vector) 446 | (declare (type vector vector)) 447 | (let ((end (length vector))) 448 | (loop for i from (- end 1) downto 0 449 | do (rotatef (aref vector i) 450 | (aref vector (random (+ i 1))))) 451 | vector)) 452 | 453 | (defun test-partition (size) 454 | (let ((vec (shuffle (let ((i 0)) 455 | (map-into (make-array size 456 | :element-type 'fixnum) 457 | (lambda () 458 | (incf i))))))) 459 | (time (partition vec 0 size (truncate size 2))) 460 | nil)) 461 | 462 | (defun test-pqsort (nproc size) 463 | (let ((vec (shuffle (let ((i 0)) 464 | (map-into (make-array size 465 | :element-type 'fixnum) 466 | (lambda () 467 | (incf i))))))) 468 | (parallel-future:with-context (nproc) 469 | (time (pqsort vec))) 470 | (loop for i below (1- (length vec)) 471 | do (assert (<= (aref vec i) (aref vec (1+ i))))))) 472 | 473 | (defun test-sort (size) 474 | (let ((vec (shuffle (let ((i 0)) 475 | (map-into (make-array size :element-type 'fixnum) 476 | (lambda () 477 | (incf i))))))) 478 | (declare (type (simple-array fixnum 1) vec)) 479 | (time (locally (declare (optimize speed (space 0)) 480 | (inline sort)) 481 | (sort vec #'<))) 482 | (loop for i below (1- (length vec)) 483 | do (assert (<= (aref vec i) (aref vec (1+ i))))))) 484 | 485 | ;; SBCL sort (heap sort...) 486 | * (test-sort (ash 1 25)) 487 | 488 | Evaluation took: 489 | 15.870 seconds of real time 490 | 15.828989 seconds of total run time (15.828989 user, 0.000000 system) 491 | 99.74% CPU 492 | 44,325,352,312 processor cycles 493 | 0 bytes consed 494 | 495 | ;; without any parallelism machinery 496 | * (test-pqsort 1 (ash 1 25)) 497 | 498 | Evaluation took: 499 | 6.245 seconds of real time 500 | 6.236389 seconds of total run time (6.236389 user, 0.000000 system) 501 | 99.86% CPU 502 | 17,440,707,947 processor cycles 503 | 0 bytes consed 504 | 505 | ;; with parallelism 506 | * (test-pqsort 1 (ash 1 25)) 507 | 508 | Evaluation took: 509 | 6.420 seconds of real time 510 | 6.416401 seconds of total run time (6.416401 user, 0.000000 system) 511 | 99.94% CPU 512 | 17,930,818,675 processor cycles 513 | 45,655,456 bytes consed 514 | 515 | NIL 516 | * (test-pqsort 2 (ash 1 25)) 517 | 518 | Evaluation took: 519 | 3.374 seconds of real time 520 | 6.572410 seconds of total run time (6.572410 user, 0.000000 system) 521 | 194.78% CPU 522 | 9,422,768,541 processor cycles 523 | 45,555,680 bytes consed 524 | 525 | NIL 526 | * (test-pqsort 4 (ash 1 25)) 527 | 528 | Evaluation took: 529 | 1.794 seconds of real time 530 | 6.536409 seconds of total run time (6.532409 user, 0.004000 system) 531 | 364.33% CPU 532 | 5,010,358,913 processor cycles 533 | 45,502,272 bytes consed 534 | 535 | NIL 536 | * (test-pqsort 8 (ash 1 25)) 537 | 538 | Evaluation took: 539 | 1.263 seconds of real time 540 | 8.456529 seconds of total run time (8.452529 user, 0.004000 system) 541 | 669.60% CPU 542 | 3,525,995,357 processor cycles 543 | 45,649,552 bytes consed 544 | 545 | NIL 546 | * (test-pqsort 11 (ash 1 25)) 547 | 548 | Evaluation took: 549 | 1.153 seconds of real time 550 | 9.188575 seconds of total run time (9.184574 user, 0.004001 system) 551 | 796.96% CPU 552 | 3,219,159,980 processor cycles 553 | 45,678,192 bytes consed 554 | 555 | NIL 556 | ||# 557 | -------------------------------------------------------------------------------- /affine-arrays.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Bulk operations on multi-dimensional arrays 2 | ;; 3 | ;; Goal: exploit both SIMD and thread-level parallelism 4 | ;; 5 | ;; Strategy: evaluate expression DAGs naively, but make 6 | ;; each operation bulky enough to be executed efficiently. 7 | ;; 8 | ;; No loop fusion. Instead, chunk up work units to work on 9 | ;; small working sets at a time. 10 | ;; 11 | ;; 12 | ;; Pieces (bottom up) 13 | ;; 14 | ;; Dataflow work queue 15 | ;; Bulk operations on shaped vectors 16 | ;; Lazy expression graph 17 | ;; Struct of Array representation 18 | ;; 19 | ;; (irregular array shapes) 20 | 21 | 22 | ;;; Dataflow work queue 23 | ;; 24 | ;; Not Implemented (: 25 | ;; 26 | ;; Basic version: 27 | ;; 28 | ;; - FIFO for ready units (bounded circular buffer) 29 | ;; - Each unit has ref to waiting unit 30 | ;; - Scan set of waiting units to find ready ones 31 | ;; 32 | ;; Add locality-awareness 33 | ;; 34 | ;; - Each worker thread has its own sub *stack* 35 | ;; -> Hash to distribute units to workers, 36 | ;; -> Steal from bottom 37 | ;; - When unit completes, try to acquire newly ready units and push 38 | ;; to local stack. 39 | ;; - If there's a lot of newly ready units, distribute according to hash 40 | ;; 41 | ;; - Hash: middle bits of written range. 42 | 43 | ;;; Bulk operations on shaped vectors 44 | ;; 45 | ;; Prototyped below. 46 | ;; 47 | ;; 48 | ;; 49 | 50 | ;;; Keep graph implicit. 51 | ;; 52 | ;; Each backing vector is either a vector or a promise. 53 | ;; 54 | ;; Promise: 55 | ;; - vector not yet allocated 56 | ;; - Each actual work unit annotated with write range. 57 | ;; - Work units depend on promise or work unit. 58 | ;; -> must add self to dependent list; just CAS. 59 | 60 | 61 | 62 | ;;; SoA structured data 63 | ;; 64 | ;; 65 | 66 | ;; Next step 67 | ;; 68 | ;; Ref-counted CoW 69 | ;; Flatten representation operation. 70 | ;; More types 71 | ;; Laziness 72 | ;; Expression graph rewriting (only trivial copy-elim stuff) 73 | ;; Work queue 74 | ;; - recursive work queue for reduce/scan 75 | ;; - enable pipelining by tracking first/last index written/read 76 | ;; 77 | ;; Inner loop: call dispatch once and find right implementation 78 | ;; -> special case common strides (1, 0, same) 79 | ;; -> reduce/scan: transpose evaluation order 80 | ;; -> parallelise really long inner loops or wide middle loops 81 | 82 | ;; Then: safety checks. 83 | ;; Bulk sorting, gather/scatter, Sort-indices 84 | 85 | ;; SoA 86 | ;; 87 | ;; TODO: make sure all the selection/reshaping/permutation operations 88 | ;; implicitly map over all slots as appropriate 89 | 90 | (defstruct (xstruct 91 | (:constructor %make-xstruct (&optional (slots (make-hash-table))))) 92 | (slots nil :type hash-table :read-only t)) 93 | 94 | (defun make-xstruct (&rest name-and-content) 95 | (let ((slots (make-hash-table))) 96 | (loop for (slot content) on name-and-content by #'cddr 97 | do 98 | (assert (typep content '(or real xarray))) 99 | (setf (gethash slot slots) 100 | (if (realp content) 101 | (singleton content) 102 | content))))) 103 | 104 | (defun xslot (xstruct slot) 105 | (or (gethash slot (xstruct-slots xstruct)) 106 | (error "Unknown slot ~S" slot))) 107 | 108 | (defun (setf xslot) (value xstruct slot) 109 | (assert (typep value '(or real xarray))) 110 | (setf (gethash slot (xstruct-slots xstruct)) 111 | (if (realp value) 112 | (singleton value) 113 | value)) 114 | value) 115 | 116 | ;; Array as linear vector + affine transform 117 | 118 | (defvar *shape-table* (make-hash-table :test 'equalp 119 | :weakness :key-and-value)) 120 | (defun intern-shape (shape) 121 | (or (gethash shape *shape-table*) 122 | (setf (gethash shape *shape-table*) 123 | (make-array (length shape) 124 | :element-type '(and unsigned-byte fixnum) 125 | :initial-contents shape)))) 126 | 127 | (defvar *transform-table* (make-hash-table :test 'equalp 128 | :weakness :key-and-value)) 129 | (defun intern-transform (transform) 130 | (or (gethash transform *transform-table*) 131 | (setf (gethash transform *transform-table*) 132 | (make-array (length transform) 133 | :element-type 'fixnum 134 | :initial-contents transform)))) 135 | 136 | (defstruct (xarray 137 | (:constructor make-xarray 138 | (%shape %transform offset backing 139 | &aux 140 | (shape (intern-shape %shape)) 141 | (transform (intern-transform %transform))))) 142 | (shape nil :type (simple-array (and unsigned-byte fixnum) 1) :read-only t) 143 | (transform nil :type (simple-array fixnum 1) :read-only t) 144 | (offset 0 :read-only t) 145 | (backing nil :type (simple-array double-float 1) :read-only t)) 146 | 147 | (defun strides (shape) 148 | (let ((acc 1) 149 | (strides (make-array (length shape)))) 150 | (loop for i from (1- (length shape)) downto 0 151 | for size = (aref shape i) 152 | do (setf (aref strides i) acc 153 | acc (* acc size)) 154 | finally (return (values strides acc))))) 155 | 156 | (defun %xarray (shape) 157 | (declare (type vector shape)) 158 | (multiple-value-bind (strides total-size) 159 | (strides shape) 160 | (make-xarray shape strides 0 161 | (make-array total-size 162 | :element-type 'double-float)))) 163 | 164 | (defun xarray (&rest shape) 165 | (declare (dynamic-extent shape)) 166 | (%xarray (coerce shape 'simple-vector))) 167 | 168 | (defun xarray-from-vector (vector) 169 | (let ((result (xarray (length vector)))) 170 | (map-into (xarray-backing result) 171 | (lambda (x) 172 | (float x 1d0)) 173 | vector) 174 | result)) 175 | 176 | (defun singleton (x) 177 | (let ((result (xarray))) 178 | (setf (aref (xarray-backing result) 0) (float x 1d0)) 179 | result)) 180 | 181 | (defun normalize (x) 182 | (etypecase x 183 | (real (singleton x)) 184 | (xarray x))) 185 | 186 | (define-modify-macro %normalizef () normalize) 187 | 188 | (defmacro normalizef (&rest places) 189 | `(progn 190 | ,@(mapcar (lambda (x) 191 | `(%normalizef ,x)) 192 | places))) 193 | 194 | (defun denormalize (x) 195 | (if (and (xarray-p x) 196 | (zerop (length (xarray-shape x)))) 197 | (aref (xarray-backing x) 198 | (xarray-offset x)) 199 | x)) 200 | 201 | (defun iota (n) 202 | (let ((result (xarray n)) 203 | (index 0d0)) 204 | (declare (type double-float index)) 205 | (map-into (xarray-backing result) 206 | (lambda () 207 | (incf index))) 208 | result)) 209 | 210 | (defun shape (xarray &optional dimension) 211 | (if dimension 212 | (aref (xarray-shape xarray) dimension) 213 | (xarray-shape xarray))) 214 | 215 | (defun reshape (xarray shape &optional strides) 216 | (unless strides 217 | (setf strides (strides shape))) 218 | (make-xarray shape 219 | strides 220 | (xarray-offset xarray) 221 | (xarray-backing xarray))) 222 | 223 | ;; (defun stack-transform (xarray )) <- linear transform. 224 | 225 | (defun transpose (xarray &rest indices) 226 | (declare (dynamic-extent indices)) 227 | (let ((shape (xarray-shape xarray)) 228 | (new-shape (make-array (length indices))) 229 | (transform (xarray-transform xarray)) 230 | (new-transform (make-array (length indices)))) 231 | (loop for i upfrom 0 232 | for index in indices 233 | do (setf (aref new-shape i) (aref shape index) 234 | (aref new-transform i) (aref transform index))) 235 | (make-xarray new-shape 236 | new-transform 237 | (xarray-offset xarray) 238 | (xarray-backing xarray)))) 239 | 240 | (defun slice (xarray dimension begin &optional end step) 241 | ;; FIXME: -ve step 242 | (unless step 243 | (setf step 1)) 244 | (let* ((shape (copy-seq (xarray-shape xarray))) 245 | (transform (copy-seq (xarray-transform xarray))) 246 | (stride (aref transform dimension)) 247 | (offset (xarray-offset xarray))) 248 | (unless end 249 | (setf end (truncate (- (aref shape dimension) begin) 250 | step))) 251 | (incf offset (* begin stride)) 252 | (setf (aref shape dimension) end 253 | (aref transform dimension) (* step stride)) 254 | (make-xarray shape transform 255 | offset 256 | (xarray-backing xarray)))) 257 | 258 | (defun remove-index (vector index) 259 | (remove-if (constantly t) vector :start index :count 1)) 260 | 261 | (defun select (xarray dimension &optional value) 262 | (unless value 263 | (setf value 0)) 264 | (let ((shape (xarray-shape xarray)) 265 | (transform (xarray-transform xarray)) 266 | (offset (xarray-offset xarray))) 267 | (make-xarray (remove-index shape dimension) 268 | (remove-index transform dimension) 269 | (+ offset (* value (aref transform dimension))) 270 | (xarray-backing xarray)))) 271 | 272 | (defun vcons (x vector) 273 | (let ((r (make-array (1+ (length vector))))) 274 | (setf (aref r 0) x) 275 | (replace r vector :start1 1) 276 | r)) 277 | 278 | (defun replicate (xarray &rest counts) 279 | (declare (dynamic-extent counts)) 280 | (reshape xarray 281 | (concatenate 'simple-vector counts (xarray-shape xarray)) 282 | (concatenate 'simple-vector 283 | (make-array (length counts) :initial-element 0) 284 | (xarray-transform xarray)))) 285 | 286 | ;; actual operations 287 | 288 | (defun extend-shape-or-die (target-shape xarray) 289 | (let ((shape (xarray-shape xarray))) 290 | (assert (<= (length shape) (length target-shape))) 291 | (loop for i from (1- (length target-shape)) downto 0 292 | for j from (1- (length shape)) downto 0 293 | do (assert (= (aref target-shape i) 294 | (aref shape j)))) 295 | (apply 'replicate xarray 296 | (coerce (subseq target-shape 0 (- (length target-shape) 297 | (length shape))) 298 | 'list)))) 299 | 300 | ;; TODO: 301 | ;; Inline all of this 302 | ;; so that execute-*-into can have inline caches re pattern. 303 | 304 | (defun args-parameters (args) 305 | (mapcan (lambda (arg) 306 | (list (xarray-shape arg) 307 | (xarray-transform arg))) 308 | args)) 309 | 310 | (define-compiler-macro optimize-pattern (x y &rest args) 311 | `((lambda (args) 312 | (let* ((box (load-time-value (list nil))) 313 | (cache (car box)) 314 | (params (args-parameters args))) 315 | (if (and cache 316 | (every #'eql params (car cache))) 317 | (cdr cache) 318 | (let ((result (%optimize-pattern args))) 319 | (setf (car box) 320 | (cons params result)) 321 | result)))) 322 | (list ,x ,y ,@args))) 323 | 324 | (declaim (inline xscan-into scan 325 | xreduce-into xreduce)) 326 | 327 | (defun xmap-into (dst op x &rest xs) 328 | (declare (dynamic-extent xs)) 329 | (normalizef dst) 330 | (let ((xs (cons x xs))) 331 | (let ((dst-shape (xarray-shape dst))) 332 | (map-into xs (lambda (x) 333 | (extend-shape-or-die dst-shape (normalize x))) 334 | xs) 335 | (apply 'execute-map-into op 336 | (apply 'optimize-pattern dst xs) 337 | dst xs) 338 | (denormalize dst)))) 339 | 340 | (define-compiler-macro xmap-into (dst op x &rest xs) 341 | (let ((names (cons 'x (loop for x in xs collect (gensym "X"))))) 342 | `((lambda (dst op ,@names) 343 | (normalizef dst) 344 | (let ((dst-shape (xarray-shape dst))) 345 | ,@(loop for x in names 346 | collect `(setf ,x (extend-shape-or-die dst-shape (normalize ,x)))) 347 | (execute-map-into op 348 | (optimize-pattern dst ,@names) 349 | dst ,@names) 350 | (denormalize dst))) 351 | ,dst ,op ,x ,@xs))) 352 | 353 | (defun build-dst-for (x &rest xs) 354 | (let ((best-shape (xarray-shape x))) 355 | (dolist (x xs (%xarray best-shape)) 356 | (let ((shape (xarray-shape x))) 357 | (when (> (length shape) (length best-shape)) 358 | (setf best-shape shape)))))) 359 | 360 | (defun xmap (op x &rest xs) 361 | (let ((xs (cons x xs))) 362 | (apply 'xmap-into 363 | (apply 'build-dst-for (mapcar 'normalize xs)) 364 | op xs))) 365 | 366 | (define-compiler-macro xmap (op x &rest xs) 367 | (let ((names (cons 'x (loop for x in xs collect (gensym "X"))))) 368 | `((lambda (op ,@names) 369 | (normalizef ,@names) 370 | (xmap-into (build-dst-for ,@names) 371 | op ,@names)) 372 | ,op ,x ,@xs))) 373 | 374 | (defun xscan-into (dst op x) 375 | (normalizef dst x) 376 | (let* ((dst-shape (xarray-shape dst)) 377 | (x (extend-shape-or-die dst-shape x)) 378 | (spine-dim (aref dst-shape 0)) 379 | (dst-stride (aref (xarray-transform dst) 0)) 380 | (x-stride (aref (xarray-transform x) 0)) 381 | (dst (select dst 0 0)) 382 | (x (select x 0 0))) 383 | ;; dst-stride = 0 -> reduce... 384 | (execute-scan-into op (optimize-pattern dst x) 385 | dst-stride x-stride spine-dim 386 | dst x) 387 | (denormalize dst))) 388 | 389 | (defun xscan (op x) 390 | (normalizef x) 391 | (xscan-into (%xarray (xarray-shape x)) 392 | op x)) 393 | 394 | 395 | (defun shape= (x y) 396 | (let ((sx (xarray-shape x)) 397 | (sy (xarray-shape y))) 398 | (and (= (length sx) (length sy)) 399 | (every #'= sx sy)))) 400 | 401 | (defun xreduce-into (dst op x) 402 | (normalizef dst x) 403 | (let* ((src-shape (xarray-shape x)) 404 | (spine-dim (aref src-shape 0)) 405 | (spine-stride (aref (xarray-transform x) 0)) 406 | (x (extend-shape-or-die (xarray-shape dst) 407 | (select x 0 0)))) 408 | (execute-reduce-into op 409 | (optimize-pattern dst x) 410 | spine-stride spine-dim 411 | dst x) 412 | (denormalize dst))) 413 | 414 | (defun xreduce (op x) 415 | (normalizef x) 416 | (xreduce-into (%xarray (subseq (xarray-shape x) 1)) 417 | op x)) 418 | 419 | (defvar *map-into-routines* (make-hash-table :test #'equal)) 420 | 421 | (defmacro def-map-into/3 (op &optional (fun op)) 422 | (let ((name (intern (format nil "%EXECUTE-MAP-INTO/3-~A" op)))) 423 | `(progn 424 | (defun ,name (vecs offsets count increment) 425 | (declare (type (simple-vector 3) vecs increment) 426 | (type (simple-array (and unsigned-byte fixnum) (3)) offsets) 427 | (type (and unsigned-byte fixnum) count)) 428 | (let ((v0 (aref vecs 0)) 429 | (v1 (aref vecs 1)) 430 | (v2 (aref vecs 2)) 431 | (i0 (aref offsets 0)) 432 | (i1 (aref offsets 1)) 433 | (i2 (aref offsets 2)) 434 | (d0 (aref increment 0)) 435 | (d1 (aref increment 1)) 436 | (d2 (aref increment 2))) 437 | (declare (type (simple-array double-float 1) v0 v1 v2) 438 | (type (and unsigned-byte fixnum) i0 i1 i2) 439 | (type fixnum d0 d1 d2) 440 | (optimize speed (safety 0))) 441 | (loop repeat count 442 | do 443 | (setf (aref v0 i0) (,fun (aref v1 i1) (aref v2 i2))) 444 | (incf i0 d0) 445 | (incf i1 d1) 446 | (incf i2 d2)))) 447 | (setf (gethash '(,op . 3) *map-into-routines*) 448 | ',name) 449 | ,(let ((name (intern (format nil "V~A" op))) 450 | (name! (intern (format nil "V~A!" op)))) 451 | `(progn 452 | (declaim (inline ,name ,name!)) 453 | (defun ,name (x y) 454 | (xmap ',op x y)) 455 | (defun ,name! (dst x y) 456 | (xmap-into dst ',op x y))))))) 457 | 458 | (def-map-into/3 +) 459 | (def-map-into/3 minus -) 460 | (def-map-into/3 *) 461 | (def-map-into/3 /) 462 | 463 | (defmacro def-map-into/2 (op &optional (fun op)) 464 | (let ((name (intern (format nil "%EXECUTE-MAP-INTO/2-~A" op)))) 465 | `(progn 466 | (defun ,name (vecs offsets count increment) 467 | (declare (type (simple-vector 2) vecs increment) 468 | (type (simple-array (and unsigned-byte fixnum) (2)) offsets) 469 | (type (and unsigned-byte fixnum) count)) 470 | (let ((v0 (aref vecs 0)) 471 | (v1 (aref vecs 1)) 472 | (i0 (aref offsets 0)) 473 | (i1 (aref offsets 1)) 474 | (d0 (aref increment 0)) 475 | (d1 (aref increment 1))) 476 | (declare (type (simple-array double-float 1) v0 v1) 477 | (type (and unsigned-byte fixnum) i0 i1) 478 | (type fixnum d0 d1) 479 | (optimize speed (safety 0))) 480 | (loop repeat count 481 | do 482 | (setf (aref v0 i0) (,fun (aref v1 i1))) 483 | (incf i0 d0) 484 | (incf i1 d1)))) 485 | (setf (gethash '(,op . 2) *map-into-routines*) 486 | ',name) 487 | ,(let ((name (intern (format nil "V~A" op))) 488 | (name! (intern (format nil "V~A!" op)))) 489 | `(progn 490 | (declaim (inline ,name ,name!)) 491 | (defun ,name (x) 492 | (xmap ',op x)) 493 | (defun ,name! (dst x) 494 | (xmap-into dst ',op x))))))) 495 | 496 | (def-map-into/2 sqrt (lambda (x) 497 | (sqrt (truly-the (double-float 0d0) x)))) 498 | (def-map-into/2 neg (lambda (x) 499 | (- x))) 500 | (def-map-into/2 abs) 501 | 502 | (declaim (inline v- v-!)) 503 | (defun v- (x &optional y) 504 | (if y 505 | (vminus x y) 506 | (vneg x))) 507 | 508 | (defun v-! (dst x &optional y) 509 | (if y 510 | (vminus! dst x y) 511 | (vneg! dst x))) 512 | 513 | (defun find-map-into-routine (op arity) 514 | (symbol-function (or (gethash (cons op arity) *map-into-routines*) 515 | (error "Unknown map-into routine ~S/~A" op arity)))) 516 | 517 | #+nil 518 | (defun %execute-map-into (op vecs offsets count increment) 519 | (declare (type simple-vector vecs increment) 520 | (type (simple-array (and unsigned-byte fixnum) 1) offsets) 521 | (type (and unsigned-byte fixnum) count)) 522 | (assert (= 3 (length vecs))) 523 | (assert (eql '+ op)) 524 | (let ((v0 (aref vecs 0)) 525 | (v1 (aref vecs 1)) 526 | (v2 (aref vecs 2)) 527 | (i0 (aref offsets 0)) 528 | (i1 (aref offsets 1)) 529 | (i2 (aref offsets 2)) 530 | (d0 (aref increment 0)) 531 | (d1 (aref increment 1)) 532 | (d2 (aref increment 2))) 533 | (declare (type (simple-array double-float 1) v0 v1 v2) 534 | (type (and unsigned-byte fixnum) i0 i1 i2) 535 | (type fixnum d0 d1 d2) 536 | (optimize speed (safety 0))) 537 | (loop repeat count 538 | do 539 | (setf (aref v0 i0) (+ (aref v1 i1) (aref v2 i2))) 540 | (incf i0 d0) 541 | (incf i1 d1) 542 | (incf i2 d2)))) 543 | 544 | (defun execute-map-into (op pattern &rest args) 545 | (declare (dynamic-extent args)) 546 | (let* ((vectors (map 'simple-vector 'xarray-backing args)) 547 | (n (length args)) 548 | (fun (find-map-into-routine op n)) 549 | (offsets (make-array n 550 | :element-type '(and unsigned-byte fixnum) 551 | :initial-contents (car pattern))) 552 | (loops (cdr pattern)) 553 | (depth (length loops))) 554 | (declare (type simple-vector loops)) 555 | (labels ((rec (i) 556 | (let* ((spec (aref loops i)) 557 | (count (car spec)) 558 | (increment (cdr spec))) 559 | (if (= i (1- depth)) 560 | (funcall fun vectors 561 | offsets 562 | count 563 | increment) 564 | (loop repeat count 565 | do (rec (1+ i)) 566 | (map-into offsets #'+ offsets increment)))))) 567 | (rec 0) 568 | (values)))) 569 | 570 | (defvar *reduce-into-routines* (make-hash-table :test #'equal)) 571 | 572 | (defmacro def-reduce-into (op) 573 | (let ((name (intern (format nil "%EXECUTE-REDUCE-INTO/~A" op)))) 574 | `(progn 575 | (defun ,name (stride dim 576 | dst x 577 | dst-offset x-offset 578 | count 579 | dst-inc x-inc) 580 | ;; switch depending on which is smaller of stride or dst-offset 581 | (declare (type fixnum stride) 582 | (type (and unsigned-byte fixnum) 583 | dim 584 | dst-offset x-offset count 585 | dst-inc x-inc) 586 | (type (simple-array double-float 1) dst x)) 587 | (locally (declare (optimize speed (safety 0))) 588 | (loop repeat count 589 | do 590 | (let ((x-offset x-offset) 591 | (acc 0d0)) 592 | (declare (type (and unsigned-byte fixnum) x-offset) 593 | (type double-float acc)) 594 | (loop repeat dim 595 | do 596 | (setf acc (,op acc (aref x x-offset))) 597 | (incf x-offset stride)) 598 | (setf (aref dst dst-offset) acc)) 599 | (incf dst-offset dst-inc) 600 | (incf x-offset x-inc)))) 601 | (setf (gethash ',op *reduce-into-routines*) 602 | ',name) 603 | ,(let ((name (intern (format nil "R~A" op))) 604 | (name! (intern (format nil "R~A!" op)))) 605 | `(progn 606 | (declaim (inline ,name ,name!)) 607 | (defun ,name (x) 608 | (xreduce ',op x)) 609 | (defun ,name! (dst x) 610 | (xreduce-into dst ',op x))))))) 611 | 612 | (def-reduce-into +) 613 | (def-reduce-into *) 614 | (def-reduce-into min) 615 | (def-reduce-into max) 616 | 617 | (defun find-reduce-into-routine (op) 618 | (symbol-function (or (gethash op *reduce-into-routines*) 619 | (error "No reduce into routine for ~S" op)))) 620 | 621 | #+nil 622 | (defun %execute-reduce-into (op stride dim 623 | dst x 624 | dst-offset x-offset 625 | count 626 | dst-inc x-inc) 627 | ;; switch depending on which is smaller of stride or dst-offset 628 | (declare (type fixnum stride) 629 | (type (and unsigned-byte fixnum) 630 | dim 631 | dst-offset x-offset count 632 | dst-inc x-inc) 633 | (type (simple-array double-float 1) dst x) 634 | (type (eql +) op)) 635 | (locally (declare (optimize speed (safety 0))) 636 | (loop repeat count 637 | do 638 | (let ((x-offset x-offset) 639 | (acc 0d0)) 640 | (declare (type (and unsigned-byte fixnum) x-offset) 641 | (type double-float acc)) 642 | (loop repeat dim 643 | do 644 | (incf acc (aref x x-offset)) 645 | (incf x-offset stride)) 646 | (setf (aref dst dst-offset) acc)) 647 | (incf dst-offset dst-inc) 648 | (incf x-offset x-inc)))) 649 | 650 | (defun execute-reduce-into (op pattern stride dim dst x) 651 | (let* ((offsets (car pattern)) 652 | (loops (cdr pattern)) 653 | (depth (length loops)) 654 | (dst-offset (aref offsets 0)) 655 | (x-offset (aref offsets 1)) 656 | (dst (xarray-backing dst)) 657 | (x (xarray-backing x)) 658 | (fun (find-reduce-into-routine op))) 659 | (declare (type simple-vector loops) 660 | (type (and unsigned-byte fixnum) dst-offset x-offset)) 661 | (labels ((rec (i) 662 | (let* ((spec (aref loops i)) 663 | (count (car spec)) 664 | (increment (cdr spec)) 665 | (dst-inc (aref increment 0)) 666 | (x-inc (aref increment 1))) 667 | (if (= i (1- depth)) 668 | (funcall fun stride dim 669 | dst x 670 | dst-offset x-offset 671 | count 672 | dst-inc x-inc) 673 | (loop repeat count 674 | do (rec (1+ i)) 675 | (incf x-offset x-inc) 676 | (incf dst-offset dst-inc)))))) 677 | (rec 0) 678 | (values)))) 679 | 680 | (defvar *scan-into-routines* (make-hash-table :test #'equal)) 681 | 682 | (defmacro def-scan-into (op) 683 | (let ((name (intern (format nil "%EXECUTE-SCAN-INTO/~A" op)))) 684 | `(progn 685 | (defun ,name (dst-stride x-stride dim 686 | dst x 687 | dst-offset x-offset 688 | count 689 | dst-inc x-inc) 690 | (declare (type fixnum dst-stride x-stride) 691 | (type (and unsigned-byte fixnum) 692 | dim 693 | dst-offset x-offset count 694 | dst-inc x-inc) 695 | (type (simple-array double-float 1) dst x)) 696 | (locally (declare (optimize speed (safety 0))) 697 | (loop repeat count 698 | do 699 | (let ((dst-offset dst-offset) 700 | (x-offset x-offset) 701 | (acc 0d0)) 702 | (declare (type (and unsigned-byte fixnum) 703 | x-offset dst-offset) 704 | (type double-float acc)) 705 | (loop repeat dim 706 | do 707 | (setf acc 708 | (setf (aref dst dst-offset) 709 | (,op acc (aref x x-offset)))) 710 | (incf dst-offset dst-stride) 711 | (incf x-offset x-stride))) 712 | (incf dst-offset dst-inc) 713 | (incf x-offset x-inc)))) 714 | (setf (gethash ',op *scan-into-routines*) 715 | ',name) 716 | ,(let ((name (intern (format nil "S~A" op))) 717 | (name! (intern (format nil "S~A!" op)))) 718 | `(progn 719 | (declaim (inline ,name ,name!)) 720 | (defun ,name (x) 721 | (xscan ',op x)) 722 | (defun ,name! (dst x) 723 | (xscan-into dst ',op x))))))) 724 | 725 | (def-scan-into +) 726 | (def-scan-into *) 727 | (def-scan-into min) 728 | (def-scan-into max) 729 | 730 | (defun find-scan-into-routine (op) 731 | (symbol-function (or (gethash op *reduce-into-routines*) 732 | (error "No reduce into routine for ~S" op)))) 733 | 734 | #+nil 735 | (defun %execute-scan-into (op dst-stride x-stride dim 736 | dst x 737 | dst-offset x-offset 738 | count 739 | dst-inc x-inc) 740 | (declare (type fixnum dst-stride x-stride) 741 | (type (and unsigned-byte fixnum) 742 | dim 743 | dst-offset x-offset count 744 | dst-inc x-inc) 745 | (type (simple-array double-float 1) dst x) 746 | (type (eql +) op)) 747 | (locally (declare (optimize speed (safety 0))) 748 | (loop repeat count 749 | do 750 | (let ((dst-offset dst-offset) 751 | (x-offset x-offset) 752 | (acc 0d0)) 753 | (declare (type (and unsigned-byte fixnum) 754 | x-offset dst-offset) 755 | (type double-float acc)) 756 | (loop repeat dim 757 | do 758 | (incf acc (aref x x-offset)) 759 | (setf (aref dst dst-offset) acc) 760 | (incf dst-offset dst-stride) 761 | (incf x-offset x-stride))) 762 | (incf dst-offset dst-inc) 763 | (incf x-offset x-inc)))) 764 | 765 | (defun execute-scan-into (op pattern dst-stride x-stride dim dst x) 766 | (let* ((fun (find-scan-into-routine op)) 767 | (offsets (car pattern)) 768 | (loops (cdr pattern)) 769 | (depth (length loops)) 770 | (dst-offset (aref offsets 0)) 771 | (x-offset (aref offsets 1)) 772 | (dst (xarray-backing dst)) 773 | (x (xarray-backing x))) 774 | (declare (type simple-vector loops) 775 | (type (and unsigned-byte fixnum) dst-offset x-offset)) 776 | (labels ((rec (i) 777 | (let* ((spec (aref loops i)) 778 | (count (car spec)) 779 | (increment (cdr spec)) 780 | (dst-inc (aref increment 0)) 781 | (x-inc (aref increment 1))) 782 | (if (= i (1- depth)) 783 | (funcall fun dst-stride x-stride dim 784 | dst x 785 | dst-offset x-offset 786 | count 787 | dst-inc x-inc) 788 | (loop repeat count 789 | do (rec (1+ i)) 790 | (incf x-offset x-inc) 791 | (incf dst-offset dst-inc)))))) 792 | (rec 0) 793 | (values)))) 794 | 795 | ;; machinery for optimize-pattern 796 | ;; Reorder perfect constant loop nests for locality, while 797 | ;; trying to ensure a minimal trip count for the inner loop 798 | 799 | (defvar *iterator-object-count* 0) 800 | 801 | (defstruct (iterator 802 | (:constructor make-iterator (limit &optional name))) 803 | (name (incf *iterator-object-count*) :read-only t) 804 | (limit nil :read-only t)) 805 | 806 | (defstruct (index-seq 807 | (:constructor make-index-seq (offset plane))) 808 | (offset 0 :read-only t :type fixnum) 809 | (plane nil :read-only t :type simple-vector)) 810 | 811 | (defstruct (fused-index-seq 812 | (:constructor make-fused-index-seq (offsets planes))) 813 | (offsets nil :read-only t :type simple-vector) 814 | (planes nil :read-only t :type simple-vector)) 815 | 816 | (defun %make-expression (offset iters strides) 817 | (let ((rest (loop for iter across iters 818 | for stride across strides 819 | do (when (minusp stride) 820 | (setf stride (- stride)) 821 | (decf offset (* stride (cdr iter)))) 822 | when iter 823 | collect (cons iter stride)))) 824 | (make-index-seq offset (coerce rest 'simple-vector)))) 825 | 826 | (defun make-expression (offset &rest iter-and-stride) 827 | (let ((rest (loop for (iter stride) on iter-and-stride by #'cddr 828 | do (when (minusp stride) 829 | (setf stride (- stride)) 830 | (decf offset (* stride (cdr iter)))) 831 | collect (cons iter stride)))) 832 | (make-index-seq offset (coerce rest 'simple-vector)))) 833 | 834 | (defun lex> (x y) 835 | (assert (vectorp x)) 836 | (assert (vectorp y)) 837 | (assert (= (length x) (length y))) 838 | (loop for xi across x 839 | for yi across y 840 | do (cond ((< xi yi) (return nil)) 841 | ((> xi yi) (return t))) 842 | finally (return nil))) 843 | 844 | (defun fuse-expressions (&rest expressions) 845 | (let ((iters (make-hash-table)) 846 | (transposed '()) 847 | (offsets (map 'simple-vector #'index-seq-offset expressions)) 848 | (count (length expressions)) 849 | (expressions (mapcar #'index-seq-plane expressions))) 850 | (loop for i upfrom 0 851 | for expression in expressions 852 | do 853 | (loop for (iter . stride) across expression 854 | do 855 | (let ((strides 856 | (or (gethash iter iters) 857 | (let ((values (make-array count :initial-element 0))) 858 | (push (cons iter values) transposed) 859 | (setf (gethash iter iters) 860 | values))))) 861 | (setf (aref strides i) stride)))) 862 | (make-fused-index-seq 863 | offsets 864 | (sort (coerce transposed 'simple-vector) 865 | (lambda (x y) 866 | (let ((xx (cdr x)) 867 | (yy (cdr y))) 868 | (if (every #'= xx yy) 869 | (> (cdar x) (cdar y)) 870 | (lex> xx yy)))))))) 871 | 872 | (defun %merge-iterators (strides) 873 | (loop for i from (1- (length strides)) downto 0 874 | for (iter . stride) = (aref strides i) 875 | for repeat = (iterator-limit iter) do 876 | (loop for j below i 877 | for (j-iter . j-stride) = (aref strides j) 878 | for j-repeat = (iterator-limit j-iter) 879 | do 880 | (when (every (lambda (i j) 881 | (= (* repeat i) j)) 882 | stride j-stride) 883 | (setf (car (aref strides i)) 884 | (make-iterator (* repeat j-repeat))) 885 | (return-from %merge-iterators 886 | (remove-index strides j)))))) 887 | 888 | (defun merge-iterators (fused) 889 | (let ((offsets (fused-index-seq-offsets fused)) 890 | (strides (fused-index-seq-planes fused))) 891 | (loop for result = (%merge-iterators strides) 892 | do (if result 893 | (setf strides result) 894 | (return (make-fused-index-seq offsets strides)))))) 895 | 896 | (defun reorder-iterators (fused &key (goal-trip-count 32)) 897 | (let ((strides (copy-seq (fused-index-seq-planes fused))) 898 | (best-idx nil) 899 | (best-trip 0)) 900 | (when (zerop (length strides)) 901 | (let* ((offsets (fused-index-seq-offsets fused)) 902 | (n (length offsets))) 903 | (return-from 904 | reorder-iterators 905 | (make-fused-index-seq 906 | offsets (vector (cons (make-iterator 1) 907 | (make-array n :initial-element 0))))))) 908 | (loop for i upfrom 0 909 | for (iter) across strides 910 | for capped-count = (min (iterator-limit iter) goal-trip-count) 911 | do 912 | (when (>= capped-count best-trip) 913 | (setf best-idx i 914 | best-trip capped-count))) 915 | (let ((bottom (aref strides best-idx))) 916 | (loop for i from (1+ best-idx) below (length strides) 917 | do (setf (aref strides (1- i)) 918 | (aref strides i))) 919 | (setf (aref strides (1- (length strides))) bottom)) 920 | (make-fused-index-seq (fused-index-seq-offsets fused) 921 | strides))) 922 | 923 | #|| 924 | CL-USER> (let ((i (make-iterator 4 'i)) 925 | (j (make-iterator 3 'j)) 926 | (k (make-iterator 20 'k))) 927 | (reorder-iterators 928 | (merge-iterators (fuse-expressions 929 | (make-expression 0 i 1 j 4 k 12) 930 | (make-expression 1 i 2 j 8 k 24))))) 931 | (#(0 1) . #(((I . 240) . #(1 2)))) 932 | ||# 933 | 934 | (defun %optimize-pattern (args) 935 | (format t "optimizing pattern ...~%") 936 | (let* ((*iterator-object-count* 0) 937 | (shape (xarray-shape (first args))) 938 | (iterators (map 'simple-vector 939 | (lambda (dim) 940 | (and (> dim 1) 941 | (make-iterator dim))) 942 | shape)) 943 | (expressions (mapcar (lambda (arg) 944 | (%make-expression (xarray-offset arg) 945 | iterators 946 | (xarray-transform arg))) 947 | args)) 948 | (pattern (reorder-iterators 949 | (merge-iterators 950 | (apply 'fuse-expressions expressions)))) 951 | (result (cons (fused-index-seq-offsets pattern) 952 | (map 'simple-vector 953 | (lambda (x) 954 | (cons (iterator-limit (car x)) 955 | (cdr x))) 956 | (fused-index-seq-planes pattern))))) 957 | (format t " ~A~%" result) 958 | result)) 959 | 960 | (defun optimize-pattern (x y &rest arguments) 961 | (%optimize-pattern (list* x y arguments))) 962 | ;; test 963 | 964 | (defun avg (values) 965 | (v/ (r+ values) 966 | (shape values 0))) 967 | 968 | (defun sd (values) 969 | (vsqrt (avg (let ((delta (v- values (avg values)))) 970 | (v* delta delta))))) 971 | 972 | (defun fast-sd (values) 973 | (declare (type (simple-array double-float 1) values) 974 | (optimize speed (safety 0))) 975 | (let ((avg 0d0)) 976 | (declare (type double-float avg)) 977 | (map nil (lambda (x) 978 | (incf avg x)) 979 | values) 980 | (setf avg (/ avg (length values))) 981 | (let ((deltas (make-array (length values) 982 | :element-type 'double-float))) 983 | (declare (type (simple-array double-float 1) deltas)) 984 | (map-into deltas 985 | (lambda (x) 986 | (- x avg)) 987 | values) 988 | (loop for i below (length deltas) 989 | do (setf (aref deltas i) 990 | (expt (aref deltas i) 2))) 991 | (let ((sum 0d0)) 992 | (declare (type double-float sum)) 993 | (map nil (lambda (x) 994 | (incf sum x)) 995 | deltas) 996 | (sqrt (truly-the (double-float 0d0) 997 | (/ sum (length values)))))))) 998 | (defun %sd (values) 999 | (vsqrt (v- (avg (v* values values)) 1000 | (let ((avg (avg values))) 1001 | (v* avg avg))))) 1002 | --------------------------------------------------------------------------------