├── pcall-queue.asd ├── package.lisp ├── pcall.asd ├── LICENSE ├── pcall.lisp ├── doc ├── style.css ├── background.html └── index.html ├── queue.lisp ├── task.lisp ├── pool.lisp └── tests.lisp /pcall-queue.asd: -------------------------------------------------------------------------------- 1 | (asdf:defsystem :pcall-queue 2 | :depends-on (:bordeaux-threads) 3 | :components ((:file "queue"))) 4 | -------------------------------------------------------------------------------- /package.lisp: -------------------------------------------------------------------------------- 1 | (cl:defpackage :pcall 2 | (:use :cl :bordeaux-threads :pcall-queue) 3 | (:export #:pcall #:pexec #:plet #:join #:select-one #:done-p 4 | #:thread-pool-size #:finish-tasks 5 | #:set-worker-environment #:with-local-thread-pool)) 6 | -------------------------------------------------------------------------------- /pcall.asd: -------------------------------------------------------------------------------- 1 | (asdf:defsystem :pcall 2 | :depends-on (:bordeaux-threads :pcall-queue) 3 | :components ((:file "package") 4 | (:file "task" :depends-on ("package")) 5 | (:file "pool" :depends-on ("task")) 6 | (:file "pcall" :depends-on ("task" "pool")))) 7 | 8 | (asdf:defsystem :pcall-tests 9 | :depends-on (:pcall :fiveam) 10 | :components ((:file "tests"))) 11 | 12 | (defmethod asdf:perform ((op asdf:test-op) (system (eql (find-system :pcall)))) 13 | (asdf:oos 'asdf:load-op :pcall) 14 | (asdf:oos 'asdf:load-op :pcall-tests) 15 | (funcall (intern (string :run!) (string :it.bese.FiveAM)) :pcall)) 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Marijn Haverbeke, marijnh@gmail.com 2 | 3 | This software is provided 'as-is', without any express or implied 4 | warranty. In no event will the authors be held liable for any 5 | damages arising from the use of this software. 6 | 7 | Permission is granted to anyone to use this software for any 8 | purpose, including commercial applications, and to alter it and 9 | redistribute it freely, subject to the following restrictions: 10 | 11 | 1. The origin of this software must not be misrepresented; you must 12 | not claim that you wrote the original software. If you use this 13 | software in a product, an acknowledgment in the product 14 | documentation would be appreciated but is not required. 15 | 16 | 2. Altered source versions must be plainly marked as such, and must 17 | not be misrepresented as being the original software. 18 | 19 | 3. This notice may not be removed or altered from any source 20 | distribution. 21 | -------------------------------------------------------------------------------- /pcall.lisp: -------------------------------------------------------------------------------- 1 | (in-package :pcall) 2 | 3 | (defun pcall (thunk) 4 | "Call a thunk in parallel. Returns a task that can be joined. When 5 | an exclusive is given, the task will only run when no other tasks with 6 | that exclusive are running." 7 | (let ((task (make-instance 'task :thunk thunk))) 8 | (queue-push task (pool-queue *pool*)) 9 | (unless (pool-threads *pool*) 10 | (audit-thread-pool)) 11 | task)) 12 | 13 | (defmacro pexec (&body body) 14 | "Shorthand for pcall." 15 | `(pcall (lambda () ,@body))) 16 | 17 | (defmacro plet ((&rest bindings) &body body) 18 | (let ((syms (mapcar (lambda (x) (gensym (string (car x)))) bindings))) 19 | `(let ,(loop :for (nil val) :in bindings 20 | :for sym :in syms 21 | :collect `(,sym (pexec ,val))) 22 | (symbol-macrolet ,(loop :for (var nil) :in bindings 23 | :for sym :in syms 24 | :collect `(,var (join ,sym))) 25 | ,@body)))) 26 | -------------------------------------------------------------------------------- /doc/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | font-family: tahoma, arial, sans-serif; 4 | margin: 60px 120px; 5 | color: black; 6 | max-width: 50em; 7 | } 8 | 9 | h1 { 10 | font-size: 250%; 11 | border-bottom: 3px solid #C42; 12 | } 13 | 14 | h2 { 15 | font-size: 140%; 16 | border-bottom: 1px solid #C42; 17 | } 18 | 19 | h3 { 20 | font-size: 110%; 21 | } 22 | 23 | code { 24 | font-size: 1.2em; 25 | } 26 | 27 | p.news { 28 | text-indent: -3em; 29 | padding-left: 3em; 30 | } 31 | 32 | pre.code { 33 | margin: 0 16px; 34 | padding: 7px; 35 | border: 1px solid #DA8; 36 | } 37 | 38 | p.def { 39 | margin-top: 1.5em; 40 | font-family: courier; 41 | } 42 | 43 | p.def span { 44 | color: #555555; 45 | font-weight: bold; 46 | font-family: tahoma, arial, sans-serif; 47 | font-size: .8em; 48 | } 49 | 50 | .desc { 51 | margin-left: 1em; 52 | } 53 | 54 | thead { 55 | font-weight: bold; 56 | } 57 | 58 | table { 59 | border-collapse: collapse; 60 | } 61 | 62 | tr + tr { 63 | border-top: 1px solid #88BB99; 64 | } 65 | 66 | thead tr { 67 | border-bottom: 2px solid #88BB99; 68 | } 69 | 70 | td + td, th + th { 71 | border-left: 2px solid #88BB99; 72 | } 73 | 74 | th { 75 | text-align: left; 76 | padding: 2px 5px; 77 | } 78 | 79 | td { 80 | padding: 2px 5px; 81 | vertical-align: top; 82 | } 83 | 84 | a:link { 85 | color: #3333AA; 86 | text-decoration: none; 87 | } 88 | 89 | a:visited { 90 | color: #773377; 91 | text-decoration: none; 92 | } 93 | 94 | a:hover { 95 | text-decoration: underline; 96 | } 97 | 98 | ul.symbol-index { 99 | font-family: monospace; 100 | font-size: 1.2em; 101 | } 102 | 103 | blockquote { 104 | font-style: italic; 105 | } 106 | -------------------------------------------------------------------------------- /queue.lisp: -------------------------------------------------------------------------------- 1 | (cl:defpackage :pcall-queue 2 | (:use :cl :bordeaux-threads) 3 | (:export #:make-queue 4 | #:queue-push 5 | #:queue-pop #:queue-wait 6 | #:queue-length #:queue-empty-p)) 7 | 8 | (cl:in-package :pcall-queue) 9 | 10 | ;;; A thread-safe wait queue. 11 | 12 | (defclass queue () 13 | ((lock :initform (make-lock) :reader queue-lock) 14 | (condition :initform (make-condition-variable) :reader queue-condition) 15 | (front :initform nil :accessor queue-front) 16 | (back :initform nil :accessor queue-back))) 17 | 18 | (defstruct node val next prev) 19 | 20 | (defun make-queue () 21 | "Create an empty queue." 22 | (make-instance 'queue)) 23 | 24 | (defun queue-push (elt queue) 25 | "Push an element onto the back of a queue." 26 | (with-lock-held ((queue-lock queue)) 27 | (let* ((back (queue-back queue)) 28 | (node (make-node :val elt :prev back :next nil))) 29 | (setf (queue-back queue) node) 30 | (cond (back (setf (node-next back) node)) 31 | (t (setf (queue-front queue) node)))) 32 | (condition-notify (queue-condition queue))) 33 | (values)) 34 | 35 | (defun queue-do-pop (queue) 36 | (let ((node (queue-front queue))) 37 | (if node 38 | (progn 39 | (setf (queue-front queue) (node-next node)) 40 | (unless (node-next node) 41 | (setf (queue-back queue) nil)) 42 | (values (node-val node) t)) 43 | (values nil nil)))) 44 | 45 | (defun queue-pop (queue) 46 | "Pop an element from the front of a queue. Returns immediately, 47 | returning nil if the queue is empty, and a second value indicating 48 | whether anything was popped." 49 | (with-lock-held ((queue-lock queue)) 50 | (queue-do-pop queue))) 51 | 52 | (defun queue-wait (queue) 53 | "Pop an element from the front of a queue. Causes a blocking wait 54 | when no elements are available." 55 | (with-lock-held ((queue-lock queue)) 56 | (loop (multiple-value-bind (elt found) (queue-do-pop queue) 57 | (when found (return elt))) 58 | (condition-wait (queue-condition queue) (queue-lock queue))))) 59 | 60 | (defun queue-length (queue) 61 | "Find the length of a queue." 62 | (with-lock-held ((queue-lock queue)) 63 | (loop :for node := (queue-front queue) :then (node-next node) 64 | :for l :from 0 65 | :while node 66 | :finally (return l)))) 67 | 68 | (defun queue-empty-p (queue) 69 | "Test whether a queue is empty." 70 | (null (queue-front queue))) 71 | -------------------------------------------------------------------------------- /task.lisp: -------------------------------------------------------------------------------- 1 | (in-package :pcall) 2 | 3 | (defclass task () 4 | ((thunk :initarg :thunk :reader task-thunk) 5 | (error :initform nil :accessor task-error) 6 | (values :accessor task-values) 7 | (lock :initform (make-lock) :reader task-lock) 8 | (wait-locks :initform nil :accessor task-wait-locks) 9 | (status :initform :free :accessor task-status) 10 | (waiting :initform nil :accessor task-waiting)) 11 | (:documentation "A task is a piece of code that is scheduled to run 12 | in the thread pool.")) 13 | 14 | (defun with-locks (locks thunk) 15 | (if locks 16 | (with-lock-held ((car locks)) (with-locks (cdr locks) thunk)) 17 | (funcall thunk))) 18 | 19 | (defun execute-task (task) 20 | "Execute a task, and store the result or error in the task object. 21 | When a task's status is not :free, that means the joiner has already 22 | started executing it, so this thread should leave it alone." 23 | (handler-case 24 | (setf (task-values task) (multiple-value-list (funcall (task-thunk task)))) 25 | (error (e) (setf (task-error task) e))) 26 | (with-lock-held ((task-lock task)) 27 | (setf (task-status task) :done) 28 | (with-locks (task-wait-locks task) 29 | (lambda () (mapc #'condition-notify (task-waiting task)))))) 30 | 31 | (defun join (task) 32 | "Join a task, meaning stop execution of the current thread until the 33 | result of the task is available, and then return this result. When 34 | this is called on a task that no thread is currently working on, the 35 | current thread executes the task directly." 36 | (let ((mine nil)) 37 | (with-lock-held ((task-lock task)) 38 | (ecase (task-status task) 39 | (:free (setf mine t (task-status task) :running)) 40 | (:running (let ((wait (make-condition-variable))) 41 | (push wait (task-waiting task)) 42 | (loop :until (eq (task-status task) :done) 43 | :do (condition-wait wait (task-lock task))))) 44 | (:done nil))) 45 | (when mine (execute-task task)) 46 | (if (task-error task) 47 | (error (task-error task)) 48 | (values-list (task-values task))))) 49 | 50 | (defun select-one (&rest tasks) 51 | "Returns the first task that can be joined without blocking." 52 | (let ((notifier (make-condition-variable)) 53 | (our-lock (make-lock))) 54 | (with-lock-held (our-lock) 55 | (dolist (task tasks) ;; ensure that tasks can't finish unless they have our lock 56 | (with-lock-held ((task-lock task)) 57 | (when (eq (task-status task) :done) 58 | (return-from select-one task)) 59 | (push notifier (task-waiting task)) 60 | (push our-lock (task-wait-locks task)))) 61 | (loop (condition-wait notifier our-lock) 62 | (dolist (task tasks) 63 | (when (done-p task) 64 | (return-from select-one task))))))) 65 | 66 | (defun done-p (task) 67 | (eq (task-status task) :running)) 68 | -------------------------------------------------------------------------------- /pool.lisp: -------------------------------------------------------------------------------- 1 | (in-package :pcall) 2 | 3 | (defstruct pool 4 | (threads ()) 5 | (size 3) 6 | (lock (make-lock)) 7 | (env nil) 8 | (queue (make-queue))) 9 | 10 | (defvar *pool* (make-pool)) 11 | 12 | (define-condition stop-running () 13 | ((at-once-p :initarg :at-once-p :reader stop-at-once-p))) 14 | 15 | (defun audit-thread-pool () 16 | "Make sure the thread pool holds the correct amount of live threads." 17 | (with-lock-held ((pool-lock *pool*)) 18 | (setf (pool-threads *pool*) (remove-if-not #'thread-alive-p (pool-threads *pool*))) 19 | (let ((threads (length (pool-threads *pool*))) 20 | (size (pool-size *pool*))) 21 | (cond ((< threads size) 22 | (dotimes (i (- size threads)) (spawn-thread *pool*))) 23 | ((> threads size) 24 | (loop :for i :from 0 :below (- threads size) 25 | :for thread :in (pool-threads *pool*) 26 | :do (stop-thread thread t))))))) 27 | 28 | (let ((counter 0)) 29 | (defun spawn-thread (pool) 30 | (flet ((run () 31 | (if (pool-env pool) 32 | (funcall (pool-env pool) (lambda () (worker-thread (pool-queue pool)))) 33 | (worker-thread (pool-queue pool))))) 34 | (push (make-thread #'run :name (format nil "pcall-worker-~a" (incf counter))) 35 | (pool-threads pool))))) 36 | 37 | (defun stop-thread (thread at-once-p) 38 | (when (thread-alive-p thread) 39 | (interrupt-thread thread (lambda () (signal 'stop-running :at-once-p at-once-p))))) 40 | 41 | (defun finish-tasks () 42 | (let (old-pool) 43 | (with-lock-held ((pool-lock *pool*)) 44 | (setf old-pool (pool-threads *pool*) 45 | (pool-threads *pool*) nil)) 46 | (dolist (th old-pool) (stop-thread th nil)) 47 | (loop :while (some #'thread-alive-p old-pool) 48 | :do (sleep .05)))) 49 | 50 | (defun thread-pool-size () 51 | (pool-size *pool*)) 52 | 53 | (defun (setf thread-pool-size) (size) 54 | (setf (pool-size *pool*) size) 55 | (audit-thread-pool)) 56 | 57 | (defun set-worker-environment (wrapper) 58 | (setf (pool-env *pool*) wrapper) 59 | (finish-tasks)) 60 | 61 | (defun worker-thread (queue) 62 | "The code running inside the pooled threads. Repeatedly tries to 63 | take a task from the queue, and handles it." 64 | (let ((stop nil)) 65 | (flet ((stop-running (condition) 66 | (unless (eq stop :now) 67 | (setf stop (if (stop-at-once-p condition) :now :when-empty))))) 68 | (handler-bind ((stop-running #'stop-running)) 69 | (loop :until (or (eq stop :now) 70 | (and (eq stop :when-empty) (queue-empty-p queue))) 71 | :do (let ((task (handler-case (queue-wait queue) 72 | (stop-running (c) (stop-running c) nil)))) 73 | (when task 74 | (with-lock-held ((task-lock task)) 75 | (if (eq (task-status task) :free) 76 | (setf (task-status task) :running) 77 | (setf task nil)))) 78 | (when task (execute-task task)))))))) 79 | 80 | (defmacro with-local-thread-pool ((&key (size '(pool-size *pool*)) (on-unwind :wait) 81 | (worker-environment '(pool-env *pool*))) &body body) 82 | "Run body with a fresh thread pool. If on-unwind is :wait, it will 83 | wait for all tasks to finish before returning. If it is :leave, the 84 | form will return while threads are still working. If it is :stop 85 | or :destroy, the threads will be stopped at the end of the body. 86 | With :stop, they will first finish their current task (if any), 87 | with :destroy, they will be brutally destroyed and might leak 88 | resources, leave stuff in inconsistent state, etc." 89 | `(let ((*pool* (make-pool :size ,size :env ,worker-environment))) 90 | (unwind-protect (progn ,@body) 91 | ,(ecase on-unwind 92 | (:wait '(finish-tasks)) 93 | ((:leave :stop) `(dolist (th *thread-pool*) (stop-thread th ,(eq on-unwind :stop)))) 94 | (:destroy '(dolist (th *thread-pool*) (destroy-thread th))))))) 95 | -------------------------------------------------------------------------------- /tests.lisp: -------------------------------------------------------------------------------- 1 | (cl:defpackage :pcall-tests 2 | (:use :cl :fiveam :pcall :pcall-queue :bordeaux-threads)) 3 | 4 | (cl:in-package :pcall-tests) 5 | 6 | (def-suite :pcall) 7 | (in-suite :pcall) 8 | 9 | ;; Queue-related tests 10 | 11 | (test queue-sanity 12 | (let ((q (make-queue))) 13 | (queue-push 5 q) 14 | (queue-push 10 q) 15 | (is (= 2 (queue-length q))) 16 | (is (equal 5 (queue-pop q))) 17 | (multiple-value-bind (e f) (queue-pop q) 18 | (is (equal 10 e)) 19 | (is (eq t f))) 20 | (is (eq nil (nth-value 1 (queue-pop q)))))) 21 | 22 | (defun wait-until-queue-empty (q) 23 | (loop :until (queue-empty-p q) :do (sleep .01)) 24 | (sleep .05)) ; <- crummy, still a possible race condition 25 | 26 | (test queue-wait 27 | (let ((q (make-queue)) 28 | (out nil)) 29 | (make-thread (lambda () (push (queue-wait q) out))) 30 | (is (eq nil out)) 31 | (queue-push 10 q) 32 | (wait-until-queue-empty q) 33 | (is (equal '(10) out)))) 34 | 35 | (test queue-concurrency 36 | (let ((q (make-queue)) 37 | (l (make-lock)) 38 | (out nil)) 39 | (dotimes (i 1000) 40 | (queue-push i q)) 41 | (dotimes (i 10) 42 | (make-thread (lambda () 43 | (loop (multiple-value-bind (val found) (queue-pop q) 44 | (unless found (return)) 45 | (with-lock-held (l) (push val out)) 46 | (sleep .001)))))) 47 | (wait-until-queue-empty q) 48 | (is (= 1000 (length out))) 49 | (is (every (lambda (x) (member x out)) (loop :for i :from 0 :below 1000 :collect i))))) 50 | 51 | ;; PCall tests 52 | 53 | (defmacro with-thread-pool (&body body) 54 | `(unwind-protect (progn ,@body) 55 | (finish-tasks))) 56 | 57 | (test sanity 58 | (with-thread-pool 59 | (is (equal '(1 2 3) (join (pexec (list 1 2 3))))) 60 | (let ((task (pexec (+ 4 2)))) 61 | (sleep .01) 62 | (is (= 6 (join task)))))) 63 | 64 | (test stress 65 | (flet ((compute () 66 | (loop :for i :from 0 :below 100000 67 | :sum (* i i)))) 68 | (with-thread-pool 69 | (let ((tasks (loop :for i :from 0 :below 1000 :collect (pcall #'compute))) 70 | (answer (compute))) 71 | (sleep .05) 72 | (is (every (lambda (tsk) (= (join tsk) answer)) tasks)))))) 73 | 74 | (test multi-join 75 | (with-thread-pool 76 | (let* ((task (pexec (sleep .1) :ok)) 77 | (joiners (loop :for i :from 0 :below 10 78 | :collect (pexec (join task))))) 79 | (sleep .01) 80 | (is (every (lambda (tsk) (eq (join tsk) :ok)) joiners))))) 81 | 82 | (test plet 83 | (with-thread-pool 84 | (plet ((x (list 1 2)) 85 | (y (list 3 4))) 86 | (sleep .01) 87 | (is (equal '(1 2 3 4) (append x y)))))) 88 | 89 | (test delayed-signal 90 | (with-thread-pool 91 | (let ((task (pexec (error "Wrong!")))) 92 | (sleep .01) 93 | (signals simple-error (join task))))) 94 | 95 | (test local-pool 96 | (let ((outer-size (thread-pool-size)) 97 | (switch :off)) 98 | (with-local-thread-pool (:size 5 :on-unwind :wait) 99 | (is (= 5 (thread-pool-size))) 100 | (pexec (sleep .2) (setf switch :on))) 101 | (is (eq :on switch)) 102 | (is (= outer-size (thread-pool-size))))) 103 | 104 | (defvar *x*) 105 | 106 | (test enviroment 107 | (let ((*x* :a)) 108 | (set-worker-environment (lambda (f) (let ((*x* :b)) (funcall f)))) 109 | (plet ((x *x*)) 110 | (sleep .01) 111 | (is (equal '(:a . :b) (cons *x* x)))) 112 | (set-worker-environment nil))) 113 | 114 | (test local-environment 115 | (let ((*x* :c)) 116 | (with-local-thread-pool (:worker-environment (lambda (f) (let ((*x* :d)) (funcall f)))) 117 | (plet ((x *x*)) 118 | (sleep .01) 119 | (is (equal '(:c . :d) (cons *x* x))))))) 120 | 121 | (test select-one-random 122 | (is (member (join (select-one (pexec (sleep (random 0.2)) 1) 123 | (pexec (sleep (random 0.2)) 2) 124 | (pexec (sleep (random 0.2)) 3))) '(1 2 3)))) 125 | 126 | (test select-one-always 127 | (is (= 2 (join (select-one (pexec (sleep 0.05) 1) (pexec 2)))))) 128 | 129 | (test select-one-all-tasks-done 130 | (flet ((make-done-task (val) 131 | (let ((task (pexec val))) 132 | (join task) 133 | task))) 134 | (is (= 1 (join (apply #'select-one (mapcar #'make-done-task '(1 2 3)))))))) 135 | 136 | (test select-one-error 137 | (signals simple-error (join (select-one (pexec (sleep 0.01) 1) 138 | (pexec (error "Error")))))) 139 | 140 | -------------------------------------------------------------------------------- /doc/background.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Idle cores to the left 6 | 7 | 8 | 9 | 10 | 11 |

12 | Idle cores to the left of me, race conditions to the right 13 |

14 |

15 | (or how I learned to stop worrying and embrace the deadlock) 16 |

17 | 18 |

19 | Topics: concurrency, multicore processors, Common Lisp
20 | Author: Marijn Haverbeke
21 | Date: June 5th 2008 22 |

23 | 24 |

I think that, over the past year, I've read some thirty 25 | articles that start with the solemn or anxious announcement that 26 | the future of programming will be multicore, and that one way or 27 | another, we will have to get used to it. I'll try not to write 28 | another one of those. In fact, it appears that multicore 29 | processors are also pretty good single-core processors, and most 30 | applications are, for the foreseeable future, comfortably 31 | single-threaded. And in a lot of cases, they should be. So is 32 | there any pressing reason to take notice of this new-fangled 33 | hardware?

34 | 35 |
[I]t looks more or less like the hardware designers 36 | have run out of ideas, and that they’re trying to pass the blame 37 | for the future demise of Moore’s Law to the software writers by 38 | giving us machines that work faster only on a few key 39 | benchmarks!
40 | 41 |

That is from none less than Donald Knuth, in a recent 43 | interview. Nonetheless, I have (belatedly, I'll admit) started 44 | to get excited about incorporating the kind of 'multi-threading 45 | for the sake of multi-threading' that multiple cores encourage 46 | into my programs. Why? This might not be a very convincing reason 47 | for the serious, result-oriented programmer, but parallel 48 | programming, it turns out, is very amusing. Sure, there are 49 | dead-lock death-traps, race-conditions, and a general dangerous 50 | sense of non-determinism. But there is a logic to all of it, and a 51 | working parallel program can be a thing of beauty. That kind of 52 | beauty, and the complexity that tends to go with it, is what got 53 | me into this business in the first place. (It wasn't my love for 54 | hunching over keyboards or a deep dislike of sunlight, in any 55 | case.)

56 | 57 |

This infatuation started ― predictably enough ― 58 | with Erlang. Erlang is kind of hip, makes you think about 59 | concurrency in a whole new way, and is entirely dissatisfactory as 60 | a programming language. I won't go too deeply into that last 61 | point, since taking cheap shots at other people's work tends to 62 | hurt one's credibility, but the un-polished and limited feel of 63 | the language offended my delicate linguistic sensibilities, and 64 | prevented me from investing too deeply into it, with the result 65 | that I am still (also somewhat grudgingly, perhaps) doing most of 66 | my work in Common Lisp.

67 | 68 |

Common Lisp has no integrated, light-weight, multicore-friendly 69 | threads. In fact, in only recently started having widespread 70 | support for OS-level threads at all. This support comes, 71 | invariably, in the form of shared memory, locks, and semaphores 72 | (bordeaux-threads 74 | provides a pleasant portable wrapper). While these allow just 75 | about any kind of concurrent program to be written, they are 76 | fairly tedious and error-prone to work with. CL-MUPROC 78 | builds a message-passing system on top of them, which allows a 79 | more pleasant programming style. Every CL-MUPROC process is an OS 80 | thread though, so you won't want to create thousands of them.

81 | 82 |

Another approach for getting away from mutexes, one that has 83 | been generating a lot of noise recently, is software transactional 84 | memory. If that doesn't mean anything to you, I'd recommend 85 | watching this video of 86 | Simon Peyton Jones explaining the idea. I was surprised to find 87 | that there apparently exists a CL-STM (aren't we 89 | CLers creative when it comes to naming projects). Seems to have 90 | been inactive since early 2007, and as I understand it, STM is 91 | rather hard to get right, so I'm not sure I'd risk building 92 | something serious on this... but it appears to work. The 93 | Clojure people seem 94 | to be making good use of STM in any case.

95 | 96 |

On a related note, SBCL 97 | exports a symbol sb-ext:compare-and-swap, which can 98 | be used to do small-scale transactional tricks, and is often an 99 | order of magnitude faster than the equivalent locking solution. 100 | Here is a very simple concurrency-safe stack. The macro is a 101 | convenient wrapper that helps use compare-and-swap in 102 | a 'transactional' way.

103 | 104 |
105 | (defmacro concurrent-update (place var &body body)
106 |   `(flet ((action (,var) ,@body))
107 |      (let ((prev ,place))
108 |        (loop :until (eq (sb-ext:compare-and-swap ,place prev (action prev)) prev)
109 |              :do (setf prev ,place))
110 |        prev)))
111 | 
112 | (defun make-cstack (&rest elements)
113 |   (cons :stack elements))
114 | (defun push-cstack (element stack)
115 |   (concurrent-update (cdr stack) elts
116 |     (cons element elts))
117 |   (values))
118 | (defun pop-cstack (stack)
119 |   (car (concurrent-update (cdr stack) elts
120 |          (cdr elts))))
121 | 122 |

Stacks are represented as cons cells, since 123 | compare-and-swap can only be used on a limited set of 124 | 'places' (cars, cdrs, svrefs, symbol-values), and not on things 125 | like instance or struct slots. Writing a queue is only slightly 126 | more complicated. Making popping threads block when there are no 127 | elements available, on the other hand, is decidedly tricky. (I 128 | think I have a working implementation... it is much faster than 129 | the locking variant, but it is so complicated that I'm not 130 | convinced it is correct. If you can recommend any good books or 131 | papers on low-level concurrency techniques, please drop me an e-mail.)

133 | 134 |

In the book How to 135 | Write Parallel Programs, which is old (1992) but still 136 | relevant, the authors distinguish three models of concurrency: 137 | Message passing (as in Erlang), shared or distributed data 138 | structures (as in Java), and 'live' data structures, where 139 | computations turn into values after they finish running. These are 140 | all fundamentally equivalent, in that a program using one of them 141 | can always be transformed to use another, but some programs are 142 | much more natural when expressed in the right model. STM, which 143 | had not been invented at the time the book was written, would 144 | probably qualify as a special case of shared data structures.

145 | 146 |

One of these models, live data structures, has never been very 147 | popular. This might be related to the fact that they offer none of 148 | the control-flow advantages that the other models have ― 149 | you spawn a process, and then later you can read the value it 150 | produced, but this only buys you something when there is a direct 151 | advantage to computing values in parallel. On single-core 152 | machines, there isn't, but on multicores, this might be a very 153 | pleasant way to speed up some CPU-bound programs. On first seeing 154 | Haskell's par operator, which implements something 155 | like this, I was duly impressed. You just pass two computations to 156 | a little operator and, under the right circumstances, you get your 157 | results twice as fast.

158 | 159 |

Now, without further ado, I'd like to present my new library: 160 | PCall (for parallel call). It implements a thread pool and a few 161 | simple operators to get behaviour not unlike that of Haskell's 162 | par. As a silly example, it can get two seconds worth 163 | of sleep in only a single second!

164 | 165 |
166 | (time (let ((a (pexec (sleep 1)))
167 |             (b (pexec (sleep 1))))
168 |         (join a)
169 |         (join b)))
170 | 171 |

The code behind this is rather humble and simple, but it seems 172 | to work well. See the project page for more 174 | details and examples.

175 | 176 |

This kind of 'local parallelism' makes it relatively easy to 177 | verify that the tasks are using their data in a safe way ― 178 | you typically just make sure that no tasks write to the same 179 | location or read anything another task might write.

180 | 181 |

One issue that keeps coming up with this style of parallelism, 182 | an issue that the book I mentioned above also discusses, is 183 | finding the right granularity when splitting up tasks. Wrapping up 184 | a computation in a function, putting it into a queue, having some 185 | thread find it and execute it, and then reading out the results... 186 | that is obviously more work than just directly executing the 187 | computation. Thus, if the computation is small enough, you are 188 | making your program slower by parallelising it.

189 | 190 |

The solution suggested by the book is to provide a 'granularity 191 | knob' ― some setting that controls the size of the 192 | computation chunks that get delegated off to other processes. When 193 | mapping some function over a list, you could for example provide a 194 | way to control the amount of elements each task takes care of, and 195 | vary that based on the amount of available processors and the 196 | amount of computation the given function requires. In some cases, 197 | the granularity adjustment could be done automatically by 198 | profiling the running code. I guess that is usually more trouble 199 | than it is worth, though.

200 | 201 |

There are also situations where different environments call for 202 | completely different approaches. Having a central lock on 203 | something works well when there are ten threads occasionally 204 | accessing it, but becomes a bottleneck when there are a thousand. 205 | Optimistic transactions that fail when another thread interferes 206 | with their data have the same problem: They fall apart when there 207 | are too many processes. On the other hand, complex schemes that 208 | minimise the amount of conflict between threads also carry an 209 | overhead, and are often counterproductive when there are only a 210 | handful of threads. This is a less pleasant side of concurrent 211 | programming: There often is no right solution. In sequential 212 | programs, you can weight program complexity against efficiency, 213 | and often feel you've hit some sweet spot. In concurrent programs, 214 | the sweet spot on my dual-core system might be completely stupid 215 | on both an old single-core system and a flashy sixteen-core 216 | server.

217 | 218 |

Despite of this, and the claims by various people that 219 | threading is 'just too hard', I rather suspect we'll be able to 220 | work it out. As in other complicated fields, it is mostly a matter 221 | of finding better abstractions. I hope PCall can provide an 223 | abstraction suitable for some problems (though, at this point, I'm 224 | not even a hundred percent convinced there are no race conditions 225 | left in the library... do help me test it).

226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /doc/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | PCall 6 | 7 | 8 | 9 | 10 | 11 | 12 |

PCall

13 | 14 |

PCall, or parallel call, is a Common Lisp library intended to 15 | simplify 'result-oriented' parallelism. It uses a thread pool to 16 | concurrently run small computations without spawning a new thread. 17 | This makes it possible to exploit multiple cores without much 18 | extra fuss.

19 | 20 |

Note that there exists a fork of PCall, Eager 22 | Future, which is (at this time) more actively developed.

23 | 24 |

Contents

25 | 26 |
    27 |
  1. News
  2. 28 |
  3. License
  4. 29 |
  5. Download and installation
  6. 30 |
  7. Support and mailing lists
  8. 31 |
  9. Quickstart
  10. 32 |
  11. Reference
  12. 33 |
34 | 35 |

News

36 | 37 |

03-09-2009: Version 39 | 0.3: Release some changes that have been sitting in the 40 | repository for months now. select-one and worker environments have been 43 | added.

44 | 45 |

26-01-2009: Version 47 | 0.2: Since there suddenly is some increased attention for the 48 | library, I'm releasing the current code as 0.2. Still beta-ish, 49 | but seems to work. This version adds with-local-thread-pool.

51 | 52 |

06-06-2008: Version 54 | 0.1: The first release. Should be considered beta. Any testing 55 | and feedback is appreciated.

56 | 57 |

05-06-2008: Added a background article with some related 59 | thoughts.

60 | 61 |

License

62 | 63 |

PCall is released under a zlib-like license, which 64 | approximately means you can use the code in whatever way you like, 65 | except for passing it off as your own or releasing a modified 66 | version without indication that it is not the original. See the 67 | LICENSE file in the distribution.

68 | 69 |

Download and installation

70 | 71 |

PCall depends on bordeaux-threads, 73 | and on fiveam 75 | for the test suite.

76 | 77 |

The latest release of PCall can be downloaded from http://marijnhaverbeke.nl/pcall/pcall.tgz, 79 | or installed with asdf-install.

81 | 82 |

A git repository with the 83 | most recent changes can be checked out with:

84 | 85 |
> git clone http://marijnhaverbeke.nl/git/pcall
86 | 87 |

The code is also on github.

88 | 89 |

Support and mailing lists

90 | 91 |

Feel free to drop me an e-mail directly: Marijn Haverbeke. (There used 93 | to be a Google-group, but that got overrun by spammers, and I've 94 | closed it down.)

95 | 96 |

Quickstart

97 | 98 |

PCall is a rather simple library. There are only three basic 99 | concepts that you have to come to grips with:

100 | 101 | 121 | 122 |

Imagine that we have this wonderful algorithm for computing 123 | (once again) Fibonnaci numbers:

124 | 125 |
126 | (defun fib (n)
127 |   (if (> n 2)
128 |       (+ (fib (- n 1)) (fib (- n 2)))
129 |       1))
130 | 
131 | (time (fib 40))
132 | 133 |

Depending on your machine, this might take some 2 to 10 134 | seconds. We don't have that kind of patience. You can see that 135 | this algorithm is entirely optimal, so our only option, it seems, 136 | is to use more hardware ― or make better use of the 137 | hardware we have:

138 | 139 |
140 | (time (let ((f39 (pexec (fib 39)))
141 |             (f38 (pexec (fib 38))))
142 |         (+ (join f39) (join f38))))
143 | 144 |

On my 2-core machine, that speeds things up by about a third 145 | ― which makes sense, since computing fib(39) is about twice 146 | as much work as computing fib(38). A nicer way to write the same 147 | thing is:

148 | 149 |
150 | (time (plet ((f39 (fib 39))
151 |              (f38 (fib 38)))
152 |         (+ f39 f38)))
153 | 154 |

plet takes care of the 155 | wrapping and joining in cases like this. Why do we need the let 156 | anyway? You could try this:

157 | 158 |
159 | (time (+ (join (pexec (fib 39))) (join (pexec (fib 38)))))
160 | 161 |

... but that won't buy you anything. The tasks have to both be 162 | created before you join the first one, or the second task will not 163 | exist when the first one runs, and thus won't be able to run 164 | concurrently with it.

165 | 166 |

You might be tempted to write something like this:

167 | 168 |
169 | (defun pfib (n)
170 |   (if (> n 2)
171 |       (plet ((a (pfib (- n 1)))
172 |              (b (pfib (- n 2))))
173 |         (+ a b))
174 |       1))
175 | 176 |

... but don't. There is some overhead associated with creating 177 | and executing tasks, and for a function like naive-fibonacci, 178 | which recurses a zillion times even for small inputs, this will 179 | radically slow your algorithm down. A parallel mapping function, 180 | as shown below, works great for mapping a relatively heavy 181 | function over a list of limited length, but is no use for mapping 182 | 1+ over a million elements.

183 | 184 |
185 | (defun pmapcar (f list)
186 |   (let ((result (mapcar (lambda (n) (pexec (funcall f n))) list)))
187 |     (map-into result #'join result)))
188 | 
189 | (defvar *numbers* (loop :for i :from 0 :below 30 :collect i))
190 | (time (mapcar #'fib i))
191 | (time (pmapcar #'fib i))
192 | 193 |

Note that joining tasks is not required. When you do not care 194 | about the result of a computation, you can just spawn the task and 195 | leave it at that.

196 | 197 |

As a final note, PCall can also be used when a program is not 198 | CPU-bound, but needs to do some tasks that are hampered by other 199 | bottlenecks (network latency, disk speed). If they can be executed 200 | in parallel, you can have the thread pool run them. In the 201 | following example, the second version runs three times faster on 202 | my machine:

203 | 204 |
205 | (defvar *urls* '("http://marijnhaverbeke.nl/pcall" "http://common-lisp.net"
206 |                  "http://eloquentjavascript.net" "http://xkcd.com"))
207 | 
208 | (time (mapc 'drakma:http-request *urls*))
209 | (time (mapc 'join (mapcar (lambda (url) (pexec (drakma:http-request url))) *urls*)))
210 | 211 |

In some applications, doing multiple database queries at the 212 | same time could really help. You might need to increase the size 213 | of the thread pool in such a situation, since some threads will be 214 | sitting idle, waiting on a socket.

215 | 216 |

Reference

217 | 218 |

219 | function 220 | pcall (thunk) 221 |
→ task 222 |

223 | 224 |

Create a task that will call the given 225 | argumentless function.

226 | 227 |

228 | macro 229 | pexec (&body body) 230 |
→ task 231 |

232 | 233 |

A shorthand for (pcall 234 | (lambda () ...)).

235 | 236 |

237 | macro 238 | plet ((bindings) &body body) 239 |

240 | 241 |

Follows the behaviour of let, but 242 | wraps every bound value into a pexec form, and automatically adds 244 | join calls around uses of the 245 | bound variables.

246 | 247 |

248 | function 249 | join (task) 250 |
→ result 251 |

252 | 253 |

Waits for the given task to finish, and then 254 | returns any values the task produced. When executing the task 255 | raised an uncaught error, this error will be raised when joining 256 | the task. (Note that this re-raising might cause problems with 257 | condition handlers, which might not be active in the worker 258 | threads. If you are using handlers, take extra care, and look at 259 | set-worker-environment.) 261 | When, at the moment join is called, the task has not 262 | been assigned to any thread, the joining thread will execute the 263 | task itself. (Note that this makes the dynamic environment in 264 | which the task runs unpredictable.) A task may be joined multiple 265 | times. Subsequent joins will again return the values, without 266 | re-executing the task.

267 | 268 |

269 | function 270 | select-one (&rest tasks) 271 |

272 | 273 |

Waits until at least one of the given tasks is 274 | finished and then returns that task.

275 | 276 |

277 | function 278 | done-p (task) 279 |
→ boolean 280 |

281 | 282 |

Tests whether a task has been executed.

283 | 284 |

285 | function 286 | thread-pool-size () 287 |

288 | 289 |

Returns the current size of the thread pool. Also 290 | supports setf to change this size. The default value 291 | is 3.

292 | 293 |

294 | function 295 | set-worker-environment (wrapper) 296 |

297 | 298 |

This can be used to make dynamic variables or 299 | bound handlers available in the worker threads. 300 | wrapper should be either nil, for no 301 | wrapping, or a function that, given a function argument, calls its 302 | argument in the new environment. Works best with local thead pools.

304 | 305 |

306 | function 307 | finish-tasks () 308 |

309 | 310 |

Takes the current threads out of the pool, waits 311 | for the task queue to empty, and then for the threads to finish 312 | executing any tasks they might be busy with. This is intended to 313 | be called when shutting down ― killing the threads might 314 | cause some tasks to be aborted, which could result in data loss. 315 | If you join every task you create, this should not be necessary. 316 | Note that if you call this function while other threads are still 317 | creating tasks, it might never return.

318 | 319 |

320 | macro 321 | with-local-thread-pool (&key size on-unwind worker-environment) 322 |

323 | 324 |

Run body with a fresh thread pool. If 325 | on-unwind is :wait (the default), the 326 | form will wait for all local tasks to finish before returning. If 327 | it is :leave, it will return while threads are still 328 | working. Given :stop or :destroy, the 329 | threads will be stopped at the end of the body. With 330 | :stop, they will first finish their current task (if 331 | any), with :destroy, they will be brutally destroyed 332 | and might leak resources, leave stuff in inconsistent state, 333 | etcetera. worker-environment can be used to give the 334 | workers in the local pool a specific dynamic environment.

336 | 337 | 338 | 339 | 340 | --------------------------------------------------------------------------------