├── .gitignore
├── LICENSE
├── README.md
├── RELEASES.md
├── benchmarks
    ├── bamap.clj
    ├── barmap.clj
    ├── bcount.clj
    ├── bdistinct.clj
    ├── bfrequencies.clj
    ├── bidentity.clj
    ├── binterleave.clj
    ├── bminmax.clj
    ├── bpmap.clj
    ├── bslurp.clj
    ├── bsort.clj
    ├── bupdate_vals.clj
    ├── groupby.clj
    └── plet.clj
├── examples
    └── lastfm
    │   ├── .gitignore
    │   ├── README.md
    │   ├── project.clj
    │   ├── src
    │       └── lastfm
    │       │   ├── version00.clj
    │       │   └── version01.clj
    │   └── test
    │       └── lastfm
    │           └── core_test.clj
├── java
    └── clojure
    │   └── lang
    │       └── Get.java
├── project.clj
├── src
    └── parallel
    │   ├── core.clj
    │   ├── foldmap.clj
    │   ├── fork_middle.clj
    │   ├── map_combine.clj
    │   ├── merge_sort.clj
    │   └── xf.clj
└── test
    ├── core_test.clj
    ├── parallel
        └── merge_sort_test.clj
    ├── words
    └── xf_test.clj


/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /classes
 3 | /checkouts
 4 | pom.xml
 5 | pom.xml.asc
 6 | *.jar
 7 | *.class
 8 | /.lein-*
 9 | /.nrepl-port
10 | .hgignore
11 | .hg/
12 | playground
13 | doc
14 | appcds.cache
15 | appcds.classlist
16 | .java-version
17 | .idea
18 | *.iml
19 | .DS_Store
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
  2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
  3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
  4 | 
  5 | 1. DEFINITIONS
  6 | 
  7 | "Contribution" means:
  8 | 
  9 | a) in the case of the initial Contributor, the initial code and
 10 | documentation distributed under this Agreement, and
 11 | 
 12 | b) in the case of each subsequent Contributor:
 13 | 
 14 | i) changes to the Program, and
 15 | 
 16 | ii) additions to the Program;
 17 | 
 18 | where such changes and/or additions to the Program originate from and are
 19 | distributed by that particular Contributor. A Contribution 'originates' from
 20 | a Contributor if it was added to the Program by such Contributor itself or
 21 | anyone acting on such Contributor's behalf. Contributions do not include
 22 | additions to the Program which: (i) are separate modules of software
 23 | distributed in conjunction with the Program under their own license
 24 | agreement, and (ii) are not derivative works of the Program.
 25 | 
 26 | "Contributor" means any person or entity that distributes the Program.
 27 | 
 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
 29 | necessarily infringed by the use or sale of its Contribution alone or when
 30 | combined with the Program.
 31 | 
 32 | "Program" means the Contributions distributed in accordance with this
 33 | Agreement.
 34 | 
 35 | "Recipient" means anyone who receives the Program under this Agreement,
 36 | including all Contributors.
 37 | 
 38 | 2. GRANT OF RIGHTS
 39 | 
 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
 42 | reproduce, prepare derivative works of, publicly display, publicly perform,
 43 | distribute and sublicense the Contribution of such Contributor, if any, and
 44 | such derivative works, in source code and object code form.
 45 | 
 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
 49 | transfer the Contribution of such Contributor, if any, in source code and
 50 | object code form.  This patent license shall apply to the combination of the
 51 | Contribution and the Program if, at the time the Contribution is added by the
 52 | Contributor, such addition of the Contribution causes such combination to be
 53 | covered by the Licensed Patents. The patent license shall not apply to any
 54 | other combinations which include the Contribution. No hardware per se is
 55 | licensed hereunder.
 56 | 
 57 | c) Recipient understands that although each Contributor grants the licenses
 58 | to its Contributions set forth herein, no assurances are provided by any
 59 | Contributor that the Program does not infringe the patent or other
 60 | intellectual property rights of any other entity. Each Contributor disclaims
 61 | any liability to Recipient for claims brought by any other entity based on
 62 | infringement of intellectual property rights or otherwise. As a condition to
 63 | exercising the rights and licenses granted hereunder, each Recipient hereby
 64 | assumes sole responsibility to secure any other intellectual property rights
 65 | needed, if any. For example, if a third party patent license is required to
 66 | allow Recipient to distribute the Program, it is Recipient's responsibility
 67 | to acquire that license before distributing the Program.
 68 | 
 69 | d) Each Contributor represents that to its knowledge it has sufficient
 70 | copyright rights in its Contribution, if any, to grant the copyright license
 71 | set forth in this Agreement.
 72 | 
 73 | 3. REQUIREMENTS
 74 | 
 75 | A Contributor may choose to distribute the Program in object code form under
 76 | its own license agreement, provided that:
 77 | 
 78 | a) it complies with the terms and conditions of this Agreement; and
 79 | 
 80 | b) its license agreement:
 81 | 
 82 | i) effectively disclaims on behalf of all Contributors all warranties and
 83 | conditions, express and implied, including warranties or conditions of title
 84 | and non-infringement, and implied warranties or conditions of merchantability
 85 | and fitness for a particular purpose;
 86 | 
 87 | ii) effectively excludes on behalf of all Contributors all liability for
 88 | damages, including direct, indirect, special, incidental and consequential
 89 | damages, such as lost profits;
 90 | 
 91 | iii) states that any provisions which differ from this Agreement are offered
 92 | by that Contributor alone and not by any other party; and
 93 | 
 94 | iv) states that source code for the Program is available from such
 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
 96 | or through a medium customarily used for software exchange.
 97 | 
 98 | When the Program is made available in source code form:
 99 | 
100 | a) it must be made available under this Agreement; and
101 | 
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 | 
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 | 
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 | 
111 | 4. COMMERCIAL DISTRIBUTION
112 | 
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering.  The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor to control, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 | 
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 | 
144 | 5. NO WARRANTY
145 | 
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 | 
157 | 6. DISCLAIMER OF LIABILITY
158 | 
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 | 
168 | 7. GENERAL
169 | 
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 | 
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 | 
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 | 
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 | 
210 | This Agreement is governed by the laws of the State of New York and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## parallel
  2 | 
  3 | `parallel` is a library of parallel-enabled (not distributed) Clojure functions. Some are designed to emulate existing functions in the standard library, sometimes as drop-in replacement, sometimes with a very different semantic. If you see a function listed below in your project or if you use transducers, chances are you can speed-up your application using parallel. As with any library claiming to speed-up your code, there are too many variables influencing performances that cannot be tested in isolation: **please keep a benchmarking tool ready and measure each of the changes**.
  4 | 
  5 | The library also provides additional transducers (not necessarily for parallel use) and supporting utilities. The functions documented below have been tested and benchmarked and are ready to use. Please report any issue or ideas for improvements, I'll be happy to help.
  6 | 
  7 | Functions and macros:
  8 | 
  9 | | Name                                    | Description
 10 | |-----------------------------------------| ---------------------------------------------------
 11 | | [`p/let`](#plet)                        | Parallel `let` bindings.
 12 | | [`p/args`](#pargs)                      | Invoke a function with arguments evaluated in parallel.
 13 | | [`p/and`](#pand)                        | Like `core/and` with expressions evaluated in parallel.
 14 | | [`p/or`](#por)                          | Like `core/or` with arguments evaluated in parallel.
 15 | | [`p/do`](#pdo)                          | Parallel `do` forms.
 16 | | [`p/doto`](#pdoto)                      | Parallel `doto` forms.
 17 | | [`p/slurp`](#pslurp)                    | Parallel slurping files.
 18 | | [`p/count`](#pcount)                    | Transducer-aware parallel `core/count`.
 19 | | [`p/frequencies`](#pfrequencies)        | Parallel `core/frequencies`
 20 | | [`p/group-by`](#pgroup-by)              | Parallel `core/group-by`
 21 | | [`p/update-vals`](#pupdate-vals)        | Updates values in a map in parallel.
 22 | | [`p/sort`](#psort)                      | Parallel `core/sort`.
 23 | | [`p/external-sort`](#pexternal-sort)    | Memory efficient, file-based, parallel merge-sort.
 24 | | [`p/fold`](#pfold-pxrf-and-pfolder)     | Transducer-aware `r/fold`.
 25 | | [`p/transduce`](#ptransduce)            | Parallel version of `transduce` based on `p/fold`.
 26 | | [`p/process-folder`](#pprocess-folder)  | Process the files in a folder in parallel.
 27 | | [`p/min` and `p/max`](#pmin-and-pmax)   | Parallel `core/min` and `core/max` functions.
 28 | | [`p/distinct`](#pdistinct)   					  | Parallel version of `core/distinct`
 29 | | [`p/pmap`](#ppmap)                      | Like `core/pmap` but running on given n of threads.
 30 | | [`p/amap`](#pamap)                      | Parallel array transformation.
 31 | | [`p/armap`](#parmap)                    | Parallel array reversal with transformation.
 32 | 
 33 | Transducers:
 34 | 
 35 | | Name                                    | Description
 36 | |-----------------------------------------| ---------------------------------------------------
 37 | | [`xf/interleave`](#xfinterleave)        | Like `core/interleave`, transducer version.
 38 | | [`xf/pmap`](#xfpmap)                    | Like `core/pmap`, transducer version.
 39 | | [`xf/identity`](#xfidentity)            | Alternative identity transducer to `core/identity`
 40 | 
 41 | In the pipeline:
 42 | 
 43 | | Name                                    | Description
 44 | |-----------------------------------------| ---------------------------------------------------
 45 | | `p/split-by`                            | Splitting transducer based on contiguous elements.
 46 | 
 47 | ### How to use the library
 48 | 
 49 | All functions are available through the `parallel.core` namespace. Pure transducers are in `parallel.xf`.  Add the following to your project dependencies:
 50 | 
 51 | ```clojure
 52 | [parallel "0.10"]
 53 | ```
 54 | 
 55 | Require at the REPL with:
 56 | 
 57 | ```clojure
 58 | (require '[parallel.core :as p]
 59 |          '[parallel.xf :as xf])
 60 | ```
 61 | 
 62 | Or in your namespace as:
 63 | 
 64 | ```clojure
 65 | (ns mynamespace
 66 |   (:require [parallel.core :as p]
 67 |             [parallel.xf :as xf]))
 68 | ```
 69 | 
 70 | ## API Docs
 71 | 
 72 | ### `p/let`
 73 | 
 74 | `p/let` works like `clojure.core/let` but evaluates its binding expressions in parallel:
 75 | 
 76 | ```clojure
 77 | (time
 78 |   (p/let [a (Thread/sleep 1000)
 79 |           b (Thread/sleep 1000)
 80 |           c (Thread/sleep 1000)]
 81 |     (= a b c)))
 82 | ;; "Elapsed time: 1002.519823 msecs"
 83 | ```
 84 | 
 85 | Don't use `p/let` if:
 86 | 
 87 | * The expressions have dependencies. `p/let` cannot resolve cross references between expressions and will throw exception.
 88 | * The expressions are trivial. In this case the thread orchestration outweighs the benefits of executing in parallel. Good expressions to parallelize are for example independent networked API calls, file system calls or other non trivial computations.
 89 | 
 90 | ### `p/args`
 91 | 
 92 | `p/args` calls a function with arguments that are evaluated in parallel:
 93 | 
 94 | ```clojure
 95 | (time
 96 |   (p/args +
 97 |     (do (Thread/sleep 1000) 1)
 98 |     (do (Thread/sleep 1000) 2)
 99 |     (do (Thread/sleep 1000) 3)))
100 | ;; "Elapsed time: 1000.613791 msecs"
101 | ;; 6
102 | ```
103 | 
104 | `p/args` improve performances when the argument to a function requires some kind of non trivial evaluation, for example if they have side effects requiring input/output. Restrictions to the use of `p/args` include any dependency between the arguments (which can happen as a side effect of their evaluation).
105 | 
106 | ### `p/and`
107 | 
108 | `p/and` works similarly to `core/and` but the expressions in the body are evaluated in parallel:
109 | 
110 | ```clojure
111 | (let [x 11]
112 |   (if (p/and
113 |         (odd? x)
114 |         (number? x)
115 |         (even? (count (str x))))
116 |     "true"
117 |     "false"))
118 | ;; true
119 | ```
120 | 
121 | There are a couple of important differences to consider:
122 | 
123 | * Differently from `core/and`, `p/and` does not short-circuit. This means that even if the first expression is false, `p/and` is going to evaluate all other expressions. `p/and` could results in worse performances if the first expression is most certainly false and it evaluates faster than the others:
124 | 
125 | ```clojure
126 | (time (let [x 11]
127 |   (if (and
128 |         (do (Thread/sleep 100) (even? x))
129 |         (do (Thread/sleep 1000) (number? x))
130 |         (do (Thread/sleep 1000) (even? (count (str x)))))
131 |     "true"
132 |     "false")))
133 | ;; "Elapsed time: 104.481973 msecs"
134 | ;; false
135 | 
136 | (time (let [x 11]
137 |   (if (p/and
138 |         (do (Thread/sleep 100) (even? x))
139 |         (do (Thread/sleep 1000) (number? x))
140 |         (do (Thread/sleep 1000) (even? (count (str x)))))
141 |     "true"
142 |     "false")))
143 | ;; "Elapsed time: 1001.878881 msecs"
144 | ;; false
145 | ```
146 | 
147 | * You should not rely on evaluation order of the expressions. The following idiomatic use of `core/and` for instance, might not work with `p/and`:
148 | 
149 | ```clojure
150 | (require '[clojure.java.io :as io])
151 | 
152 | (def file 1)
153 | 
154 | (p/and
155 |   (instance? java.io.File file)
156 |   (.exists file)
157 |   (.isDirectory file))
158 | ;; IllegalArgumentException No matching field found: exists for class java.lang.Long
159 | ```
160 | 
161 | ### `p/or`
162 | 
163 | `p/or` works similarly to `core/or` but the expressions in the body are evaluated in parallel:
164 | 
165 | ```clojure
166 | (let [x 11]
167 |   (if (p/or
168 |         (odd? x)
169 |         (string? x)
170 |         (double? x))
171 |     "true"
172 |     "false"))
173 | ;; true
174 | ```
175 | 
176 | Like `p/and`, `p/or` it does not short-circuit, potentially taking more time than sequential `core/or`. This happens for example when the first expression is true but `p/or` cannot return until all other expressions are evaluated. Also similarly to `p/and`, `p/or` should not be used if there is an implicit order between expressions (for example `(let [string-length (p/or s (.length s))] string-length)` might result in `NullPointerException` if `s` is `nil`.
177 | 
178 | ### `p/do`
179 | 
180 | `p/do` works like normal `core/do` to encapsulate evaluation of multiple forms (presumably for side effects). It returns the last evaluated form:
181 | 
182 | ```clojure
183 | (def counter (atom 0))
184 | 
185 | (p/do
186 |   (swap! counter inc)
187 |   (println "counter incremented" @counter)
188 |   (map inc (range 1000))
189 |   (println "more stuff to do"))
190 | ;; counter incremented more stuff to do0
191 | ```
192 | 
193 | As demonstrated by the output, there is no guarantee about the order in which the forms are evaluated, so the use of `p/do` should be restricted to side effecting forms without an ordering requirement.
194 | 
195 | ### `p/doto`
196 | 
197 | Similarly to `core/doto`, `p/doto` threads an expression into the following forms (presumably for side effects) and returns the initial expression at the end. Threading through forms happens in parallel so:
198 | 
199 | * Side effects can happen in any order (the forms following the threaded expression).
200 | * If side effects are against a shared collection, the collection has to be thread safe (or one of the Clojure ref types).
201 | 
202 | The following example uses a `ConcurrentLinkedQueue` to add items concurrently:
203 | 
204 | ```clojure
205 | (import 'java.util.concurrent.ConcurrentLinkedQueue)
206 | 
207 | (p/doto
208 |   (ConcurrentLinkedQueue.)
209 |   (.add 1)
210 |   (.add 2))
211 | 
212 | ;; #object[java.util.concurrent.ConcurrentLinkedQueue 0x5fbc5177 "[1, 2]"]
213 | ```
214 | 
215 | Like other parallel macros, `p/doto` it's effective when the performed operations are not trivial. The following expression, for example, executes in 1/4 of the time:
216 | 
217 | ```clojure
218 | (require '[clojure.xml :as xml])
219 | (import 'java.util.concurrent.ConcurrentHashMap)
220 | 
221 | (defn heavy-stuff [n] (Thread/sleep 1000) n)
222 | 
223 | (time
224 |   (doto (ConcurrentHashMap.)
225 |     (.put :a (heavy-stuff 1))
226 |     (.put :b (heavy-stuff 2))
227 |     (.put :c (heavy-stuff 3))
228 |     (.put :d (heavy-stuff 4))))
229 | ;; "Elapsed time: 4009.656834 msecs"
230 | ;; {:d 4, :b 2, :c 3, :a 1}
231 | 
232 | (time
233 |   (p/doto (ConcurrentHashMap.)
234 |     (.put :a (heavy-stuff 1))
235 |     (.put :b (heavy-stuff 2))
236 |     (.put :c (heavy-stuff 3))
237 |     (.put :d (heavy-stuff 4))))
238 | ;; "Elapsed time: 1006.563343 msecs"
239 | ;; {:d 4, :b 2, :c 3, :a 1}
240 | ```
241 | 
242 | ### `p/slurp`
243 | 
244 | `p/slurp` loads the content of a file in parallel. Compared to `core/slurp`, it only supports local files (no URLs or other input streams):
245 | 
246 | ```clojure
247 | (import 'java.io.File)
248 | (take 10 (.split (p/slurp (File. "test/words")) "\n"))
249 | ;; ("A" "a" "aa" "aal" "aalii" "aam" "Aani" "aardvark" "aardwolf" "Aaron")
250 | ```
251 | 
252 | `p/slurp` offers a way to interpret the loaded byte array differently from a string, for example to load an entry from a zipped file:
253 | 
254 | ```clojure
255 | (import '[java.io File ByteArrayInputStream]
256 |         '[java.util.zip ZipFile ZipInputStream])
257 | 
258 | (defn filenames-in-zip [bytes]
259 |   (let [z (ZipInputStream. (ByteArrayInputStream. bytes))]
260 |     (.getName (.getNextEntry z))))
261 | 
262 | (p/slurp (File. "target/parallel-0.7.jar") filenames-in-zip)
263 | ;; "META-INF/MANIFEST.MF"
264 | ```
265 | 
266 | When `*mutable*` is set to `true` the transformation step is skipped altogether and the raw byte array is returned:
267 | 
268 | ```clojure
269 | (import 'java.io.File)
270 | (binding [p/*mutable* true] (p/slurp (File. "test/words")))
271 | ;; #object["[B" 0x705709a4 "[B@705709a4"]
272 | ```
273 | 
274 | `p/slurp` performs better than `core/slurp` on large files (> 500K). Here's for example a comparison benchmark to load a 2.4MB file:
275 | 
276 | ```clojure
277 | (import 'java.io.File)
278 | (let [fname "test/words" file (File. fname)] (bench (slurp file))) ; 8.84ms
279 | (let [fname "test/words" file (File. fname)] (bench (p/slurp file))) ; 2.87ms
280 | ```
281 | 
282 | ### `p/count`
283 | 
284 | `p/count` can speed up counting on collections when non-trivial transformations are involved. It takes a composition of transducers and the collection to count. It applies the transducers to coll and produces a count of the resulting elements (in this case 1.2M):
285 | 
286 | ```clojure
287 | (def xform
288 |   (comp
289 |     (filter odd?)
290 |     (map inc)
291 |     (map #(mod % 50))
292 |     (mapcat range)
293 |     (map str)))
294 | 
295 | (p/count xform (range 100000))
296 | ;; 1200000
297 | ```
298 | 
299 | `p/count` is eager, transforming "coll" into a vector if it's not already a foldable collection (vectors, maps or reducers/Cat objects). Use `p/count` only if the transformation are altering the number of elements in the input collection, otherwise `core/count` would likely outperform `p/count` in most situation. `p/count` supports stateful transducers. In this example we are dropping 6250 elements from each of the 32 chunks (32 is the default number of chunks `p/count` operates on, so 32x6250=200k elements will be removed):
300 | 
301 | ```clojure
302 | (def xform
303 |   (comp
304 |     (filter odd?)
305 |     (map inc)
306 |     (map #(mod % 50))
307 |     (mapcat range)
308 |     (map str)
309 |     (drop 6250)))
310 | 
311 | (p/count xform (range 100000))
312 | ;; 1000000
313 | ```
314 | 
315 | See [bcount.clj](https://github.com/reborg/parallel/blob/master/benchmarks/bcount.clj) for additional benchmarks.
316 | 
317 | ### `p/frequencies`
318 | 
319 | Like `core/frequencies`, but executes in parallel. It takes an optional composition of transducers (stateless or stateful) to apply to coll before the frequency is calculated. It does not support nil values. The following is the typical word frequencies example:
320 | 
321 | ```clojure
322 | (require '[clojure.string :as s])
323 | (def war-and-peace "http://www.gutenberg.org/files/2600/2600-0.txt")
324 | (def book (slurp war-and-peace))
325 | (let [freqs (p/frequencies (re-seq #"\S+" book) (map s/lower-case))]
326 |   (take 5 (sort-by last > freqs)))
327 | ;; (["the" 34258] ["and" 21396] ["to" 16500] ["of" 14904] ["a" 10388])
328 | 
329 | (quick-bench (p/frequencies (re-seq #"\S+" book) (map s/lower-case))) ;; 111ms
330 | (quick-bench (frequencies (map s/lower-case (re-seq #"\S+" book)))) ;; 349ms
331 | ```
332 | 
333 | ### `p/group-by`
334 | 
335 | `p/group-by` is similar to `clojure.core/group-by`, but the grouping happens in parallel. Here's an example about searching most frequent anagrams in a large text:
336 | 
337 | ```clojure
338 | (require '[clojure.string :as s])
339 | 
340 | (def war-and-peace
341 |   (s/split (slurp "http://gutenberg.org/files/2600/2600-0.txt") #"\W+"))
342 | 
343 | (def anagrams
344 |   (p/group-by sort war-and-peace (map s/lower-case)))
345 | 
346 | (->> anagrams
347 |   (map (comp distinct second))
348 |   (sort-by count >)
349 |   first)
350 | 
351 | ;; ("stop" "post" "spot" "pots" "tops")
352 | ```
353 | 
354 | `p/group-by` takes an optional list of transducers to apply to the items in coll before generating the groups. It has been used in the example to lower-case each word. Note that differently from `clojure.core/group-by`:
355 | 
356 | * The order of the items in each value vector can change between runs (this can be a problem or not, depending on your use case).
357 | * It does not support nil values in the input collection.
358 | 
359 | `p/group-by` is generally 2x-5x faster than `clojure.core/group-by`:
360 | 
361 | ```clojure
362 | (require '[criterium.core :refer [quick-bench]])
363 | 
364 | ;; with transformation (which boosts p/group-by even further)
365 | (quick-bench (group-by sort (map s/lower-case war-and-peace)))   ;; 957ms
366 | (quick-bench (p/group-by sort war-and-peace (map s/lower-case))) ;; 259ms
367 | 
368 | ;; fair comparison without transformations
369 | (quick-bench (group-by sort war-and-peace))   ;; 936ms
370 | (quick-bench (p/group-by sort war-and-peace)) ;; 239ms
371 | ```
372 | 
373 | A further boost can be achieved by avoiding conversion back to immutable data structures:
374 | 
375 | ```clojure
376 | (quick-bench
377 |   (binding [p/*mutable* true]
378 |     (p/group-by sort war-and-peace (map s/lower-case)))) ;; 168ms
379 | ```
380 | 
381 | When invoked with `p/*mutable*`, `p/group-by` returns a Java ConcurrentHashMap with ConcurrentLinkedQueue as values. They are both easy to deal with from Clojure.
382 | 
383 | ```clojure
384 | (def anagrams
385 |   (binding [p/*mutable* true]
386 |     (p/group-by sort war-and-peace (s/lower-case))))
387 | 
388 | (distinct (into [] (.get anagrams (sort "stop"))))
389 | ;; ("post" "spot" "stop" "tops" "pots")
390 | ```
391 | 
392 | ### `p/update-vals`
393 | 
394 | `p/'update-vals` updates the values of a map in parallel. With reference to the [`p/group-by`](#pgroup-by) example of the most frequent anagrams, we could apply the step to calculate the distinct words for each key on the map in parallel ("anagrams" is the map resulting from applying `p/group-by` to a large text):
395 | 
396 | ```clojure
397 | 
398 | (first anagrams)
399 | ;; [(\a \d \e \e \h \t) ["heated" "heated" "heated" "heated" "heated" "heated" "heated" "heated"]]
400 | 
401 | (first (p/update-vals anagrams distinct))
402 | ;; [(\a \d \e \e \h \t) ("heated")]
403 | ```
404 | 
405 | Like other functions in the library, `p/update-vals` speed can be improved removing the conversation back into a mutable data structure:
406 | 
407 | ```clojure
408 | (time (dorun (p/update-vals anagrams distinct)))
409 | ;; "Elapsed time: 18.462031 msecs"
410 | (time (dorun (binding [p/*mutable* true] (p/update-vals anagrams distinct))))
411 | ;; "Elapsed time: 9.908815 msecs"
412 | ```
413 | 
414 | In the context of the previous computation of the most frequent anagrams, we could operate using a combination of mutable `p/sort` and `p/update-vals` and compare it with the previous solution:
415 | 
416 | ```clojure
417 | (import '[java.util Map$Entry])
418 | 
419 | (defn cmp [^Map$Entry e1 ^Map$Entry e2]
420 |   (> (count (.getValue e1))
421 |      (count (.getValue e2))))
422 | 
423 | (time (binding [p/*mutable* true]
424 |   (let [a (p/sort cmp (p/update-vals anagrams distinct))]
425 |     (.getValue ^Map$Entry (aget ^objects a 0)))))
426 | ;; "Elapsed time: 128.422734 msecs"
427 | ;; ("post" "spot" "stop" "tops" "pots")
428 | 
429 | (time (->> anagrams
430 |   (map (comp distinct second))
431 |   (sort-by count >)
432 |   first))
433 | ;; "Elapsed time: 251.277616 msecs"
434 | ;; ("post" "spot" "stop" "tops" "pots")
435 | ```
436 | 
437 | The mutable version is roughly 50% faster, but it's verbose and requires type annotations.
438 | 
439 | ### `p/sort`
440 | 
441 | `p/sort` is a parallel merge-sort implementation that works by splitting the input into smaller chunks which are ordered sequentially below a certain threshold (8192 is the default). `p/sort` offers similar features to `clojure.core/sort` and it's not lazy. The following uses the default comparator `<` to sort a collection of 2M numbers (and by comparison doing the same with `core/sort`):
442 | 
443 | ```clojure
444 | (let [coll (shuffle (range 2e6))] (time (dorun (p/sort coll))))
445 | ;; "Elapsed time: 1335.769356 msecs"
446 | 
447 | (let [coll (shuffle (range 2e6))] (time (dorun (sort coll))))
448 | ;; "Elapsed time: 2098.151666 msecs"
449 | ```
450 | 
451 | Or reverse sorting strings:
452 | 
453 | ```clojure
454 | (let [coll (shuffle (map str (range 2e6)))] (time (dorun (p/sort #(compare %2 %1) coll))))
455 | ;; "Elapsed time: 1954.57439 msecs"
456 | 
457 | (let [coll (shuffle (map str (range 2e6)))] (time (dorun (sort #(compare %2 %1) coll))))
458 | ;; "Elapsed time: 2540.829781 msecs"
459 | ```
460 | 
461 | `p/sort` is implemented on top of mutable native arrays, converting both input/output into immutable vectors as a default. There are a few ways to speed-up sorting with `p/sort`:
462 | 
463 | * Vector inputs are preferable than sequences.
464 | * Shave additional milliseconds by using the raw array output, by enclosing `p/sort` in a binding like `(binding [p/*mutable* true] (p/sort coll))`. `p/sort` returns an object array in this case, instead of a vector.
465 | * If you happen to be working natively with arrays, be sure to feed `p/sort` with the native array to avoid conversion.
466 | 
467 | In order of increasing speed:
468 | 
469 | ```clojure
470 | (require '[criterium.core :refer [quick-bench]])
471 | 
472 | (let [c (into [] (shuffle (range 2e6)))
473 |       a (to-array c)]
474 |   (quick-bench (p/sort c))
475 |   (quick-bench (binding [p/*mutable* true] (p/sort c)))
476 |   (quick-bench (binding [p/*mutable* true] (p/sort a))))
477 | 
478 | ;; 1185ms
479 | ;; 1052ms
480 | ;; 46ms
481 | ```
482 | 
483 | As you can see, the conversion into array is responsible for most of the sorting time. If you are lucky to work with arrays, sorting is one order of magnitude faster and more memory efficient.
484 | 
485 | ### `p/external-sort`
486 | 
487 | `merge-sort` is a well known example of parallelizable sorting algorithm. There was a time when machines were forced to use tapes to process large amount of data, loading smaller chunks into memory one at a time. The `merge-sort` sorting algorithm for example, is suitable for this kind of processing. Today we have bigger memories, but also big-data. File-based merge-sort implementations could still be useful to work with external storage such as S3.
488 | 
489 | `p/external-sort` can be used to fetch large amount of data from slow storage, order them by some attribute and consume only the part that is actually needed (e.g. "find the top most" kind of problems). A working but not very useful `p/external-sort` example is the following:
490 | 
491 | ```clojure
492 | (let [fetchf (fn [ids] ids)
493 |       v (into [] (reverse (range 10000)))]
494 |   (take 5 (p/external-sort 1000 compare fetchf v)))
495 | ;; [0 1 2 3 4]
496 | ```
497 | 
498 | `p/external-sort` accepts a vector "v" of IDs as input. The unique identifiers are used to fetch data objects from remote storage. "fetchf" is the way to tell `p/external-sort` how to retrieve the object given a group of ids (in this example, fetching the id has been simulated by returning the ids themselves). Input IDs are split into chunks not bigger than "1000" (with 512 the default).
499 | 
500 | Once all data is retrieved for a chunk, data are sorted using the given comparator ("core/compare" by default) and the result is stored in a temporary file on disk. The above example creates 16 files, as the number of files needs to be a power of two and `(/ 10000 16) = 625` is the first split that generates chunk less than 1000 in size.
501 | 
502 | Once all chunks are retrieved and sorted on disk, the result is available as a lazy sequence, which is the type returned by `p/external-sort`. If the lazy sequence is not fully consumed, the related files are never loaded in memory. In the example above, some files are never loaded in memory. A call to `last` (instead of `take 5`) would load all files. If the head of the sequence is not retained, the content of the files is garbage collected from memory accordingly.
503 | 
504 | The next example verifies these assumptions with a large dataset of around 20M played songs. Each song contains userid, track title, time it was played and other information. We want to print the most recently played songs but we can't load the 2.5 GB file in memory to sort it without blowing the heap (on a normal laptop).
505 | 
506 | You can download the dataset from this page: http://www.dtic.upf.edu/~ocelma/MusicRecommendationDataset/lastfm-1K.html. We are then going to split the file on disk into smaller but still unordered files with:
507 | 
508 | ```bash
509 | split -a 4 -l 18702 userid-timestamp-artid-artname-traid-traname.tsv
510 | num=0; for i in *; do mv "$i" "$num"; ((num++)); done
511 | ```
512 | 
513 | The big tsv file contains exactly 19150868 played songs. We pick a split size that is the closest to `(Math/pow 2 10)`, which creates 1024 files of a reasonable size (18702 lines) plus a last one containing the remaining 20. We also rename the files using an incremental, so we can quickly know which file contains what. You should now have a folder with 1025 files named 0 to 1024 (no extension). Here's how to use `p/external-sort` to retrieve the top 3 most recently played tracks:
514 | 
515 | ```clojure
516 | (require '[clojure.string :as s])
517 | 
518 | (let [lines 19150868
519 |       chunk-size 18702
520 |       chunk-folder "../resources/lastfm-dataset-1K/splits/"
521 |       fetchf (fn [ids]
522 |                (->> (quot (last ids) chunk-size)
523 |                     (str chunk-folder)
524 |                     slurp
525 |                     s/split-lines
526 |                     (mapv #(s/split % #"\t"))))]
527 |   (pprint (time (take 3
528 |     (p/external-sort
529 |       chunk-size
530 |       #(compare (nth %2 1) (nth %1 1))
531 |       fetchf
532 |       (range lines))))))
533 | ```
534 | 
535 | The degree of parallelism with which "fetchf" is invoked is equal to the number of cores (physical or virtual) available on the running system. If the collection of IDs is a not a vector, it is converted into one. `fetchf` is provided a group of ids and we can calculate which file contains those IDs because we know their name and size. The custom comparator uses the timestamp found at index 1 after each line is split by tabs (the format of the file). After about 1 minute (my machine) we get:
536 | 
537 | ```
538 | (["user_000762"
539 |   "2013-09-29T18:32:04Z"
540 |   "d8354b38-e942-4c89-ba93-29323432abc3"
541 |   "30 Seconds To Mars"
542 |   "b5b40605-5a81-46b4-a51e-2b1ec7964c1a"
543 |   "A Beautiful Lie"]
544 |  ["user_000762"
545 |   "2009-05-02T02:01:47Z"
546 |   "91f7a868-d82e-4cfb-9cd9-a2ffd7faac25"
547 |   "The Cab"
548 |   "7ede8578-bf9c-4e68-a060-56924202cdf0"
549 |   "This City Is Contagious"]
550 |  ["user_000762"
551 |   "2009-05-02T01:58:09Z"
552 |   "91f7a868-d82e-4cfb-9cd9-a2ffd7faac25"
553 |   "The Cab"
554 |   "14298942-7452-444f-9fb7-3199464957d6"
555 |   "Can You Keep A Secret?"])
556 | ```
557 | 
558 | By taking more results instead of just the top 3, more files will need to load into memory. If you don't hold on the head of the sequence, you can any other part of the ordered sequence including the last element without incurring into an out of memory (about 2 minutes later in my machine).
559 | 
560 | ### `p/fold`, `p/xrf` and `p/folder`
561 | 
562 | `p/fold` is modeled similar to `clojure.core.reducers/fold` function, the entry point into the Clojure reduce-combine (Java fork-join) parallel computation framework. It can be used with transducers like you would with normal `r/fold`:
563 | 
564 | ```clojure
565 | (def v (vec (range 1000)))
566 | (p/fold + ((comp (map inc) (filter odd?)) +) v)
567 | ;; 250000
568 | ```
569 | 
570 | And exactly like with normal `r/fold` this would give you inconsistent results when a stateful transducer like `(drop 1)` is introduced:
571 | 
572 | ```clojure
573 | (distinct (for [i (range 1000)]
574 |   (p/fold + ((comp (map inc) (drop 1) (filter odd?)) +) v)))
575 | ;; (249999 249498 249499)
576 | ```
577 | 
578 | This is what `p/xrf` is designed for. `p/xrf` is a wrapping utility that hides the way the transducers are combined with the reducing function. More importantly, it takes care of the potential presence of stateful transducers in the chain (like `drop`, `take`, `partition` and so on).
579 | 
580 | ```clojure
581 | (distinct (for [i (range 1000)]
582 |   (p/fold (p/xrf + (map inc) (drop 1) (filter odd?)) v)))
583 | ;; (242240)
584 | ```
585 | 
586 | `p/xrf` makes sure that stateful transducer state is allocated at each chunk instead of each thread (the "chunk" is the portion of the initial collection that is not subject to any further splitting). This is a drastic departure from the semantic of the same transducers when used sequentially on the whole input. The first practical implication is that operations like `take`, `drop`, `partition` etc. are isolated in their own chunk and don't see each other state (for example, `(drop 1)` would remove the first element from each chunk, not just the first element from the whole input). The second consequence is that the result is now dependent (consistently) on the number of chunks.
587 | 
588 | To enable easier design of parallel algorithms, you can pass `p/fold` a number "n" of desired chunks for the parallel computation (n has to be a power of 2 and it defaults to 32 by default). **Note the difference: with `(r/fold)` the computation is chunk-size driven by "n", the desired chunk size (default to 512). With `(p/fold)` the computation is chunk-number driven by "n" the number of desired chunks to compute in parallel**:
589 | 
590 | ```clojure
591 | (p/fold 4 + (p/xrf + (map inc) (drop 1) (filter odd?)) v)
592 | ;; 248496
593 | ```
594 | 
595 | Assuming there are 4 cores available, the example above executes on 4 parallel threads. Let's dissect it chunk by chunk:
596 | 
597 | * We are asking `(p/fold)` to create 4 chunks of the initial vector "v" of 1000 elements. Each chunk ends up having 250 items.
598 | * The content of each chunk can be expressed by the following ranges (the actual type is a subvec not a range but the content it the same): `(range 0 250)`, `(range 250 500)`, `(range 500 750)`, `(range 750 1000)`
599 | * Transducers transform each chunk (composition reads backward like normal transducers): `(filter odd? (drop 1 (map inc (range 0 250))))`, `(filter odd? (drop 1 (map inc (range 250 500))))`, `(filter odd? (drop 1 (map inc (range 500 750))))`, `(filter odd? (drop 1 (map inc (range 750 1000))))`
600 | * The reducing function "+" is applied on the items on each chunk: 15624, 46624, 77624, 108624
601 | * The combining function is again "+", resulting in the final sums: (+ (+ 15624 46624) (+ 77624 108624)) which is 248496.
602 | 
603 | It can be tricky for arbitrary collection sizes to see what is the best strategy in terms of chunk size or number. The utility function `p/show-chunks` can be used to predict the splitting for a parallel calculation. `p/fold` parameters can be adjusted accordingly. Here's what happens if you have a vector of 9629 items and you'd like 8 chunks to be created. Some of them will be bigger, other will be smaller:
604 | 
605 | ```clojure
606 | (p/show-chunks (vec (range 9629)) 8)
607 | ;; (1203 1204 1203 1204 1203 1204 1204 1204)
608 | ```
609 | 
610 | `p/fold` also allows transducers on hash-maps, not just vectors. A hash-map can be folded with transducers (in parallel) like this:
611 | 
612 | ```clojure
613 | (require '[clojure.core.reducers :refer [monoid]])
614 | (def input (zipmap (range 10000) (range 10000)))
615 | 
616 | (def output
617 |  (p/fold
618 |   (monoid merge (constantly {}))
619 |   (p/xrf conj
620 |     (filter (fn [[k v]] (even? k)))
621 |     (map (fn [[k v]] [k (inc v)]))
622 |     (map (fn [[k v]] [(str k) v])))
623 |   input))
624 | (output "664")
625 | ;; 665
626 | ```
627 | 
628 | The single argument for transducers is a vector pair containing a key and a value. In this case each transducer returns another pair to build another map (but that's not required).
629 | 
630 | Caveats and known problems:
631 | 
632 | * Stateful transducers like `dedupe` and `distinct`, that operates correctly at the chunk level, can bring back duplicates once combined back into the final result. Keep that in mind if absolute uniqueness is a requirement, you might need an additional step outside `p/fold` to ensure final elimination of duplicates. I'm thinking what else can be done to avoid the problem in the meanwhile.
633 | 
634 | ### `p/transduce`
635 | 
636 | `p/transduce` is a parallel version of the same function present in core:
637 | 
638 | ```clojure
639 | (p/transduce (comp (filter odd?) (map inc)) + (vec (range 1000)))
640 | ;; 250500
641 | ```
642 | 
643 | Similarly to `p/fold`, you can use stateful transducers with `p/transduce`. When you do, it's better to design your computation around the number of chunks that are processed in parallel. `p/transduce` accepts the number of desired chunks and an additional "combinef" to know how to merge chunks back together.
644 | 
645 | The example below takes 1000 items and operates in 4 parallel chunks of 250 each, dropping 240 items each chunk, and partitioning the remaining 10 into groups of 5. The results from each parallel thread is combined back with `into`:
646 | 
647 | ```clojure
648 | (p/transduce 4 (comp (drop 240) (partition-all 5)) conj into (vec (range 1000)))
649 | ;; [[240 241 242 243 244]
650 | ;;  [245 246 247 248 249]
651 | ;;  [490 491 492 493 494]
652 | ;;  [495 496 497 498 499]
653 | ;;  [740 741 742 743 744]
654 | ;;  [745 746 747 748 749]
655 | ;;  [990 991 992 993 994]
656 | ;;  [995 996 997 998 999]]
657 | ```
658 | 
659 | The equivalent operation attempted on `reducers/fold` would give inconsistent results (the result is different each run or throws exception because the state in statful transducers is shared across concurrent threads):
660 | 
661 | ```clojure
662 | (require '[clojure.core.reducers :as r])
663 | 
664 | (r/fold
665 |   250
666 |   (r/monoid into conj)
667 |   ((comp (drop 240) (partition-all 5)) conj)
668 |   (vec (range 1000)))
669 | ;; Sometimes ArrayOutOfBound, sometimes a bunch of random partitions.
670 | ```
671 | 
672 | ### `p/process-folder`
673 | 
674 | `p/process-folder` applies a composition of transducers to all files in a folder in parallel. The first transducer in the pipeline should expect a line of text. You can use something like `split -l 10000 -a 4 <filename> segment-` to split a large files into many smaller ones of 10k lines each. After you move them in a folder (please be sure it contains only the files that need processing) you're good to go, for example:
675 | 
676 | ```clojure
677 | (p/process-folder
678 |   "folder-name-as-string"
679 |   (comp (map s/trim)
680 |         (remove s/blank?)
681 |         (map #(s/split % #"\,"))
682 |         (map peek)))
683 | ```
684 | 
685 | The snippet above takes the last value for each line of each CSV file in a folder. `p/process-folder` is eager: if the files are many or lines are big, there is nothing `p/process-folder` can do to avoid out of memory. Try to compose your transducers so they process and aggregate data as needed returning a result that can fit into memory.
686 | 
687 | ### `p/min` and `p/max`
688 | 
689 | `p/min` and `p/max` find the minimum or maximum in a vector of numbers in parallel (the input collection is converted into a vector if it's not already):
690 | 
691 | ```clojure
692 | (let [c (shuffle (conj (range 100000) -9))]
693 |   (p/min c))
694 | ;; -9
695 | ```
696 | 
697 | They also allow any combination of transducers (stateless or stateful) to be passed in as arguments:
698 | 
699 | ```clojure
700 | (let [c (into [] (range 100000))]
701 |   (p/min c
702 |     (map dec)
703 |     (drop 20)
704 |     (partition-all 30)
705 |     (map last)
706 |     (filter odd?))) ;; 3173
707 | ```
708 | 
709 | `p/min` and `p/max` outperform sequential `core/min` and `core/max` starting at 10k items and up (depending on hardware configuration). For a 4 cores machine, the speed increase is roughly 50%:
710 | 
711 | ```clojure
712 | (require '[criterium.core :refer [bench]])
713 | (require '[parallel.core :as p])
714 | 
715 | (def 1M (shuffle (range 1000000)))
716 | 
717 | (bench (reduce min 1M)) ;; 9.963971 ms
718 | (bench (p/min 1M))      ;; 5.474384 ms
719 | 
720 | (bench (transduce (comp (map inc) (filter odd?)) min ##Inf 1M)) ;; 22.701385 ms
721 | (bench (p/min 1M (map inc) (filter odd?)))                      ;; 12.085497 ms
722 | ```
723 | 
724 | ### `p/distinct`
725 | 
726 | `p/distinct` returns a sequence of the distinct items in "coll":
727 | 
728 | ```clojure
729 | (let [c (apply concat (repeat 20 (range 100)))]
730 |   (take 10 (p/distinct c)))
731 | ;; (0 1 2 3 4 5 6 7 8 9)
732 | ```
733 | 
734 | The sequence is not-lazy and can return in any order. We can see this by supplying a transducer list (without using `comp`) to change from integers to keywords:
735 | 
736 | ```clojure
737 | (let [c (apply concat (repeat 20 (range 100)))]
738 |   (take 10 (p/distinct c (map str) (map keyword))))
739 | ;; (:59 :16 :39 :47 :28 :58 :36 :15 :25 :18)
740 | ```
741 | 
742 | `p/distinct` does not support `nil`, which needs to be removed (you can pass `(remove nil?)` as a transducer to the argument list). Performance of `p/distinct` are quite good on both small and large collections:
743 | 
744 | ```clojure
745 | (require '[criterium.core :refer [quick-bench]])
746 | 
747 | (let [small (apply concat (repeat 20 (range 100)))
748 |       large (apply concat (repeat 200 (range 10000)))]
749 |   (quick-bench (p/distinct small))
750 |   (quick-bench (p/distinct large)))
751 | ;; Execution time mean : 160.949448 µs
752 | ;; Execution time mean : 77.772233 ms
753 | 
754 | (let [small (apply concat (repeat 20 (range 100)))
755 |       large (apply concat (repeat 200 (range 10000)))]
756 |   (quick-bench (doall (distinct small)))
757 |   (quick-bench (doall (distinct large))))
758 | ;; Execution time mean : 565.503835 µs
759 | ;; Execution time mean : 862.702828 ms
760 | ```
761 | 
762 | You can additionally increase `p/distinct` speed by using a vector input and forcing mutable output (in this case `p/distinct` returns an `java.util.Set` interface):
763 | 
764 | 
765 | ```clojure
766 | (let [large (into [] (apply concat (repeat 200 (range 10000))))]
767 |   (quick-bench (binding [p/*mutable* true] (p/distinct large))))
768 | ;; Execution time mean : 37.703288 ms
769 | ```
770 | 
771 | ### `p/pmap`
772 | 
773 | `p/pmap` has a similar interface as `core/pmap`:
774 | 
775 | ```clojure
776 | (p/pmap inc (range 10))
777 | ;; [1 3 2 6 4 5 7 8 10 9]
778 | ```
779 | 
780 | But as you can see the output is a vector of results in any order. Additionally `p/pmap` differs from `core/pmap` in the following:
781 | 
782 | * It executes on n parallel threads (default 100) independently from the input collection chunk size or the number of available cores.
783 | * It is not lazy.
784 | * It does not support multiple collections as input.
785 | 
786 | `p/pmap` is useful when you want to control the amount of parallelism executing the same task over a collection of inputs. If you are making requests to a highly scalable service, for example, you could take advantage of the higher level of parallelism of `p/pmap` compared to `core/pmap` throwing up to 100 (or more) threads at the problem (instead of `core/pmap` which is bound to the chunk size 32, plus the number of cores, plus 2). To change the number of threads, you can use the optional "n" parameter, for example setting it to 200 threads. In the following example, up to 200 threads are working on "heavyf":
787 | 
788 | ```clojure
789 | (defn heavyf [x] (Thread/sleep 500) (inc x))
790 | 
791 | (time (count (p/pmap heavyf (range 1000) 200)))
792 | ;; "Elapsed time: 2552.601996 msecs"
793 | 
794 | (time (count (pmap heavyf (range 1000))))
795 | ;; "Elapsed time: 16115.643296 msecs"
796 | ```
797 | 
798 | ### `p/amap`
799 | 
800 | `p/amap` is a parallel version of `core/amap`. It takes an array of objects and a transformation "f" and it mutates the input to produce the transformed version of the output:
801 | 
802 | ```clojure
803 | 
804 | (def c (range 2e6))
805 | (defn f [x] (if (zero? (rem x 2)) (* 0.3 x) (Math/sqrt x)))
806 | 
807 | (let [a (to-array c)] (time (p/amap f a)))
808 | ;; "Elapsed time: 34.955138 msecs"
809 | 
810 | (let [^objects a (to-array c)] (time (amap a idx ret (f (aget a idx)))))
811 | ;; "Elapsed time: 53.058256 msecs"
812 | ```
813 | 
814 | `p/amap` uses the fork-join framework to update the array in parallel and it performs better than sequential for non-trivial transformations, otherwise the thread orchestration dominates the computational cost. You can optionally pass in a "threshold" which indicates how small the chunk of computation should be before going sequential, otherwise the number is chosen to be `(/ alength (* 2 ncores))`.
815 | 
816 | ### `p/armap`
817 | 
818 | `p/armap` is similar to `p/amap` but it also inverts the array. It takes an array of objects and a transformation "f" and it mutates the input to produce the transformed-reverse version of the output.
819 | 
820 | ```clojure
821 | (let [a (object-array [0 9 8 2 0 9 2 2 90 1 2])]
822 |   (p/armap (comp keyword str) a)
823 |   (into [] a))
824 | ;; [:2 :1 :90 :2 :2 :9 :0 :2 :8]
825 | ```
826 | 
827 | `p/armap` performs better than sequential for non-trivial transformations, otherwise the thread orchestration dominates the computational cost. Here's for example a reverse-complement of some random DNA strand:
828 | 
829 | ```clojure
830 | (require '[criterium.core :refer [quick-bench]])
831 | 
832 | (defn random-dna [n] (repeatedly n #(rand-nth [\a \c \g \t])))
833 | (def compl {\a \t \t \a \c \g \g \c})
834 | 
835 | (defn armap
836 |   "A fair sequential comparison"
837 |   [f ^objects a]
838 |   (loop [i 0]
839 |     (when (< i (quot (alength a) 2))
840 |       (let [tmp (f (aget a i))
841 |             j (- (alength a) i 1)]
842 |         (aset a i (f (aget a j)))
843 |         (aset a j tmp))
844 |       (recur (unchecked-inc i)))))
845 | 
846 | (let [a (to-array (random-dna 1e6))]
847 |   (quick-bench (p/armap compl a)))
848 | ;; "Elapsed time: 39.195143 msecs"
849 | 
850 | (let [a (to-array (random-dna 1e6))]
851 |   (quick-bench (armap compl a)))
852 | ;; "Elapsed time: 70.286622 msecs"
853 | ```
854 | 
855 | You can optionally pass in a "threshold" which indicates how small the chunk of computation should be before going sequential, otherwise the number is chosen to be `(/ alength (* 2 ncores))`.
856 | 
857 | ### `xf/interleave`
858 | 
859 | Like `clojure.core/interleave` but in transducer version. When `xf/interleave` is instantiated, it takes a "filler" collection. The items from the collection are used to interleave the others items coming from the main transducing sequence:
860 | 
861 | ```clojure
862 | (sequence
863 |   (comp
864 |     (map inc)
865 |     (xf/interleave [100 101 102 103 104 105])
866 |     (filter odd?)
867 |     (map str))
868 |   [3 6 9 12 15 18 21 24 37 30])
869 | ;; ("7" "101" "13" "103" "19" "105")
870 | ```
871 | 
872 | The main transducing process runs until there are items in the filler sequence (those starting at 100 in the example). You can provide an infinite sequence to be sure all results are interleaved:
873 | 
874 | ```clojure
875 | (sequence
876 |   (comp
877 |     (map inc)
878 |     (xf/interleave (range))
879 |     (filter odd?)
880 |     (map str))
881 |   [3 6 9 12 15 18 21 24 37 30])
882 | ;; ("7" "1" "13" "3" "19" "5" "25" "7" "31" "9")
883 | ```
884 | 
885 | ### `xf/pmap`
886 | 
887 | `xf/pmap` is a transducer version of `core/pmap`. When added to a transducer chain, it works like the `colojure.core/map` transducer applying the function "f" to all the items passing through the transducer. Different from `clojure.core/map`, `xf/pmap` processes a fixed number items in parallel (competing for the actual number of physical cores). So if you have 12 cores and you're transducing a Clojure collection (big majority of them have a chunk size of 32), then you can achieve a max of 12+32+2 threads working in parallel:
888 | 
889 | ```clojure
890 | (defn heavyf [x] (Thread/sleep 1000) (inc x))
891 | 
892 | (time (transduce (comp (map heavyf) (filter odd?)) + (range 10)))
893 | ;; 10025ms
894 | (time (transduce (comp (xf/pmap heavyf) (filter odd?)) + (range 10)))
895 | ;; 1006ms
896 | ```
897 | 
898 | `xf/pmap` has similar limitations to `clojure.core/pmap`. It works great when "f" is non trivial and the average elapsed of "f" is uniform across the input. If one `(f item)` takes much more than the others, the current N-chunk is kept busy with parallelism=1 before moving to the next chunk, wasting resources. Use `xf/pmap` if your transducing transformation is reasonably big and complex. Apart from `transduce` you can use it with `sequence`:
899 | 
900 | ```clojure
901 | (time (doall (pmap heavyf (range 10))))
902 | ;; "Elapsed time: 1005.330409 msecs"
903 | 
904 | (time (doall (sequence (xf/pmap heavyf) (range 10))))
905 | ;; "Elapsed time: 1002.868326 msecs"
906 | ```
907 | 
908 | ### `xf/identity`
909 | 
910 | `xf/identity` works similarly to `(map identity)` or just `identity` as identity transducer:
911 | 
912 | 
913 | ```clojure
914 | (sequence (map identity) (range 10))
915 | (sequence clojure.core/identity (range 10))
916 | (sequence xf/identity (range 10))
917 | ;; All printing (0 1 2 3 4 5 6 7 8 9)
918 | ```
919 | 
920 | The identity transducer works as a placeholder for those cases in which a transformation is not requested, for example:
921 | 
922 | ```clojure
923 | (def config false)
924 | 
925 | (defn build-massive-xform []
926 |   (when config
927 |     (comp (map inc) (filter odd?))))
928 | 
929 | (sequence (or (build-massive-xform) identity) (range 5))
930 | ;; (0 1 2 3 4)
931 | ```
932 | 
933 | `core/identity` works fine as a transducer in most cases, except when it comes to multiple inputs, for which it requires a definition of what "identity" means. We could for example agree that if you want to use `core/identity` with multiple inputs you need to use it in pair with another transducer, for example `(map list)`:
934 | 
935 | ```clojure
936 | (sequence (or (build-massive-xform) identity) (range 5) (range 5))
937 | ;; Throws exception
938 | 
939 | (sequence (or (build-massive-xform) (comp (map list) identity)) (range 5) (range 5))
940 | ;; ((0 0) (1 1) (2 2) (3 3) (4 4))
941 | ```
942 | 
943 | `xf/identity` is a simple transducer that takes care of of this case, assuming "identity" means "wrap around" in case of multiple inputs:
944 | 
945 | ```clojure
946 | (sequence (or (build-massive-xform) xf/identity) (range 5))
947 | ;; (0 1 2 3 4)
948 | 
949 | (sequence (or (build-massive-xform) xf/identity) (range 5) (range 5))
950 | ;; ((0 0) (1 1) (2 2) (3 3) (4 4))
951 | 
952 | (sequence (or (build-massive-xform) xf/identity) (range 5) (range 5) (range 5))
953 | ;; ((0 0 0) (1 1 1) (2 2 2) (3 3 3) (4 4 4))
954 | ```
955 | 
956 | `xf/identity` custom transducer compared to `(comp (map list) identity)` has also positive effects on performances:
957 | 
958 | ```clojure
959 | (let [items (range 10000)
960 |       xform (comp (map list) identity)]
961 |   (quick-bench
962 |     (dorun
963 |       (sequence xform items items))))
964 | ;; 4.09ms
965 | 
966 | (let [items (range 10000)]
967 |   (quick-bench
968 |     (dorun
969 |       (sequence xf/identity items items))))
970 | ;; 2.67ms
971 | ```
972 | 
973 | ## Development
974 | 
975 | There are no dependencies other than Java and Clojure.
976 | 
977 | * `lein test` to run the test suite.
978 | 
979 | #### misc todo
980 | 
981 | * [ ] `p/fold` Enable extend to (thread-safe) Java collections
982 | * [ ] `p/fold` Enable extend on Cat objects
983 | * [ ] `p/fold` operates on a group of keys for hash-maps.
984 | * [ ] A foldable reader of some sort for large files.
985 | 
986 | ## License
987 | 
988 | Copyright © 2018 Renzo Borgatti @reborg http://reborg.net
989 | Distributed under the Eclipse Public License either version 1.0 or (at your option) any later version.
990 | 


--------------------------------------------------------------------------------
/RELEASES.md:
--------------------------------------------------------------------------------
 1 | ## Release History
 2 | 
 3 | ### 0.11 (WIP)
 4 | 
 5 | ### 0.10
 6 | 
 7 | * Removed the benchmark ns from source paths causing trouble in the jar (thanks @benedekfazekas)
 8 | 
 9 | ### 0.9
10 | 
11 | * `p/frequencies` does not need a special keyfn.
12 | * Added `p/pmap`
13 | * Added `p/args`, `p/or` and `p/and`.
14 | 
15 | ### 0.8
16 | 
17 | * Using `p/transduce` to implement frequencies.
18 | * Added ./examples
19 | 
20 | ### 0.7
21 | 
22 | * Fix bug in `p/armap` for the sequential case.
23 | * Fixed missing laziness in external-sort
24 | * Added `p/do` and `p/doto`
25 | * Added `p/transduce`
26 | * Added `p/process-folder`
27 | 
28 | ### 0.6
29 | 
30 | * Added `p/slurp`
31 | * Consolidated and documented `p/min` and `p/max`
32 | 
33 | ### 0.5
34 | 
35 | * Added `xf/identity` transducer.
36 | * Added `p/let` parallel let bindings.
37 | 
38 | ### 0.4
39 | 
40 | * Added `p/armap`, parallel array reverse.
41 | * Added `xf/pmap`, transducer version of `core/pmap`.
42 | 
43 | ### 0.3
44 | 
45 | * Added `p/distinct`
46 | 
47 | ### 0.2
48 | 
49 | * Moved `parallel` namespace to `core` to avoid potential Java interop problems (see #3).
50 | 
51 | ### 0.1
52 | 
53 | First batch of functions.
54 | 


--------------------------------------------------------------------------------
/benchmarks/bamap.clj:
--------------------------------------------------------------------------------
 1 | (ns bamap)
 2 | 
 3 | (require '[criterium.core :refer [quick-bench]])
 4 | (require '[parallel.core :as p] :reload)
 5 | 
 6 | (let [c (range 50000)] (quick-bench (doall (map inc c))))                             ;; 1.9
 7 | (let [c (to-array (range 50000))] (quick-bench (amap c idx ret (inc (aget c idx)))))  ;; 0.4
 8 | (let [c (to-array (range 50000))] (quick-bench (p/amap inc c)))                       ;; 0.6
 9 | 
10 | (let [c (range 500000)] (quick-bench (doall (map inc c))))                            ;; 18
11 | (let [c (to-array (range 500000))] (quick-bench (amap c idx ret (inc (aget c idx))))) ;; 4.9
12 | (let [c (to-array (range 500000))] (quick-bench (p/amap inc c)))                      ;; 5.9
13 | 
14 | (let [c (range 2e6)] (quick-bench (doall (map inc c))))                               ;; 80
15 | (let [c (to-array (range 2e6))] (quick-bench (amap c idx ret (inc (aget c idx)))))    ;; 20
16 | (let [c (to-array (range 2e6))] (quick-bench (p/amap inc c)))                         ;; 18
17 | 
18 | (let [c (range 5e6)] (quick-bench (doall (map inc c))))                               ;; 201
19 | (let [c (to-array (range 5e6))] (quick-bench (amap c idx ret (inc (aget c idx)))))    ;; 44
20 | (let [c (to-array (range 5e6))] (quick-bench (p/amap inc c)))                         ;; 58
21 | 
22 | ;; demanding f
23 | 
24 | (defn pi [n] (->> (range) (filter odd?) (take n) (map / (cycle [1 -1])) (reduce +) (* 4.0)))
25 | (def pis (shuffle (range 400 800)))
26 | 
27 | (let [c pis] (time (dorun (map pi c))))                                               ;; 12178
28 | (let [c (to-array pis)] (time (amap c idx ret (pi (aget c idx)))))                    ;; 11901
29 | (let [c (to-array pis)] (time (p/amap pi c)))                                         ;; 6991
30 | 


--------------------------------------------------------------------------------
/benchmarks/barmap.clj:
--------------------------------------------------------------------------------
 1 | (ns barmap)
 2 | 
 3 | (require '[criterium.core :refer [quick-benchmark quick-bench]])
 4 | (defmacro b [expr] `(* 1000. (first (:mean (quick-benchmark ~expr {}))))) ;; mssecs
 5 | (require '[parallel.core :as p] :reload)
 6 | 
 7 | (defn armap
 8 |   "Fair sequential comparison"
 9 |   [f ^objects a]
10 |   (loop [i 0]
11 |     (when (< i (quot (alength a) 2))
12 |       (let [tmp (f (aget a i))
13 |             j (- (alength a) i 1)]
14 |         (aset a i (f (aget a j)))
15 |         (aset a j tmp))
16 |       (recur (unchecked-inc i)))))
17 | 
18 | (def coll (range 1e6))
19 | 
20 | ;; sequential identity
21 | (let [c (object-array coll)] (b (armap identity c)))    ; 1.28
22 | (let [c (object-array coll)] (b (p/armap identity c)))  ; 11.14 (10x slow)
23 | 
24 | ;; reverse-complement example
25 | (defn random-dna [n] (repeatedly n #(rand-nth [\a \c \g \t])))
26 | (def compl {\a \t \t \a \c \g \g \c})
27 | (let [c (random-dna 1e6)
28 |       a1 (object-array c)
29 |       a2 (object-array c)]
30 |   [(b (armap compl a1))
31 |    (b (p/armap compl a2))])
32 | 
33 | ;; [70.55341358333335 39.12026016666667] (~1.5x faster)
34 | 
35 | ;; even more demanding f
36 | (defn pi [n] (->> (range) (filter odd?) (take n) (map / (cycle [1 -1])) (reduce +) (* 4.0)))
37 | 
38 | (let [ps (shuffle (range 400 800))
39 |       a1 (object-array ps)
40 |       a2 (object-array ps)]
41 |   (quick-bench (armap pi a1))   ; 1.246923 ms
42 |   (quick-bench (p/armap pi a2)) ; 0.866139 ms
43 |   )
44 | 


--------------------------------------------------------------------------------
/benchmarks/bcount.clj:
--------------------------------------------------------------------------------
 1 | (ns bcount)
 2 | 
 3 | (require '[criterium.core :refer [bench quick-bench]])
 4 | (require '[parallel.core :as p])
 5 | 
 6 | ;; core/count
 7 | 
 8 | (let [coll (range 100000)] (quick-bench (clojure.core/count (filter odd? (map inc coll))))) ;; 4.74ms
 9 | (let [coll (into [] (range 100000))] (quick-bench (clojure.core/count coll))) ;; 8.58ns
10 | 
11 | ;; p/count
12 | 
13 | (let [coll (range 100000)] (quick-bench (p/count coll (filter odd?) (map inc)))) ;; 3.58ms
14 | ;; no transforms, falls back on normal count, with some added timing.
15 | (let [coll (into [] (range 100000))] (quick-bench (p/count coll))) ;; 14.21ns
16 | 
17 | (def xform
18 |   (comp
19 |     (filter odd?)
20 |     (map inc)
21 |     (map #(mod % 50))
22 |     (mapcat range)
23 |     (map str)))
24 | 
25 | ;; to see some speedup we need non-trivial transforms and larger colls.
26 | (let [coll (into [] (range 1000000))] (quick-bench (p/count xform coll))) ;; 408ms
27 | 
28 | ;; here's the same transform with a sequential transduce
29 | (let [coll (into [] (range 1000000))] (quick-bench (transduce xform (completing (fn [sum _] (inc sum))) 0 coll))) ;; 524ms
30 | 


--------------------------------------------------------------------------------
/benchmarks/bdistinct.clj:
--------------------------------------------------------------------------------
 1 | (ns bdistinct)
 2 | 
 3 | (require '[criterium.core :refer [quick-benchmark]])
 4 | (defmacro b [expr] `(* 1000. (first (:mean (quick-benchmark ~expr {}))))) ;; mssecs
 5 | (require '[parallel.core :as p] :reload)
 6 | 
 7 | (defn create-with-uniques [percent n]
 8 |   (cond
 9 |     (== 0 percent) (take n (repeat 1))
10 |     (== 100 percent) (shuffle (range n))
11 |     :else (let [k (quot n percent)] (shuffle (apply concat (take (/ 1 (/ percent 100.)) (repeat (range (* n (/ percent 100.))))))))))
12 | 
13 | ;; ballpark at 100k
14 | (def coll 1e5)
15 | (def c100 (create-with-uniques 100 coll))
16 | (def c75 (create-with-uniques 75 coll))
17 | (def c50 (create-with-uniques 50 coll))
18 | (def c25 (create-with-uniques 25 coll))
19 | (def c0 (create-with-uniques 0 coll))
20 | 
21 | ;; normal core
22 | (let [c (into [] c100)] (quick-bench (doall (distinct c)))) ; 76.321408 ms
23 | (let [c (into [] c75)]  (quick-bench (doall (distinct c)))) ; 95.102771 ms
24 | (let [c (into [] c50)]  (quick-bench (doall (distinct c)))) ; 59.967416 ms
25 | (let [c (into [] c25)]  (quick-bench (doall (distinct c)))) ; 47.372695 ms
26 | (let [c (into [] c0)]   (quick-bench (doall (distinct c)))) ; 26.161685 ms
27 | 
28 | ;; normal core on sequences
29 | (let [c c100] (quick-bench (doall (distinct c)))) ; 74.756156 ms
30 | (let [c c75]  (quick-bench (doall (distinct c)))) ; 98.587782 ms
31 | (let [c c50]  (quick-bench (doall (distinct c)))) ; 63.899022 ms
32 | (let [c c25]  (quick-bench (doall (distinct c)))) ; 56.241547 ms
33 | (let [c c0]   (quick-bench (doall (distinct c)))) ; 19.684880 ms
34 | 
35 | ;; transducers core
36 | (let [c (into [] c100)] (quick-bench (doall (sequence (distinct) c)))) ; 65.090661 ms
37 | (let [c (into [] c75)]  (quick-bench (doall (sequence (distinct) c)))) ; 77.059407 ms
38 | (let [c (into [] c50)]  (quick-bench (doall (sequence (distinct) c)))) ; 44.620541 ms
39 | (let [c (into [] c25)]  (quick-bench (doall (sequence (distinct) c)))) ; 32.205828 ms
40 | (let [c (into [] c0)]   (quick-bench (doall (sequence (distinct) c)))) ; 7.455225 ms
41 | 
42 | ;; parallel on sequences
43 | (let [c c100] (quick-bench (doall (p/distinct c)))) ; 7.677920 ms
44 | (let [c c75]  (quick-bench (doall (p/distinct c)))) ; 8.686195 ms
45 | (let [c c50]  (quick-bench (doall (p/distinct c)))) ; 4.875998 ms
46 | (let [c c25]  (quick-bench (doall (p/distinct c)))) ; 4.980696 ms
47 | (let [c c0]   (quick-bench (doall (p/distinct c)))) ; 11.416917 ms
48 | 
49 | ;; parallel on vectors
50 | (let [c (into [] c100)] (quick-bench (doall (p/distinct c)))) ; 7.391681 ms
51 | (let [c (into [] c75)]  (quick-bench (doall (p/distinct c)))) ; 7.802467 ms
52 | (let [c (into [] c50)]  (quick-bench (doall (p/distinct c)))) ; 4.966004 ms
53 | (let [c (into [] c25)]  (quick-bench (doall (p/distinct c)))) ; 4.208700 ms
54 | (let [c (into [] c0)]   (quick-bench (doall (p/distinct c)))) ; 8.037075 ms
55 | 
56 | ;; parallel mutable
57 | (binding [p/*mutable* true]
58 | (let [c (into [] c100)] (quick-bench (p/distinct c)))  ; 2.739602 ms
59 | (let [c (into [] c75)]  (quick-bench (p/distinct c)))  ; 6.188239 ms
60 | (let [c (into [] c50)]  (quick-bench (p/distinct c)))  ; 3.679788 ms
61 | (let [c (into [] c25)]  (quick-bench (p/distinct c)))  ; 2.713920 ms
62 | (let [c (into [] c0)]   (quick-bench (p/distinct c)))) ; 7.802422 ms
63 | 
64 | 


--------------------------------------------------------------------------------
/benchmarks/bfrequencies.clj:
--------------------------------------------------------------------------------
 1 | (ns bfrequencies)
 2 | 
 3 | (require '[parallel.core :as p])
 4 | (require '[criterium.core :refer [bench quick-bench]])
 5 | (require '[clojure.core.reducers :as r])
 6 | 
 7 | (import 'java.util.concurrent.atomic.AtomicInteger
 8 |         'java.util.concurrent.ConcurrentHashMap
 9 |         '[java.util HashMap Collections Map])
10 | 
11 | (def small-overlapping
12 |   (into [] (map hash-map
13 |      (repeat :samplevalue)
14 |      (concat
15 |        (shuffle (range 0. 1e5))
16 |        (shuffle (range 0. 1e5))
17 |        (shuffle (range 0. 1e5))
18 |        (shuffle (range 0. 1e5))
19 |        (shuffle (range 0. 1e5))))))
20 | 
21 | (def big-overlapping
22 |   (into [] (map hash-map
23 |      (repeat :samplevalue)
24 |      (concat
25 |        (shuffle (range 6e4 1e5))
26 |        (shuffle (range 6e4 1e5))
27 |        (shuffle (range 6e4 1e5))
28 |        (shuffle (range 6e4 1e5))
29 |        (shuffle (range 6e4 1e5))))))
30 | 
31 | (def no-overlapping (into [] (range 1000)))
32 | 
33 | (def bigger-data
34 |   (into [] (map hash-map
35 |      (repeat :samplevalue)
36 |      (concat
37 |        (shuffle (range 0. 7e5))
38 |        (shuffle (range 0. 7e5))
39 |        (shuffle (range 0. 7e5))
40 |        (shuffle (range 0. 7e5))
41 |        (shuffle (range 0. 7e5))))))
42 | 
43 | 
44 | 
45 | ;; small overlapping
46 | (quick-bench (frequencies small-overlapping))
47 | ;; 441 ms
48 | (quick-bench (p/frequencies small-overlapping))
49 | ;; 190 ms
50 | (binding [p/*mutable* true] (quick-bench (p/frequencies small-overlapping)))
51 | ;; 92ms
52 | 
53 | 
54 | ;; bigger overlapping
55 | (quick-bench (frequencies big-overlapping))
56 | ;; 172ms
57 | (quick-bench (p/frequencies big-overlapping))
58 | ;; 52ms
59 | (binding [p/*mutable* true] (quick-bench (p/frequencies big-overlapping)))
60 | ;; 28ms
61 | 
62 | 
63 | 
64 | ;; with xforms
65 | 
66 | (quick-bench (frequencies (eduction (keep :samplevalue) (map int) small-overlapping)))
67 | ;; 238 ms
68 | (quick-bench (p/frequencies small-overlapping (keep :samplevalue) (map int)))
69 | ;; 91 ms
70 | (binding [p/*mutable* true] (quick-bench (p/frequencies small-overlapping (keep :samplevalue) (map int))))
71 | ;; 50 ms
72 | 
73 | (quick-bench (frequencies no-overlapping))
74 | ;; 335 µs
75 | (quick-bench (p/frequencies no-overlapping))
76 | ;; 299 µs
77 | 
78 | (time (dorun (frequencies bigger-data)))
79 | ;; 4320.984379 ms
80 | (time (dorun (p/frequencies bigger-data)))
81 | ;; 1980.512017 ms
82 | 


--------------------------------------------------------------------------------
/benchmarks/bidentity.clj:
--------------------------------------------------------------------------------
 1 | (ns bpmap)
 2 | 
 3 | (require '[parallel.xf :as xf])
 4 | (require '[criterium.core :refer [bench quick-bench]])
 5 | 
 6 | (let [items (range 10000)]
 7 |   (quick-bench
 8 |     (dorun
 9 |       (sequence (map identity) items))))
10 | ;; 914.020710 µs
11 | 
12 | (let [items (range 10000)]
13 |   (quick-bench
14 |     (dorun
15 |       (sequence xf-identity items))))
16 | ;; 892.697959 µs
17 | 
18 | (let [items (range 10000)]
19 |   (quick-bench
20 |     (dorun
21 |       (sequence identity items))))
22 | ;; 926.697959 µs
23 | 
24 | (let [items (range 10000)
25 |       xform (comp (map list) identity)]
26 |   (quick-bench
27 |     (dorun
28 |       (sequence xform items items))))
29 | ;; 4.09ms
30 | 
31 | (let [items (range 10000)]
32 |   (quick-bench
33 |     (dorun
34 |       (sequence xf/identity items items))))
35 | ;; 2.67ms
36 | 


--------------------------------------------------------------------------------
/benchmarks/binterleave.clj:
--------------------------------------------------------------------------------
 1 | (ns binterleave)
 2 | 
 3 | (require '[parallel.xf :as xf])
 4 | (require '[criterium.core :refer [bench quick-bench]])
 5 | 
 6 | (let [coll (range 1e5)]
 7 |   (quick-bench (doall (interleave (map inc coll) (range)))))
 8 | ;; 14ms
 9 | 
10 | (let [coll (range 1e5)]
11 |   (quick-bench (doall (sequence (comp (map inc) (xf/interleave (range))) coll))))
12 | ;; 40ms
13 | 
14 | (let [coll (range 1e5)]
15 |   (quick-bench (doall (map str (filter odd? (interleave (map inc coll) (range)))))))
16 | ;; 37ms
17 | 
18 | (let [coll (range 1e5)]
19 |   (quick-bench (doall (sequence (comp (map inc) (xf/interleave (range)) (filter odd?) (map str)) coll))))
20 | ;; 40ms
21 | 


--------------------------------------------------------------------------------
/benchmarks/bminmax.clj:
--------------------------------------------------------------------------------
 1 | (ns bminmax)
 2 | 
 3 | (require '[criterium.core :refer [bench]])
 4 | (require '[parallel.core :as p] :reload)
 5 | 
 6 | (def v10k   (conj (shuffle (range 10000)) -9))
 7 | (def v100k  (conj (shuffle (range 100000)) -9))
 8 | (def v1m    (conj (shuffle (range 1000000)) -9))
 9 | 
10 | ;; core reduce
11 | (let [c v10k]   (bench (reduce min c))) ;; 98.237074 µs
12 | (let [c v100k]  (bench (reduce min c))) ;; 1.139608 ms
13 | (let [c v1m]    (bench (reduce min c))) ;; 9.963971 ms
14 | 
15 | ;; core apply (slower than reduce)
16 | (let [c v10k]   (bench (apply min c))) ;; 105.267586 µs
17 | (let [c v1m]    (bench (apply min c))) ;; 8.764973 ms
18 | 
19 | ;; parallel
20 | (let [c v10k]   (bench (p/min c))) ;; 83.043014 µs
21 | (let [c v100k]  (bench (p/min c))) ;; 665.367802 µs
22 | (let [c v1m]    (bench (p/min c))) ;; 5.474384 ms
23 | 
24 | ;; parallel xforms
25 | (let [c v10k]   (bench (transduce (comp (map inc) (filter odd?)) min ##Inf c))) ;; 219.782220 µs
26 | (let [c v100k]  (bench (transduce (comp (map inc) (filter odd?)) min ##Inf c))) ;; 2.722521 ms
27 | (let [c v1m]    (bench (transduce (comp (map inc) (filter odd?)) min ##Inf c))) ;; 22.701385 ms
28 | (let [c v10k]   (bench (p/min c (map inc) (filter odd?)))) ;; 168.950187 µs
29 | (let [c v100k]  (bench (p/min c (map inc) (filter odd?)))) ;; 1.361213 ms
30 | (let [c v1m]    (bench (p/min c (map inc) (filter odd?)))) ;; 12.085497 ms
31 | 
32 | ;; experiments...
33 | (let [c v1m]
34 |   (bench
35 |     (r/fold
36 |       8000
37 |       min
38 |       (fn [v] (nth (sort v) 0))
39 |       (reify r/CollFold
40 |         (coll-fold [this n combinef f]
41 |           (p/foldvec c n combinef f))))))
42 | ;; 647ms
43 | 


--------------------------------------------------------------------------------
/benchmarks/bpmap.clj:
--------------------------------------------------------------------------------
 1 | (ns bpmap)
 2 | 
 3 | (require '[parallel.xf :as xf])
 4 | (require '[criterium.core :refer [bench quick-bench]])
 5 | 
 6 | (defn pi [n]
 7 |   (->> (range)
 8 |        (filter odd?)
 9 |        (take n)
10 |        (map / (cycle [1 -1]))
11 |        (reduce +)
12 |        (* 4.0)))
13 | 
14 | (let [items (range 1000000)] (time (dorun (sequence (map inc) items)))) ;; 141ms
15 | (let [items (range 1000000)] (time (dorun (sequence (xf/pmap inc) items)))) ;; 2563ms ok
16 | 
17 | (let [items (range 400 800)] (time (dorun (sequence (map pi) items)))) ;; 11876ms
18 | (let [items (range 400 800)] (time (dorun (sequence (xf/pmap pi) items)))) ;; 418ms ok ok
19 | 
20 | (let [items (range 400 800)] (time (transduce (map pi) + items))) ;; 11876ms
21 | (let [items (range 400 800)] (time (transduce (xf/pmap pi) + items))) ;; 1256ms
22 | 


--------------------------------------------------------------------------------
/benchmarks/bslurp.clj:
--------------------------------------------------------------------------------
 1 | (ns bslurp)
 2 | 
 3 | (require '[criterium.core :refer [bench quick-bench]])
 4 | (require '[parallel.core :as p] :reload)
 5 | (import '(java.nio ByteBuffer CharBuffer)
 6 |         '(java.io File PushbackReader InputStream InputStreamReader FileInputStream))
 7 | 
 8 | (set! *warn-on-reflection* true)
 9 | 
10 | (def READ_ONLY ^{:private true}
11 |   (java.nio.channels.FileChannel$MapMode/READ_ONLY))
12 | 
13 | (defn mmap [^String f]
14 |   (let [channel (.getChannel (FileInputStream. f))]
15 |     (.map channel READ_ONLY 0 (.size channel))))
16 | 
17 | (defn mslurp
18 |   "Including memory mapping for benchmarks."
19 |   [^String f]
20 |   (.. java.nio.charset.Charset (forName "UTF-8")
21 |       (newDecoder) (decode (mmap f))))
22 | 
23 | ;; lot of lines, 2.4M
24 | (let [fname "test/words" file (File. fname)] (bench (slurp file))) ; 8.84ms
25 | (let [fname "test/words" file (File. fname)] (bench (p/slurp file))) ; 2.87ms
26 | (let [fname "test/words" file (File. fname)] (binding [p/*mutable* true] (bench (p/slurp file)))) ; 1.40ms
27 | (let [fname "test/words" file (File. fname)] (bench (mslurp file))) ; 18.67ms
28 | 
29 | ;; less lines, 3.1M
30 | (let [fname (File. "/Users/reborg/prj/my/pwc/test/war-and-peace.txt")] (bench (slurp fname))) ; 14.67 ms
31 | (let [fname (File. "/Users/reborg/prj/my/pwc/test/war-and-peace.txt")] (bench (p/slurp fname))) ; 7.67ms
32 | (let [fname (File. "/Users/reborg/prj/my/pwc/test/war-and-peace.txt")] (bench (mslurp fname))) ; 8.67ms
33 | 
34 | ;; small file, no no.
35 | (let [fname (File. "project.clj")] (bench (slurp fname))) ; 35.13 µs
36 | (let [fname (File. "project.clj")] (bench (p/slurp fname))) ; 213.517530 µs
37 | 


--------------------------------------------------------------------------------
/benchmarks/bsort.clj:
--------------------------------------------------------------------------------
 1 | (ns bsort)
 2 | 
 3 | (require '[criterium.core :refer [quick-benchmark]])
 4 | (defmacro b [expr] `(* 1000. (first (:mean (quick-benchmark ~expr {}))))) ;; mssecs
 5 | (require '[parallel.core :as p] :reload)
 6 | (import '[java.util Arrays])
 7 | 
 8 | (defn sort-some [percent coll]
 9 |   (cond
10 |     (== 100 percent) coll
11 |     (== 0 percent) (let [n (count coll) half (quot n 2)] (interleave (take half coll) (reverse (drop half coll))))
12 |     :else (apply concat (map #(if (< (rand) (/ percent 100.)) (sort %) %) (partition-all 20 (shuffle coll))))))
13 | 
14 | ;; ballpark at 1M
15 | (def coll (range 1e6))
16 | 
17 | (let [c (into [] (sort-some 100 coll))] (b (sort c))) ;  25
18 | (let [c (into [] (sort-some 95  coll))] (b (sort c))) ; 537
19 | (let [c (into [] (sort-some 50  coll))] (b (sort c))) ; 781
20 | (let [c (into [] (sort-some 10  coll))] (b (sort c))) ; 801
21 | (let [c (into [] (sort-some 0   coll))] (b (sort c))) ; 132
22 | 
23 | (let [c (into [] (sort-some 100 coll))] (b (p/sort c))) ;  44
24 | (let [c (into [] (sort-some 95  coll))] (b (p/sort c))) ; 502
25 | (let [c (into [] (sort-some 50  coll))] (b (p/sort c))) ; 707
26 | (let [c (into [] (sort-some 10  coll))] (b (p/sort c))) ; 675
27 | (let [c (into [] (sort-some 0   coll))] (b (p/sort c))) ; 376
28 | 
29 | (let [c (into [] (sort-some 100 coll))] (binding [p/*mutable* true] (b (p/sort c)))) ; 19
30 | (let [c (into [] (sort-some 95  coll))] (binding [p/*mutable* true] (b (p/sort c)))) ; 562
31 | (let [c (into [] (sort-some 50  coll))] (binding [p/*mutable* true] (b (p/sort c)))) ; 548
32 | (let [c (into [] (sort-some 10  coll))] (binding [p/*mutable* true] (b (p/sort c)))) ; 571
33 | (let [c (into [] (sort-some 0   coll))] (binding [p/*mutable* true] (b (p/sort c)))) ; 292
34 | 
35 | ;; heavier comparator, just vaguely faster than sequential.
36 | 
37 | (let [c (into [] (sort-some 100 (map str coll)))] (b (sort compare c))) ; 59
38 | (let [c (into [] (sort-some 95  (map str coll)))] (b (sort compare c))) ; 760
39 | (let [c (into [] (sort-some 50  (map str coll)))] (b (sort compare c))) ; 760
40 | (let [c (into [] (sort-some 10  (map str coll)))] (b (sort compare c))) ; 802
41 | (let [c (into [] (sort-some 0   (map str coll)))] (b (sort compare c))) ; 136
42 | 
43 | (let [c (into [] (sort-some 100 (map str coll)))] (b (p/sort compare c))) ; 136
44 | (let [c (into [] (sort-some 95  (map str coll)))] (b (p/sort compare c))) ; 689
45 | (let [c (into [] (sort-some 50  (map str coll)))] (b (p/sort compare c))) ; 740
46 | (let [c (into [] (sort-some 10  (map str coll)))] (b (p/sort compare c))) ; 664
47 | (let [c (into [] (sort-some 0   (map str coll)))] (b (p/sort compare c))) ; 258
48 | 
49 | ;; Even heavier comparator
50 | (def cmp #(compare (last %1) (last %2)))
51 | 
52 | (let [c (into [] (sort-some 100 (map-indexed vector coll)))] (b (sort cmp c))) ; 325
53 | (let [c (into [] (sort-some 95  (map-indexed vector coll)))] (b (sort cmp c))) ; 6475
54 | (let [c (into [] (sort-some 50  (map-indexed vector coll)))] (b (sort cmp c))) ; 6801
55 | (let [c (into [] (sort-some 10  (map-indexed vector coll)))] (b (sort cmp c))) ; 6566
56 | (let [c (into [] (sort-some 0   (map-indexed vector coll)))] (b (sort cmp c))) ; 1261
57 | 
58 | (let [c (into [] (sort-some 100 (map-indexed vector coll)))] (b (p/sort cmp c))) ; 182
59 | (let [c (into [] (sort-some 95  (map-indexed vector coll)))] (b (p/sort cmp c))) ; 3589
60 | (let [c (into [] (sort-some 50  (map-indexed vector coll)))] (b (p/sort cmp c))) ; 3371
61 | (let [c (into [] (sort-some 10  (map-indexed vector coll)))] (b (p/sort cmp c))) ; 3422
62 | (let [c (into [] (sort-some 0   (map-indexed vector coll)))] (b (p/sort cmp c))) ; 615
63 | 
64 | (set! *warn-on-reflection* true)
65 | (let [c (int-array (sort-some 100 coll))] (b (do (Arrays/parallelSort c) (into [] c))))
66 | (let [c (int-array (sort-some 95  coll))] (b (do (Arrays/parallelSort c) (into [] c))))
67 | (let [c (int-array (sort-some 50  coll))] (b (do (Arrays/parallelSort c) (into [] c))))
68 | (let [c (int-array (sort-some 10  coll))] (b (do (Arrays/parallelSort c) (into [] c))))
69 | (let [c (int-array (sort-some 0   coll))] (b (do (Arrays/parallelSort c) (into [] c))))
70 | 
71 | 39.43213305555556
72 | 38.128529944444445
73 | 38.26176866666667
74 | 42.11502133333334
75 | 39.757541388888896
76 | 
77 | (let [c (into [] (sort-some 50 coll))] (b (p/sort 5000 compare c)))
78 | (let [c (into [] (sort-some 50 coll))] (b (p/sort 10000 compare c)))
79 | (let [c (into [] (sort-some 50 coll))] (b (p/sort 15000 compare c)))
80 | 


--------------------------------------------------------------------------------
/benchmarks/bupdate_vals.clj:
--------------------------------------------------------------------------------
 1 | (ns bupdate-vals)
 2 | 
 3 | (require '[parallel.core :as p])
 4 | (require '[criterium.core :refer [bench quick-bench]])
 5 | 
 6 | (defn large-map [i] (into {} (map vector (range i) (range i))))
 7 | 
 8 | (defn update-vals [m f]
 9 |   (reduce-kv (fn [m k v] (assoc m k (f v))) {} m))
10 | 
11 | (defn update-vals-transients [m f]
12 |   (persistent! (reduce-kv (fn [m k v] (assoc! m k (f v))) (transient {}) m)))
13 | 
14 | ;; sanity
15 | (def m (large-map 1e5))
16 | (for [i (range 20)]
17 |   (= (sort (vals (update-vals m inc)))
18 |    (sort (vals (p/update-vals m inc)))))
19 | 
20 | (let [m (large-map 1e5)] (quick-bench (update-vals m inc))) ;; 22ms
21 | (let [m (large-map 1e5)] (quick-bench (update-vals-transients m inc))) ;; 15ms
22 | (let [m (large-map 1e5)] (binding [p/*mutable* true] (quick-bench (p/update-vals m inc)))) ;; 16ms
23 | (let [m (large-map 1e5)] (binding [p/*mutable* false] (quick-bench (p/update-vals m inc)))) ;; 56ms
24 | 
25 | (let [m (large-map 1e6)] (quick-bench (update-vals m inc))) ;; 551ms
26 | (let [m (large-map 1e6)] (quick-bench (update-vals-transients m inc))) ;; 241ms
27 | (let [m (large-map 1e6)] (binding [p/*mutable* true] (quick-bench (p/update-vals m inc)))) ;; 215ms
28 | (let [m (large-map 1e6)] (binding [p/*mutable* false] (quick-bench (p/update-vals m inc)))) ;; 1.09secs
29 | 
30 | ;; heavy f calculating pi approx. never going beyond 50 iterations here.
31 | (defn f [n] (->> (range) (filter odd?) (take (rem n 50)) (map / (cycle [1 -1])) (reduce +) (* 4.0)))
32 | (quick-bench (f 50)) ;; 175ns
33 | 
34 | (let [m (large-map 1e5)] (quick-bench (update-vals m f))) ;; 3.5secs
35 | (let [m (large-map 1e5)] (quick-bench (update-vals-transients m f))) ;; 3.3secs
36 | (let [m (large-map 1e5)] (binding [p/*mutable* false] (quick-bench (p/update-vals m f)))) ;; 1.8secs
37 | (let [m (large-map 1e5)] (binding [p/*mutable* true] (quick-bench (p/update-vals m f)))) ;; 1.6secs
38 | 


--------------------------------------------------------------------------------
/benchmarks/groupby.clj:
--------------------------------------------------------------------------------
 1 | (ns groupby)
 2 | 
 3 | (require '[parallel.core :as p])
 4 | (require '[criterium.core :refer [bench quick-bench]])
 5 | 
 6 | (def ^:const magnitude 1e5)
 7 | (def ^:const repetition 5)
 8 | (def ^:const sqrt (Math/sqrt (* repetition magnitude)))
 9 | 
10 | (def v1 ;;all-keys-no-repeat
11 |   (into [] (range (* repetition magnitude))))
12 | 
13 | (def v2 ;;many-keys-small-repeat
14 |   (into [] (apply concat (for [i (range repetition)] (shuffle (range magnitude))))))
15 | 
16 | (def v3 ;;medium-keys-medium-repeat
17 |   (into [] (apply concat (for [i (range sqrt)] (range sqrt)))))
18 | 
19 | (def v4 ;;small-keys-many-repeat
20 |   (into [] (apply concat (for [i (range magnitude)] (range repetition)))))
21 | 
22 | ;; ************* Normal Group-By **************
23 | 
24 | (quick-bench (clojure.core/group-by identity v1)) ;; 229ms
25 | (quick-bench (clojure.core/group-by identity v2)) ;; 268ms
26 | (quick-bench (clojure.core/group-by identity v3)) ;; 127ms
27 | (quick-bench (clojure.core/group-by identity v4)) ;; 95ms
28 | 
29 | ;; ************* Parallel Group-By **************
30 | 
31 | (quick-bench (p/group-by identity v1)) ;; 441ms
32 | (quick-bench (p/group-by identity v2)) ;; 168ms
33 | (quick-bench (p/group-by identity v3)) ;; 29ms
34 | (quick-bench (p/group-by identity v4)) ;; 32ms
35 | 
36 | ;; ************* Parallel Group-By Mutable Result **************
37 | 
38 | (binding [p/*mutable* true] (quick-bench (p/group-by identity v1))) ;; 21ms
39 | (binding [p/*mutable* true] (quick-bench (p/group-by identity v2))) ;; 48ms
40 | (binding [p/*mutable* true] (quick-bench (p/group-by identity v3))) ;; 13ms
41 | (binding [p/*mutable* true] (quick-bench (p/group-by identity v4))) ;; 18ms
42 | 


--------------------------------------------------------------------------------
/benchmarks/plet.clj:
--------------------------------------------------------------------------------
1 | (ns plet)
2 | 
3 | (require '[criterium.core :refer [quick-bench]])
4 | (require '[parallel.core :as p] :reload)
5 | 
6 | (quick-bench (let [a (+ 1 2) b (*  4 3)] (+ a b))) ;; 1.43ns
7 | (quick-bench (p/let [a (+ 1 2) b (*  4 3)] (+ a b))) ;; 15us
8 | 


--------------------------------------------------------------------------------
/examples/lastfm/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /classes
 3 | /checkouts
 4 | pom.xml
 5 | pom.xml.asc
 6 | *.jar
 7 | *.class
 8 | /.lein-*
 9 | /.nrepl-port
10 | .hgignore
11 | .hg/
12 | data
13 | 


--------------------------------------------------------------------------------
/examples/lastfm/README.md:
--------------------------------------------------------------------------------
 1 | # Lastfm dataset processing example
 2 | 
 3 | This example project is designed to showcase Parallel mainly in the context of intensive data IO.
 4 | 
 5 | ## Introduction
 6 | 
 7 | To run the example, you need to download the Last.fm dataset. This is an (old but good) version of the Last.fm data kindly hosted by [Oscar Celma](http://ocelma.net). If you are interested in music recommendation in general, have a look around the web site or [buy his book](http://ocelma.net/MusicRecommendationBook/index.html).
 8 | 
 9 | The project answers a list of interesting questions about music using the Last.fm dataset. It first shows how to retrieve the answers with plain Clojure (`src/lastfm/plain.clj`) and then how we could speed up processing using the Parallel library (`src/lastfm/parallel.clj`).
10 | 
11 | ### Download the data
12 | 
13 | Sizes: lastfm-dataset-1K.tar.gz (~641Mb), lastfm-dataset-360K.tar.gz (~543Mb) but both files expands into much larger ones (2.4G and 1.6G respectively).
14 | 
15 | ```bash
16 | mkdir data; cd data
17 | curl -O http://mtg.upf.edu/static/datasets/last.fm/lastfm-dataset-1K.tar.gz
18 | curl -O http://mtg.upf.edu/static/datasets/last.fm/lastfm-dataset-360K.tar.gz
19 | tar xvfz lastfm-dataset-1K.tar.gz
20 | tar xvfz lastfm-dataset-360K.tar.gz
21 | cd lastfm-dataset-1K
22 | head -n 1000 userid-timestamp-artid-artname-traid-traname.tsv > small.tsv
23 | cd ..
24 | cd lastfm-dataset-360K
25 | head -n 1000 usersha1-artmbid-artname-plays.tsv > small.tsv
26 | cd ..
27 | ```
28 | 
29 | The instructions above also creates `small.tsv` samples of only 1k lines for quick experiments.
30 | 


--------------------------------------------------------------------------------
/examples/lastfm/project.clj:
--------------------------------------------------------------------------------
1 | (defproject lastfm "0.1.0-SNAPSHOT"
2 |   :description "Lastfm dataset processing showcase."
3 |   :url "https://github.com/reborg/parallel"
4 |   :license {:name "Eclipse Public License"
5 |             :url "http://www.eclipse.org/legal/epl-v10.html"}
6 |   :jvm-opts ["-Xmx4g" "-server"]
7 |   :dependencies [[org.clojure/clojure "1.9.0"]
8 |                  [parallel "0.8"]])
9 | 


--------------------------------------------------------------------------------
/examples/lastfm/src/lastfm/version00.clj:
--------------------------------------------------------------------------------
  1 | (ns lastfm.version00
  2 |   (:require [clojure.string :as s])
  3 |   (:import [java.io BufferedReader FileReader Reader StringReader File]))
  4 | 
  5 | ;; #######################
  6 | ;; ### Files and utils ###
  7 | ;; #######################
  8 | 
  9 | ;; Pointing at the original large TSV
 10 | (defn plays [] (FileReader. (File. "data/lastfm-dataset-360K/usersha1-artmbid-artname-plays.tsv")))
 11 | (defn details [] (FileReader. (File. "data/lastfm-dataset-360K/usersha1-profile.tsv")))
 12 | (defn listeners [] (FileReader. (File. "data/lastfm-dataset-1K/userid-timestamp-artid-artname-traid-traname.tsv")))
 13 | 
 14 | (def clean-xform
 15 |   (comp (map s/trim)
 16 |         (remove s/blank?)
 17 |         (map #(s/split % #"\t"))))
 18 | 
 19 | (defn process
 20 |   ([r xcomp] (process r xcomp conj! []))
 21 |   ([r xcomp store! init]
 22 |    (let [br (BufferedReader. r)
 23 |          lines (line-seq br)
 24 |          editable? #(instance? clojure.lang.IEditableCollection %)]
 25 |      (transduce
 26 |        (comp clean-xform xcomp)
 27 |        (completing
 28 |          store!
 29 |          #(do (.close br) (if (editable? init) (persistent! %) %)))
 30 |        (if (editable? init) (transient init) init)
 31 |        lines))))
 32 | 
 33 | (defn load-user-info [fname]
 34 |   (process fname identity
 35 |     (fn [m [userid :as attrs]]
 36 |       (assoc! m userid (subvec attrs 1 (count attrs)))) {}))
 37 | 
 38 | ; (require '[lastfm.version00 :as v0] :reload)
 39 | ; (def details (time (v0/load-user-info (v0/details))))
 40 | ; "Elapsed time: 1467.065929 msecs"
 41 | 
 42 | ;; #######################
 43 | ;; ######## API ##########
 44 | ;; #######################
 45 | 
 46 | ;; What are the most played artists?
 47 | (defn top-artists [f]
 48 |   (->> (process f (map #(nth % 2)))
 49 |        frequencies
 50 |        (sort-by last >)
 51 |        (take 5)))
 52 | 
 53 | ; (time (v0/top-artists (v0/plays)))
 54 | ; "Elapsed time: 45463.570766 msecs"
 55 | ; (["radiohead" 77348] ["the beatles" 76339] ["coldplay" 66738]
 56 | ;  ["red hot chili peppers" 48989] ["muse" 47015])
 57 | 
 58 | (defn top-artists-in [f in-country]
 59 |   (let [user-info (load-user-info (details))
 60 |         for-country (fn [[user-id]]
 61 |                       (let [country (some-> (nth (user-info user-id) 2) s/lower-case)
 62 |                             regxp (re-pattern in-country)]
 63 |                         (re-find regxp (or country ""))))]
 64 |     (->> (process f
 65 |            (comp
 66 |              (filter for-country)
 67 |              (map #(nth % 2))))
 68 |          frequencies
 69 |          (sort-by last >)
 70 |          (take 5))))
 71 | 
 72 | ; (time (v0/top-artists-in (v0/plays) "poland"))
 73 | ; "Elapsed time: 37677.731108 msecs"
 74 | ; (["metallica" 3869]
 75 | ;  ["myslovitz" 3778]
 76 | ;  ["red hot chili peppers" 3610]
 77 | ;  ["o.s.t.r." 3440]
 78 | ;  ["system of a down" 3306])
 79 | 
 80 | (defn how-many-songs-played-for [f band]
 81 |   (process f
 82 |     (comp
 83 |       (filter
 84 |         (fn [[_ _ played]]
 85 |           (re-find (re-pattern band) (or played ""))))
 86 |       (map peek)
 87 |       (map #(Integer/valueOf %)))
 88 |     + 0))
 89 | 
 90 | ; (time (v0/how-many-songs-played-for (v0/plays) "coltrane"))
 91 | ; "Elapsed time: 23843.245219 msecs"
 92 | ; 1,157,511
 93 | 
 94 | (defn most-played-band-by-day
 95 |   [fname]
 96 |   (let [keyfn (fn [item]
 97 |                 (let [[_ ts _ band] item]
 98 |                   [(nth (s/split ts #"T") 0) band]))
 99 |         reducefn (fn [m item]
100 |                    (let [k (keyfn item)]
101 |                      (assoc! m k (inc (get m k 0)))))]
102 |     (->> (process fname identity reducefn {})
103 |          (sort-by #(nth % 1) >)
104 |          (take 5))))
105 | 
106 | ;; (time (v0/most-played-band-by-day (v0/listeners)))
107 | ; "Elapsed time: 65574.980722 msecs"
108 | ; ([["2009-03-21" "Kanye West"] 2331] [["2009-02-28" "T.I."] 2062]
109 | 


--------------------------------------------------------------------------------
/examples/lastfm/src/lastfm/version01.clj:
--------------------------------------------------------------------------------
  1 | (ns lastfm.version01
  2 |   (:require [parallel.core :as p]
  3 |             [clojure.string :as s])
  4 |   (:import [java.io BufferedReader FileReader Reader StringReader File]))
  5 | 
  6 | ;; #######################
  7 | ;; ### Files and utils ###
  8 | ;; #######################
  9 | 
 10 | ;; Split original files into segments
 11 | (defn plays [] "data/lastfm-dataset-360K/splits")
 12 | (defn details [] "data/lastfm-dataset-360K/details")
 13 | (defn listeners [] "data/lastfm-dataset-1K/splits")
 14 | 
 15 | (def clean-xform
 16 |   (comp (map s/trim)
 17 |         (remove s/blank?)
 18 |         (map #(s/split % #"\t"))))
 19 | 
 20 | (defn process
 21 |   [fname xcomp]
 22 |   (p/process-folder fname (comp clean-xform xcomp)))
 23 | 
 24 | (defn load-user-info [fname]
 25 |   (p/process-folder
 26 |     fname
 27 |     (completing
 28 |       (fn reducef [m [userid :as attrs]]
 29 |         (assoc! m userid (subvec attrs 1 (count attrs))))
 30 |       persistent!)
 31 |     (fn ([] (transient {})) ([m1 m2] (into m1 m2)))
 32 |     clean-xform))
 33 | 
 34 | ; (require '[lastfm.version01 :as v1] :reload)
 35 | ; (def details (time (v1/load-user-info (v1/details))))
 36 | ; "Elapsed time: 683.946281 msecs"
 37 | 
 38 | ;; #######################
 39 | ;; ######## API ##########
 40 | ;; #######################
 41 | 
 42 | ;; What are the most played artists?
 43 | (defn top-artists [folder]
 44 |   (->> (process folder (map #(nth % 2)))
 45 |        frequencies
 46 |        ; Shall we add a “p/”?
 47 |        ; p/frequencies
 48 |        (sort-by last >)
 49 |        (take 5)))
 50 | 
 51 | ; (require '[lastfm.version01 :as v1] :reload)
 52 | ; (time (v1/top-artists (v1/plays)))
 53 | ; "Elapsed time: 17494.570766 msecs"
 54 | ; "Elapsed time: 9865.58715 msecs"
 55 | ; (["radiohead" 77348] ["the beatles" 76339] ["coldplay" 66738]
 56 | 
 57 | (defn top-artists-in [fname in-country]
 58 |   (let [user-info (load-user-info (details))
 59 |         for-country (fn [[user-id]]
 60 |                       (let [country (some-> (nth (user-info user-id) 2) s/lower-case)
 61 |                             regxp (re-pattern in-country)]
 62 |                         (re-find regxp (or country ""))))
 63 |         xform (comp clean-xform (filter for-country) (map #(nth % 2)))]
 64 |     (->> (p/frequencies (File. fname) xform)
 65 |          (sort-by #(nth % 1) >)
 66 |          (take 5))))
 67 | 
 68 | ; (time (v1/top-artists-in (v1/plays) "poland"))
 69 | ; "Elapsed time: 5017.731108 msecs"
 70 | ; (["metallica" 3869]
 71 | ;  ["myslovitz" 3778]
 72 | ;  ["red hot chili peppers" 3610]
 73 | ;  ["o.s.t.r." 3440]
 74 | ;  ["system of a down" 3306])
 75 | 
 76 | (defn how-many-songs-played-for [f band]
 77 |   (p/process-folder f + +
 78 |     (comp
 79 |       clean-xform
 80 |       (filter (fn [[_ _ played]] (re-find (re-pattern band) (or played ""))))
 81 |       (map peek)
 82 |       (map #(Integer/valueOf %)))))
 83 | 
 84 | ; (time (v1/how-many-songs-played-for (v1/plays) "coltrane"))
 85 | ; "Elapsed time: 4307.20904 msecs"
 86 | ; 1,157,511
 87 | 
 88 | (defn most-played-band-by-day
 89 |   [fname]
 90 |   (let [xform (comp
 91 |                 clean-xform
 92 |                 (map (fn [[_ ts _ band]]
 93 |                        [(nth (s/split ts #"T") 0) band])))]
 94 |     (->>
 95 |       (p/frequencies (File. fname) xform)
 96 |       (sort-by #(nth % 1) >)
 97 |       ; Shall we add a “p/”?
 98 |       ; (p/sort #(compare (nth %2 1) (nth %2 1)))
 99 |       (take 5))))
100 | 
101 | ; (time (v1/most-played-band-by-day (v1/listeners)))
102 | ; "Elapsed time: 16614.461194 msecs"
103 | ; ([["2009-03-21" "Kanye West"] 2331] [["2009-02-28" "T.I."] 2062]
104 | 


--------------------------------------------------------------------------------
/examples/lastfm/test/lastfm/core_test.clj:
--------------------------------------------------------------------------------
1 | (ns lastfm.core-test
2 |   (:require [clojure.test :refer :all]
3 |             [lastfm.core :refer :all]))
4 | 
5 | (deftest a-test
6 |   (testing "FIXME, I fail."
7 |     (is (= 0 1))))
8 | 


--------------------------------------------------------------------------------
/java/clojure/lang/Get.java:
--------------------------------------------------------------------------------
 1 | package clojure.lang;
 2 | 
 3 | public class Get {
 4 | 
 5 |     public static PersistentHashMap.INode root(PersistentHashMap phm) {
 6 |         return phm.root;
 7 |     }
 8 | 
 9 |     public static Boolean hasNullValue(PersistentHashMap phm) {
10 |         return phm.hasNull;
11 |     }
12 | 
13 |     public static Object nullValue(PersistentHashMap phm) {
14 |         return phm.nullValue;
15 |     }
16 | 
17 |     public static PersistentHashMap.INode[] array(PersistentHashMap.ArrayNode arrayNode) {
18 |         return arrayNode.array;
19 |     }
20 | 
21 |     public static Object[] array(PersistentHashMap.BitmapIndexedNode bitmapIndexedNode) {
22 |         return bitmapIndexedNode.array;
23 |     }
24 | 
25 |     public static Object[] array(PersistentHashMap.HashCollisionNode hashCollisionNode) {
26 |         return hashCollisionNode.array;
27 |     }
28 | 
29 |     public static Object kvreduce(Object[] array, IFn f, Object init) {
30 |         return PersistentHashMap.NodeSeq.kvreduce(array, f, init);
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
 1 | (defproject parallel "0.11"
 2 |   :description "A library of parallel-enabled Clojure functions"
 3 |   :url "https://github.com/reborg/parallel"
 4 |   :license {:name "Eclipse Public License"
 5 |             :url "http://www.eclipse.org/legal/epl-v10.html"}
 6 |   :dependencies [[org.clojure/clojure "1.9.0"]]
 7 |   :java-source-paths ["java"]
 8 |   :uberjar-name "parallel.jar"
 9 |   :deploy-repositories [["releases" :clojars] ["snapshots" :clojars]]
10 |   :profiles {:dev {:dependencies [[criterium  "0.4.4"]
11 |                                   [com.clojure-goes-fast/clj-java-decompiler "0.1.0"]]
12 |                    :plugins []}}
13 |   :jvm-opts ["-Xmx2g" "-server"]
14 |   :test-refresh {:watch-dirs ["src" "test"] :refresh-dirs ["src" "test"]})
15 | 


--------------------------------------------------------------------------------
/src/parallel/core.clj:
--------------------------------------------------------------------------------
  1 | (ns parallel.core
  2 |   (:refer-clojure :exclude [eduction sequence transduce pmap
  3 |                             frequencies let slurp do doto and or
  4 |                             count group-by sort min max amap distinct])
  5 |   (:require [parallel.foldmap :as fmap]
  6 |             [parallel.merge-sort :as msort]
  7 |             [parallel.map-combine :as mcombine]
  8 |             [parallel.fork-middle :as forkm]
  9 |             [clojure.core.reducers :as r]
 10 |             [clojure.core.protocols :as p]
 11 |             [clojure.java.io :as io]
 12 |             [clojure.core :as c])
 13 |   (:import
 14 |     [parallel.merge_sort MergeSort]
 15 |     [parallel.map_combine MapCombine]
 16 |     [java.io FileInputStream BufferedReader FileReader Reader StringReader File]
 17 |     [java.nio.file Files]
 18 |     [java.util.concurrent.atomic AtomicInteger AtomicLong]
 19 |     [java.util.concurrent ConcurrentHashMap ConcurrentLinkedQueue]
 20 |     [java.util HashMap Collections Queue Map]))
 21 | 
 22 | (def ^:const ncpu (.availableProcessors (Runtime/getRuntime)))
 23 | 
 24 | (def ^:dynamic *mutable* false)
 25 | 
 26 | (defn- foldable? [coll]
 27 |   (c/or (map? coll)
 28 |       (vector? coll)
 29 |       (instance? clojure.core.reducers.Cat coll)))
 30 | 
 31 | (defn- compose
 32 |   "As a consequence, reducef cannot be a vector.
 33 |   TODO: could use meta?"
 34 |   [xrf]
 35 |   (if (vector? xrf)
 36 |     ((peek xrf) (nth xrf 0))
 37 |     xrf))
 38 | 
 39 | (defn xrf
 40 |   "Expects a reducing function rf and a list
 41 |   of transducers (or comp thereof). Use with
 42 |   p/fold to compose any chain of transducers applied to
 43 |   a reducing function to run in parallel."
 44 |   [rf & xforms]
 45 |   (if (empty? xforms)
 46 |     rf
 47 |     [rf (apply comp xforms)]))
 48 | 
 49 | (defn- splitting
 50 |   "Calculates split sizes as they would be generated by
 51 |   a parallel fold with n=1."
 52 |   [coll]
 53 |   (iterate
 54 |     #(mapcat
 55 |        (fn [n] [(quot n 2) (- n (quot n 2))]) %)
 56 |     [(c/count coll)]))
 57 | 
 58 | (defn show-chunks
 59 |   "Shows chunk sizes for the desired chunk number
 60 |   on a given collection coll."
 61 |   [coll nchunks]
 62 |   {:pre [(== (bit-and nchunks (- nchunks)) nchunks)]}
 63 |   (->> (splitting coll)
 64 |        (take-while #(<= (c/count %) nchunks))
 65 |        last))
 66 | 
 67 | (defn chunk-size
 68 |   "Calculates the necessary chunk-size to obtain
 69 |   the given number of splits during a parallel fold.
 70 |   nchunks needs to be a power of two."
 71 |   [coll nchunks]
 72 |   (apply c/max (show-chunks coll nchunks)))
 73 | 
 74 | (defn foldvec
 75 |   "A general purpose reducers/foldvec taking a generic f
 76 |   to apply at the leaf instead of reduce."
 77 |   [v n combinef f]
 78 |   (c/let [cnt (c/count v)]
 79 |     (cond
 80 |       (empty? v) (combinef)
 81 |       (<= cnt n) (f v)
 82 |       :else (c/let [half (quot cnt 2)
 83 |                   r1 (subvec v 0 half)
 84 |                   r2 (subvec v half cnt)
 85 |                   fc (fn [v] #(foldvec v n combinef f))]
 86 |               (#'r/fjinvoke
 87 |                 #(c/let [f1 (fc r1)
 88 |                        t2 (#'r/fjtask (fc r2))]
 89 |                    (#'r/fjfork t2)
 90 |                    (combinef (f1) (#'r/fjjoin t2))))))))
 91 | 
 92 | (defn- reduce-leaf
 93 |   "reduce-leaf executes at the chunk level, once
 94 |   the splitting is done. It calls xform single arity
 95 |   to flush any possible remaining state."
 96 |   [reducef combinef]
 97 |   #(c/let [f (compose reducef)
 98 |            ret (r/reduce f (combinef) %)]
 99 |      (if (vector? reducef) (f ret) ret)))
100 | 
101 | (defprotocol Folder
102 |   (folder [coll]
103 |           [coll nchunks]))
104 | 
105 | (extend-protocol Folder
106 |   Object
107 |   (folder
108 |     ([coll]
109 |      (reify r/CollFold
110 |        (coll-fold [this n combinef reducef]
111 |          (r/reduce reducef (combinef) coll))))
112 |     ([coll nchunks]
113 |      (reify r/CollFold
114 |        (coll-fold [this _ combinef reducef]
115 |          (r/reduce reducef (combinef) coll)))))
116 |   clojure.lang.IPersistentVector
117 |   (folder
118 |     ([coll]
119 |      (reify r/CollFold
120 |        (coll-fold [this n combinef reducef]
121 |          (foldvec coll n combinef (reduce-leaf reducef combinef)))))
122 |     ([coll nchunks]
123 |      (reify r/CollFold
124 |        (coll-fold [this _ combinef reducef]
125 |          (foldvec coll (chunk-size coll nchunks) combinef (reduce-leaf reducef combinef))))))
126 |   clojure.lang.PersistentHashMap
127 |   (folder
128 |     ([coll]
129 |      (reify r/CollFold
130 |        (coll-fold [m n combinef reducef]
131 |          (fmap/fold coll 512 combinef reducef))))
132 |     ([coll nchunks]
133 |      (reify r/CollFold
134 |        (coll-fold [m n combinef reducef]
135 |          (fmap/fold coll 512 combinef reducef))))))
136 | 
137 | (defn fold
138 |   "Like reducers fold, but with stateful transducers support.
139 |   Expect reducef to be built using p/xrf to defer initialization.
140 |   n is the number-of-chunks instead of chunk size.
141 |   n must be a power of 2 and defaults to 32."
142 |   ([reducef coll]
143 |    (fold (first reducef) reducef coll))
144 |   ([combinef reducef coll]
145 |    (fold 32 combinef reducef coll))
146 |   ([n combinef reducef coll]
147 |    (r/fold ::ignored combinef reducef (folder coll n))))
148 | 
149 | (defn transduce
150 |   "Similar to core/transduce, but executes transducers in parallel.
151 |   Instead of `init`, it accepts a combinef to combine results back
152 |   from parallel execution  When combinef is present, it takes
153 |   precedence over f to establish the initial value for the reduction."
154 |   ([xform f coll]
155 |    (transduce xform f f coll))
156 |   ([xform f combinef coll]
157 |    (transduce 32 xform f combinef coll))
158 |   ([n xform f combinef coll]
159 |    (fold n combinef (xrf f xform) coll)))
160 | 
161 | (defn count
162 |   ([xform coll]
163 |    (count 32 xform coll))
164 |   ([n xform coll]
165 |    (c/let [coll (if (foldable? coll) coll (into [] coll))
166 |          cnt (AtomicLong. 0)
167 |          reducef (xrf (completing (fn [_ _] (.incrementAndGet cnt))) xform)
168 |          combinef (constantly cnt)]
169 |      (fold n combinef reducef coll)
170 |      (.get cnt))))
171 | 
172 | (extend-protocol clojure.core.protocols/IKVReduce
173 |   java.util.Map
174 |   (kv-reduce
175 |     [amap f init]
176 |     (c/let [^java.util.Iterator iter (.. amap entrySet iterator)]
177 |       (loop [ret init]
178 |         (if (.hasNext iter)
179 |           (c/let [^java.util.Map$Entry kv (.next iter)
180 |                 ret (f ret (.getKey kv) (.getValue kv))]
181 |             (if (reduced? ret)
182 |               @ret
183 |               (recur ret)))
184 |           ret)))))
185 | 
186 | (defn group-by
187 |   "Similar to core/group-by, but executes in parallel.
188 |   It takes an optional list of transducers to apply to the
189 |   items in coll before generating the groups. Differently
190 |   from core/group-by, the order of the items in each
191 |   value vector can change between runs. It's generally 2x-5x faster
192 |   than core/group-by (without xducers). If dealing with a Java mutable
193 |   map with Queue type values is not a problem, a further 2x
194 |   speedup can be achieved by:
195 |         (binding [p/*mutable* true] (p/group-by f coll))
196 |   Restrictions: it does not support nil values."
197 |   [f coll & xforms]
198 |   (c/let [coll (if (foldable? coll) coll (into [] coll))
199 |         m (ConcurrentHashMap. (quot (c/count coll) 2) 0.75 ncpu)
200 |         combinef (fn ([] m) ([m1 m2]))
201 |         rf (fn [^Map m x]
202 |              (c/let [k (f x)
203 |                    ^Queue a (c/or (.get m k) (.putIfAbsent m k (ConcurrentLinkedQueue. [x])))]
204 |                (when a (.add a x))
205 |                m))]
206 |     (fold combinef (apply xrf (completing rf) xforms) coll)
207 |     (if *mutable* m (persistent! (reduce-kv (fn [m k v] (assoc! m k (vec v))) (transient {}) m)))))
208 | 
209 | (defn update-vals
210 |   "Use f to update the values of a map in parallel. It performs well
211 |   with non-trivial f, otherwise is outperformed by reduce-kv.
212 |   For larger maps (> 100k keys), the final transformation
213 |   from mutable to persistent dominates over trivial f trasforms.
214 |   You can access the raw mutable java.util.Map by setting the dynamic
215 |   binding *mutable* to true. Restrictions: does not support nil values."
216 |   [^Map input f]
217 |   (c/let [ks (into [] (keys input))
218 |         output (ConcurrentHashMap. (c/count ks) 1. ncpu)]
219 |     (r/fold
220 |       (fn ([] output) ([_ _]))
221 |       (fn [^Map m k]
222 |         (.put m k (f (.get input k)))
223 |         m)
224 |       ks)
225 |     (if *mutable* output (into {} output))))
226 | 
227 | (defn sort
228 |   "Splits input coll into chunk of 'threshold' (default 8192)
229 |   size then sorts chunks in parallel. Input needs conversion into a native
230 |   array before splitting. More effective for large colls
231 |   (> 1M elements) or non trivial comparators. Set *mutable* to 'true'
232 |   to access the raw results as a mutable array."
233 |   ([coll]
234 |    (sort 8192 < coll))
235 |   ([cmp coll]
236 |    (sort 8192 cmp coll))
237 |   ([threshold cmp ^Object coll]
238 |    (c/let [a (if (.. coll getClass isArray) coll (to-array coll))]
239 |      (msort/sort threshold cmp a)
240 |      (if *mutable* a (into [] a)))))
241 | 
242 | (defn slurp
243 |   "Loads a java.io.File in parallel. By default,
244 |   the loaded byte array is converted into an UTF-8 string.
245 |   It takes an optional parsef function of the byte array for
246 |   additional (or different) processing. When *mutable* var
247 |   is true it returns the byte array as is."
248 |   ([file]
249 |    (slurp file (fn parsef [^bytes a] (String. a "UTF-8"))))
250 |   ([^File file parsef]
251 |    (c/let [size (.length file)
252 |            threshold (quot size (* 4 ncpu))
253 |            a (byte-array size)]
254 |      (mcombine/map
255 |        (fn read-chunk [low high]
256 |          (c/let [fis (FileInputStream. file)]
257 |            (try
258 |              (.skip fis low)
259 |              (.read fis a low (- high low))
260 |              (finally (.close fis)))))
261 |        (fn [_ _])
262 |        threshold size)
263 |      (if *mutable* a (parsef a)))))
264 | 
265 | (defn unchunk-map [f coll]
266 |   (lazy-seq
267 |     (when-let [s (seq coll)]
268 |       (cons
269 |         (f (first s))
270 |         (unchunk-map f (rest s))))))
271 | 
272 | (defn external-sort
273 |   "Allows large datasets (that would otherwise not fit into memory)
274 |   to be sorted in parallel. It performs the following on a vector of 'ids'
275 |   and 'fetchf', a function from chunk->data:
276 |   * split ids into chunks of approximate size 'n'
277 |   * call 'fetchf' on a chunk and expects actual data in return
278 |   * sort actual data using 'cmp' ('compare' by default)
279 |   * save result to temporary files (deleted when the JVM exits)
280 |   * lazily concat files in order as they are requested"
281 |   ([fetchf ids]
282 |    (external-sort compare fetchf ids))
283 |   ([cmp fetchf ids]
284 |    (external-sort 512 compare fetchf ids))
285 |   ([n cmp fetchf ids]
286 |    (letfn [(save-chunk! [data]
287 |              (c/let [file (File/createTempFile "mergesort-" ".tmp")]
288 |                (with-open [fw (io/writer file)]
289 |                  (binding [*out* fw] (pr data)))
290 |                [(first data) file]))]
291 |      (->>
292 |        (r/fold
293 |          n concat
294 |          (fn [chunk] (->> chunk fetchf (c/sort cmp) save-chunk! vector))
295 |          (reify r/CollFold
296 |            (coll-fold [this n combinef f]
297 |              (foldvec (into [] ids) n combinef f))))
298 |        (sort-by first cmp)
299 |        (unchunk-map #(read-string (slurp (last %))))
300 |        (mapcat identity)))))
301 | 
302 | (defn- nearest-pow2 [x]
303 |   (int (Math/pow 2 (- 32 (Integer/numberOfLeadingZeros x)))))
304 | 
305 | (defn- fold-adapt
306 |   "Select r/fold or p/fold based on presence of xforms.
307 |   Adapt p/fold chunk number to the requested chunk-size."
308 |   [rf init coll chunk-size xforms]
309 |   (c/let [v (if (vector? coll) coll (into [] coll))]
310 |     (if (seq xforms)
311 |       (fold (nearest-pow2 (/ (c/count v) chunk-size))
312 |             (fn ([] init) ([a b] (rf a b)))
313 |             (apply xrf rf xforms)
314 |             v)
315 |       (r/fold chunk-size (fn ([] init) ([a b] (rf a b))) rf v))))
316 | 
317 | (defn min
318 |   "Find the min in coll in parallel. Accepts optional
319 |   transducers to apply to coll before searching the min.
320 |   Effective for coll size >10k items. 4000 is an approximate
321 |   minimal chunk size."
322 |   [coll & xforms]
323 |   (fold-adapt c/min ##Inf coll 4000 xforms))
324 | 
325 | (defn max
326 |   "Find the min in coll in parallel. Accepts optional
327 |   transducers to apply to coll before searching the min.
328 |   Effective for coll size >10k items. 4000 is an approximate
329 |   minimal chunk size."
330 |   [coll & xforms]
331 |   (fold-adapt c/max ##-Inf coll 4000 xforms))
332 | 
333 | (defn amap
334 |   "Applies f in parallel to the elements in the array.
335 |   The threshold decides how big a chunk of computation should be before
336 |   going sequential and it's given a default based on the number of
337 |   available cores."
338 |   ([f ^objects a]
339 |    (amap (quot (alength a) (* 2 ncpu)) f a))
340 |   ([threshold f ^objects a]
341 |    (mcombine/map
342 |      (fn [low high]
343 |        (loop [idx low]
344 |          (when (< idx high)
345 |            (aset a idx (f (aget a idx)))
346 |            (recur (unchecked-inc idx)))))
347 |      (fn [_ _])
348 |      threshold (alength a))
349 |    a))
350 | 
351 | (defn distinct
352 |   "Returns a non-lazy and unordered sequence of the distinct elements in coll.
353 |   It does not support null values that need to be removed before calling.
354 |   Also accepts an optional list of transducers that is applied before removing
355 |   duplicates. When bound with *mutable* dynamic var, returns a java.util.Set."
356 |   [coll & xforms]
357 |   (c/let [coll (if (foldable? coll) coll (into [] coll))
358 |         m (ConcurrentHashMap. (quot (c/count coll) 2) 0.75 ncpu)
359 |         combinef (fn ([] m) ([_ _]))
360 |         rf (fn put [^Map m k] (.put m k 1) m)]
361 |     (fold combinef (apply xrf (completing rf) xforms) coll)
362 |     (if *mutable* (.keySet m) (enumeration-seq (.keys m)))))
363 | 
364 | (defn arswap
365 |   "Arrays reverse-swap of the regions identified by:
366 |   [low, low + radius]....[high - radius, high]
367 |   Takes transformation f to apply to each item.
368 |   Preconditions: (pos? (alength a)), (< low high), (pos? radius)"
369 |   [f low high radius ^objects a]
370 |   (loop [left low right high]
371 |     (when (c/and (<= left right) (< left (+ low radius)))
372 |       (c/let [tmp (f (aget a left))]
373 |         (aset a left (f (aget a right)))
374 |         (aset a right tmp)
375 |         (recur (inc left) (dec right))))) a)
376 | 
377 | (defn- sequential-armap
378 |   "Reverse an array."
379 |   [f ^objects a]
380 |   (loop [i 0]
381 |     (when (<= i (quot (alength a) 2))
382 |       (c/let [tmp (f (aget a i))
383 |             j (- (alength a) i 1)]
384 |         (aset a i (f (aget a j)))
385 |         (aset a j tmp))
386 |       (recur (unchecked-inc i)))))
387 | 
388 | (defn armap
389 |   "Applies f in parallel over the reverse of the array.
390 |   The threshold decides how big is the chunk of sequential
391 |   computation, with a default of alength / twice the CPUs.
392 |   Performs better than sequential for non-trivial transforms."
393 |   ([f ^objects a]
394 |    (when a
395 |      (armap (quot (alength a) (* 2 ncpu)) f a)))
396 |   ([threshold f ^objects a]
397 |    (when (c/and a (pos? (alength a)))
398 |      (if (pos? threshold)
399 |        (forkm/submit f arswap threshold a)
400 |        (sequential-armap f a))) a))
401 | 
402 | (defn- should-be [p msg form]
403 |   (when-not p
404 |     (c/let [line (:line (meta form))
405 |           msg (format "%s requires %s in %s:%s" (first form) msg *ns* line)]
406 |       (throw (IllegalArgumentException. msg)))))
407 | 
408 | (defmacro let
409 |   "Evaluates bindings in parallel and returns the result of
410 |   evaluating body in the context of those bindings. Bindings
411 |   have to be indpendent from each other."
412 |   [bindings & body]
413 |   (should-be (vector? bindings) "a vector for its bindings" &form)
414 |   (should-be (even? (c/count bindings)) "an even number of forms in bindings" &form)
415 |   (c/let [ks (take-nth 2 bindings)
416 |           vs (take-nth 2 (rest bindings))
417 |           ts (take (c/count ks) (repeatedly gensym))]
418 |     `(c/let ~(vec (interleave ts (map #(list 'future %) vs)))
419 |        (c/let ~(vec (interleave ks (map #(list 'deref %) ts)))
420 |          ~@body))))
421 | 
422 | (defmacro args
423 |   "Call f with each argument evaluated in parralel.
424 |   This is roughly equivalent to the expansion:
425 |   (p/args + 1 2 3) =>
426 |   (let [a (future 1) b (future 2) c (future 3)] (+ @a @b @c))"
427 |   [f & args]
428 |   (c/let [ts (take (c/count args) (repeatedly gensym))]
429 |     `(c/let ~(vec (interleave ts (map #(list 'future %) args)))
430 |        (~f ~@(map #(list 'deref %) ts)))))
431 | 
432 | (defmacro or
433 |   "Like `core/or` but each expression is evaluated in parralel.
434 |   It does not short-circuit."
435 |   [& args]
436 |   (c/let [ts (take (c/count args) (repeatedly gensym))]
437 |     `(let ~(vec (interleave ts (map #(list 'future %) args)))
438 |        (reduce #(c/or %1 %2) nil  ~(vec (map #(list 'deref %) ts))))))
439 | 
440 | (defmacro and
441 |   "Like `core/and` but each expression is evaluated in parralel.
442 |   It does not short-circuit."
443 |   [& args]
444 |   (c/let [ts (take (c/count args) (repeatedly gensym))]
445 |     `(let ~(vec (interleave ts (map #(list 'future %) args)))
446 |        (reduce #(c/and %1 %2) true  ~(vec (map #(list 'deref %) ts))))))
447 | 
448 | (defmacro do
449 |   "Like core/do but forms evaluate in paralell."
450 |   [& body]
451 |   (when-not (empty? body)
452 |     (c/let [ts (repeatedly gensym)
453 |             bindings (vec (interleave ts body))]
454 |       `(let ~bindings ~(peek (pop bindings))))))
455 | 
456 | (defmacro doto
457 |   "Like core/doto but forms evaluate in parallel."
458 |   [x & forms]
459 |   (c/let [target (gensym)]
460 |     `(c/let [~target ~x]
461 |        (parallel.core/do
462 |          ~@(map (fn [f]
463 |                   (if (seq? f)
464 |                     `(~(first f) ~target ~@(next f))
465 |                     `(~f ~target)))
466 |                 forms))
467 |        ~target)))
468 | 
469 | (defn process-folder
470 |   "Applies xforms to all lines of all files inside folder. It supports
471 |   statful transducers (for example, to skip the header, group stuff, etc.)
472 |   By default it produces a vector of results, but you can pass a different
473 |   reducef+combinef to use different data structures."
474 |   ([^String folder xforms]
475 |    (process-folder
476 |      folder
477 |      (completing conj! persistent!)
478 |      (r/monoid into conj!)
479 |      xforms))
480 |   ([^String folder reducef combinef xforms]
481 |    (transduce
482 |      (comp
483 |        (mapcat #(Files/readAllLines (.toPath %)))
484 |        xforms)
485 |      reducef
486 |      combinef
487 |      (into [] (rest (file-seq (java.io.File. folder)))))))
488 | 
489 | (defn- transducing
490 |   "Prepare the input for transducing, making some assumptions
491 |   about the type. A folder is considered a group of files
492 |   containing lines."
493 |   [input]
494 |   (cond
495 |     (foldable? input) input
496 |     (c/and (instance? File input) (.isDirectory input)) (into [] (rest (file-seq input)))
497 |     (instance? File input) (Files/readAllLines (.toPath input))
498 |     :else (into [] input)))
499 | 
500 | (defn frequencies
501 |   "Like clojure.core/frequencies, but executes in parallel.
502 |   It takes an optional comp of transducers to apply to coll before
503 |   the frequency is calculated."
504 |   ([input]
505 |    (frequencies input identity))
506 |   ([input custom-xforms]
507 |    (c/let [folder? (c/and (instance? File input) (.isDirectory input))
508 |            xforms (if folder?
509 |                     (comp (mapcat #(Files/readAllLines (.toPath %))) custom-xforms)
510 |                     custom-xforms)
511 |            reducef (completing
512 |                      (fn [^Map m k]
513 |                        (c/let [^AtomicInteger v (c/or (.get m k) (.putIfAbsent m k (AtomicInteger. 1)))]
514 |                          (when v (.incrementAndGet v))
515 |                          m))
516 |                      identity)
517 |            m (ConcurrentHashMap.)
518 |            combinef (fn ([] m) ([_ _] m))]
519 |      (transduce xforms reducef combinef (transducing input))
520 |      (if *mutable* m (into {} m)))))
521 | 
522 | (defn pmap
523 |   "Like pmap but eager and unordered. It runs n parallel threads
524 |   (default 100) independently from the chunk size or the number
525 |   of cores."
526 |   [f input & [n]]
527 |   (c/let [q (ConcurrentLinkedQueue. input)
528 |           n (c/or n 100)
529 |           workers (repeatedly #(future (when-let [item (.poll q)] (f item))))]
530 |     (loop [workers workers res []]
531 |       (c/let [res (into res (keep deref (doall (take n workers))))]
532 |         (if (.isEmpty q)
533 |           res
534 |           (recur (drop n workers) res))))))
535 | 


--------------------------------------------------------------------------------
/src/parallel/foldmap.clj:
--------------------------------------------------------------------------------
  1 | (ns parallel.foldmap
  2 |   (:require [clojure.core.reducers :as r])
  3 |   (:import [clojure.lang RT Get
  4 |             PersistentHashMap
  5 |             PersistentHashMap$INode
  6 |             PersistentHashMap$ArrayNode
  7 |             PersistentHashMap$BitmapIndexedNode
  8 |             PersistentHashMap$HashCollisionNode]
  9 |            [java.util.concurrent Callable]
 10 |            [java.util ArrayList List]))
 11 | 
 12 | (set! *warn-on-reflection* false)
 13 | 
 14 | (defn- agetter
 15 |   "Trickiness. This needs to be an indirected call.
 16 |   It prevents Clojure from inlining Get/array implementation
 17 |   into the generated function class. The generated parallel.foldmap
 18 |   package class doesn't have protected access to clojure.lang.
 19 |   Will always throw a reflection warning."
 20 |   [node] (Get/array node))
 21 | 
 22 | (set! *warn-on-reflection* true)
 23 | 
 24 | (defn- fold-tasks [^List tasks combinef]
 25 |   (cond
 26 |     (.isEmpty tasks) (combinef)
 27 |     (== 1 (.size tasks)) (.call ^Callable (.get tasks 0))
 28 |     :else (let [t1 (.subList tasks 0 (quot (.size tasks) 2))
 29 |                 t2 (.subList tasks (quot (.size tasks) 2) (.size tasks))
 30 |                 forked (#'r/fjfork (#'r/fjtask #(fold-tasks t2 combinef)))]
 31 |             (combinef (fold-tasks t1 combinef)
 32 |                       (#'r/fjjoin forked)))))
 33 | 
 34 | (defn- compose
 35 |   "As a consequence, reducef cannot be a vector."
 36 |   [xrf]
 37 |   (if (vector? xrf)
 38 |     ((last xrf) (first xrf))
 39 |     xrf))
 40 | 
 41 | (defprotocol Foldmap
 42 |   (fold [m n combinef reducef])
 43 |   (kvreduce [node f init]))
 44 | 
 45 | (extend-protocol Foldmap
 46 | 
 47 |   (Class/forName "[Ljava.lang.Object;")
 48 |   (fold [m n combinef reducef]
 49 |     (throw (RuntimeException. "Not implemented")))
 50 |   (kvreduce [node f init]
 51 |     ;; workaround type hints are lost [CLJ-1381]
 52 |     (let [^"[Ljava.lang.Object;" node node ^Object init init]
 53 |       (loop [idx 0 res init]
 54 |         (if (or (RT/isReduced res) (>= idx (alength ^"[Ljava.lang.Object;" node)))
 55 |           res
 56 |           (let [idx+1 (unchecked-inc idx)
 57 |                 idx+2 (unchecked-add idx 2)]
 58 |             (if (nil? (aget node idx))
 59 |               (let [node (aget node idx+1)]
 60 |                 (if (nil? node)
 61 |                   (recur idx+2 res)
 62 |                   (recur idx+2 (kvreduce node f res))))
 63 |               (recur idx+2 (f res [(aget node idx) (aget node idx+1)]))))))))
 64 | 
 65 |   PersistentHashMap
 66 |   (fold [m n combinef reducef]
 67 |     (#'r/fjinvoke
 68 |       #(let [ret (combinef)
 69 |              ret (if (Get/root m) (combinef ret (fold (Get/root m) n combinef reducef)) ret)]
 70 |          (if (Get/hasNullValue m)
 71 |            (combinef ret (reducef (combinef) nil (Get/nullValue m)))
 72 |            ret))))
 73 |   (kvreduce [node f init]
 74 |     (throw (RuntimeException. "Not implemented")))
 75 | 
 76 |   PersistentHashMap$ArrayNode
 77 |   (fold [m n combinef reducef]
 78 |     (let [tasks (ArrayList.)
 79 |           ^"[Lclojure.lang.PersistentHashMap$INode;" array (agetter m)]
 80 |       (dotimes [idx (alength array)]
 81 |         (let [node (aget array idx)]
 82 |           (if (not (nil? node))
 83 |             (.add tasks #(fold node n combinef reducef)))))
 84 |       (fold-tasks tasks combinef)))
 85 |   (kvreduce [node f init]
 86 |     (let [^"[Ljava.lang.Object;" node node
 87 |           ^"[Lclojure.lang.PersistentHashMap$INode;" array (agetter node)]
 88 |       (loop [idx 0 res init]
 89 |         (if (or (RT/isReduced res) (>= idx (alength node)))
 90 |           res
 91 |           (if (nil? (aget array idx))
 92 |             (recur (unchecked-inc idx) res)
 93 |             (recur (unchecked-inc idx) (kvreduce node f res)))))))
 94 | 
 95 |   PersistentHashMap$BitmapIndexedNode
 96 |   (fold [m n combinef reducef]
 97 |     (let [^objects array (agetter m)]
 98 |       (kvreduce array (compose reducef) (combinef))))
 99 |   (kvreduce [node f init]
100 |     (let [^"[Lclojure.lang.PersistentHashMap$INode;" array (agetter node)]
101 |       (kvreduce array f init)))
102 | 
103 |   PersistentHashMap$HashCollisionNode
104 |   (fold [m n combinef reducef]
105 |     (let [^objects array (agetter m)]
106 |       (kvreduce array (compose reducef) (combinef))))
107 |   (kvreduce [node f init]
108 |     (let [^"[Lclojure.lang.PersistentHashMap$INode;" array (agetter node)]
109 |       (kvreduce array f init))))
110 | 


--------------------------------------------------------------------------------
/src/parallel/fork_middle.clj:
--------------------------------------------------------------------------------
 1 | (ns parallel.fork-middle
 2 |   (:require [clojure.core.reducers :as r])
 3 |   (:import [java.util.concurrent Callable ForkJoinPool ForkJoinTask]
 4 |            [java.util ArrayList List]))
 5 | 
 6 | (set! *warn-on-reflection* true)
 7 | 
 8 | (deftype ForkMiddle [^objects a
 9 |                      ^int low ^int high ^int radius
10 |                      ^Callable mapf ^Callable f]
11 |   Callable
12 |   (call [this]
13 |     (let [size (- (- high low) (* 2 radius))]
14 |       (if (<= size radius)
15 |         (f mapf low high (inc (quot (- high low) 2)) a)
16 |         (#'r/fjinvoke
17 |           #(let [middle (ForkMiddle. a (+ low radius) (- high radius) radius mapf f)
18 |                  t (.fork (ForkJoinTask/adapt middle))]
19 |              (f mapf low high radius a)
20 |              (.join ^ForkJoinTask t)))))))
21 | 
22 | (defn submit [mapf f radius ^objects a]
23 |   (let [n (alength a)
24 |         ^ForkJoinPool pool @r/pool]
25 |     (.join (.submit pool (ForkMiddle. a 0 (dec n) radius mapf f)))))
26 | 
27 | ;; Different strategy, similar results.
28 | ; (defn fork-tasks
29 | ;   "Fork a collection of tasks by recusively
30 | ;   splitting into halves."
31 | ;   [^List tasks]
32 | ;   (let [cnt (.size tasks)]
33 | ;     (cond
34 | ;       (= 1 cnt) (.call ^Callable (.get tasks 0))
35 | ;       (> cnt 1)
36 | ;       (let [mid (quot cnt 2)]
37 | ;         (#'r/fjinvoke
38 | ;           (fn []
39 | ;             (let [task (#'r/fjtask #(fork-tasks (.subList tasks mid cnt)))]
40 | ;               (#'r/fjfork task)
41 | ;               (fork-tasks (.subList tasks 0 mid))
42 | ;               (#'r/fjjoin task))))))))
43 | 
44 | ; (defn submit
45 | ;   "A forking strategy that chops off chunks
46 | ;   at the edges and fork the rest in the middle."
47 | ;   [mapf f ^long radius ^objects a]
48 | ;   (let [tasks (ArrayList.)]
49 | ;     (loop [low 0 high (dec (alength a))]
50 | ;       (if (> (- (- high low) (* 2 radius)) radius)
51 | ;         (do
52 | ;           (.add tasks #(f mapf low high radius a))
53 | ;           (recur (+ low radius) (- high radius)))
54 | ;         (.add tasks #(f mapf low high (inc (quot (- high low) 2)) a))))
55 | ;     (fork-tasks tasks)))
56 | 


--------------------------------------------------------------------------------
/src/parallel/map_combine.clj:
--------------------------------------------------------------------------------
 1 | (ns parallel.map-combine
 2 |   (:refer-clojure :exclude [map])
 3 |   (:require [clojure.core.reducers :as r])
 4 |   (:import [java.util.concurrent Callable ForkJoinPool]))
 5 | 
 6 | (set! *warn-on-reflection* true)
 7 | 
 8 | (deftype MapCombine [^int low ^int high ^int threshold
 9 |                      ^Callable mapf ^Callable combinef]
10 |   Callable
11 |   (call [this]
12 |     (let [size (- high low)]
13 |       (if (<= size threshold)
14 |         (mapf low high)
15 |         (let [middle (+ low (bit-shift-right size 1))
16 |               l (MapCombine. low middle threshold mapf combinef)
17 |               h (MapCombine. middle high threshold mapf combinef)]
18 |           (let [fc (fn [^Callable child] #(.call child))]
19 |             (#'r/fjinvoke
20 |               #(let [f1 (fc l)
21 |                      t2 (#'r/fjtask (fc h))]
22 |                  (#'r/fjfork t2)
23 |                  (combinef (f1) (#'r/fjjoin t2))))))))))
24 | 
25 | (defn map [mapf combinef threshold n]
26 |   (let [^ForkJoinPool pool @r/pool]
27 |     (.join (.submit pool (MapCombine. 0 n threshold mapf combinef)))))
28 | 


--------------------------------------------------------------------------------
/src/parallel/merge_sort.clj:
--------------------------------------------------------------------------------
 1 | (ns parallel.merge-sort
 2 |   (:refer-clojure :exclude [sort])
 3 |   (:require [clojure.core.reducers :as r])
 4 |   (:import
 5 |     [java.util.concurrent Callable ForkJoinPool]
 6 |     [java.util Arrays Comparator]))
 7 | 
 8 | (set! *warn-on-reflection* true)
 9 | 
10 | (definterface IMergeSort
11 |   (merge [mid])
12 |   (sort []))
13 | 
14 | (deftype MergeSort [^objects a
15 |                     ^int lo
16 |                     ^int hi
17 |                     ^int threshold
18 |                     ^Comparator cmp]
19 | 
20 |   Callable
21 |   (call [this] (.sort this))
22 | 
23 |   IMergeSort
24 |   (merge [this mid]
25 |     (when (pos? (.compare cmp (aget a (dec mid)) (aget a mid)))
26 |       (let [size (- hi lo)
27 |             lsize (- mid lo)
28 |             ^objects aux (object-array size)]
29 |         (System/arraycopy a lo aux 0 size)
30 |         (loop [k lo i 0 j lsize]
31 |           (when (< k hi)
32 |             (if (or (>= j size) (and (< i lsize) (neg? (.compare cmp (aget aux i) (aget aux j)))))
33 |               (do (aset a k (aget aux i)) (recur (inc k) (inc i) j))
34 |               (do (aset a k (aget aux j)) (recur (inc k) i (inc j)))))))))
35 | 
36 |   (sort [this]
37 |     (let [size (- hi lo)]
38 |       (if (<= size threshold)
39 |         (Arrays/sort a lo hi cmp)
40 |         (let [mid (+ lo (bit-shift-right size 1))
41 |               l (MergeSort. a lo mid threshold cmp)
42 |               h (MergeSort. a mid hi threshold cmp)]
43 |           (let [fc (fn [^Callable child] #(.call child))]
44 |             (#'r/fjinvoke
45 |               #(let [f1 (fc l)
46 |                      t2 (#'r/fjtask (fc h))]
47 |                  (#'r/fjfork t2)
48 |                  (f1)
49 |                  (#'r/fjjoin t2)
50 |                  (.merge this mid)))))))))
51 | 
52 | (defn sort [threshold cmp ^objects a]
53 |   (let [n (alength a)
54 |         ^ForkJoinPool pool @r/pool]
55 |     (.join (.submit pool (MergeSort. a 0 n threshold cmp)))))
56 | 


--------------------------------------------------------------------------------
/src/parallel/xf.clj:
--------------------------------------------------------------------------------
 1 | (ns parallel.xf
 2 |   (:refer-clojure :exclude [interleave pmap identity]))
 3 | 
 4 | (defn interleave
 5 |   "Transducer version of core/interleave."
 6 |   [coll]
 7 |   (fn [rf]
 8 |     (let [fillers (volatile! (seq coll))]
 9 |       (fn
10 |         ([] (rf))
11 |         ([result] (rf result))
12 |         ([result input]
13 |          (if-let [[filler] @fillers]
14 |            (let [step (rf result input)]
15 |              (if (reduced? step)
16 |                step
17 |                (do
18 |                  (vswap! fillers next)
19 |                  (rf step filler))))
20 |            (reduced result)))))))
21 | 
22 | (defn pmap
23 |   "Like map transducer, but items are processed in chunk of up to 32 items
24 |   in parallel. Only effective with computational intensive f. Unlike normal
25 |   map/pmap, it does not accept multiple inputs."
26 |   [f]
27 |   (comp
28 |     (partition-all 32)
29 |     (fn [rf]
30 |       (fn
31 |         ([] (rf))
32 |         ([result] (rf result))
33 |         ([result input] (rf result (clojure.core/pmap f input)))))
34 |     cat))
35 | 
36 | (def identity
37 |   "Identity transducer. When multiple inputs are present,
38 |   it wraps them in a list similarly to what (map list) transducer
39 |   would produce."
40 |   (fn [rf]
41 |     (fn
42 |       ([] (rf))
43 |       ([res] (rf res))
44 |       ([res in] (rf res in))
45 |       ([res in & ins] (rf res (list* in ins))))))
46 | 


--------------------------------------------------------------------------------
/test/core_test.clj:
--------------------------------------------------------------------------------
  1 | (ns core-test
  2 |   (:import [clojure.lang RT]
  3 |            [java.io File]
  4 |            [java.util.concurrent ConcurrentLinkedQueue])
  5 |   (:require [parallel.core :as p]
  6 |             [clojure.core.reducers :as r]
  7 |             [clojure.test :refer :all]))
  8 | 
  9 | (deftest frequencies-test
 10 |   (testing "frequencies with xform"
 11 |     (is (= 5000 (count (p/frequencies (range 1e4) (filter odd?)))))
 12 |     (is (= {":a" 2 ":b" 3} (p/frequencies [:a :a :b :b :b] (map str)))))
 13 |   (testing "a dictionary of words with no dupes"
 14 |     (let [dict (slurp "test/words")]
 15 |       (is (= (count (re-seq #"\S+" dict))
 16 |              (->> dict
 17 |                   (re-seq #"\S+")
 18 |                   (frequencies)
 19 |                   (map second)
 20 |                   (reduce +))))))
 21 |   (testing "misc examples"
 22 |     (are [expected test-seq] (= (p/frequencies test-seq) expected)
 23 |          {\p 2 \s 4 \i 4 \m 1} "mississippi"
 24 |          {1 4 2 2 3 1} [1 1 1 1 2 2 3]
 25 |          {1 3 2 2 3 1} [1 1 1 2 2 3]
 26 |          {1 4 2 2 3 1} '(1 1 1 1 2 2 3))))
 27 | 
 28 | (defn large-map [i] (into {} (map vector (range i) (range i))))
 29 | 
 30 | (deftest update-vals-test
 31 |   (testing "sanity"
 32 |     (is (= (map inc (range 1000))
 33 |            (sort (vals (p/update-vals (large-map 1000) inc)))))))
 34 | 
 35 | (defmacro repeater [& forms]
 36 |   `(first (distinct (for [i# (range 500)] (do ~@forms)))))
 37 | 
 38 | (defn chunkedf [f rf size coll]
 39 |   (->> coll (partition-all size) (mapcat f) (reduce rf)))
 40 | 
 41 | (deftest stateful-transducers
 42 |   (testing "should drop based on chunk size"
 43 |     (is (= (chunkedf #(drop 10 %) + 200 (vec (range 1600)))
 44 |            (repeater (r/fold 200 + (p/xrf + (drop 10)) (p/folder (vec (range 1600)))))))
 45 |     (is (= (chunkedf #(drop 10 %) + 100 (vec (range 204800)))
 46 |            (repeater (r/fold 100 + (p/xrf + (drop 10)) (p/folder (vec (range 204800)))))))
 47 |     (is (= (chunkedf #(drop 10 %) + 400 (vec (range 1600)))
 48 |            (repeater (r/fold + (p/xrf + (drop 10)) (p/folder (vec (range 1600))))))))
 49 |   (testing "folding by number of chunks"
 50 |     (is (= [3  4  5  6  7  8  9  10 11 12
 51 |             16 17 18 19 20 21 22 23 24 25
 52 |             29 30 31 32 33 34 35 36 37 38
 53 |             42 43 44 45 46 47 48 49 50 51]
 54 |            (r/fold "ignored"
 55 |                    (r/monoid concat conj)
 56 |                    (p/xrf conj (drop 3))
 57 |                    (p/folder (vec (range 52)) 4))))
 58 |     (is (= (- 1802 (* 3 8))
 59 |            (count (r/fold "ignored"
 60 |                           (r/monoid concat conj)
 61 |                           (p/xrf conj (drop 3))
 62 |                           (p/folder (vec (range 1802)) 8))))))
 63 |   (testing "p/fold entry point at 32 default chunks"
 64 |     (is (= (chunkedf #(drop 10 %) + (/ 2048 32) (vec (map inc (range 2048))))
 65 |            (p/fold (p/xrf + (drop 10) (map inc)) (vec (range 2048))))))
 66 | 
 67 |   (testing "p/fold VS r/fold on stateless xducers should be the same"
 68 |     (let [v (vec (range 10000))]
 69 |       (is (= (r/fold + ((comp (map inc) (filter odd?)) +) v)
 70 |              (p/fold (p/xrf + (map inc) (filter odd?)) v)
 71 |              (p/fold + ((comp (map inc) (filter odd?)) +) v)))))
 72 | 
 73 |   (testing "p/transduce"
 74 |     (let [v (vec (range 10000))]
 75 |       (is (= (reduce + 0 (filter odd? (map inc v)))
 76 |              (p/transduce (comp (map inc) (filter odd?)) + v)))
 77 |       (is (= (reduce conj [] (filter odd? (map inc v)))
 78 |              (p/transduce (comp (map inc) (filter odd?)) conj into v)))
 79 |       (is (= [248 249]
 80 |              (nth
 81 |                (p/transduce
 82 |                  4
 83 |                  (comp (drop 240) (partition-all 4))
 84 |                  conj into
 85 |                  (vec (range 1000))) 2)))))
 86 | 
 87 |   (testing "p/folding without reducing, just conj"
 88 |     (let [v (vec (range 10000))]
 89 |       (is (= (reduce conj [] (filter odd? (map inc v)))
 90 |              (r/fold
 91 |                (r/monoid into (constantly []))
 92 |                ((comp (map inc) (filter odd?)) conj) v)
 93 |              (p/fold
 94 |                (r/monoid into (constantly []))
 95 |                ((comp (map inc) (filter odd?)) conj) v)))))
 96 | 
 97 |   (testing "hashmaps, not just vectors"
 98 |     (is (= {\a [21] \z [23] \h [10 12]}
 99 |            (p/fold
100 |              (r/monoid #(merge-with into %1 %2) (constantly {}))
101 |              (fn [m [k v]]
102 |                (let [c (Character/toLowerCase ^Character (first k))]
103 |                  (assoc m c (conj (get m c []) v))))
104 |              (hash-map "abba" 21 "zubb" 23 "hello" 10 "hops" 12)))))
105 | 
106 |   (testing "folding hashmaps with transducers"
107 |     (is (= {0 1 1 2 2 3 3 4}
108 |            (p/fold
109 |              (r/monoid merge (constantly {}))
110 |              (p/xrf conj (map (fn [[k v]] [k (inc v)])))
111 |              (hash-map 0 0 1 1 2 2 3 3)))))
112 | 
113 |   (testing "exercising all code with larger maps"
114 |     (is (= 999
115 |            ((p/fold
116 |               (r/monoid merge (constantly {}))
117 |               (p/xrf conj
118 |                      (filter (fn [[k v]] (even? k)))
119 |                      (map (fn [[k v]] [k (inc v)])))
120 |               (zipmap (range 10000) (range 10000))) 998)))))
121 | 
122 | (deftest counting
123 |   (testing "count a coll"
124 |     (is (= 100000 (p/count (map inc) (range 1e5))))
125 |     (is (= (reduce + (range 50)) (p/count (comp (mapcat range)) (range 50))))))
126 | 
127 | (deftest grouping
128 |   (testing "sanity"
129 |     (is (= 5000 (count ((p/group-by odd? (range 10000)) true)))))
130 | (testing "with xducers"
131 |     (is (= 1667 (count ((p/group-by odd? (range 10000) (map inc) (filter #(zero? (mod % 3)))) true)))))
132 | (testing "with stateful xducers"
133 |     (is (= 1133 (count ((p/group-by odd? (range 10000) (drop 100) (map inc) (filter #(zero? (mod % 3)))) true)))))
134 |   (testing "anagrams"
135 |     (let [dict (slurp "test/words")]
136 |       (is (= #{"caret" "carte" "cater" "crate"
137 |                "creat" "creta" "react" "recta" "trace"}
138 |              (into #{}
139 |                    (->> dict
140 |                         (re-seq #"\S+")
141 |                         (p/group-by sort)
142 |                         (sort-by (comp count second) >)
143 |                         (map second)
144 |                         first)))))))
145 | 
146 | (deftest sorting
147 |   (testing "sanity"
148 |     (let [coll (reverse (range 1000))
149 |           c2 (shuffle (map (comp str char) (range 65 91)))]
150 |       (is (= (range 1000) (p/sort 200 < coll)))
151 |       (is (= coll (p/sort 200 > coll)))
152 |       (is (= (sort compare c2) (p/sort compare c2))))))
153 | 
154 | ;; (int (/ 100000 (Math/pow 2 8)))
155 | (deftest external-sorting
156 |   (testing "sanity"
157 |     (let [coll (into [] (reverse (range 1000)))]
158 |       (is (= 0
159 |              (first (p/external-sort 125 compare identity coll))))))
160 |   (testing "additional processing"
161 |     (let [coll (map #(str % "-" %) (range 100000))
162 |           fetchf (fn [c] (map #(clojure.string/split % #"-") c))]
163 |       (is (= ["99999" "99999"]
164 |              (first (p/external-sort 1562 #(compare (peek %2) (peek %1)) fetchf coll)))))))
165 | 
166 | (deftest min-max
167 |   (testing "min"
168 |     (let [c (shuffle (conj (range 100000) -3))]
169 |       (is (= -3 (p/min c)))))
170 |   (testing "max"
171 |     (let [c (shuffle (conj (range 100000) -3))]
172 |       (is (= 99999 (p/max c)))))
173 |   (testing "xducers"
174 |     (let [c (into [] (shuffle (conj (range 100000) -3)))]
175 |       (is (= 99998 (p/max c (map dec))))))
176 |   (testing "min-index"
177 |     (let [c (conj (range 100000) -3)]
178 |       (is (= 99999 (p/max c))))))
179 | 
180 | (deftest pamap-test
181 |   (testing "sanity"
182 |     (let [c (to-array (range 100000))]
183 |       (is (= (map inc (range 10)) (take 10 (p/amap inc c)))))))
184 | 
185 | (deftest distinct-test
186 |   (let [c (shuffle (apply concat (take 5 (repeat (range 10000)))))]
187 |     (testing "sanity"
188 |       (is (= (sort (distinct c)) (sort (p/distinct c)))))
189 |     (testing "with transducers"
190 |       (is (= [1 3 5 7 9] (take 5 (sort (p/distinct c (map inc) (filter odd?)))))))
191 |     (testing "equality semantic"
192 |       (is (= (sort (distinct (map vector c c)))
193 |              (sort (p/distinct (map vector c c))))))
194 |     (testing "mutability on"
195 |       (is (= #{1 2 3}
196 |              (into #{} (binding [p/*mutable* true] (p/distinct [1 2 3]))))))))
197 | 
198 | (deftest reverse-test
199 |   (testing "swap reverse simmetrical regions in arrays"
200 |     (let [s (range 10)]
201 |       (is (= s (let [a (object-array s)] (p/arswap identity 0 9 0 a) (into [] a))))
202 |       (is (= (reverse s) (let [a (object-array s)] (p/arswap identity 0 9 5 a) (into [] a))))
203 |       (is (= (reverse s) (let [a (object-array s)] (p/arswap identity 0 9 10 a) (into [] a))))
204 |       (is (= [9 8 2 3 4 5 6 7 1 0] (let [a (object-array s)] (p/arswap identity 0 9 2 a) (into [] a))))
205 |       (is (= [9 8 7 6 5 4 3 2 1] (let [a (object-array (rest s))] (p/arswap identity 0 8 4 a) (into [] a))))
206 |       (is (= [9 8 7 4 5 6 3 2 1] (let [a (object-array (rest s))] (p/arswap identity 0 8 3 a) (into [] a))))))
207 |   (testing "swap reverse with transform"
208 |     (let [s (range 10)]
209 |       (is (= ["9" "8" 2 3 4 5 6 7 "1" "0"] (let [a (object-array s)] (p/arswap str 0 9 2 a) (into [] a))))
210 |       (is (= [:9 :8 :7 4 5 6 :3 :2 :1] (let [a (object-array (rest s))] (p/arswap (comp keyword str) 0 8 3 a) (into [] a))))))
211 |   (testing "sanity"
212 |     (is (= nil (p/armap identity nil)))
213 |     (is (= (reverse ()) (let [a (object-array ())] (p/armap identity a) (into [] a))))
214 |     (is (= (reverse (range 1)) (let [a (object-array (range 1))] (p/armap identity a) (into [] a))))
215 |     (is (= (reverse (range 5)) (let [a (object-array (range 5))] (p/armap identity a) (into [] a))))
216 |     (is (= (reverse (range 1e2)) (let [a (object-array (range 1e2))] (p/armap identity a) (into [] a))))
217 |     (let [xs (shuffle (range 11))
218 |           a (object-array xs)]
219 |       (is (= (reverse (map str xs)) (do (p/armap str a) (into [] a)))))))
220 | 
221 | (deftest slurping
222 |   (testing "slurping sanity"
223 |     (is (= (slurp "test/words") (p/slurp (File. "test/words"))))))
224 | 
225 | (deftest parallel-let
226 |   (testing "it works like normal let"
227 |     (is (= 3 (p/let [a 1 b 2] (+ a b))))
228 |     (is (= 3 (p/let [a (future 1) b (future 2)] (+ @a @b))))
229 |     (is (= 6 (p/let [[a b] [1 2] {c :c} {:c 3}] (+ a b c))))
230 |     (is (= 300 (p/let [a (do (Thread/sleep 20) 100) b (do (Thread/sleep 10) 200)] (+ a b))))))
231 | 
232 | (deftest parallel-args
233 |   (testing "works like a standard function invocation"
234 |     (is (= 6 (p/args + 1 2 3)))
235 |     (is (= 1 (p/args first [1 2 3])))
236 |     (is (= 300 (p/args + (do (Thread/sleep 20) 100) (do (Thread/sleep 10) 200))))))
237 | 
238 | (deftest parallel-and
239 |   (testing "works like a standard and"
240 |     (is (= (and) (p/and)))
241 |     (is (= "a"   (p/and true 1 "a")))
242 |     (is (= :x    (p/and :y true :x)))
243 |     (is (= false (p/and true false true)))
244 |     (is (p/and (do (Thread/sleep 20) true) (do (Thread/sleep 10) true)))))
245 | 
246 | (deftest parallel-or
247 |   (testing "works like a standard or"
248 |     (is (= (or)  (p/or)))
249 |     (is (= true  (p/or true 1 "a")))
250 |     (is (= :y    (p/or false :y true :x)))
251 |     (is (= true  (p/or true false true)))
252 |     (is (p/or (do (Thread/sleep 20) false) (do (Thread/sleep 10) true)))))
253 | 
254 | (deftest parallel-do-doto
255 | 
256 |   (testing "like do, but forms evaluate in parallel."
257 |     (is (= nil (p/do)))
258 |     (is (= 1 (p/do 1)))
259 |     (is (some #{[1 2] [2 1]}
260 |            (set (repeatedly 50
261 |              #(let [a (ConcurrentLinkedQueue.)]
262 |                 (p/do (.add a 1) (.add a 2)) (vec a)))))))
263 | 
264 |   (testing "like doto, but forms evaluated in parallel."
265 |     (is (= 1 (p/doto 1)))
266 |     (is (= [1 2] (vec (p/doto (ConcurrentLinkedQueue.) (.add 1) (.add 2)))))))
267 | 
268 | (deftest pmap-test
269 |   (testing "like pmap but results are not ordered"
270 |     (is (= (set (range 1 1001)) (set (p/pmap inc (range 1000)))))))
271 | 


--------------------------------------------------------------------------------
/test/parallel/merge_sort_test.clj:
--------------------------------------------------------------------------------
 1 | (ns parallel.merge-sort-test
 2 |   (:require [parallel.merge-sort :as msort]
 3 |             [clojure.core.reducers :as r]
 4 |             [clojure.test :refer :all])
 5 |   (:import [parallel.merge_sort MergeSort]
 6 |            [java.util.concurrent ForkJoinPool]
 7 |            [java.util Arrays]))
 8 | 
 9 | (deftest parallel-merge-sort
10 |   (testing "with numbers"
11 |     (let [n 10000
12 |           v (into [] (shuffle (range n)))
13 |           a1 (object-array v)
14 |           a2 (object-array v)]
15 |       (is (= (do (Arrays/parallelSort a2 0 n compare) (into [] a2))
16 |              (do (.join (.submit ^ForkJoinPool @r/pool (MergeSort. a1 0 n 8192 compare))) (into [] a1))))))
17 |   (testing "with strings"
18 |     (let [n 10000
19 |           v (into [] (map str (shuffle (range n))))
20 |           a1 (object-array v)
21 |           a2 (object-array v)]
22 |       (is (= (do (Arrays/parallelSort a2 0 n compare) (into [] a2))
23 |              (do (.join (.submit ^ForkJoinPool @r/pool (MergeSort. a1 0 n 8192 compare))) (into [] a1)))))))
24 | 


--------------------------------------------------------------------------------
/test/xf_test.clj:
--------------------------------------------------------------------------------
 1 | (ns xf-test
 2 |   (:require [parallel.xf :as xf]
 3 |             [clojure.test :refer :all]))
 4 | 
 5 | (deftest interleave-test
 6 |   (testing "interleave with sequence"
 7 |     (is (= [0 :a 1 :b 2 :c] (sequence (xf/interleave [:a :b :c]) (range 3))))
 8 |     (are [x y] (= x y)
 9 |          (sequence (xf/interleave [1 2]) [3 4]) (interleave [3 4] [1 2])
10 |          (sequence (xf/interleave [1]) [3 4]) (interleave [3 4] [1])
11 |          (sequence (xf/interleave [1 2]) [3]) (interleave [3] [1 2])
12 |          (sequence (xf/interleave []) [3 4]) (interleave [3 4] [])
13 |          (sequence (xf/interleave [1 2]) []) (interleave [] [1 2])
14 |          (sequence (xf/interleave []) []) (interleave [] [])))
15 |   (testing "interleave with eduction"
16 |     (is (= [1 0 2 1 3 2 4 3 5 4 6 5 7 6 8 7 9 8 10 9]
17 |            (eduction (map inc) (xf/interleave (range)) (filter number?) (range 10))))))
18 | 
19 | (deftest pmap-test
20 |   (testing "pmap as a transducer, similarly to map"
21 |     (is (= 250000
22 |            (transduce
23 |              (comp
24 |                (xf/pmap inc)
25 |                (filter odd?)) +
26 |              (range 1000))))
27 |     (is (= (sequence
28 |              (comp
29 |                (filter odd?)
30 |                (map #(* % %))
31 |                (take 10))
32 |              (range 1000))
33 |            (sequence
34 |              (comp
35 |                (filter odd?)
36 |                (xf/pmap #(* % %))
37 |                (take 10))
38 |              (range 1000))))))
39 | 
40 | (deftest identity-test
41 |   (testing "single"
42 |     (is (= (range 10) (sequence xf/identity (range 10))))
43 |     (is (= (range 1 11) (sequence (comp (map inc) xf/identity) (range 10))))
44 |     (is (= (range 1 11) (sequence (comp xf/identity (map inc)) (range 10))))
45 |     (is (= [2 4 6 8 10] (sequence (comp (filter odd?) xf/identity (map inc)) (range 10)))))
46 | 
47 |   (testing "multi"
48 |     (is (= (map vector (range 10) (range 10))
49 |            (sequence xf/identity (range 10) (range 10))))
50 |     (is (= (range 0 20 2)
51 |            (sequence
52 |              (comp (map #(+ %1 %2))
53 |                    xf/identity)
54 |              (range 10) (range 10))))
55 |     (is (= (range 0 20 2)
56 |            (sequence
57 |              (comp xf/identity
58 |                    (map #(apply + %)))
59 |              (range 10) (range 10))))
60 |     (is (= [1 1 2 2 3 3 4 4 5 5]
61 |            (sequence
62 |              (comp xf/identity
63 |                    cat
64 |                    (map inc))
65 |              (range 5) (range 5))))
66 |     (is (= (range 0 20 2)
67 |            (sequence
68 |              (comp (map vector)
69 |                    xf/identity
70 |                    (map #(apply + %)))
71 |              (range 10) (range 10))))
72 |     ))
73 | 


--------------------------------------------------------------------------------