├── .gitignore ├── LICENSE ├── README.md ├── RELEASES.md ├── benchmarks ├── bamap.clj ├── barmap.clj ├── bcount.clj ├── bdistinct.clj ├── bfrequencies.clj ├── bidentity.clj ├── binterleave.clj ├── bminmax.clj ├── bpmap.clj ├── bslurp.clj ├── bsort.clj ├── bupdate_vals.clj ├── groupby.clj └── plet.clj ├── examples └── lastfm │ ├── .gitignore │ ├── README.md │ ├── project.clj │ ├── src │ └── lastfm │ │ ├── version00.clj │ │ └── version01.clj │ └── test │ └── lastfm │ └── core_test.clj ├── java └── clojure │ └── lang │ └── Get.java ├── project.clj ├── src └── parallel │ ├── core.clj │ ├── foldmap.clj │ ├── fork_middle.clj │ ├── map_combine.clj │ ├── merge_sort.clj │ └── xf.clj └── test ├── core_test.clj ├── parallel └── merge_sort_test.clj ├── words └── xf_test.clj /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | .hgignore 11 | .hg/ 12 | playground 13 | doc 14 | appcds.cache 15 | appcds.classlist 16 | .java-version 17 | .idea 18 | *.iml 19 | .DS_Store 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor to control, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of New York and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## parallel 2 | 3 | `parallel` is a library of parallel-enabled (not distributed) Clojure functions. Some are designed to emulate existing functions in the standard library, sometimes as drop-in replacement, sometimes with a very different semantic. If you see a function listed below in your project or if you use transducers, chances are you can speed-up your application using parallel. As with any library claiming to speed-up your code, there are too many variables influencing performances that cannot be tested in isolation: **please keep a benchmarking tool ready and measure each of the changes**. 4 | 5 | The library also provides additional transducers (not necessarily for parallel use) and supporting utilities. The functions documented below have been tested and benchmarked and are ready to use. Please report any issue or ideas for improvements, I'll be happy to help. 6 | 7 | Functions and macros: 8 | 9 | | Name | Description 10 | |-----------------------------------------| --------------------------------------------------- 11 | | [`p/let`](#plet) | Parallel `let` bindings. 12 | | [`p/args`](#pargs) | Invoke a function with arguments evaluated in parallel. 13 | | [`p/and`](#pand) | Like `core/and` with expressions evaluated in parallel. 14 | | [`p/or`](#por) | Like `core/or` with arguments evaluated in parallel. 15 | | [`p/do`](#pdo) | Parallel `do` forms. 16 | | [`p/doto`](#pdoto) | Parallel `doto` forms. 17 | | [`p/slurp`](#pslurp) | Parallel slurping files. 18 | | [`p/count`](#pcount) | Transducer-aware parallel `core/count`. 19 | | [`p/frequencies`](#pfrequencies) | Parallel `core/frequencies` 20 | | [`p/group-by`](#pgroup-by) | Parallel `core/group-by` 21 | | [`p/update-vals`](#pupdate-vals) | Updates values in a map in parallel. 22 | | [`p/sort`](#psort) | Parallel `core/sort`. 23 | | [`p/external-sort`](#pexternal-sort) | Memory efficient, file-based, parallel merge-sort. 24 | | [`p/fold`](#pfold-pxrf-and-pfolder) | Transducer-aware `r/fold`. 25 | | [`p/transduce`](#ptransduce) | Parallel version of `transduce` based on `p/fold`. 26 | | [`p/process-folder`](#pprocess-folder) | Process the files in a folder in parallel. 27 | | [`p/min` and `p/max`](#pmin-and-pmax) | Parallel `core/min` and `core/max` functions. 28 | | [`p/distinct`](#pdistinct) | Parallel version of `core/distinct` 29 | | [`p/pmap`](#ppmap) | Like `core/pmap` but running on given n of threads. 30 | | [`p/amap`](#pamap) | Parallel array transformation. 31 | | [`p/armap`](#parmap) | Parallel array reversal with transformation. 32 | 33 | Transducers: 34 | 35 | | Name | Description 36 | |-----------------------------------------| --------------------------------------------------- 37 | | [`xf/interleave`](#xfinterleave) | Like `core/interleave`, transducer version. 38 | | [`xf/pmap`](#xfpmap) | Like `core/pmap`, transducer version. 39 | | [`xf/identity`](#xfidentity) | Alternative identity transducer to `core/identity` 40 | 41 | In the pipeline: 42 | 43 | | Name | Description 44 | |-----------------------------------------| --------------------------------------------------- 45 | | `p/split-by` | Splitting transducer based on contiguous elements. 46 | 47 | ### How to use the library 48 | 49 | All functions are available through the `parallel.core` namespace. Pure transducers are in `parallel.xf`. Add the following to your project dependencies: 50 | 51 | ```clojure 52 | [parallel "0.10"] 53 | ``` 54 | 55 | Require at the REPL with: 56 | 57 | ```clojure 58 | (require '[parallel.core :as p] 59 | '[parallel.xf :as xf]) 60 | ``` 61 | 62 | Or in your namespace as: 63 | 64 | ```clojure 65 | (ns mynamespace 66 | (:require [parallel.core :as p] 67 | [parallel.xf :as xf])) 68 | ``` 69 | 70 | ## API Docs 71 | 72 | ### `p/let` 73 | 74 | `p/let` works like `clojure.core/let` but evaluates its binding expressions in parallel: 75 | 76 | ```clojure 77 | (time 78 | (p/let [a (Thread/sleep 1000) 79 | b (Thread/sleep 1000) 80 | c (Thread/sleep 1000)] 81 | (= a b c))) 82 | ;; "Elapsed time: 1002.519823 msecs" 83 | ``` 84 | 85 | Don't use `p/let` if: 86 | 87 | * The expressions have dependencies. `p/let` cannot resolve cross references between expressions and will throw exception. 88 | * The expressions are trivial. In this case the thread orchestration outweighs the benefits of executing in parallel. Good expressions to parallelize are for example independent networked API calls, file system calls or other non trivial computations. 89 | 90 | ### `p/args` 91 | 92 | `p/args` calls a function with arguments that are evaluated in parallel: 93 | 94 | ```clojure 95 | (time 96 | (p/args + 97 | (do (Thread/sleep 1000) 1) 98 | (do (Thread/sleep 1000) 2) 99 | (do (Thread/sleep 1000) 3))) 100 | ;; "Elapsed time: 1000.613791 msecs" 101 | ;; 6 102 | ``` 103 | 104 | `p/args` improve performances when the argument to a function requires some kind of non trivial evaluation, for example if they have side effects requiring input/output. Restrictions to the use of `p/args` include any dependency between the arguments (which can happen as a side effect of their evaluation). 105 | 106 | ### `p/and` 107 | 108 | `p/and` works similarly to `core/and` but the expressions in the body are evaluated in parallel: 109 | 110 | ```clojure 111 | (let [x 11] 112 | (if (p/and 113 | (odd? x) 114 | (number? x) 115 | (even? (count (str x)))) 116 | "true" 117 | "false")) 118 | ;; true 119 | ``` 120 | 121 | There are a couple of important differences to consider: 122 | 123 | * Differently from `core/and`, `p/and` does not short-circuit. This means that even if the first expression is false, `p/and` is going to evaluate all other expressions. `p/and` could results in worse performances if the first expression is most certainly false and it evaluates faster than the others: 124 | 125 | ```clojure 126 | (time (let [x 11] 127 | (if (and 128 | (do (Thread/sleep 100) (even? x)) 129 | (do (Thread/sleep 1000) (number? x)) 130 | (do (Thread/sleep 1000) (even? (count (str x))))) 131 | "true" 132 | "false"))) 133 | ;; "Elapsed time: 104.481973 msecs" 134 | ;; false 135 | 136 | (time (let [x 11] 137 | (if (p/and 138 | (do (Thread/sleep 100) (even? x)) 139 | (do (Thread/sleep 1000) (number? x)) 140 | (do (Thread/sleep 1000) (even? (count (str x))))) 141 | "true" 142 | "false"))) 143 | ;; "Elapsed time: 1001.878881 msecs" 144 | ;; false 145 | ``` 146 | 147 | * You should not rely on evaluation order of the expressions. The following idiomatic use of `core/and` for instance, might not work with `p/and`: 148 | 149 | ```clojure 150 | (require '[clojure.java.io :as io]) 151 | 152 | (def file 1) 153 | 154 | (p/and 155 | (instance? java.io.File file) 156 | (.exists file) 157 | (.isDirectory file)) 158 | ;; IllegalArgumentException No matching field found: exists for class java.lang.Long 159 | ``` 160 | 161 | ### `p/or` 162 | 163 | `p/or` works similarly to `core/or` but the expressions in the body are evaluated in parallel: 164 | 165 | ```clojure 166 | (let [x 11] 167 | (if (p/or 168 | (odd? x) 169 | (string? x) 170 | (double? x)) 171 | "true" 172 | "false")) 173 | ;; true 174 | ``` 175 | 176 | Like `p/and`, `p/or` it does not short-circuit, potentially taking more time than sequential `core/or`. This happens for example when the first expression is true but `p/or` cannot return until all other expressions are evaluated. Also similarly to `p/and`, `p/or` should not be used if there is an implicit order between expressions (for example `(let [string-length (p/or s (.length s))] string-length)` might result in `NullPointerException` if `s` is `nil`. 177 | 178 | ### `p/do` 179 | 180 | `p/do` works like normal `core/do` to encapsulate evaluation of multiple forms (presumably for side effects). It returns the last evaluated form: 181 | 182 | ```clojure 183 | (def counter (atom 0)) 184 | 185 | (p/do 186 | (swap! counter inc) 187 | (println "counter incremented" @counter) 188 | (map inc (range 1000)) 189 | (println "more stuff to do")) 190 | ;; counter incremented more stuff to do0 191 | ``` 192 | 193 | As demonstrated by the output, there is no guarantee about the order in which the forms are evaluated, so the use of `p/do` should be restricted to side effecting forms without an ordering requirement. 194 | 195 | ### `p/doto` 196 | 197 | Similarly to `core/doto`, `p/doto` threads an expression into the following forms (presumably for side effects) and returns the initial expression at the end. Threading through forms happens in parallel so: 198 | 199 | * Side effects can happen in any order (the forms following the threaded expression). 200 | * If side effects are against a shared collection, the collection has to be thread safe (or one of the Clojure ref types). 201 | 202 | The following example uses a `ConcurrentLinkedQueue` to add items concurrently: 203 | 204 | ```clojure 205 | (import 'java.util.concurrent.ConcurrentLinkedQueue) 206 | 207 | (p/doto 208 | (ConcurrentLinkedQueue.) 209 | (.add 1) 210 | (.add 2)) 211 | 212 | ;; #object[java.util.concurrent.ConcurrentLinkedQueue 0x5fbc5177 "[1, 2]"] 213 | ``` 214 | 215 | Like other parallel macros, `p/doto` it's effective when the performed operations are not trivial. The following expression, for example, executes in 1/4 of the time: 216 | 217 | ```clojure 218 | (require '[clojure.xml :as xml]) 219 | (import 'java.util.concurrent.ConcurrentHashMap) 220 | 221 | (defn heavy-stuff [n] (Thread/sleep 1000) n) 222 | 223 | (time 224 | (doto (ConcurrentHashMap.) 225 | (.put :a (heavy-stuff 1)) 226 | (.put :b (heavy-stuff 2)) 227 | (.put :c (heavy-stuff 3)) 228 | (.put :d (heavy-stuff 4)))) 229 | ;; "Elapsed time: 4009.656834 msecs" 230 | ;; {:d 4, :b 2, :c 3, :a 1} 231 | 232 | (time 233 | (p/doto (ConcurrentHashMap.) 234 | (.put :a (heavy-stuff 1)) 235 | (.put :b (heavy-stuff 2)) 236 | (.put :c (heavy-stuff 3)) 237 | (.put :d (heavy-stuff 4)))) 238 | ;; "Elapsed time: 1006.563343 msecs" 239 | ;; {:d 4, :b 2, :c 3, :a 1} 240 | ``` 241 | 242 | ### `p/slurp` 243 | 244 | `p/slurp` loads the content of a file in parallel. Compared to `core/slurp`, it only supports local files (no URLs or other input streams): 245 | 246 | ```clojure 247 | (import 'java.io.File) 248 | (take 10 (.split (p/slurp (File. "test/words")) "\n")) 249 | ;; ("A" "a" "aa" "aal" "aalii" "aam" "Aani" "aardvark" "aardwolf" "Aaron") 250 | ``` 251 | 252 | `p/slurp` offers a way to interpret the loaded byte array differently from a string, for example to load an entry from a zipped file: 253 | 254 | ```clojure 255 | (import '[java.io File ByteArrayInputStream] 256 | '[java.util.zip ZipFile ZipInputStream]) 257 | 258 | (defn filenames-in-zip [bytes] 259 | (let [z (ZipInputStream. (ByteArrayInputStream. bytes))] 260 | (.getName (.getNextEntry z)))) 261 | 262 | (p/slurp (File. "target/parallel-0.7.jar") filenames-in-zip) 263 | ;; "META-INF/MANIFEST.MF" 264 | ``` 265 | 266 | When `*mutable*` is set to `true` the transformation step is skipped altogether and the raw byte array is returned: 267 | 268 | ```clojure 269 | (import 'java.io.File) 270 | (binding [p/*mutable* true] (p/slurp (File. "test/words"))) 271 | ;; #object["[B" 0x705709a4 "[B@705709a4"] 272 | ``` 273 | 274 | `p/slurp` performs better than `core/slurp` on large files (> 500K). Here's for example a comparison benchmark to load a 2.4MB file: 275 | 276 | ```clojure 277 | (import 'java.io.File) 278 | (let [fname "test/words" file (File. fname)] (bench (slurp file))) ; 8.84ms 279 | (let [fname "test/words" file (File. fname)] (bench (p/slurp file))) ; 2.87ms 280 | ``` 281 | 282 | ### `p/count` 283 | 284 | `p/count` can speed up counting on collections when non-trivial transformations are involved. It takes a composition of transducers and the collection to count. It applies the transducers to coll and produces a count of the resulting elements (in this case 1.2M): 285 | 286 | ```clojure 287 | (def xform 288 | (comp 289 | (filter odd?) 290 | (map inc) 291 | (map #(mod % 50)) 292 | (mapcat range) 293 | (map str))) 294 | 295 | (p/count xform (range 100000)) 296 | ;; 1200000 297 | ``` 298 | 299 | `p/count` is eager, transforming "coll" into a vector if it's not already a foldable collection (vectors, maps or reducers/Cat objects). Use `p/count` only if the transformation are altering the number of elements in the input collection, otherwise `core/count` would likely outperform `p/count` in most situation. `p/count` supports stateful transducers. In this example we are dropping 6250 elements from each of the 32 chunks (32 is the default number of chunks `p/count` operates on, so 32x6250=200k elements will be removed): 300 | 301 | ```clojure 302 | (def xform 303 | (comp 304 | (filter odd?) 305 | (map inc) 306 | (map #(mod % 50)) 307 | (mapcat range) 308 | (map str) 309 | (drop 6250))) 310 | 311 | (p/count xform (range 100000)) 312 | ;; 1000000 313 | ``` 314 | 315 | See [bcount.clj](https://github.com/reborg/parallel/blob/master/benchmarks/bcount.clj) for additional benchmarks. 316 | 317 | ### `p/frequencies` 318 | 319 | Like `core/frequencies`, but executes in parallel. It takes an optional composition of transducers (stateless or stateful) to apply to coll before the frequency is calculated. It does not support nil values. The following is the typical word frequencies example: 320 | 321 | ```clojure 322 | (require '[clojure.string :as s]) 323 | (def war-and-peace "http://www.gutenberg.org/files/2600/2600-0.txt") 324 | (def book (slurp war-and-peace)) 325 | (let [freqs (p/frequencies (re-seq #"\S+" book) (map s/lower-case))] 326 | (take 5 (sort-by last > freqs))) 327 | ;; (["the" 34258] ["and" 21396] ["to" 16500] ["of" 14904] ["a" 10388]) 328 | 329 | (quick-bench (p/frequencies (re-seq #"\S+" book) (map s/lower-case))) ;; 111ms 330 | (quick-bench (frequencies (map s/lower-case (re-seq #"\S+" book)))) ;; 349ms 331 | ``` 332 | 333 | ### `p/group-by` 334 | 335 | `p/group-by` is similar to `clojure.core/group-by`, but the grouping happens in parallel. Here's an example about searching most frequent anagrams in a large text: 336 | 337 | ```clojure 338 | (require '[clojure.string :as s]) 339 | 340 | (def war-and-peace 341 | (s/split (slurp "http://gutenberg.org/files/2600/2600-0.txt") #"\W+")) 342 | 343 | (def anagrams 344 | (p/group-by sort war-and-peace (map s/lower-case))) 345 | 346 | (->> anagrams 347 | (map (comp distinct second)) 348 | (sort-by count >) 349 | first) 350 | 351 | ;; ("stop" "post" "spot" "pots" "tops") 352 | ``` 353 | 354 | `p/group-by` takes an optional list of transducers to apply to the items in coll before generating the groups. It has been used in the example to lower-case each word. Note that differently from `clojure.core/group-by`: 355 | 356 | * The order of the items in each value vector can change between runs (this can be a problem or not, depending on your use case). 357 | * It does not support nil values in the input collection. 358 | 359 | `p/group-by` is generally 2x-5x faster than `clojure.core/group-by`: 360 | 361 | ```clojure 362 | (require '[criterium.core :refer [quick-bench]]) 363 | 364 | ;; with transformation (which boosts p/group-by even further) 365 | (quick-bench (group-by sort (map s/lower-case war-and-peace))) ;; 957ms 366 | (quick-bench (p/group-by sort war-and-peace (map s/lower-case))) ;; 259ms 367 | 368 | ;; fair comparison without transformations 369 | (quick-bench (group-by sort war-and-peace)) ;; 936ms 370 | (quick-bench (p/group-by sort war-and-peace)) ;; 239ms 371 | ``` 372 | 373 | A further boost can be achieved by avoiding conversion back to immutable data structures: 374 | 375 | ```clojure 376 | (quick-bench 377 | (binding [p/*mutable* true] 378 | (p/group-by sort war-and-peace (map s/lower-case)))) ;; 168ms 379 | ``` 380 | 381 | When invoked with `p/*mutable*`, `p/group-by` returns a Java ConcurrentHashMap with ConcurrentLinkedQueue as values. They are both easy to deal with from Clojure. 382 | 383 | ```clojure 384 | (def anagrams 385 | (binding [p/*mutable* true] 386 | (p/group-by sort war-and-peace (s/lower-case)))) 387 | 388 | (distinct (into [] (.get anagrams (sort "stop")))) 389 | ;; ("post" "spot" "stop" "tops" "pots") 390 | ``` 391 | 392 | ### `p/update-vals` 393 | 394 | `p/'update-vals` updates the values of a map in parallel. With reference to the [`p/group-by`](#pgroup-by) example of the most frequent anagrams, we could apply the step to calculate the distinct words for each key on the map in parallel ("anagrams" is the map resulting from applying `p/group-by` to a large text): 395 | 396 | ```clojure 397 | 398 | (first anagrams) 399 | ;; [(\a \d \e \e \h \t) ["heated" "heated" "heated" "heated" "heated" "heated" "heated" "heated"]] 400 | 401 | (first (p/update-vals anagrams distinct)) 402 | ;; [(\a \d \e \e \h \t) ("heated")] 403 | ``` 404 | 405 | Like other functions in the library, `p/update-vals` speed can be improved removing the conversation back into a mutable data structure: 406 | 407 | ```clojure 408 | (time (dorun (p/update-vals anagrams distinct))) 409 | ;; "Elapsed time: 18.462031 msecs" 410 | (time (dorun (binding [p/*mutable* true] (p/update-vals anagrams distinct)))) 411 | ;; "Elapsed time: 9.908815 msecs" 412 | ``` 413 | 414 | In the context of the previous computation of the most frequent anagrams, we could operate using a combination of mutable `p/sort` and `p/update-vals` and compare it with the previous solution: 415 | 416 | ```clojure 417 | (import '[java.util Map$Entry]) 418 | 419 | (defn cmp [^Map$Entry e1 ^Map$Entry e2] 420 | (> (count (.getValue e1)) 421 | (count (.getValue e2)))) 422 | 423 | (time (binding [p/*mutable* true] 424 | (let [a (p/sort cmp (p/update-vals anagrams distinct))] 425 | (.getValue ^Map$Entry (aget ^objects a 0))))) 426 | ;; "Elapsed time: 128.422734 msecs" 427 | ;; ("post" "spot" "stop" "tops" "pots") 428 | 429 | (time (->> anagrams 430 | (map (comp distinct second)) 431 | (sort-by count >) 432 | first)) 433 | ;; "Elapsed time: 251.277616 msecs" 434 | ;; ("post" "spot" "stop" "tops" "pots") 435 | ``` 436 | 437 | The mutable version is roughly 50% faster, but it's verbose and requires type annotations. 438 | 439 | ### `p/sort` 440 | 441 | `p/sort` is a parallel merge-sort implementation that works by splitting the input into smaller chunks which are ordered sequentially below a certain threshold (8192 is the default). `p/sort` offers similar features to `clojure.core/sort` and it's not lazy. The following uses the default comparator `<` to sort a collection of 2M numbers (and by comparison doing the same with `core/sort`): 442 | 443 | ```clojure 444 | (let [coll (shuffle (range 2e6))] (time (dorun (p/sort coll)))) 445 | ;; "Elapsed time: 1335.769356 msecs" 446 | 447 | (let [coll (shuffle (range 2e6))] (time (dorun (sort coll)))) 448 | ;; "Elapsed time: 2098.151666 msecs" 449 | ``` 450 | 451 | Or reverse sorting strings: 452 | 453 | ```clojure 454 | (let [coll (shuffle (map str (range 2e6)))] (time (dorun (p/sort #(compare %2 %1) coll)))) 455 | ;; "Elapsed time: 1954.57439 msecs" 456 | 457 | (let [coll (shuffle (map str (range 2e6)))] (time (dorun (sort #(compare %2 %1) coll)))) 458 | ;; "Elapsed time: 2540.829781 msecs" 459 | ``` 460 | 461 | `p/sort` is implemented on top of mutable native arrays, converting both input/output into immutable vectors as a default. There are a few ways to speed-up sorting with `p/sort`: 462 | 463 | * Vector inputs are preferable than sequences. 464 | * Shave additional milliseconds by using the raw array output, by enclosing `p/sort` in a binding like `(binding [p/*mutable* true] (p/sort coll))`. `p/sort` returns an object array in this case, instead of a vector. 465 | * If you happen to be working natively with arrays, be sure to feed `p/sort` with the native array to avoid conversion. 466 | 467 | In order of increasing speed: 468 | 469 | ```clojure 470 | (require '[criterium.core :refer [quick-bench]]) 471 | 472 | (let [c (into [] (shuffle (range 2e6))) 473 | a (to-array c)] 474 | (quick-bench (p/sort c)) 475 | (quick-bench (binding [p/*mutable* true] (p/sort c))) 476 | (quick-bench (binding [p/*mutable* true] (p/sort a)))) 477 | 478 | ;; 1185ms 479 | ;; 1052ms 480 | ;; 46ms 481 | ``` 482 | 483 | As you can see, the conversion into array is responsible for most of the sorting time. If you are lucky to work with arrays, sorting is one order of magnitude faster and more memory efficient. 484 | 485 | ### `p/external-sort` 486 | 487 | `merge-sort` is a well known example of parallelizable sorting algorithm. There was a time when machines were forced to use tapes to process large amount of data, loading smaller chunks into memory one at a time. The `merge-sort` sorting algorithm for example, is suitable for this kind of processing. Today we have bigger memories, but also big-data. File-based merge-sort implementations could still be useful to work with external storage such as S3. 488 | 489 | `p/external-sort` can be used to fetch large amount of data from slow storage, order them by some attribute and consume only the part that is actually needed (e.g. "find the top most" kind of problems). A working but not very useful `p/external-sort` example is the following: 490 | 491 | ```clojure 492 | (let [fetchf (fn [ids] ids) 493 | v (into [] (reverse (range 10000)))] 494 | (take 5 (p/external-sort 1000 compare fetchf v))) 495 | ;; [0 1 2 3 4] 496 | ``` 497 | 498 | `p/external-sort` accepts a vector "v" of IDs as input. The unique identifiers are used to fetch data objects from remote storage. "fetchf" is the way to tell `p/external-sort` how to retrieve the object given a group of ids (in this example, fetching the id has been simulated by returning the ids themselves). Input IDs are split into chunks not bigger than "1000" (with 512 the default). 499 | 500 | Once all data is retrieved for a chunk, data are sorted using the given comparator ("core/compare" by default) and the result is stored in a temporary file on disk. The above example creates 16 files, as the number of files needs to be a power of two and `(/ 10000 16) = 625` is the first split that generates chunk less than 1000 in size. 501 | 502 | Once all chunks are retrieved and sorted on disk, the result is available as a lazy sequence, which is the type returned by `p/external-sort`. If the lazy sequence is not fully consumed, the related files are never loaded in memory. In the example above, some files are never loaded in memory. A call to `last` (instead of `take 5`) would load all files. If the head of the sequence is not retained, the content of the files is garbage collected from memory accordingly. 503 | 504 | The next example verifies these assumptions with a large dataset of around 20M played songs. Each song contains userid, track title, time it was played and other information. We want to print the most recently played songs but we can't load the 2.5 GB file in memory to sort it without blowing the heap (on a normal laptop). 505 | 506 | You can download the dataset from this page: http://www.dtic.upf.edu/~ocelma/MusicRecommendationDataset/lastfm-1K.html. We are then going to split the file on disk into smaller but still unordered files with: 507 | 508 | ```bash 509 | split -a 4 -l 18702 userid-timestamp-artid-artname-traid-traname.tsv 510 | num=0; for i in *; do mv "$i" "$num"; ((num++)); done 511 | ``` 512 | 513 | The big tsv file contains exactly 19150868 played songs. We pick a split size that is the closest to `(Math/pow 2 10)`, which creates 1024 files of a reasonable size (18702 lines) plus a last one containing the remaining 20. We also rename the files using an incremental, so we can quickly know which file contains what. You should now have a folder with 1025 files named 0 to 1024 (no extension). Here's how to use `p/external-sort` to retrieve the top 3 most recently played tracks: 514 | 515 | ```clojure 516 | (require '[clojure.string :as s]) 517 | 518 | (let [lines 19150868 519 | chunk-size 18702 520 | chunk-folder "../resources/lastfm-dataset-1K/splits/" 521 | fetchf (fn [ids] 522 | (->> (quot (last ids) chunk-size) 523 | (str chunk-folder) 524 | slurp 525 | s/split-lines 526 | (mapv #(s/split % #"\t"))))] 527 | (pprint (time (take 3 528 | (p/external-sort 529 | chunk-size 530 | #(compare (nth %2 1) (nth %1 1)) 531 | fetchf 532 | (range lines)))))) 533 | ``` 534 | 535 | The degree of parallelism with which "fetchf" is invoked is equal to the number of cores (physical or virtual) available on the running system. If the collection of IDs is a not a vector, it is converted into one. `fetchf` is provided a group of ids and we can calculate which file contains those IDs because we know their name and size. The custom comparator uses the timestamp found at index 1 after each line is split by tabs (the format of the file). After about 1 minute (my machine) we get: 536 | 537 | ``` 538 | (["user_000762" 539 | "2013-09-29T18:32:04Z" 540 | "d8354b38-e942-4c89-ba93-29323432abc3" 541 | "30 Seconds To Mars" 542 | "b5b40605-5a81-46b4-a51e-2b1ec7964c1a" 543 | "A Beautiful Lie"] 544 | ["user_000762" 545 | "2009-05-02T02:01:47Z" 546 | "91f7a868-d82e-4cfb-9cd9-a2ffd7faac25" 547 | "The Cab" 548 | "7ede8578-bf9c-4e68-a060-56924202cdf0" 549 | "This City Is Contagious"] 550 | ["user_000762" 551 | "2009-05-02T01:58:09Z" 552 | "91f7a868-d82e-4cfb-9cd9-a2ffd7faac25" 553 | "The Cab" 554 | "14298942-7452-444f-9fb7-3199464957d6" 555 | "Can You Keep A Secret?"]) 556 | ``` 557 | 558 | By taking more results instead of just the top 3, more files will need to load into memory. If you don't hold on the head of the sequence, you can any other part of the ordered sequence including the last element without incurring into an out of memory (about 2 minutes later in my machine). 559 | 560 | ### `p/fold`, `p/xrf` and `p/folder` 561 | 562 | `p/fold` is modeled similar to `clojure.core.reducers/fold` function, the entry point into the Clojure reduce-combine (Java fork-join) parallel computation framework. It can be used with transducers like you would with normal `r/fold`: 563 | 564 | ```clojure 565 | (def v (vec (range 1000))) 566 | (p/fold + ((comp (map inc) (filter odd?)) +) v) 567 | ;; 250000 568 | ``` 569 | 570 | And exactly like with normal `r/fold` this would give you inconsistent results when a stateful transducer like `(drop 1)` is introduced: 571 | 572 | ```clojure 573 | (distinct (for [i (range 1000)] 574 | (p/fold + ((comp (map inc) (drop 1) (filter odd?)) +) v))) 575 | ;; (249999 249498 249499) 576 | ``` 577 | 578 | This is what `p/xrf` is designed for. `p/xrf` is a wrapping utility that hides the way the transducers are combined with the reducing function. More importantly, it takes care of the potential presence of stateful transducers in the chain (like `drop`, `take`, `partition` and so on). 579 | 580 | ```clojure 581 | (distinct (for [i (range 1000)] 582 | (p/fold (p/xrf + (map inc) (drop 1) (filter odd?)) v))) 583 | ;; (242240) 584 | ``` 585 | 586 | `p/xrf` makes sure that stateful transducer state is allocated at each chunk instead of each thread (the "chunk" is the portion of the initial collection that is not subject to any further splitting). This is a drastic departure from the semantic of the same transducers when used sequentially on the whole input. The first practical implication is that operations like `take`, `drop`, `partition` etc. are isolated in their own chunk and don't see each other state (for example, `(drop 1)` would remove the first element from each chunk, not just the first element from the whole input). The second consequence is that the result is now dependent (consistently) on the number of chunks. 587 | 588 | To enable easier design of parallel algorithms, you can pass `p/fold` a number "n" of desired chunks for the parallel computation (n has to be a power of 2 and it defaults to 32 by default). **Note the difference: with `(r/fold)` the computation is chunk-size driven by "n", the desired chunk size (default to 512). With `(p/fold)` the computation is chunk-number driven by "n" the number of desired chunks to compute in parallel**: 589 | 590 | ```clojure 591 | (p/fold 4 + (p/xrf + (map inc) (drop 1) (filter odd?)) v) 592 | ;; 248496 593 | ``` 594 | 595 | Assuming there are 4 cores available, the example above executes on 4 parallel threads. Let's dissect it chunk by chunk: 596 | 597 | * We are asking `(p/fold)` to create 4 chunks of the initial vector "v" of 1000 elements. Each chunk ends up having 250 items. 598 | * The content of each chunk can be expressed by the following ranges (the actual type is a subvec not a range but the content it the same): `(range 0 250)`, `(range 250 500)`, `(range 500 750)`, `(range 750 1000)` 599 | * Transducers transform each chunk (composition reads backward like normal transducers): `(filter odd? (drop 1 (map inc (range 0 250))))`, `(filter odd? (drop 1 (map inc (range 250 500))))`, `(filter odd? (drop 1 (map inc (range 500 750))))`, `(filter odd? (drop 1 (map inc (range 750 1000))))` 600 | * The reducing function "+" is applied on the items on each chunk: 15624, 46624, 77624, 108624 601 | * The combining function is again "+", resulting in the final sums: (+ (+ 15624 46624) (+ 77624 108624)) which is 248496. 602 | 603 | It can be tricky for arbitrary collection sizes to see what is the best strategy in terms of chunk size or number. The utility function `p/show-chunks` can be used to predict the splitting for a parallel calculation. `p/fold` parameters can be adjusted accordingly. Here's what happens if you have a vector of 9629 items and you'd like 8 chunks to be created. Some of them will be bigger, other will be smaller: 604 | 605 | ```clojure 606 | (p/show-chunks (vec (range 9629)) 8) 607 | ;; (1203 1204 1203 1204 1203 1204 1204 1204) 608 | ``` 609 | 610 | `p/fold` also allows transducers on hash-maps, not just vectors. A hash-map can be folded with transducers (in parallel) like this: 611 | 612 | ```clojure 613 | (require '[clojure.core.reducers :refer [monoid]]) 614 | (def input (zipmap (range 10000) (range 10000))) 615 | 616 | (def output 617 | (p/fold 618 | (monoid merge (constantly {})) 619 | (p/xrf conj 620 | (filter (fn [[k v]] (even? k))) 621 | (map (fn [[k v]] [k (inc v)])) 622 | (map (fn [[k v]] [(str k) v]))) 623 | input)) 624 | (output "664") 625 | ;; 665 626 | ``` 627 | 628 | The single argument for transducers is a vector pair containing a key and a value. In this case each transducer returns another pair to build another map (but that's not required). 629 | 630 | Caveats and known problems: 631 | 632 | * Stateful transducers like `dedupe` and `distinct`, that operates correctly at the chunk level, can bring back duplicates once combined back into the final result. Keep that in mind if absolute uniqueness is a requirement, you might need an additional step outside `p/fold` to ensure final elimination of duplicates. I'm thinking what else can be done to avoid the problem in the meanwhile. 633 | 634 | ### `p/transduce` 635 | 636 | `p/transduce` is a parallel version of the same function present in core: 637 | 638 | ```clojure 639 | (p/transduce (comp (filter odd?) (map inc)) + (vec (range 1000))) 640 | ;; 250500 641 | ``` 642 | 643 | Similarly to `p/fold`, you can use stateful transducers with `p/transduce`. When you do, it's better to design your computation around the number of chunks that are processed in parallel. `p/transduce` accepts the number of desired chunks and an additional "combinef" to know how to merge chunks back together. 644 | 645 | The example below takes 1000 items and operates in 4 parallel chunks of 250 each, dropping 240 items each chunk, and partitioning the remaining 10 into groups of 5. The results from each parallel thread is combined back with `into`: 646 | 647 | ```clojure 648 | (p/transduce 4 (comp (drop 240) (partition-all 5)) conj into (vec (range 1000))) 649 | ;; [[240 241 242 243 244] 650 | ;; [245 246 247 248 249] 651 | ;; [490 491 492 493 494] 652 | ;; [495 496 497 498 499] 653 | ;; [740 741 742 743 744] 654 | ;; [745 746 747 748 749] 655 | ;; [990 991 992 993 994] 656 | ;; [995 996 997 998 999]] 657 | ``` 658 | 659 | The equivalent operation attempted on `reducers/fold` would give inconsistent results (the result is different each run or throws exception because the state in statful transducers is shared across concurrent threads): 660 | 661 | ```clojure 662 | (require '[clojure.core.reducers :as r]) 663 | 664 | (r/fold 665 | 250 666 | (r/monoid into conj) 667 | ((comp (drop 240) (partition-all 5)) conj) 668 | (vec (range 1000))) 669 | ;; Sometimes ArrayOutOfBound, sometimes a bunch of random partitions. 670 | ``` 671 | 672 | ### `p/process-folder` 673 | 674 | `p/process-folder` applies a composition of transducers to all files in a folder in parallel. The first transducer in the pipeline should expect a line of text. You can use something like `split -l 10000 -a 4 segment-` to split a large files into many smaller ones of 10k lines each. After you move them in a folder (please be sure it contains only the files that need processing) you're good to go, for example: 675 | 676 | ```clojure 677 | (p/process-folder 678 | "folder-name-as-string" 679 | (comp (map s/trim) 680 | (remove s/blank?) 681 | (map #(s/split % #"\,")) 682 | (map peek))) 683 | ``` 684 | 685 | The snippet above takes the last value for each line of each CSV file in a folder. `p/process-folder` is eager: if the files are many or lines are big, there is nothing `p/process-folder` can do to avoid out of memory. Try to compose your transducers so they process and aggregate data as needed returning a result that can fit into memory. 686 | 687 | ### `p/min` and `p/max` 688 | 689 | `p/min` and `p/max` find the minimum or maximum in a vector of numbers in parallel (the input collection is converted into a vector if it's not already): 690 | 691 | ```clojure 692 | (let [c (shuffle (conj (range 100000) -9))] 693 | (p/min c)) 694 | ;; -9 695 | ``` 696 | 697 | They also allow any combination of transducers (stateless or stateful) to be passed in as arguments: 698 | 699 | ```clojure 700 | (let [c (into [] (range 100000))] 701 | (p/min c 702 | (map dec) 703 | (drop 20) 704 | (partition-all 30) 705 | (map last) 706 | (filter odd?))) ;; 3173 707 | ``` 708 | 709 | `p/min` and `p/max` outperform sequential `core/min` and `core/max` starting at 10k items and up (depending on hardware configuration). For a 4 cores machine, the speed increase is roughly 50%: 710 | 711 | ```clojure 712 | (require '[criterium.core :refer [bench]]) 713 | (require '[parallel.core :as p]) 714 | 715 | (def 1M (shuffle (range 1000000))) 716 | 717 | (bench (reduce min 1M)) ;; 9.963971 ms 718 | (bench (p/min 1M)) ;; 5.474384 ms 719 | 720 | (bench (transduce (comp (map inc) (filter odd?)) min ##Inf 1M)) ;; 22.701385 ms 721 | (bench (p/min 1M (map inc) (filter odd?))) ;; 12.085497 ms 722 | ``` 723 | 724 | ### `p/distinct` 725 | 726 | `p/distinct` returns a sequence of the distinct items in "coll": 727 | 728 | ```clojure 729 | (let [c (apply concat (repeat 20 (range 100)))] 730 | (take 10 (p/distinct c))) 731 | ;; (0 1 2 3 4 5 6 7 8 9) 732 | ``` 733 | 734 | The sequence is not-lazy and can return in any order. We can see this by supplying a transducer list (without using `comp`) to change from integers to keywords: 735 | 736 | ```clojure 737 | (let [c (apply concat (repeat 20 (range 100)))] 738 | (take 10 (p/distinct c (map str) (map keyword)))) 739 | ;; (:59 :16 :39 :47 :28 :58 :36 :15 :25 :18) 740 | ``` 741 | 742 | `p/distinct` does not support `nil`, which needs to be removed (you can pass `(remove nil?)` as a transducer to the argument list). Performance of `p/distinct` are quite good on both small and large collections: 743 | 744 | ```clojure 745 | (require '[criterium.core :refer [quick-bench]]) 746 | 747 | (let [small (apply concat (repeat 20 (range 100))) 748 | large (apply concat (repeat 200 (range 10000)))] 749 | (quick-bench (p/distinct small)) 750 | (quick-bench (p/distinct large))) 751 | ;; Execution time mean : 160.949448 µs 752 | ;; Execution time mean : 77.772233 ms 753 | 754 | (let [small (apply concat (repeat 20 (range 100))) 755 | large (apply concat (repeat 200 (range 10000)))] 756 | (quick-bench (doall (distinct small))) 757 | (quick-bench (doall (distinct large)))) 758 | ;; Execution time mean : 565.503835 µs 759 | ;; Execution time mean : 862.702828 ms 760 | ``` 761 | 762 | You can additionally increase `p/distinct` speed by using a vector input and forcing mutable output (in this case `p/distinct` returns an `java.util.Set` interface): 763 | 764 | 765 | ```clojure 766 | (let [large (into [] (apply concat (repeat 200 (range 10000))))] 767 | (quick-bench (binding [p/*mutable* true] (p/distinct large)))) 768 | ;; Execution time mean : 37.703288 ms 769 | ``` 770 | 771 | ### `p/pmap` 772 | 773 | `p/pmap` has a similar interface as `core/pmap`: 774 | 775 | ```clojure 776 | (p/pmap inc (range 10)) 777 | ;; [1 3 2 6 4 5 7 8 10 9] 778 | ``` 779 | 780 | But as you can see the output is a vector of results in any order. Additionally `p/pmap` differs from `core/pmap` in the following: 781 | 782 | * It executes on n parallel threads (default 100) independently from the input collection chunk size or the number of available cores. 783 | * It is not lazy. 784 | * It does not support multiple collections as input. 785 | 786 | `p/pmap` is useful when you want to control the amount of parallelism executing the same task over a collection of inputs. If you are making requests to a highly scalable service, for example, you could take advantage of the higher level of parallelism of `p/pmap` compared to `core/pmap` throwing up to 100 (or more) threads at the problem (instead of `core/pmap` which is bound to the chunk size 32, plus the number of cores, plus 2). To change the number of threads, you can use the optional "n" parameter, for example setting it to 200 threads. In the following example, up to 200 threads are working on "heavyf": 787 | 788 | ```clojure 789 | (defn heavyf [x] (Thread/sleep 500) (inc x)) 790 | 791 | (time (count (p/pmap heavyf (range 1000) 200))) 792 | ;; "Elapsed time: 2552.601996 msecs" 793 | 794 | (time (count (pmap heavyf (range 1000)))) 795 | ;; "Elapsed time: 16115.643296 msecs" 796 | ``` 797 | 798 | ### `p/amap` 799 | 800 | `p/amap` is a parallel version of `core/amap`. It takes an array of objects and a transformation "f" and it mutates the input to produce the transformed version of the output: 801 | 802 | ```clojure 803 | 804 | (def c (range 2e6)) 805 | (defn f [x] (if (zero? (rem x 2)) (* 0.3 x) (Math/sqrt x))) 806 | 807 | (let [a (to-array c)] (time (p/amap f a))) 808 | ;; "Elapsed time: 34.955138 msecs" 809 | 810 | (let [^objects a (to-array c)] (time (amap a idx ret (f (aget a idx))))) 811 | ;; "Elapsed time: 53.058256 msecs" 812 | ``` 813 | 814 | `p/amap` uses the fork-join framework to update the array in parallel and it performs better than sequential for non-trivial transformations, otherwise the thread orchestration dominates the computational cost. You can optionally pass in a "threshold" which indicates how small the chunk of computation should be before going sequential, otherwise the number is chosen to be `(/ alength (* 2 ncores))`. 815 | 816 | ### `p/armap` 817 | 818 | `p/armap` is similar to `p/amap` but it also inverts the array. It takes an array of objects and a transformation "f" and it mutates the input to produce the transformed-reverse version of the output. 819 | 820 | ```clojure 821 | (let [a (object-array [0 9 8 2 0 9 2 2 90 1 2])] 822 | (p/armap (comp keyword str) a) 823 | (into [] a)) 824 | ;; [:2 :1 :90 :2 :2 :9 :0 :2 :8] 825 | ``` 826 | 827 | `p/armap` performs better than sequential for non-trivial transformations, otherwise the thread orchestration dominates the computational cost. Here's for example a reverse-complement of some random DNA strand: 828 | 829 | ```clojure 830 | (require '[criterium.core :refer [quick-bench]]) 831 | 832 | (defn random-dna [n] (repeatedly n #(rand-nth [\a \c \g \t]))) 833 | (def compl {\a \t \t \a \c \g \g \c}) 834 | 835 | (defn armap 836 | "A fair sequential comparison" 837 | [f ^objects a] 838 | (loop [i 0] 839 | (when (< i (quot (alength a) 2)) 840 | (let [tmp (f (aget a i)) 841 | j (- (alength a) i 1)] 842 | (aset a i (f (aget a j))) 843 | (aset a j tmp)) 844 | (recur (unchecked-inc i))))) 845 | 846 | (let [a (to-array (random-dna 1e6))] 847 | (quick-bench (p/armap compl a))) 848 | ;; "Elapsed time: 39.195143 msecs" 849 | 850 | (let [a (to-array (random-dna 1e6))] 851 | (quick-bench (armap compl a))) 852 | ;; "Elapsed time: 70.286622 msecs" 853 | ``` 854 | 855 | You can optionally pass in a "threshold" which indicates how small the chunk of computation should be before going sequential, otherwise the number is chosen to be `(/ alength (* 2 ncores))`. 856 | 857 | ### `xf/interleave` 858 | 859 | Like `clojure.core/interleave` but in transducer version. When `xf/interleave` is instantiated, it takes a "filler" collection. The items from the collection are used to interleave the others items coming from the main transducing sequence: 860 | 861 | ```clojure 862 | (sequence 863 | (comp 864 | (map inc) 865 | (xf/interleave [100 101 102 103 104 105]) 866 | (filter odd?) 867 | (map str)) 868 | [3 6 9 12 15 18 21 24 37 30]) 869 | ;; ("7" "101" "13" "103" "19" "105") 870 | ``` 871 | 872 | The main transducing process runs until there are items in the filler sequence (those starting at 100 in the example). You can provide an infinite sequence to be sure all results are interleaved: 873 | 874 | ```clojure 875 | (sequence 876 | (comp 877 | (map inc) 878 | (xf/interleave (range)) 879 | (filter odd?) 880 | (map str)) 881 | [3 6 9 12 15 18 21 24 37 30]) 882 | ;; ("7" "1" "13" "3" "19" "5" "25" "7" "31" "9") 883 | ``` 884 | 885 | ### `xf/pmap` 886 | 887 | `xf/pmap` is a transducer version of `core/pmap`. When added to a transducer chain, it works like the `colojure.core/map` transducer applying the function "f" to all the items passing through the transducer. Different from `clojure.core/map`, `xf/pmap` processes a fixed number items in parallel (competing for the actual number of physical cores). So if you have 12 cores and you're transducing a Clojure collection (big majority of them have a chunk size of 32), then you can achieve a max of 12+32+2 threads working in parallel: 888 | 889 | ```clojure 890 | (defn heavyf [x] (Thread/sleep 1000) (inc x)) 891 | 892 | (time (transduce (comp (map heavyf) (filter odd?)) + (range 10))) 893 | ;; 10025ms 894 | (time (transduce (comp (xf/pmap heavyf) (filter odd?)) + (range 10))) 895 | ;; 1006ms 896 | ``` 897 | 898 | `xf/pmap` has similar limitations to `clojure.core/pmap`. It works great when "f" is non trivial and the average elapsed of "f" is uniform across the input. If one `(f item)` takes much more than the others, the current N-chunk is kept busy with parallelism=1 before moving to the next chunk, wasting resources. Use `xf/pmap` if your transducing transformation is reasonably big and complex. Apart from `transduce` you can use it with `sequence`: 899 | 900 | ```clojure 901 | (time (doall (pmap heavyf (range 10)))) 902 | ;; "Elapsed time: 1005.330409 msecs" 903 | 904 | (time (doall (sequence (xf/pmap heavyf) (range 10)))) 905 | ;; "Elapsed time: 1002.868326 msecs" 906 | ``` 907 | 908 | ### `xf/identity` 909 | 910 | `xf/identity` works similarly to `(map identity)` or just `identity` as identity transducer: 911 | 912 | 913 | ```clojure 914 | (sequence (map identity) (range 10)) 915 | (sequence clojure.core/identity (range 10)) 916 | (sequence xf/identity (range 10)) 917 | ;; All printing (0 1 2 3 4 5 6 7 8 9) 918 | ``` 919 | 920 | The identity transducer works as a placeholder for those cases in which a transformation is not requested, for example: 921 | 922 | ```clojure 923 | (def config false) 924 | 925 | (defn build-massive-xform [] 926 | (when config 927 | (comp (map inc) (filter odd?)))) 928 | 929 | (sequence (or (build-massive-xform) identity) (range 5)) 930 | ;; (0 1 2 3 4) 931 | ``` 932 | 933 | `core/identity` works fine as a transducer in most cases, except when it comes to multiple inputs, for which it requires a definition of what "identity" means. We could for example agree that if you want to use `core/identity` with multiple inputs you need to use it in pair with another transducer, for example `(map list)`: 934 | 935 | ```clojure 936 | (sequence (or (build-massive-xform) identity) (range 5) (range 5)) 937 | ;; Throws exception 938 | 939 | (sequence (or (build-massive-xform) (comp (map list) identity)) (range 5) (range 5)) 940 | ;; ((0 0) (1 1) (2 2) (3 3) (4 4)) 941 | ``` 942 | 943 | `xf/identity` is a simple transducer that takes care of of this case, assuming "identity" means "wrap around" in case of multiple inputs: 944 | 945 | ```clojure 946 | (sequence (or (build-massive-xform) xf/identity) (range 5)) 947 | ;; (0 1 2 3 4) 948 | 949 | (sequence (or (build-massive-xform) xf/identity) (range 5) (range 5)) 950 | ;; ((0 0) (1 1) (2 2) (3 3) (4 4)) 951 | 952 | (sequence (or (build-massive-xform) xf/identity) (range 5) (range 5) (range 5)) 953 | ;; ((0 0 0) (1 1 1) (2 2 2) (3 3 3) (4 4 4)) 954 | ``` 955 | 956 | `xf/identity` custom transducer compared to `(comp (map list) identity)` has also positive effects on performances: 957 | 958 | ```clojure 959 | (let [items (range 10000) 960 | xform (comp (map list) identity)] 961 | (quick-bench 962 | (dorun 963 | (sequence xform items items)))) 964 | ;; 4.09ms 965 | 966 | (let [items (range 10000)] 967 | (quick-bench 968 | (dorun 969 | (sequence xf/identity items items)))) 970 | ;; 2.67ms 971 | ``` 972 | 973 | ## Development 974 | 975 | There are no dependencies other than Java and Clojure. 976 | 977 | * `lein test` to run the test suite. 978 | 979 | #### misc todo 980 | 981 | * [ ] `p/fold` Enable extend to (thread-safe) Java collections 982 | * [ ] `p/fold` Enable extend on Cat objects 983 | * [ ] `p/fold` operates on a group of keys for hash-maps. 984 | * [ ] A foldable reader of some sort for large files. 985 | 986 | ## License 987 | 988 | Copyright © 2018 Renzo Borgatti @reborg http://reborg.net 989 | Distributed under the Eclipse Public License either version 1.0 or (at your option) any later version. 990 | -------------------------------------------------------------------------------- /RELEASES.md: -------------------------------------------------------------------------------- 1 | ## Release History 2 | 3 | ### 0.11 (WIP) 4 | 5 | ### 0.10 6 | 7 | * Removed the benchmark ns from source paths causing trouble in the jar (thanks @benedekfazekas) 8 | 9 | ### 0.9 10 | 11 | * `p/frequencies` does not need a special keyfn. 12 | * Added `p/pmap` 13 | * Added `p/args`, `p/or` and `p/and`. 14 | 15 | ### 0.8 16 | 17 | * Using `p/transduce` to implement frequencies. 18 | * Added ./examples 19 | 20 | ### 0.7 21 | 22 | * Fix bug in `p/armap` for the sequential case. 23 | * Fixed missing laziness in external-sort 24 | * Added `p/do` and `p/doto` 25 | * Added `p/transduce` 26 | * Added `p/process-folder` 27 | 28 | ### 0.6 29 | 30 | * Added `p/slurp` 31 | * Consolidated and documented `p/min` and `p/max` 32 | 33 | ### 0.5 34 | 35 | * Added `xf/identity` transducer. 36 | * Added `p/let` parallel let bindings. 37 | 38 | ### 0.4 39 | 40 | * Added `p/armap`, parallel array reverse. 41 | * Added `xf/pmap`, transducer version of `core/pmap`. 42 | 43 | ### 0.3 44 | 45 | * Added `p/distinct` 46 | 47 | ### 0.2 48 | 49 | * Moved `parallel` namespace to `core` to avoid potential Java interop problems (see #3). 50 | 51 | ### 0.1 52 | 53 | First batch of functions. 54 | -------------------------------------------------------------------------------- /benchmarks/bamap.clj: -------------------------------------------------------------------------------- 1 | (ns bamap) 2 | 3 | (require '[criterium.core :refer [quick-bench]]) 4 | (require '[parallel.core :as p] :reload) 5 | 6 | (let [c (range 50000)] (quick-bench (doall (map inc c)))) ;; 1.9 7 | (let [c (to-array (range 50000))] (quick-bench (amap c idx ret (inc (aget c idx))))) ;; 0.4 8 | (let [c (to-array (range 50000))] (quick-bench (p/amap inc c))) ;; 0.6 9 | 10 | (let [c (range 500000)] (quick-bench (doall (map inc c)))) ;; 18 11 | (let [c (to-array (range 500000))] (quick-bench (amap c idx ret (inc (aget c idx))))) ;; 4.9 12 | (let [c (to-array (range 500000))] (quick-bench (p/amap inc c))) ;; 5.9 13 | 14 | (let [c (range 2e6)] (quick-bench (doall (map inc c)))) ;; 80 15 | (let [c (to-array (range 2e6))] (quick-bench (amap c idx ret (inc (aget c idx))))) ;; 20 16 | (let [c (to-array (range 2e6))] (quick-bench (p/amap inc c))) ;; 18 17 | 18 | (let [c (range 5e6)] (quick-bench (doall (map inc c)))) ;; 201 19 | (let [c (to-array (range 5e6))] (quick-bench (amap c idx ret (inc (aget c idx))))) ;; 44 20 | (let [c (to-array (range 5e6))] (quick-bench (p/amap inc c))) ;; 58 21 | 22 | ;; demanding f 23 | 24 | (defn pi [n] (->> (range) (filter odd?) (take n) (map / (cycle [1 -1])) (reduce +) (* 4.0))) 25 | (def pis (shuffle (range 400 800))) 26 | 27 | (let [c pis] (time (dorun (map pi c)))) ;; 12178 28 | (let [c (to-array pis)] (time (amap c idx ret (pi (aget c idx))))) ;; 11901 29 | (let [c (to-array pis)] (time (p/amap pi c))) ;; 6991 30 | -------------------------------------------------------------------------------- /benchmarks/barmap.clj: -------------------------------------------------------------------------------- 1 | (ns barmap) 2 | 3 | (require '[criterium.core :refer [quick-benchmark quick-bench]]) 4 | (defmacro b [expr] `(* 1000. (first (:mean (quick-benchmark ~expr {}))))) ;; mssecs 5 | (require '[parallel.core :as p] :reload) 6 | 7 | (defn armap 8 | "Fair sequential comparison" 9 | [f ^objects a] 10 | (loop [i 0] 11 | (when (< i (quot (alength a) 2)) 12 | (let [tmp (f (aget a i)) 13 | j (- (alength a) i 1)] 14 | (aset a i (f (aget a j))) 15 | (aset a j tmp)) 16 | (recur (unchecked-inc i))))) 17 | 18 | (def coll (range 1e6)) 19 | 20 | ;; sequential identity 21 | (let [c (object-array coll)] (b (armap identity c))) ; 1.28 22 | (let [c (object-array coll)] (b (p/armap identity c))) ; 11.14 (10x slow) 23 | 24 | ;; reverse-complement example 25 | (defn random-dna [n] (repeatedly n #(rand-nth [\a \c \g \t]))) 26 | (def compl {\a \t \t \a \c \g \g \c}) 27 | (let [c (random-dna 1e6) 28 | a1 (object-array c) 29 | a2 (object-array c)] 30 | [(b (armap compl a1)) 31 | (b (p/armap compl a2))]) 32 | 33 | ;; [70.55341358333335 39.12026016666667] (~1.5x faster) 34 | 35 | ;; even more demanding f 36 | (defn pi [n] (->> (range) (filter odd?) (take n) (map / (cycle [1 -1])) (reduce +) (* 4.0))) 37 | 38 | (let [ps (shuffle (range 400 800)) 39 | a1 (object-array ps) 40 | a2 (object-array ps)] 41 | (quick-bench (armap pi a1)) ; 1.246923 ms 42 | (quick-bench (p/armap pi a2)) ; 0.866139 ms 43 | ) 44 | -------------------------------------------------------------------------------- /benchmarks/bcount.clj: -------------------------------------------------------------------------------- 1 | (ns bcount) 2 | 3 | (require '[criterium.core :refer [bench quick-bench]]) 4 | (require '[parallel.core :as p]) 5 | 6 | ;; core/count 7 | 8 | (let [coll (range 100000)] (quick-bench (clojure.core/count (filter odd? (map inc coll))))) ;; 4.74ms 9 | (let [coll (into [] (range 100000))] (quick-bench (clojure.core/count coll))) ;; 8.58ns 10 | 11 | ;; p/count 12 | 13 | (let [coll (range 100000)] (quick-bench (p/count coll (filter odd?) (map inc)))) ;; 3.58ms 14 | ;; no transforms, falls back on normal count, with some added timing. 15 | (let [coll (into [] (range 100000))] (quick-bench (p/count coll))) ;; 14.21ns 16 | 17 | (def xform 18 | (comp 19 | (filter odd?) 20 | (map inc) 21 | (map #(mod % 50)) 22 | (mapcat range) 23 | (map str))) 24 | 25 | ;; to see some speedup we need non-trivial transforms and larger colls. 26 | (let [coll (into [] (range 1000000))] (quick-bench (p/count xform coll))) ;; 408ms 27 | 28 | ;; here's the same transform with a sequential transduce 29 | (let [coll (into [] (range 1000000))] (quick-bench (transduce xform (completing (fn [sum _] (inc sum))) 0 coll))) ;; 524ms 30 | -------------------------------------------------------------------------------- /benchmarks/bdistinct.clj: -------------------------------------------------------------------------------- 1 | (ns bdistinct) 2 | 3 | (require '[criterium.core :refer [quick-benchmark]]) 4 | (defmacro b [expr] `(* 1000. (first (:mean (quick-benchmark ~expr {}))))) ;; mssecs 5 | (require '[parallel.core :as p] :reload) 6 | 7 | (defn create-with-uniques [percent n] 8 | (cond 9 | (== 0 percent) (take n (repeat 1)) 10 | (== 100 percent) (shuffle (range n)) 11 | :else (let [k (quot n percent)] (shuffle (apply concat (take (/ 1 (/ percent 100.)) (repeat (range (* n (/ percent 100.)))))))))) 12 | 13 | ;; ballpark at 100k 14 | (def coll 1e5) 15 | (def c100 (create-with-uniques 100 coll)) 16 | (def c75 (create-with-uniques 75 coll)) 17 | (def c50 (create-with-uniques 50 coll)) 18 | (def c25 (create-with-uniques 25 coll)) 19 | (def c0 (create-with-uniques 0 coll)) 20 | 21 | ;; normal core 22 | (let [c (into [] c100)] (quick-bench (doall (distinct c)))) ; 76.321408 ms 23 | (let [c (into [] c75)] (quick-bench (doall (distinct c)))) ; 95.102771 ms 24 | (let [c (into [] c50)] (quick-bench (doall (distinct c)))) ; 59.967416 ms 25 | (let [c (into [] c25)] (quick-bench (doall (distinct c)))) ; 47.372695 ms 26 | (let [c (into [] c0)] (quick-bench (doall (distinct c)))) ; 26.161685 ms 27 | 28 | ;; normal core on sequences 29 | (let [c c100] (quick-bench (doall (distinct c)))) ; 74.756156 ms 30 | (let [c c75] (quick-bench (doall (distinct c)))) ; 98.587782 ms 31 | (let [c c50] (quick-bench (doall (distinct c)))) ; 63.899022 ms 32 | (let [c c25] (quick-bench (doall (distinct c)))) ; 56.241547 ms 33 | (let [c c0] (quick-bench (doall (distinct c)))) ; 19.684880 ms 34 | 35 | ;; transducers core 36 | (let [c (into [] c100)] (quick-bench (doall (sequence (distinct) c)))) ; 65.090661 ms 37 | (let [c (into [] c75)] (quick-bench (doall (sequence (distinct) c)))) ; 77.059407 ms 38 | (let [c (into [] c50)] (quick-bench (doall (sequence (distinct) c)))) ; 44.620541 ms 39 | (let [c (into [] c25)] (quick-bench (doall (sequence (distinct) c)))) ; 32.205828 ms 40 | (let [c (into [] c0)] (quick-bench (doall (sequence (distinct) c)))) ; 7.455225 ms 41 | 42 | ;; parallel on sequences 43 | (let [c c100] (quick-bench (doall (p/distinct c)))) ; 7.677920 ms 44 | (let [c c75] (quick-bench (doall (p/distinct c)))) ; 8.686195 ms 45 | (let [c c50] (quick-bench (doall (p/distinct c)))) ; 4.875998 ms 46 | (let [c c25] (quick-bench (doall (p/distinct c)))) ; 4.980696 ms 47 | (let [c c0] (quick-bench (doall (p/distinct c)))) ; 11.416917 ms 48 | 49 | ;; parallel on vectors 50 | (let [c (into [] c100)] (quick-bench (doall (p/distinct c)))) ; 7.391681 ms 51 | (let [c (into [] c75)] (quick-bench (doall (p/distinct c)))) ; 7.802467 ms 52 | (let [c (into [] c50)] (quick-bench (doall (p/distinct c)))) ; 4.966004 ms 53 | (let [c (into [] c25)] (quick-bench (doall (p/distinct c)))) ; 4.208700 ms 54 | (let [c (into [] c0)] (quick-bench (doall (p/distinct c)))) ; 8.037075 ms 55 | 56 | ;; parallel mutable 57 | (binding [p/*mutable* true] 58 | (let [c (into [] c100)] (quick-bench (p/distinct c))) ; 2.739602 ms 59 | (let [c (into [] c75)] (quick-bench (p/distinct c))) ; 6.188239 ms 60 | (let [c (into [] c50)] (quick-bench (p/distinct c))) ; 3.679788 ms 61 | (let [c (into [] c25)] (quick-bench (p/distinct c))) ; 2.713920 ms 62 | (let [c (into [] c0)] (quick-bench (p/distinct c)))) ; 7.802422 ms 63 | 64 | -------------------------------------------------------------------------------- /benchmarks/bfrequencies.clj: -------------------------------------------------------------------------------- 1 | (ns bfrequencies) 2 | 3 | (require '[parallel.core :as p]) 4 | (require '[criterium.core :refer [bench quick-bench]]) 5 | (require '[clojure.core.reducers :as r]) 6 | 7 | (import 'java.util.concurrent.atomic.AtomicInteger 8 | 'java.util.concurrent.ConcurrentHashMap 9 | '[java.util HashMap Collections Map]) 10 | 11 | (def small-overlapping 12 | (into [] (map hash-map 13 | (repeat :samplevalue) 14 | (concat 15 | (shuffle (range 0. 1e5)) 16 | (shuffle (range 0. 1e5)) 17 | (shuffle (range 0. 1e5)) 18 | (shuffle (range 0. 1e5)) 19 | (shuffle (range 0. 1e5)))))) 20 | 21 | (def big-overlapping 22 | (into [] (map hash-map 23 | (repeat :samplevalue) 24 | (concat 25 | (shuffle (range 6e4 1e5)) 26 | (shuffle (range 6e4 1e5)) 27 | (shuffle (range 6e4 1e5)) 28 | (shuffle (range 6e4 1e5)) 29 | (shuffle (range 6e4 1e5)))))) 30 | 31 | (def no-overlapping (into [] (range 1000))) 32 | 33 | (def bigger-data 34 | (into [] (map hash-map 35 | (repeat :samplevalue) 36 | (concat 37 | (shuffle (range 0. 7e5)) 38 | (shuffle (range 0. 7e5)) 39 | (shuffle (range 0. 7e5)) 40 | (shuffle (range 0. 7e5)) 41 | (shuffle (range 0. 7e5)))))) 42 | 43 | 44 | 45 | ;; small overlapping 46 | (quick-bench (frequencies small-overlapping)) 47 | ;; 441 ms 48 | (quick-bench (p/frequencies small-overlapping)) 49 | ;; 190 ms 50 | (binding [p/*mutable* true] (quick-bench (p/frequencies small-overlapping))) 51 | ;; 92ms 52 | 53 | 54 | ;; bigger overlapping 55 | (quick-bench (frequencies big-overlapping)) 56 | ;; 172ms 57 | (quick-bench (p/frequencies big-overlapping)) 58 | ;; 52ms 59 | (binding [p/*mutable* true] (quick-bench (p/frequencies big-overlapping))) 60 | ;; 28ms 61 | 62 | 63 | 64 | ;; with xforms 65 | 66 | (quick-bench (frequencies (eduction (keep :samplevalue) (map int) small-overlapping))) 67 | ;; 238 ms 68 | (quick-bench (p/frequencies small-overlapping (keep :samplevalue) (map int))) 69 | ;; 91 ms 70 | (binding [p/*mutable* true] (quick-bench (p/frequencies small-overlapping (keep :samplevalue) (map int)))) 71 | ;; 50 ms 72 | 73 | (quick-bench (frequencies no-overlapping)) 74 | ;; 335 µs 75 | (quick-bench (p/frequencies no-overlapping)) 76 | ;; 299 µs 77 | 78 | (time (dorun (frequencies bigger-data))) 79 | ;; 4320.984379 ms 80 | (time (dorun (p/frequencies bigger-data))) 81 | ;; 1980.512017 ms 82 | -------------------------------------------------------------------------------- /benchmarks/bidentity.clj: -------------------------------------------------------------------------------- 1 | (ns bpmap) 2 | 3 | (require '[parallel.xf :as xf]) 4 | (require '[criterium.core :refer [bench quick-bench]]) 5 | 6 | (let [items (range 10000)] 7 | (quick-bench 8 | (dorun 9 | (sequence (map identity) items)))) 10 | ;; 914.020710 µs 11 | 12 | (let [items (range 10000)] 13 | (quick-bench 14 | (dorun 15 | (sequence xf-identity items)))) 16 | ;; 892.697959 µs 17 | 18 | (let [items (range 10000)] 19 | (quick-bench 20 | (dorun 21 | (sequence identity items)))) 22 | ;; 926.697959 µs 23 | 24 | (let [items (range 10000) 25 | xform (comp (map list) identity)] 26 | (quick-bench 27 | (dorun 28 | (sequence xform items items)))) 29 | ;; 4.09ms 30 | 31 | (let [items (range 10000)] 32 | (quick-bench 33 | (dorun 34 | (sequence xf/identity items items)))) 35 | ;; 2.67ms 36 | -------------------------------------------------------------------------------- /benchmarks/binterleave.clj: -------------------------------------------------------------------------------- 1 | (ns binterleave) 2 | 3 | (require '[parallel.xf :as xf]) 4 | (require '[criterium.core :refer [bench quick-bench]]) 5 | 6 | (let [coll (range 1e5)] 7 | (quick-bench (doall (interleave (map inc coll) (range))))) 8 | ;; 14ms 9 | 10 | (let [coll (range 1e5)] 11 | (quick-bench (doall (sequence (comp (map inc) (xf/interleave (range))) coll)))) 12 | ;; 40ms 13 | 14 | (let [coll (range 1e5)] 15 | (quick-bench (doall (map str (filter odd? (interleave (map inc coll) (range))))))) 16 | ;; 37ms 17 | 18 | (let [coll (range 1e5)] 19 | (quick-bench (doall (sequence (comp (map inc) (xf/interleave (range)) (filter odd?) (map str)) coll)))) 20 | ;; 40ms 21 | -------------------------------------------------------------------------------- /benchmarks/bminmax.clj: -------------------------------------------------------------------------------- 1 | (ns bminmax) 2 | 3 | (require '[criterium.core :refer [bench]]) 4 | (require '[parallel.core :as p] :reload) 5 | 6 | (def v10k (conj (shuffle (range 10000)) -9)) 7 | (def v100k (conj (shuffle (range 100000)) -9)) 8 | (def v1m (conj (shuffle (range 1000000)) -9)) 9 | 10 | ;; core reduce 11 | (let [c v10k] (bench (reduce min c))) ;; 98.237074 µs 12 | (let [c v100k] (bench (reduce min c))) ;; 1.139608 ms 13 | (let [c v1m] (bench (reduce min c))) ;; 9.963971 ms 14 | 15 | ;; core apply (slower than reduce) 16 | (let [c v10k] (bench (apply min c))) ;; 105.267586 µs 17 | (let [c v1m] (bench (apply min c))) ;; 8.764973 ms 18 | 19 | ;; parallel 20 | (let [c v10k] (bench (p/min c))) ;; 83.043014 µs 21 | (let [c v100k] (bench (p/min c))) ;; 665.367802 µs 22 | (let [c v1m] (bench (p/min c))) ;; 5.474384 ms 23 | 24 | ;; parallel xforms 25 | (let [c v10k] (bench (transduce (comp (map inc) (filter odd?)) min ##Inf c))) ;; 219.782220 µs 26 | (let [c v100k] (bench (transduce (comp (map inc) (filter odd?)) min ##Inf c))) ;; 2.722521 ms 27 | (let [c v1m] (bench (transduce (comp (map inc) (filter odd?)) min ##Inf c))) ;; 22.701385 ms 28 | (let [c v10k] (bench (p/min c (map inc) (filter odd?)))) ;; 168.950187 µs 29 | (let [c v100k] (bench (p/min c (map inc) (filter odd?)))) ;; 1.361213 ms 30 | (let [c v1m] (bench (p/min c (map inc) (filter odd?)))) ;; 12.085497 ms 31 | 32 | ;; experiments... 33 | (let [c v1m] 34 | (bench 35 | (r/fold 36 | 8000 37 | min 38 | (fn [v] (nth (sort v) 0)) 39 | (reify r/CollFold 40 | (coll-fold [this n combinef f] 41 | (p/foldvec c n combinef f)))))) 42 | ;; 647ms 43 | -------------------------------------------------------------------------------- /benchmarks/bpmap.clj: -------------------------------------------------------------------------------- 1 | (ns bpmap) 2 | 3 | (require '[parallel.xf :as xf]) 4 | (require '[criterium.core :refer [bench quick-bench]]) 5 | 6 | (defn pi [n] 7 | (->> (range) 8 | (filter odd?) 9 | (take n) 10 | (map / (cycle [1 -1])) 11 | (reduce +) 12 | (* 4.0))) 13 | 14 | (let [items (range 1000000)] (time (dorun (sequence (map inc) items)))) ;; 141ms 15 | (let [items (range 1000000)] (time (dorun (sequence (xf/pmap inc) items)))) ;; 2563ms ok 16 | 17 | (let [items (range 400 800)] (time (dorun (sequence (map pi) items)))) ;; 11876ms 18 | (let [items (range 400 800)] (time (dorun (sequence (xf/pmap pi) items)))) ;; 418ms ok ok 19 | 20 | (let [items (range 400 800)] (time (transduce (map pi) + items))) ;; 11876ms 21 | (let [items (range 400 800)] (time (transduce (xf/pmap pi) + items))) ;; 1256ms 22 | -------------------------------------------------------------------------------- /benchmarks/bslurp.clj: -------------------------------------------------------------------------------- 1 | (ns bslurp) 2 | 3 | (require '[criterium.core :refer [bench quick-bench]]) 4 | (require '[parallel.core :as p] :reload) 5 | (import '(java.nio ByteBuffer CharBuffer) 6 | '(java.io File PushbackReader InputStream InputStreamReader FileInputStream)) 7 | 8 | (set! *warn-on-reflection* true) 9 | 10 | (def READ_ONLY ^{:private true} 11 | (java.nio.channels.FileChannel$MapMode/READ_ONLY)) 12 | 13 | (defn mmap [^String f] 14 | (let [channel (.getChannel (FileInputStream. f))] 15 | (.map channel READ_ONLY 0 (.size channel)))) 16 | 17 | (defn mslurp 18 | "Including memory mapping for benchmarks." 19 | [^String f] 20 | (.. java.nio.charset.Charset (forName "UTF-8") 21 | (newDecoder) (decode (mmap f)))) 22 | 23 | ;; lot of lines, 2.4M 24 | (let [fname "test/words" file (File. fname)] (bench (slurp file))) ; 8.84ms 25 | (let [fname "test/words" file (File. fname)] (bench (p/slurp file))) ; 2.87ms 26 | (let [fname "test/words" file (File. fname)] (binding [p/*mutable* true] (bench (p/slurp file)))) ; 1.40ms 27 | (let [fname "test/words" file (File. fname)] (bench (mslurp file))) ; 18.67ms 28 | 29 | ;; less lines, 3.1M 30 | (let [fname (File. "/Users/reborg/prj/my/pwc/test/war-and-peace.txt")] (bench (slurp fname))) ; 14.67 ms 31 | (let [fname (File. "/Users/reborg/prj/my/pwc/test/war-and-peace.txt")] (bench (p/slurp fname))) ; 7.67ms 32 | (let [fname (File. "/Users/reborg/prj/my/pwc/test/war-and-peace.txt")] (bench (mslurp fname))) ; 8.67ms 33 | 34 | ;; small file, no no. 35 | (let [fname (File. "project.clj")] (bench (slurp fname))) ; 35.13 µs 36 | (let [fname (File. "project.clj")] (bench (p/slurp fname))) ; 213.517530 µs 37 | -------------------------------------------------------------------------------- /benchmarks/bsort.clj: -------------------------------------------------------------------------------- 1 | (ns bsort) 2 | 3 | (require '[criterium.core :refer [quick-benchmark]]) 4 | (defmacro b [expr] `(* 1000. (first (:mean (quick-benchmark ~expr {}))))) ;; mssecs 5 | (require '[parallel.core :as p] :reload) 6 | (import '[java.util Arrays]) 7 | 8 | (defn sort-some [percent coll] 9 | (cond 10 | (== 100 percent) coll 11 | (== 0 percent) (let [n (count coll) half (quot n 2)] (interleave (take half coll) (reverse (drop half coll)))) 12 | :else (apply concat (map #(if (< (rand) (/ percent 100.)) (sort %) %) (partition-all 20 (shuffle coll)))))) 13 | 14 | ;; ballpark at 1M 15 | (def coll (range 1e6)) 16 | 17 | (let [c (into [] (sort-some 100 coll))] (b (sort c))) ; 25 18 | (let [c (into [] (sort-some 95 coll))] (b (sort c))) ; 537 19 | (let [c (into [] (sort-some 50 coll))] (b (sort c))) ; 781 20 | (let [c (into [] (sort-some 10 coll))] (b (sort c))) ; 801 21 | (let [c (into [] (sort-some 0 coll))] (b (sort c))) ; 132 22 | 23 | (let [c (into [] (sort-some 100 coll))] (b (p/sort c))) ; 44 24 | (let [c (into [] (sort-some 95 coll))] (b (p/sort c))) ; 502 25 | (let [c (into [] (sort-some 50 coll))] (b (p/sort c))) ; 707 26 | (let [c (into [] (sort-some 10 coll))] (b (p/sort c))) ; 675 27 | (let [c (into [] (sort-some 0 coll))] (b (p/sort c))) ; 376 28 | 29 | (let [c (into [] (sort-some 100 coll))] (binding [p/*mutable* true] (b (p/sort c)))) ; 19 30 | (let [c (into [] (sort-some 95 coll))] (binding [p/*mutable* true] (b (p/sort c)))) ; 562 31 | (let [c (into [] (sort-some 50 coll))] (binding [p/*mutable* true] (b (p/sort c)))) ; 548 32 | (let [c (into [] (sort-some 10 coll))] (binding [p/*mutable* true] (b (p/sort c)))) ; 571 33 | (let [c (into [] (sort-some 0 coll))] (binding [p/*mutable* true] (b (p/sort c)))) ; 292 34 | 35 | ;; heavier comparator, just vaguely faster than sequential. 36 | 37 | (let [c (into [] (sort-some 100 (map str coll)))] (b (sort compare c))) ; 59 38 | (let [c (into [] (sort-some 95 (map str coll)))] (b (sort compare c))) ; 760 39 | (let [c (into [] (sort-some 50 (map str coll)))] (b (sort compare c))) ; 760 40 | (let [c (into [] (sort-some 10 (map str coll)))] (b (sort compare c))) ; 802 41 | (let [c (into [] (sort-some 0 (map str coll)))] (b (sort compare c))) ; 136 42 | 43 | (let [c (into [] (sort-some 100 (map str coll)))] (b (p/sort compare c))) ; 136 44 | (let [c (into [] (sort-some 95 (map str coll)))] (b (p/sort compare c))) ; 689 45 | (let [c (into [] (sort-some 50 (map str coll)))] (b (p/sort compare c))) ; 740 46 | (let [c (into [] (sort-some 10 (map str coll)))] (b (p/sort compare c))) ; 664 47 | (let [c (into [] (sort-some 0 (map str coll)))] (b (p/sort compare c))) ; 258 48 | 49 | ;; Even heavier comparator 50 | (def cmp #(compare (last %1) (last %2))) 51 | 52 | (let [c (into [] (sort-some 100 (map-indexed vector coll)))] (b (sort cmp c))) ; 325 53 | (let [c (into [] (sort-some 95 (map-indexed vector coll)))] (b (sort cmp c))) ; 6475 54 | (let [c (into [] (sort-some 50 (map-indexed vector coll)))] (b (sort cmp c))) ; 6801 55 | (let [c (into [] (sort-some 10 (map-indexed vector coll)))] (b (sort cmp c))) ; 6566 56 | (let [c (into [] (sort-some 0 (map-indexed vector coll)))] (b (sort cmp c))) ; 1261 57 | 58 | (let [c (into [] (sort-some 100 (map-indexed vector coll)))] (b (p/sort cmp c))) ; 182 59 | (let [c (into [] (sort-some 95 (map-indexed vector coll)))] (b (p/sort cmp c))) ; 3589 60 | (let [c (into [] (sort-some 50 (map-indexed vector coll)))] (b (p/sort cmp c))) ; 3371 61 | (let [c (into [] (sort-some 10 (map-indexed vector coll)))] (b (p/sort cmp c))) ; 3422 62 | (let [c (into [] (sort-some 0 (map-indexed vector coll)))] (b (p/sort cmp c))) ; 615 63 | 64 | (set! *warn-on-reflection* true) 65 | (let [c (int-array (sort-some 100 coll))] (b (do (Arrays/parallelSort c) (into [] c)))) 66 | (let [c (int-array (sort-some 95 coll))] (b (do (Arrays/parallelSort c) (into [] c)))) 67 | (let [c (int-array (sort-some 50 coll))] (b (do (Arrays/parallelSort c) (into [] c)))) 68 | (let [c (int-array (sort-some 10 coll))] (b (do (Arrays/parallelSort c) (into [] c)))) 69 | (let [c (int-array (sort-some 0 coll))] (b (do (Arrays/parallelSort c) (into [] c)))) 70 | 71 | 39.43213305555556 72 | 38.128529944444445 73 | 38.26176866666667 74 | 42.11502133333334 75 | 39.757541388888896 76 | 77 | (let [c (into [] (sort-some 50 coll))] (b (p/sort 5000 compare c))) 78 | (let [c (into [] (sort-some 50 coll))] (b (p/sort 10000 compare c))) 79 | (let [c (into [] (sort-some 50 coll))] (b (p/sort 15000 compare c))) 80 | -------------------------------------------------------------------------------- /benchmarks/bupdate_vals.clj: -------------------------------------------------------------------------------- 1 | (ns bupdate-vals) 2 | 3 | (require '[parallel.core :as p]) 4 | (require '[criterium.core :refer [bench quick-bench]]) 5 | 6 | (defn large-map [i] (into {} (map vector (range i) (range i)))) 7 | 8 | (defn update-vals [m f] 9 | (reduce-kv (fn [m k v] (assoc m k (f v))) {} m)) 10 | 11 | (defn update-vals-transients [m f] 12 | (persistent! (reduce-kv (fn [m k v] (assoc! m k (f v))) (transient {}) m))) 13 | 14 | ;; sanity 15 | (def m (large-map 1e5)) 16 | (for [i (range 20)] 17 | (= (sort (vals (update-vals m inc))) 18 | (sort (vals (p/update-vals m inc))))) 19 | 20 | (let [m (large-map 1e5)] (quick-bench (update-vals m inc))) ;; 22ms 21 | (let [m (large-map 1e5)] (quick-bench (update-vals-transients m inc))) ;; 15ms 22 | (let [m (large-map 1e5)] (binding [p/*mutable* true] (quick-bench (p/update-vals m inc)))) ;; 16ms 23 | (let [m (large-map 1e5)] (binding [p/*mutable* false] (quick-bench (p/update-vals m inc)))) ;; 56ms 24 | 25 | (let [m (large-map 1e6)] (quick-bench (update-vals m inc))) ;; 551ms 26 | (let [m (large-map 1e6)] (quick-bench (update-vals-transients m inc))) ;; 241ms 27 | (let [m (large-map 1e6)] (binding [p/*mutable* true] (quick-bench (p/update-vals m inc)))) ;; 215ms 28 | (let [m (large-map 1e6)] (binding [p/*mutable* false] (quick-bench (p/update-vals m inc)))) ;; 1.09secs 29 | 30 | ;; heavy f calculating pi approx. never going beyond 50 iterations here. 31 | (defn f [n] (->> (range) (filter odd?) (take (rem n 50)) (map / (cycle [1 -1])) (reduce +) (* 4.0))) 32 | (quick-bench (f 50)) ;; 175ns 33 | 34 | (let [m (large-map 1e5)] (quick-bench (update-vals m f))) ;; 3.5secs 35 | (let [m (large-map 1e5)] (quick-bench (update-vals-transients m f))) ;; 3.3secs 36 | (let [m (large-map 1e5)] (binding [p/*mutable* false] (quick-bench (p/update-vals m f)))) ;; 1.8secs 37 | (let [m (large-map 1e5)] (binding [p/*mutable* true] (quick-bench (p/update-vals m f)))) ;; 1.6secs 38 | -------------------------------------------------------------------------------- /benchmarks/groupby.clj: -------------------------------------------------------------------------------- 1 | (ns groupby) 2 | 3 | (require '[parallel.core :as p]) 4 | (require '[criterium.core :refer [bench quick-bench]]) 5 | 6 | (def ^:const magnitude 1e5) 7 | (def ^:const repetition 5) 8 | (def ^:const sqrt (Math/sqrt (* repetition magnitude))) 9 | 10 | (def v1 ;;all-keys-no-repeat 11 | (into [] (range (* repetition magnitude)))) 12 | 13 | (def v2 ;;many-keys-small-repeat 14 | (into [] (apply concat (for [i (range repetition)] (shuffle (range magnitude)))))) 15 | 16 | (def v3 ;;medium-keys-medium-repeat 17 | (into [] (apply concat (for [i (range sqrt)] (range sqrt))))) 18 | 19 | (def v4 ;;small-keys-many-repeat 20 | (into [] (apply concat (for [i (range magnitude)] (range repetition))))) 21 | 22 | ;; ************* Normal Group-By ************** 23 | 24 | (quick-bench (clojure.core/group-by identity v1)) ;; 229ms 25 | (quick-bench (clojure.core/group-by identity v2)) ;; 268ms 26 | (quick-bench (clojure.core/group-by identity v3)) ;; 127ms 27 | (quick-bench (clojure.core/group-by identity v4)) ;; 95ms 28 | 29 | ;; ************* Parallel Group-By ************** 30 | 31 | (quick-bench (p/group-by identity v1)) ;; 441ms 32 | (quick-bench (p/group-by identity v2)) ;; 168ms 33 | (quick-bench (p/group-by identity v3)) ;; 29ms 34 | (quick-bench (p/group-by identity v4)) ;; 32ms 35 | 36 | ;; ************* Parallel Group-By Mutable Result ************** 37 | 38 | (binding [p/*mutable* true] (quick-bench (p/group-by identity v1))) ;; 21ms 39 | (binding [p/*mutable* true] (quick-bench (p/group-by identity v2))) ;; 48ms 40 | (binding [p/*mutable* true] (quick-bench (p/group-by identity v3))) ;; 13ms 41 | (binding [p/*mutable* true] (quick-bench (p/group-by identity v4))) ;; 18ms 42 | -------------------------------------------------------------------------------- /benchmarks/plet.clj: -------------------------------------------------------------------------------- 1 | (ns plet) 2 | 3 | (require '[criterium.core :refer [quick-bench]]) 4 | (require '[parallel.core :as p] :reload) 5 | 6 | (quick-bench (let [a (+ 1 2) b (* 4 3)] (+ a b))) ;; 1.43ns 7 | (quick-bench (p/let [a (+ 1 2) b (* 4 3)] (+ a b))) ;; 15us 8 | -------------------------------------------------------------------------------- /examples/lastfm/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | .hgignore 11 | .hg/ 12 | data 13 | -------------------------------------------------------------------------------- /examples/lastfm/README.md: -------------------------------------------------------------------------------- 1 | # Lastfm dataset processing example 2 | 3 | This example project is designed to showcase Parallel mainly in the context of intensive data IO. 4 | 5 | ## Introduction 6 | 7 | To run the example, you need to download the Last.fm dataset. This is an (old but good) version of the Last.fm data kindly hosted by [Oscar Celma](http://ocelma.net). If you are interested in music recommendation in general, have a look around the web site or [buy his book](http://ocelma.net/MusicRecommendationBook/index.html). 8 | 9 | The project answers a list of interesting questions about music using the Last.fm dataset. It first shows how to retrieve the answers with plain Clojure (`src/lastfm/plain.clj`) and then how we could speed up processing using the Parallel library (`src/lastfm/parallel.clj`). 10 | 11 | ### Download the data 12 | 13 | Sizes: lastfm-dataset-1K.tar.gz (~641Mb), lastfm-dataset-360K.tar.gz (~543Mb) but both files expands into much larger ones (2.4G and 1.6G respectively). 14 | 15 | ```bash 16 | mkdir data; cd data 17 | curl -O http://mtg.upf.edu/static/datasets/last.fm/lastfm-dataset-1K.tar.gz 18 | curl -O http://mtg.upf.edu/static/datasets/last.fm/lastfm-dataset-360K.tar.gz 19 | tar xvfz lastfm-dataset-1K.tar.gz 20 | tar xvfz lastfm-dataset-360K.tar.gz 21 | cd lastfm-dataset-1K 22 | head -n 1000 userid-timestamp-artid-artname-traid-traname.tsv > small.tsv 23 | cd .. 24 | cd lastfm-dataset-360K 25 | head -n 1000 usersha1-artmbid-artname-plays.tsv > small.tsv 26 | cd .. 27 | ``` 28 | 29 | The instructions above also creates `small.tsv` samples of only 1k lines for quick experiments. 30 | -------------------------------------------------------------------------------- /examples/lastfm/project.clj: -------------------------------------------------------------------------------- 1 | (defproject lastfm "0.1.0-SNAPSHOT" 2 | :description "Lastfm dataset processing showcase." 3 | :url "https://github.com/reborg/parallel" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :jvm-opts ["-Xmx4g" "-server"] 7 | :dependencies [[org.clojure/clojure "1.9.0"] 8 | [parallel "0.8"]]) 9 | -------------------------------------------------------------------------------- /examples/lastfm/src/lastfm/version00.clj: -------------------------------------------------------------------------------- 1 | (ns lastfm.version00 2 | (:require [clojure.string :as s]) 3 | (:import [java.io BufferedReader FileReader Reader StringReader File])) 4 | 5 | ;; ####################### 6 | ;; ### Files and utils ### 7 | ;; ####################### 8 | 9 | ;; Pointing at the original large TSV 10 | (defn plays [] (FileReader. (File. "data/lastfm-dataset-360K/usersha1-artmbid-artname-plays.tsv"))) 11 | (defn details [] (FileReader. (File. "data/lastfm-dataset-360K/usersha1-profile.tsv"))) 12 | (defn listeners [] (FileReader. (File. "data/lastfm-dataset-1K/userid-timestamp-artid-artname-traid-traname.tsv"))) 13 | 14 | (def clean-xform 15 | (comp (map s/trim) 16 | (remove s/blank?) 17 | (map #(s/split % #"\t")))) 18 | 19 | (defn process 20 | ([r xcomp] (process r xcomp conj! [])) 21 | ([r xcomp store! init] 22 | (let [br (BufferedReader. r) 23 | lines (line-seq br) 24 | editable? #(instance? clojure.lang.IEditableCollection %)] 25 | (transduce 26 | (comp clean-xform xcomp) 27 | (completing 28 | store! 29 | #(do (.close br) (if (editable? init) (persistent! %) %))) 30 | (if (editable? init) (transient init) init) 31 | lines)))) 32 | 33 | (defn load-user-info [fname] 34 | (process fname identity 35 | (fn [m [userid :as attrs]] 36 | (assoc! m userid (subvec attrs 1 (count attrs)))) {})) 37 | 38 | ; (require '[lastfm.version00 :as v0] :reload) 39 | ; (def details (time (v0/load-user-info (v0/details)))) 40 | ; "Elapsed time: 1467.065929 msecs" 41 | 42 | ;; ####################### 43 | ;; ######## API ########## 44 | ;; ####################### 45 | 46 | ;; What are the most played artists? 47 | (defn top-artists [f] 48 | (->> (process f (map #(nth % 2))) 49 | frequencies 50 | (sort-by last >) 51 | (take 5))) 52 | 53 | ; (time (v0/top-artists (v0/plays))) 54 | ; "Elapsed time: 45463.570766 msecs" 55 | ; (["radiohead" 77348] ["the beatles" 76339] ["coldplay" 66738] 56 | ; ["red hot chili peppers" 48989] ["muse" 47015]) 57 | 58 | (defn top-artists-in [f in-country] 59 | (let [user-info (load-user-info (details)) 60 | for-country (fn [[user-id]] 61 | (let [country (some-> (nth (user-info user-id) 2) s/lower-case) 62 | regxp (re-pattern in-country)] 63 | (re-find regxp (or country ""))))] 64 | (->> (process f 65 | (comp 66 | (filter for-country) 67 | (map #(nth % 2)))) 68 | frequencies 69 | (sort-by last >) 70 | (take 5)))) 71 | 72 | ; (time (v0/top-artists-in (v0/plays) "poland")) 73 | ; "Elapsed time: 37677.731108 msecs" 74 | ; (["metallica" 3869] 75 | ; ["myslovitz" 3778] 76 | ; ["red hot chili peppers" 3610] 77 | ; ["o.s.t.r." 3440] 78 | ; ["system of a down" 3306]) 79 | 80 | (defn how-many-songs-played-for [f band] 81 | (process f 82 | (comp 83 | (filter 84 | (fn [[_ _ played]] 85 | (re-find (re-pattern band) (or played "")))) 86 | (map peek) 87 | (map #(Integer/valueOf %))) 88 | + 0)) 89 | 90 | ; (time (v0/how-many-songs-played-for (v0/plays) "coltrane")) 91 | ; "Elapsed time: 23843.245219 msecs" 92 | ; 1,157,511 93 | 94 | (defn most-played-band-by-day 95 | [fname] 96 | (let [keyfn (fn [item] 97 | (let [[_ ts _ band] item] 98 | [(nth (s/split ts #"T") 0) band])) 99 | reducefn (fn [m item] 100 | (let [k (keyfn item)] 101 | (assoc! m k (inc (get m k 0)))))] 102 | (->> (process fname identity reducefn {}) 103 | (sort-by #(nth % 1) >) 104 | (take 5)))) 105 | 106 | ;; (time (v0/most-played-band-by-day (v0/listeners))) 107 | ; "Elapsed time: 65574.980722 msecs" 108 | ; ([["2009-03-21" "Kanye West"] 2331] [["2009-02-28" "T.I."] 2062] 109 | -------------------------------------------------------------------------------- /examples/lastfm/src/lastfm/version01.clj: -------------------------------------------------------------------------------- 1 | (ns lastfm.version01 2 | (:require [parallel.core :as p] 3 | [clojure.string :as s]) 4 | (:import [java.io BufferedReader FileReader Reader StringReader File])) 5 | 6 | ;; ####################### 7 | ;; ### Files and utils ### 8 | ;; ####################### 9 | 10 | ;; Split original files into segments 11 | (defn plays [] "data/lastfm-dataset-360K/splits") 12 | (defn details [] "data/lastfm-dataset-360K/details") 13 | (defn listeners [] "data/lastfm-dataset-1K/splits") 14 | 15 | (def clean-xform 16 | (comp (map s/trim) 17 | (remove s/blank?) 18 | (map #(s/split % #"\t")))) 19 | 20 | (defn process 21 | [fname xcomp] 22 | (p/process-folder fname (comp clean-xform xcomp))) 23 | 24 | (defn load-user-info [fname] 25 | (p/process-folder 26 | fname 27 | (completing 28 | (fn reducef [m [userid :as attrs]] 29 | (assoc! m userid (subvec attrs 1 (count attrs)))) 30 | persistent!) 31 | (fn ([] (transient {})) ([m1 m2] (into m1 m2))) 32 | clean-xform)) 33 | 34 | ; (require '[lastfm.version01 :as v1] :reload) 35 | ; (def details (time (v1/load-user-info (v1/details)))) 36 | ; "Elapsed time: 683.946281 msecs" 37 | 38 | ;; ####################### 39 | ;; ######## API ########## 40 | ;; ####################### 41 | 42 | ;; What are the most played artists? 43 | (defn top-artists [folder] 44 | (->> (process folder (map #(nth % 2))) 45 | frequencies 46 | ; Shall we add a “p/”? 47 | ; p/frequencies 48 | (sort-by last >) 49 | (take 5))) 50 | 51 | ; (require '[lastfm.version01 :as v1] :reload) 52 | ; (time (v1/top-artists (v1/plays))) 53 | ; "Elapsed time: 17494.570766 msecs" 54 | ; "Elapsed time: 9865.58715 msecs" 55 | ; (["radiohead" 77348] ["the beatles" 76339] ["coldplay" 66738] 56 | 57 | (defn top-artists-in [fname in-country] 58 | (let [user-info (load-user-info (details)) 59 | for-country (fn [[user-id]] 60 | (let [country (some-> (nth (user-info user-id) 2) s/lower-case) 61 | regxp (re-pattern in-country)] 62 | (re-find regxp (or country "")))) 63 | xform (comp clean-xform (filter for-country) (map #(nth % 2)))] 64 | (->> (p/frequencies (File. fname) xform) 65 | (sort-by #(nth % 1) >) 66 | (take 5)))) 67 | 68 | ; (time (v1/top-artists-in (v1/plays) "poland")) 69 | ; "Elapsed time: 5017.731108 msecs" 70 | ; (["metallica" 3869] 71 | ; ["myslovitz" 3778] 72 | ; ["red hot chili peppers" 3610] 73 | ; ["o.s.t.r." 3440] 74 | ; ["system of a down" 3306]) 75 | 76 | (defn how-many-songs-played-for [f band] 77 | (p/process-folder f + + 78 | (comp 79 | clean-xform 80 | (filter (fn [[_ _ played]] (re-find (re-pattern band) (or played "")))) 81 | (map peek) 82 | (map #(Integer/valueOf %))))) 83 | 84 | ; (time (v1/how-many-songs-played-for (v1/plays) "coltrane")) 85 | ; "Elapsed time: 4307.20904 msecs" 86 | ; 1,157,511 87 | 88 | (defn most-played-band-by-day 89 | [fname] 90 | (let [xform (comp 91 | clean-xform 92 | (map (fn [[_ ts _ band]] 93 | [(nth (s/split ts #"T") 0) band])))] 94 | (->> 95 | (p/frequencies (File. fname) xform) 96 | (sort-by #(nth % 1) >) 97 | ; Shall we add a “p/”? 98 | ; (p/sort #(compare (nth %2 1) (nth %2 1))) 99 | (take 5)))) 100 | 101 | ; (time (v1/most-played-band-by-day (v1/listeners))) 102 | ; "Elapsed time: 16614.461194 msecs" 103 | ; ([["2009-03-21" "Kanye West"] 2331] [["2009-02-28" "T.I."] 2062] 104 | -------------------------------------------------------------------------------- /examples/lastfm/test/lastfm/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns lastfm.core-test 2 | (:require [clojure.test :refer :all] 3 | [lastfm.core :refer :all])) 4 | 5 | (deftest a-test 6 | (testing "FIXME, I fail." 7 | (is (= 0 1)))) 8 | -------------------------------------------------------------------------------- /java/clojure/lang/Get.java: -------------------------------------------------------------------------------- 1 | package clojure.lang; 2 | 3 | public class Get { 4 | 5 | public static PersistentHashMap.INode root(PersistentHashMap phm) { 6 | return phm.root; 7 | } 8 | 9 | public static Boolean hasNullValue(PersistentHashMap phm) { 10 | return phm.hasNull; 11 | } 12 | 13 | public static Object nullValue(PersistentHashMap phm) { 14 | return phm.nullValue; 15 | } 16 | 17 | public static PersistentHashMap.INode[] array(PersistentHashMap.ArrayNode arrayNode) { 18 | return arrayNode.array; 19 | } 20 | 21 | public static Object[] array(PersistentHashMap.BitmapIndexedNode bitmapIndexedNode) { 22 | return bitmapIndexedNode.array; 23 | } 24 | 25 | public static Object[] array(PersistentHashMap.HashCollisionNode hashCollisionNode) { 26 | return hashCollisionNode.array; 27 | } 28 | 29 | public static Object kvreduce(Object[] array, IFn f, Object init) { 30 | return PersistentHashMap.NodeSeq.kvreduce(array, f, init); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject parallel "0.11" 2 | :description "A library of parallel-enabled Clojure functions" 3 | :url "https://github.com/reborg/parallel" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.9.0"]] 7 | :java-source-paths ["java"] 8 | :uberjar-name "parallel.jar" 9 | :deploy-repositories [["releases" :clojars] ["snapshots" :clojars]] 10 | :profiles {:dev {:dependencies [[criterium "0.4.4"] 11 | [com.clojure-goes-fast/clj-java-decompiler "0.1.0"]] 12 | :plugins []}} 13 | :jvm-opts ["-Xmx2g" "-server"] 14 | :test-refresh {:watch-dirs ["src" "test"] :refresh-dirs ["src" "test"]}) 15 | -------------------------------------------------------------------------------- /src/parallel/core.clj: -------------------------------------------------------------------------------- 1 | (ns parallel.core 2 | (:refer-clojure :exclude [eduction sequence transduce pmap 3 | frequencies let slurp do doto and or 4 | count group-by sort min max amap distinct]) 5 | (:require [parallel.foldmap :as fmap] 6 | [parallel.merge-sort :as msort] 7 | [parallel.map-combine :as mcombine] 8 | [parallel.fork-middle :as forkm] 9 | [clojure.core.reducers :as r] 10 | [clojure.core.protocols :as p] 11 | [clojure.java.io :as io] 12 | [clojure.core :as c]) 13 | (:import 14 | [parallel.merge_sort MergeSort] 15 | [parallel.map_combine MapCombine] 16 | [java.io FileInputStream BufferedReader FileReader Reader StringReader File] 17 | [java.nio.file Files] 18 | [java.util.concurrent.atomic AtomicInteger AtomicLong] 19 | [java.util.concurrent ConcurrentHashMap ConcurrentLinkedQueue] 20 | [java.util HashMap Collections Queue Map])) 21 | 22 | (def ^:const ncpu (.availableProcessors (Runtime/getRuntime))) 23 | 24 | (def ^:dynamic *mutable* false) 25 | 26 | (defn- foldable? [coll] 27 | (c/or (map? coll) 28 | (vector? coll) 29 | (instance? clojure.core.reducers.Cat coll))) 30 | 31 | (defn- compose 32 | "As a consequence, reducef cannot be a vector. 33 | TODO: could use meta?" 34 | [xrf] 35 | (if (vector? xrf) 36 | ((peek xrf) (nth xrf 0)) 37 | xrf)) 38 | 39 | (defn xrf 40 | "Expects a reducing function rf and a list 41 | of transducers (or comp thereof). Use with 42 | p/fold to compose any chain of transducers applied to 43 | a reducing function to run in parallel." 44 | [rf & xforms] 45 | (if (empty? xforms) 46 | rf 47 | [rf (apply comp xforms)])) 48 | 49 | (defn- splitting 50 | "Calculates split sizes as they would be generated by 51 | a parallel fold with n=1." 52 | [coll] 53 | (iterate 54 | #(mapcat 55 | (fn [n] [(quot n 2) (- n (quot n 2))]) %) 56 | [(c/count coll)])) 57 | 58 | (defn show-chunks 59 | "Shows chunk sizes for the desired chunk number 60 | on a given collection coll." 61 | [coll nchunks] 62 | {:pre [(== (bit-and nchunks (- nchunks)) nchunks)]} 63 | (->> (splitting coll) 64 | (take-while #(<= (c/count %) nchunks)) 65 | last)) 66 | 67 | (defn chunk-size 68 | "Calculates the necessary chunk-size to obtain 69 | the given number of splits during a parallel fold. 70 | nchunks needs to be a power of two." 71 | [coll nchunks] 72 | (apply c/max (show-chunks coll nchunks))) 73 | 74 | (defn foldvec 75 | "A general purpose reducers/foldvec taking a generic f 76 | to apply at the leaf instead of reduce." 77 | [v n combinef f] 78 | (c/let [cnt (c/count v)] 79 | (cond 80 | (empty? v) (combinef) 81 | (<= cnt n) (f v) 82 | :else (c/let [half (quot cnt 2) 83 | r1 (subvec v 0 half) 84 | r2 (subvec v half cnt) 85 | fc (fn [v] #(foldvec v n combinef f))] 86 | (#'r/fjinvoke 87 | #(c/let [f1 (fc r1) 88 | t2 (#'r/fjtask (fc r2))] 89 | (#'r/fjfork t2) 90 | (combinef (f1) (#'r/fjjoin t2)))))))) 91 | 92 | (defn- reduce-leaf 93 | "reduce-leaf executes at the chunk level, once 94 | the splitting is done. It calls xform single arity 95 | to flush any possible remaining state." 96 | [reducef combinef] 97 | #(c/let [f (compose reducef) 98 | ret (r/reduce f (combinef) %)] 99 | (if (vector? reducef) (f ret) ret))) 100 | 101 | (defprotocol Folder 102 | (folder [coll] 103 | [coll nchunks])) 104 | 105 | (extend-protocol Folder 106 | Object 107 | (folder 108 | ([coll] 109 | (reify r/CollFold 110 | (coll-fold [this n combinef reducef] 111 | (r/reduce reducef (combinef) coll)))) 112 | ([coll nchunks] 113 | (reify r/CollFold 114 | (coll-fold [this _ combinef reducef] 115 | (r/reduce reducef (combinef) coll))))) 116 | clojure.lang.IPersistentVector 117 | (folder 118 | ([coll] 119 | (reify r/CollFold 120 | (coll-fold [this n combinef reducef] 121 | (foldvec coll n combinef (reduce-leaf reducef combinef))))) 122 | ([coll nchunks] 123 | (reify r/CollFold 124 | (coll-fold [this _ combinef reducef] 125 | (foldvec coll (chunk-size coll nchunks) combinef (reduce-leaf reducef combinef)))))) 126 | clojure.lang.PersistentHashMap 127 | (folder 128 | ([coll] 129 | (reify r/CollFold 130 | (coll-fold [m n combinef reducef] 131 | (fmap/fold coll 512 combinef reducef)))) 132 | ([coll nchunks] 133 | (reify r/CollFold 134 | (coll-fold [m n combinef reducef] 135 | (fmap/fold coll 512 combinef reducef)))))) 136 | 137 | (defn fold 138 | "Like reducers fold, but with stateful transducers support. 139 | Expect reducef to be built using p/xrf to defer initialization. 140 | n is the number-of-chunks instead of chunk size. 141 | n must be a power of 2 and defaults to 32." 142 | ([reducef coll] 143 | (fold (first reducef) reducef coll)) 144 | ([combinef reducef coll] 145 | (fold 32 combinef reducef coll)) 146 | ([n combinef reducef coll] 147 | (r/fold ::ignored combinef reducef (folder coll n)))) 148 | 149 | (defn transduce 150 | "Similar to core/transduce, but executes transducers in parallel. 151 | Instead of `init`, it accepts a combinef to combine results back 152 | from parallel execution When combinef is present, it takes 153 | precedence over f to establish the initial value for the reduction." 154 | ([xform f coll] 155 | (transduce xform f f coll)) 156 | ([xform f combinef coll] 157 | (transduce 32 xform f combinef coll)) 158 | ([n xform f combinef coll] 159 | (fold n combinef (xrf f xform) coll))) 160 | 161 | (defn count 162 | ([xform coll] 163 | (count 32 xform coll)) 164 | ([n xform coll] 165 | (c/let [coll (if (foldable? coll) coll (into [] coll)) 166 | cnt (AtomicLong. 0) 167 | reducef (xrf (completing (fn [_ _] (.incrementAndGet cnt))) xform) 168 | combinef (constantly cnt)] 169 | (fold n combinef reducef coll) 170 | (.get cnt)))) 171 | 172 | (extend-protocol clojure.core.protocols/IKVReduce 173 | java.util.Map 174 | (kv-reduce 175 | [amap f init] 176 | (c/let [^java.util.Iterator iter (.. amap entrySet iterator)] 177 | (loop [ret init] 178 | (if (.hasNext iter) 179 | (c/let [^java.util.Map$Entry kv (.next iter) 180 | ret (f ret (.getKey kv) (.getValue kv))] 181 | (if (reduced? ret) 182 | @ret 183 | (recur ret))) 184 | ret))))) 185 | 186 | (defn group-by 187 | "Similar to core/group-by, but executes in parallel. 188 | It takes an optional list of transducers to apply to the 189 | items in coll before generating the groups. Differently 190 | from core/group-by, the order of the items in each 191 | value vector can change between runs. It's generally 2x-5x faster 192 | than core/group-by (without xducers). If dealing with a Java mutable 193 | map with Queue type values is not a problem, a further 2x 194 | speedup can be achieved by: 195 | (binding [p/*mutable* true] (p/group-by f coll)) 196 | Restrictions: it does not support nil values." 197 | [f coll & xforms] 198 | (c/let [coll (if (foldable? coll) coll (into [] coll)) 199 | m (ConcurrentHashMap. (quot (c/count coll) 2) 0.75 ncpu) 200 | combinef (fn ([] m) ([m1 m2])) 201 | rf (fn [^Map m x] 202 | (c/let [k (f x) 203 | ^Queue a (c/or (.get m k) (.putIfAbsent m k (ConcurrentLinkedQueue. [x])))] 204 | (when a (.add a x)) 205 | m))] 206 | (fold combinef (apply xrf (completing rf) xforms) coll) 207 | (if *mutable* m (persistent! (reduce-kv (fn [m k v] (assoc! m k (vec v))) (transient {}) m))))) 208 | 209 | (defn update-vals 210 | "Use f to update the values of a map in parallel. It performs well 211 | with non-trivial f, otherwise is outperformed by reduce-kv. 212 | For larger maps (> 100k keys), the final transformation 213 | from mutable to persistent dominates over trivial f trasforms. 214 | You can access the raw mutable java.util.Map by setting the dynamic 215 | binding *mutable* to true. Restrictions: does not support nil values." 216 | [^Map input f] 217 | (c/let [ks (into [] (keys input)) 218 | output (ConcurrentHashMap. (c/count ks) 1. ncpu)] 219 | (r/fold 220 | (fn ([] output) ([_ _])) 221 | (fn [^Map m k] 222 | (.put m k (f (.get input k))) 223 | m) 224 | ks) 225 | (if *mutable* output (into {} output)))) 226 | 227 | (defn sort 228 | "Splits input coll into chunk of 'threshold' (default 8192) 229 | size then sorts chunks in parallel. Input needs conversion into a native 230 | array before splitting. More effective for large colls 231 | (> 1M elements) or non trivial comparators. Set *mutable* to 'true' 232 | to access the raw results as a mutable array." 233 | ([coll] 234 | (sort 8192 < coll)) 235 | ([cmp coll] 236 | (sort 8192 cmp coll)) 237 | ([threshold cmp ^Object coll] 238 | (c/let [a (if (.. coll getClass isArray) coll (to-array coll))] 239 | (msort/sort threshold cmp a) 240 | (if *mutable* a (into [] a))))) 241 | 242 | (defn slurp 243 | "Loads a java.io.File in parallel. By default, 244 | the loaded byte array is converted into an UTF-8 string. 245 | It takes an optional parsef function of the byte array for 246 | additional (or different) processing. When *mutable* var 247 | is true it returns the byte array as is." 248 | ([file] 249 | (slurp file (fn parsef [^bytes a] (String. a "UTF-8")))) 250 | ([^File file parsef] 251 | (c/let [size (.length file) 252 | threshold (quot size (* 4 ncpu)) 253 | a (byte-array size)] 254 | (mcombine/map 255 | (fn read-chunk [low high] 256 | (c/let [fis (FileInputStream. file)] 257 | (try 258 | (.skip fis low) 259 | (.read fis a low (- high low)) 260 | (finally (.close fis))))) 261 | (fn [_ _]) 262 | threshold size) 263 | (if *mutable* a (parsef a))))) 264 | 265 | (defn unchunk-map [f coll] 266 | (lazy-seq 267 | (when-let [s (seq coll)] 268 | (cons 269 | (f (first s)) 270 | (unchunk-map f (rest s)))))) 271 | 272 | (defn external-sort 273 | "Allows large datasets (that would otherwise not fit into memory) 274 | to be sorted in parallel. It performs the following on a vector of 'ids' 275 | and 'fetchf', a function from chunk->data: 276 | * split ids into chunks of approximate size 'n' 277 | * call 'fetchf' on a chunk and expects actual data in return 278 | * sort actual data using 'cmp' ('compare' by default) 279 | * save result to temporary files (deleted when the JVM exits) 280 | * lazily concat files in order as they are requested" 281 | ([fetchf ids] 282 | (external-sort compare fetchf ids)) 283 | ([cmp fetchf ids] 284 | (external-sort 512 compare fetchf ids)) 285 | ([n cmp fetchf ids] 286 | (letfn [(save-chunk! [data] 287 | (c/let [file (File/createTempFile "mergesort-" ".tmp")] 288 | (with-open [fw (io/writer file)] 289 | (binding [*out* fw] (pr data))) 290 | [(first data) file]))] 291 | (->> 292 | (r/fold 293 | n concat 294 | (fn [chunk] (->> chunk fetchf (c/sort cmp) save-chunk! vector)) 295 | (reify r/CollFold 296 | (coll-fold [this n combinef f] 297 | (foldvec (into [] ids) n combinef f)))) 298 | (sort-by first cmp) 299 | (unchunk-map #(read-string (slurp (last %)))) 300 | (mapcat identity))))) 301 | 302 | (defn- nearest-pow2 [x] 303 | (int (Math/pow 2 (- 32 (Integer/numberOfLeadingZeros x))))) 304 | 305 | (defn- fold-adapt 306 | "Select r/fold or p/fold based on presence of xforms. 307 | Adapt p/fold chunk number to the requested chunk-size." 308 | [rf init coll chunk-size xforms] 309 | (c/let [v (if (vector? coll) coll (into [] coll))] 310 | (if (seq xforms) 311 | (fold (nearest-pow2 (/ (c/count v) chunk-size)) 312 | (fn ([] init) ([a b] (rf a b))) 313 | (apply xrf rf xforms) 314 | v) 315 | (r/fold chunk-size (fn ([] init) ([a b] (rf a b))) rf v)))) 316 | 317 | (defn min 318 | "Find the min in coll in parallel. Accepts optional 319 | transducers to apply to coll before searching the min. 320 | Effective for coll size >10k items. 4000 is an approximate 321 | minimal chunk size." 322 | [coll & xforms] 323 | (fold-adapt c/min ##Inf coll 4000 xforms)) 324 | 325 | (defn max 326 | "Find the min in coll in parallel. Accepts optional 327 | transducers to apply to coll before searching the min. 328 | Effective for coll size >10k items. 4000 is an approximate 329 | minimal chunk size." 330 | [coll & xforms] 331 | (fold-adapt c/max ##-Inf coll 4000 xforms)) 332 | 333 | (defn amap 334 | "Applies f in parallel to the elements in the array. 335 | The threshold decides how big a chunk of computation should be before 336 | going sequential and it's given a default based on the number of 337 | available cores." 338 | ([f ^objects a] 339 | (amap (quot (alength a) (* 2 ncpu)) f a)) 340 | ([threshold f ^objects a] 341 | (mcombine/map 342 | (fn [low high] 343 | (loop [idx low] 344 | (when (< idx high) 345 | (aset a idx (f (aget a idx))) 346 | (recur (unchecked-inc idx))))) 347 | (fn [_ _]) 348 | threshold (alength a)) 349 | a)) 350 | 351 | (defn distinct 352 | "Returns a non-lazy and unordered sequence of the distinct elements in coll. 353 | It does not support null values that need to be removed before calling. 354 | Also accepts an optional list of transducers that is applied before removing 355 | duplicates. When bound with *mutable* dynamic var, returns a java.util.Set." 356 | [coll & xforms] 357 | (c/let [coll (if (foldable? coll) coll (into [] coll)) 358 | m (ConcurrentHashMap. (quot (c/count coll) 2) 0.75 ncpu) 359 | combinef (fn ([] m) ([_ _])) 360 | rf (fn put [^Map m k] (.put m k 1) m)] 361 | (fold combinef (apply xrf (completing rf) xforms) coll) 362 | (if *mutable* (.keySet m) (enumeration-seq (.keys m))))) 363 | 364 | (defn arswap 365 | "Arrays reverse-swap of the regions identified by: 366 | [low, low + radius]....[high - radius, high] 367 | Takes transformation f to apply to each item. 368 | Preconditions: (pos? (alength a)), (< low high), (pos? radius)" 369 | [f low high radius ^objects a] 370 | (loop [left low right high] 371 | (when (c/and (<= left right) (< left (+ low radius))) 372 | (c/let [tmp (f (aget a left))] 373 | (aset a left (f (aget a right))) 374 | (aset a right tmp) 375 | (recur (inc left) (dec right))))) a) 376 | 377 | (defn- sequential-armap 378 | "Reverse an array." 379 | [f ^objects a] 380 | (loop [i 0] 381 | (when (<= i (quot (alength a) 2)) 382 | (c/let [tmp (f (aget a i)) 383 | j (- (alength a) i 1)] 384 | (aset a i (f (aget a j))) 385 | (aset a j tmp)) 386 | (recur (unchecked-inc i))))) 387 | 388 | (defn armap 389 | "Applies f in parallel over the reverse of the array. 390 | The threshold decides how big is the chunk of sequential 391 | computation, with a default of alength / twice the CPUs. 392 | Performs better than sequential for non-trivial transforms." 393 | ([f ^objects a] 394 | (when a 395 | (armap (quot (alength a) (* 2 ncpu)) f a))) 396 | ([threshold f ^objects a] 397 | (when (c/and a (pos? (alength a))) 398 | (if (pos? threshold) 399 | (forkm/submit f arswap threshold a) 400 | (sequential-armap f a))) a)) 401 | 402 | (defn- should-be [p msg form] 403 | (when-not p 404 | (c/let [line (:line (meta form)) 405 | msg (format "%s requires %s in %s:%s" (first form) msg *ns* line)] 406 | (throw (IllegalArgumentException. msg))))) 407 | 408 | (defmacro let 409 | "Evaluates bindings in parallel and returns the result of 410 | evaluating body in the context of those bindings. Bindings 411 | have to be indpendent from each other." 412 | [bindings & body] 413 | (should-be (vector? bindings) "a vector for its bindings" &form) 414 | (should-be (even? (c/count bindings)) "an even number of forms in bindings" &form) 415 | (c/let [ks (take-nth 2 bindings) 416 | vs (take-nth 2 (rest bindings)) 417 | ts (take (c/count ks) (repeatedly gensym))] 418 | `(c/let ~(vec (interleave ts (map #(list 'future %) vs))) 419 | (c/let ~(vec (interleave ks (map #(list 'deref %) ts))) 420 | ~@body)))) 421 | 422 | (defmacro args 423 | "Call f with each argument evaluated in parralel. 424 | This is roughly equivalent to the expansion: 425 | (p/args + 1 2 3) => 426 | (let [a (future 1) b (future 2) c (future 3)] (+ @a @b @c))" 427 | [f & args] 428 | (c/let [ts (take (c/count args) (repeatedly gensym))] 429 | `(c/let ~(vec (interleave ts (map #(list 'future %) args))) 430 | (~f ~@(map #(list 'deref %) ts))))) 431 | 432 | (defmacro or 433 | "Like `core/or` but each expression is evaluated in parralel. 434 | It does not short-circuit." 435 | [& args] 436 | (c/let [ts (take (c/count args) (repeatedly gensym))] 437 | `(let ~(vec (interleave ts (map #(list 'future %) args))) 438 | (reduce #(c/or %1 %2) nil ~(vec (map #(list 'deref %) ts)))))) 439 | 440 | (defmacro and 441 | "Like `core/and` but each expression is evaluated in parralel. 442 | It does not short-circuit." 443 | [& args] 444 | (c/let [ts (take (c/count args) (repeatedly gensym))] 445 | `(let ~(vec (interleave ts (map #(list 'future %) args))) 446 | (reduce #(c/and %1 %2) true ~(vec (map #(list 'deref %) ts)))))) 447 | 448 | (defmacro do 449 | "Like core/do but forms evaluate in paralell." 450 | [& body] 451 | (when-not (empty? body) 452 | (c/let [ts (repeatedly gensym) 453 | bindings (vec (interleave ts body))] 454 | `(let ~bindings ~(peek (pop bindings)))))) 455 | 456 | (defmacro doto 457 | "Like core/doto but forms evaluate in parallel." 458 | [x & forms] 459 | (c/let [target (gensym)] 460 | `(c/let [~target ~x] 461 | (parallel.core/do 462 | ~@(map (fn [f] 463 | (if (seq? f) 464 | `(~(first f) ~target ~@(next f)) 465 | `(~f ~target))) 466 | forms)) 467 | ~target))) 468 | 469 | (defn process-folder 470 | "Applies xforms to all lines of all files inside folder. It supports 471 | statful transducers (for example, to skip the header, group stuff, etc.) 472 | By default it produces a vector of results, but you can pass a different 473 | reducef+combinef to use different data structures." 474 | ([^String folder xforms] 475 | (process-folder 476 | folder 477 | (completing conj! persistent!) 478 | (r/monoid into conj!) 479 | xforms)) 480 | ([^String folder reducef combinef xforms] 481 | (transduce 482 | (comp 483 | (mapcat #(Files/readAllLines (.toPath %))) 484 | xforms) 485 | reducef 486 | combinef 487 | (into [] (rest (file-seq (java.io.File. folder))))))) 488 | 489 | (defn- transducing 490 | "Prepare the input for transducing, making some assumptions 491 | about the type. A folder is considered a group of files 492 | containing lines." 493 | [input] 494 | (cond 495 | (foldable? input) input 496 | (c/and (instance? File input) (.isDirectory input)) (into [] (rest (file-seq input))) 497 | (instance? File input) (Files/readAllLines (.toPath input)) 498 | :else (into [] input))) 499 | 500 | (defn frequencies 501 | "Like clojure.core/frequencies, but executes in parallel. 502 | It takes an optional comp of transducers to apply to coll before 503 | the frequency is calculated." 504 | ([input] 505 | (frequencies input identity)) 506 | ([input custom-xforms] 507 | (c/let [folder? (c/and (instance? File input) (.isDirectory input)) 508 | xforms (if folder? 509 | (comp (mapcat #(Files/readAllLines (.toPath %))) custom-xforms) 510 | custom-xforms) 511 | reducef (completing 512 | (fn [^Map m k] 513 | (c/let [^AtomicInteger v (c/or (.get m k) (.putIfAbsent m k (AtomicInteger. 1)))] 514 | (when v (.incrementAndGet v)) 515 | m)) 516 | identity) 517 | m (ConcurrentHashMap.) 518 | combinef (fn ([] m) ([_ _] m))] 519 | (transduce xforms reducef combinef (transducing input)) 520 | (if *mutable* m (into {} m))))) 521 | 522 | (defn pmap 523 | "Like pmap but eager and unordered. It runs n parallel threads 524 | (default 100) independently from the chunk size or the number 525 | of cores." 526 | [f input & [n]] 527 | (c/let [q (ConcurrentLinkedQueue. input) 528 | n (c/or n 100) 529 | workers (repeatedly #(future (when-let [item (.poll q)] (f item))))] 530 | (loop [workers workers res []] 531 | (c/let [res (into res (keep deref (doall (take n workers))))] 532 | (if (.isEmpty q) 533 | res 534 | (recur (drop n workers) res)))))) 535 | -------------------------------------------------------------------------------- /src/parallel/foldmap.clj: -------------------------------------------------------------------------------- 1 | (ns parallel.foldmap 2 | (:require [clojure.core.reducers :as r]) 3 | (:import [clojure.lang RT Get 4 | PersistentHashMap 5 | PersistentHashMap$INode 6 | PersistentHashMap$ArrayNode 7 | PersistentHashMap$BitmapIndexedNode 8 | PersistentHashMap$HashCollisionNode] 9 | [java.util.concurrent Callable] 10 | [java.util ArrayList List])) 11 | 12 | (set! *warn-on-reflection* false) 13 | 14 | (defn- agetter 15 | "Trickiness. This needs to be an indirected call. 16 | It prevents Clojure from inlining Get/array implementation 17 | into the generated function class. The generated parallel.foldmap 18 | package class doesn't have protected access to clojure.lang. 19 | Will always throw a reflection warning." 20 | [node] (Get/array node)) 21 | 22 | (set! *warn-on-reflection* true) 23 | 24 | (defn- fold-tasks [^List tasks combinef] 25 | (cond 26 | (.isEmpty tasks) (combinef) 27 | (== 1 (.size tasks)) (.call ^Callable (.get tasks 0)) 28 | :else (let [t1 (.subList tasks 0 (quot (.size tasks) 2)) 29 | t2 (.subList tasks (quot (.size tasks) 2) (.size tasks)) 30 | forked (#'r/fjfork (#'r/fjtask #(fold-tasks t2 combinef)))] 31 | (combinef (fold-tasks t1 combinef) 32 | (#'r/fjjoin forked))))) 33 | 34 | (defn- compose 35 | "As a consequence, reducef cannot be a vector." 36 | [xrf] 37 | (if (vector? xrf) 38 | ((last xrf) (first xrf)) 39 | xrf)) 40 | 41 | (defprotocol Foldmap 42 | (fold [m n combinef reducef]) 43 | (kvreduce [node f init])) 44 | 45 | (extend-protocol Foldmap 46 | 47 | (Class/forName "[Ljava.lang.Object;") 48 | (fold [m n combinef reducef] 49 | (throw (RuntimeException. "Not implemented"))) 50 | (kvreduce [node f init] 51 | ;; workaround type hints are lost [CLJ-1381] 52 | (let [^"[Ljava.lang.Object;" node node ^Object init init] 53 | (loop [idx 0 res init] 54 | (if (or (RT/isReduced res) (>= idx (alength ^"[Ljava.lang.Object;" node))) 55 | res 56 | (let [idx+1 (unchecked-inc idx) 57 | idx+2 (unchecked-add idx 2)] 58 | (if (nil? (aget node idx)) 59 | (let [node (aget node idx+1)] 60 | (if (nil? node) 61 | (recur idx+2 res) 62 | (recur idx+2 (kvreduce node f res)))) 63 | (recur idx+2 (f res [(aget node idx) (aget node idx+1)])))))))) 64 | 65 | PersistentHashMap 66 | (fold [m n combinef reducef] 67 | (#'r/fjinvoke 68 | #(let [ret (combinef) 69 | ret (if (Get/root m) (combinef ret (fold (Get/root m) n combinef reducef)) ret)] 70 | (if (Get/hasNullValue m) 71 | (combinef ret (reducef (combinef) nil (Get/nullValue m))) 72 | ret)))) 73 | (kvreduce [node f init] 74 | (throw (RuntimeException. "Not implemented"))) 75 | 76 | PersistentHashMap$ArrayNode 77 | (fold [m n combinef reducef] 78 | (let [tasks (ArrayList.) 79 | ^"[Lclojure.lang.PersistentHashMap$INode;" array (agetter m)] 80 | (dotimes [idx (alength array)] 81 | (let [node (aget array idx)] 82 | (if (not (nil? node)) 83 | (.add tasks #(fold node n combinef reducef))))) 84 | (fold-tasks tasks combinef))) 85 | (kvreduce [node f init] 86 | (let [^"[Ljava.lang.Object;" node node 87 | ^"[Lclojure.lang.PersistentHashMap$INode;" array (agetter node)] 88 | (loop [idx 0 res init] 89 | (if (or (RT/isReduced res) (>= idx (alength node))) 90 | res 91 | (if (nil? (aget array idx)) 92 | (recur (unchecked-inc idx) res) 93 | (recur (unchecked-inc idx) (kvreduce node f res))))))) 94 | 95 | PersistentHashMap$BitmapIndexedNode 96 | (fold [m n combinef reducef] 97 | (let [^objects array (agetter m)] 98 | (kvreduce array (compose reducef) (combinef)))) 99 | (kvreduce [node f init] 100 | (let [^"[Lclojure.lang.PersistentHashMap$INode;" array (agetter node)] 101 | (kvreduce array f init))) 102 | 103 | PersistentHashMap$HashCollisionNode 104 | (fold [m n combinef reducef] 105 | (let [^objects array (agetter m)] 106 | (kvreduce array (compose reducef) (combinef)))) 107 | (kvreduce [node f init] 108 | (let [^"[Lclojure.lang.PersistentHashMap$INode;" array (agetter node)] 109 | (kvreduce array f init)))) 110 | -------------------------------------------------------------------------------- /src/parallel/fork_middle.clj: -------------------------------------------------------------------------------- 1 | (ns parallel.fork-middle 2 | (:require [clojure.core.reducers :as r]) 3 | (:import [java.util.concurrent Callable ForkJoinPool ForkJoinTask] 4 | [java.util ArrayList List])) 5 | 6 | (set! *warn-on-reflection* true) 7 | 8 | (deftype ForkMiddle [^objects a 9 | ^int low ^int high ^int radius 10 | ^Callable mapf ^Callable f] 11 | Callable 12 | (call [this] 13 | (let [size (- (- high low) (* 2 radius))] 14 | (if (<= size radius) 15 | (f mapf low high (inc (quot (- high low) 2)) a) 16 | (#'r/fjinvoke 17 | #(let [middle (ForkMiddle. a (+ low radius) (- high radius) radius mapf f) 18 | t (.fork (ForkJoinTask/adapt middle))] 19 | (f mapf low high radius a) 20 | (.join ^ForkJoinTask t))))))) 21 | 22 | (defn submit [mapf f radius ^objects a] 23 | (let [n (alength a) 24 | ^ForkJoinPool pool @r/pool] 25 | (.join (.submit pool (ForkMiddle. a 0 (dec n) radius mapf f))))) 26 | 27 | ;; Different strategy, similar results. 28 | ; (defn fork-tasks 29 | ; "Fork a collection of tasks by recusively 30 | ; splitting into halves." 31 | ; [^List tasks] 32 | ; (let [cnt (.size tasks)] 33 | ; (cond 34 | ; (= 1 cnt) (.call ^Callable (.get tasks 0)) 35 | ; (> cnt 1) 36 | ; (let [mid (quot cnt 2)] 37 | ; (#'r/fjinvoke 38 | ; (fn [] 39 | ; (let [task (#'r/fjtask #(fork-tasks (.subList tasks mid cnt)))] 40 | ; (#'r/fjfork task) 41 | ; (fork-tasks (.subList tasks 0 mid)) 42 | ; (#'r/fjjoin task)))))))) 43 | 44 | ; (defn submit 45 | ; "A forking strategy that chops off chunks 46 | ; at the edges and fork the rest in the middle." 47 | ; [mapf f ^long radius ^objects a] 48 | ; (let [tasks (ArrayList.)] 49 | ; (loop [low 0 high (dec (alength a))] 50 | ; (if (> (- (- high low) (* 2 radius)) radius) 51 | ; (do 52 | ; (.add tasks #(f mapf low high radius a)) 53 | ; (recur (+ low radius) (- high radius))) 54 | ; (.add tasks #(f mapf low high (inc (quot (- high low) 2)) a)))) 55 | ; (fork-tasks tasks))) 56 | -------------------------------------------------------------------------------- /src/parallel/map_combine.clj: -------------------------------------------------------------------------------- 1 | (ns parallel.map-combine 2 | (:refer-clojure :exclude [map]) 3 | (:require [clojure.core.reducers :as r]) 4 | (:import [java.util.concurrent Callable ForkJoinPool])) 5 | 6 | (set! *warn-on-reflection* true) 7 | 8 | (deftype MapCombine [^int low ^int high ^int threshold 9 | ^Callable mapf ^Callable combinef] 10 | Callable 11 | (call [this] 12 | (let [size (- high low)] 13 | (if (<= size threshold) 14 | (mapf low high) 15 | (let [middle (+ low (bit-shift-right size 1)) 16 | l (MapCombine. low middle threshold mapf combinef) 17 | h (MapCombine. middle high threshold mapf combinef)] 18 | (let [fc (fn [^Callable child] #(.call child))] 19 | (#'r/fjinvoke 20 | #(let [f1 (fc l) 21 | t2 (#'r/fjtask (fc h))] 22 | (#'r/fjfork t2) 23 | (combinef (f1) (#'r/fjjoin t2)))))))))) 24 | 25 | (defn map [mapf combinef threshold n] 26 | (let [^ForkJoinPool pool @r/pool] 27 | (.join (.submit pool (MapCombine. 0 n threshold mapf combinef))))) 28 | -------------------------------------------------------------------------------- /src/parallel/merge_sort.clj: -------------------------------------------------------------------------------- 1 | (ns parallel.merge-sort 2 | (:refer-clojure :exclude [sort]) 3 | (:require [clojure.core.reducers :as r]) 4 | (:import 5 | [java.util.concurrent Callable ForkJoinPool] 6 | [java.util Arrays Comparator])) 7 | 8 | (set! *warn-on-reflection* true) 9 | 10 | (definterface IMergeSort 11 | (merge [mid]) 12 | (sort [])) 13 | 14 | (deftype MergeSort [^objects a 15 | ^int lo 16 | ^int hi 17 | ^int threshold 18 | ^Comparator cmp] 19 | 20 | Callable 21 | (call [this] (.sort this)) 22 | 23 | IMergeSort 24 | (merge [this mid] 25 | (when (pos? (.compare cmp (aget a (dec mid)) (aget a mid))) 26 | (let [size (- hi lo) 27 | lsize (- mid lo) 28 | ^objects aux (object-array size)] 29 | (System/arraycopy a lo aux 0 size) 30 | (loop [k lo i 0 j lsize] 31 | (when (< k hi) 32 | (if (or (>= j size) (and (< i lsize) (neg? (.compare cmp (aget aux i) (aget aux j))))) 33 | (do (aset a k (aget aux i)) (recur (inc k) (inc i) j)) 34 | (do (aset a k (aget aux j)) (recur (inc k) i (inc j))))))))) 35 | 36 | (sort [this] 37 | (let [size (- hi lo)] 38 | (if (<= size threshold) 39 | (Arrays/sort a lo hi cmp) 40 | (let [mid (+ lo (bit-shift-right size 1)) 41 | l (MergeSort. a lo mid threshold cmp) 42 | h (MergeSort. a mid hi threshold cmp)] 43 | (let [fc (fn [^Callable child] #(.call child))] 44 | (#'r/fjinvoke 45 | #(let [f1 (fc l) 46 | t2 (#'r/fjtask (fc h))] 47 | (#'r/fjfork t2) 48 | (f1) 49 | (#'r/fjjoin t2) 50 | (.merge this mid))))))))) 51 | 52 | (defn sort [threshold cmp ^objects a] 53 | (let [n (alength a) 54 | ^ForkJoinPool pool @r/pool] 55 | (.join (.submit pool (MergeSort. a 0 n threshold cmp))))) 56 | -------------------------------------------------------------------------------- /src/parallel/xf.clj: -------------------------------------------------------------------------------- 1 | (ns parallel.xf 2 | (:refer-clojure :exclude [interleave pmap identity])) 3 | 4 | (defn interleave 5 | "Transducer version of core/interleave." 6 | [coll] 7 | (fn [rf] 8 | (let [fillers (volatile! (seq coll))] 9 | (fn 10 | ([] (rf)) 11 | ([result] (rf result)) 12 | ([result input] 13 | (if-let [[filler] @fillers] 14 | (let [step (rf result input)] 15 | (if (reduced? step) 16 | step 17 | (do 18 | (vswap! fillers next) 19 | (rf step filler)))) 20 | (reduced result))))))) 21 | 22 | (defn pmap 23 | "Like map transducer, but items are processed in chunk of up to 32 items 24 | in parallel. Only effective with computational intensive f. Unlike normal 25 | map/pmap, it does not accept multiple inputs." 26 | [f] 27 | (comp 28 | (partition-all 32) 29 | (fn [rf] 30 | (fn 31 | ([] (rf)) 32 | ([result] (rf result)) 33 | ([result input] (rf result (clojure.core/pmap f input))))) 34 | cat)) 35 | 36 | (def identity 37 | "Identity transducer. When multiple inputs are present, 38 | it wraps them in a list similarly to what (map list) transducer 39 | would produce." 40 | (fn [rf] 41 | (fn 42 | ([] (rf)) 43 | ([res] (rf res)) 44 | ([res in] (rf res in)) 45 | ([res in & ins] (rf res (list* in ins)))))) 46 | -------------------------------------------------------------------------------- /test/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns core-test 2 | (:import [clojure.lang RT] 3 | [java.io File] 4 | [java.util.concurrent ConcurrentLinkedQueue]) 5 | (:require [parallel.core :as p] 6 | [clojure.core.reducers :as r] 7 | [clojure.test :refer :all])) 8 | 9 | (deftest frequencies-test 10 | (testing "frequencies with xform" 11 | (is (= 5000 (count (p/frequencies (range 1e4) (filter odd?))))) 12 | (is (= {":a" 2 ":b" 3} (p/frequencies [:a :a :b :b :b] (map str))))) 13 | (testing "a dictionary of words with no dupes" 14 | (let [dict (slurp "test/words")] 15 | (is (= (count (re-seq #"\S+" dict)) 16 | (->> dict 17 | (re-seq #"\S+") 18 | (frequencies) 19 | (map second) 20 | (reduce +)))))) 21 | (testing "misc examples" 22 | (are [expected test-seq] (= (p/frequencies test-seq) expected) 23 | {\p 2 \s 4 \i 4 \m 1} "mississippi" 24 | {1 4 2 2 3 1} [1 1 1 1 2 2 3] 25 | {1 3 2 2 3 1} [1 1 1 2 2 3] 26 | {1 4 2 2 3 1} '(1 1 1 1 2 2 3)))) 27 | 28 | (defn large-map [i] (into {} (map vector (range i) (range i)))) 29 | 30 | (deftest update-vals-test 31 | (testing "sanity" 32 | (is (= (map inc (range 1000)) 33 | (sort (vals (p/update-vals (large-map 1000) inc))))))) 34 | 35 | (defmacro repeater [& forms] 36 | `(first (distinct (for [i# (range 500)] (do ~@forms))))) 37 | 38 | (defn chunkedf [f rf size coll] 39 | (->> coll (partition-all size) (mapcat f) (reduce rf))) 40 | 41 | (deftest stateful-transducers 42 | (testing "should drop based on chunk size" 43 | (is (= (chunkedf #(drop 10 %) + 200 (vec (range 1600))) 44 | (repeater (r/fold 200 + (p/xrf + (drop 10)) (p/folder (vec (range 1600))))))) 45 | (is (= (chunkedf #(drop 10 %) + 100 (vec (range 204800))) 46 | (repeater (r/fold 100 + (p/xrf + (drop 10)) (p/folder (vec (range 204800))))))) 47 | (is (= (chunkedf #(drop 10 %) + 400 (vec (range 1600))) 48 | (repeater (r/fold + (p/xrf + (drop 10)) (p/folder (vec (range 1600)))))))) 49 | (testing "folding by number of chunks" 50 | (is (= [3 4 5 6 7 8 9 10 11 12 51 | 16 17 18 19 20 21 22 23 24 25 52 | 29 30 31 32 33 34 35 36 37 38 53 | 42 43 44 45 46 47 48 49 50 51] 54 | (r/fold "ignored" 55 | (r/monoid concat conj) 56 | (p/xrf conj (drop 3)) 57 | (p/folder (vec (range 52)) 4)))) 58 | (is (= (- 1802 (* 3 8)) 59 | (count (r/fold "ignored" 60 | (r/monoid concat conj) 61 | (p/xrf conj (drop 3)) 62 | (p/folder (vec (range 1802)) 8)))))) 63 | (testing "p/fold entry point at 32 default chunks" 64 | (is (= (chunkedf #(drop 10 %) + (/ 2048 32) (vec (map inc (range 2048)))) 65 | (p/fold (p/xrf + (drop 10) (map inc)) (vec (range 2048)))))) 66 | 67 | (testing "p/fold VS r/fold on stateless xducers should be the same" 68 | (let [v (vec (range 10000))] 69 | (is (= (r/fold + ((comp (map inc) (filter odd?)) +) v) 70 | (p/fold (p/xrf + (map inc) (filter odd?)) v) 71 | (p/fold + ((comp (map inc) (filter odd?)) +) v))))) 72 | 73 | (testing "p/transduce" 74 | (let [v (vec (range 10000))] 75 | (is (= (reduce + 0 (filter odd? (map inc v))) 76 | (p/transduce (comp (map inc) (filter odd?)) + v))) 77 | (is (= (reduce conj [] (filter odd? (map inc v))) 78 | (p/transduce (comp (map inc) (filter odd?)) conj into v))) 79 | (is (= [248 249] 80 | (nth 81 | (p/transduce 82 | 4 83 | (comp (drop 240) (partition-all 4)) 84 | conj into 85 | (vec (range 1000))) 2))))) 86 | 87 | (testing "p/folding without reducing, just conj" 88 | (let [v (vec (range 10000))] 89 | (is (= (reduce conj [] (filter odd? (map inc v))) 90 | (r/fold 91 | (r/monoid into (constantly [])) 92 | ((comp (map inc) (filter odd?)) conj) v) 93 | (p/fold 94 | (r/monoid into (constantly [])) 95 | ((comp (map inc) (filter odd?)) conj) v))))) 96 | 97 | (testing "hashmaps, not just vectors" 98 | (is (= {\a [21] \z [23] \h [10 12]} 99 | (p/fold 100 | (r/monoid #(merge-with into %1 %2) (constantly {})) 101 | (fn [m [k v]] 102 | (let [c (Character/toLowerCase ^Character (first k))] 103 | (assoc m c (conj (get m c []) v)))) 104 | (hash-map "abba" 21 "zubb" 23 "hello" 10 "hops" 12))))) 105 | 106 | (testing "folding hashmaps with transducers" 107 | (is (= {0 1 1 2 2 3 3 4} 108 | (p/fold 109 | (r/monoid merge (constantly {})) 110 | (p/xrf conj (map (fn [[k v]] [k (inc v)]))) 111 | (hash-map 0 0 1 1 2 2 3 3))))) 112 | 113 | (testing "exercising all code with larger maps" 114 | (is (= 999 115 | ((p/fold 116 | (r/monoid merge (constantly {})) 117 | (p/xrf conj 118 | (filter (fn [[k v]] (even? k))) 119 | (map (fn [[k v]] [k (inc v)]))) 120 | (zipmap (range 10000) (range 10000))) 998))))) 121 | 122 | (deftest counting 123 | (testing "count a coll" 124 | (is (= 100000 (p/count (map inc) (range 1e5)))) 125 | (is (= (reduce + (range 50)) (p/count (comp (mapcat range)) (range 50)))))) 126 | 127 | (deftest grouping 128 | (testing "sanity" 129 | (is (= 5000 (count ((p/group-by odd? (range 10000)) true))))) 130 | (testing "with xducers" 131 | (is (= 1667 (count ((p/group-by odd? (range 10000) (map inc) (filter #(zero? (mod % 3)))) true))))) 132 | (testing "with stateful xducers" 133 | (is (= 1133 (count ((p/group-by odd? (range 10000) (drop 100) (map inc) (filter #(zero? (mod % 3)))) true))))) 134 | (testing "anagrams" 135 | (let [dict (slurp "test/words")] 136 | (is (= #{"caret" "carte" "cater" "crate" 137 | "creat" "creta" "react" "recta" "trace"} 138 | (into #{} 139 | (->> dict 140 | (re-seq #"\S+") 141 | (p/group-by sort) 142 | (sort-by (comp count second) >) 143 | (map second) 144 | first))))))) 145 | 146 | (deftest sorting 147 | (testing "sanity" 148 | (let [coll (reverse (range 1000)) 149 | c2 (shuffle (map (comp str char) (range 65 91)))] 150 | (is (= (range 1000) (p/sort 200 < coll))) 151 | (is (= coll (p/sort 200 > coll))) 152 | (is (= (sort compare c2) (p/sort compare c2)))))) 153 | 154 | ;; (int (/ 100000 (Math/pow 2 8))) 155 | (deftest external-sorting 156 | (testing "sanity" 157 | (let [coll (into [] (reverse (range 1000)))] 158 | (is (= 0 159 | (first (p/external-sort 125 compare identity coll)))))) 160 | (testing "additional processing" 161 | (let [coll (map #(str % "-" %) (range 100000)) 162 | fetchf (fn [c] (map #(clojure.string/split % #"-") c))] 163 | (is (= ["99999" "99999"] 164 | (first (p/external-sort 1562 #(compare (peek %2) (peek %1)) fetchf coll))))))) 165 | 166 | (deftest min-max 167 | (testing "min" 168 | (let [c (shuffle (conj (range 100000) -3))] 169 | (is (= -3 (p/min c))))) 170 | (testing "max" 171 | (let [c (shuffle (conj (range 100000) -3))] 172 | (is (= 99999 (p/max c))))) 173 | (testing "xducers" 174 | (let [c (into [] (shuffle (conj (range 100000) -3)))] 175 | (is (= 99998 (p/max c (map dec)))))) 176 | (testing "min-index" 177 | (let [c (conj (range 100000) -3)] 178 | (is (= 99999 (p/max c)))))) 179 | 180 | (deftest pamap-test 181 | (testing "sanity" 182 | (let [c (to-array (range 100000))] 183 | (is (= (map inc (range 10)) (take 10 (p/amap inc c))))))) 184 | 185 | (deftest distinct-test 186 | (let [c (shuffle (apply concat (take 5 (repeat (range 10000)))))] 187 | (testing "sanity" 188 | (is (= (sort (distinct c)) (sort (p/distinct c))))) 189 | (testing "with transducers" 190 | (is (= [1 3 5 7 9] (take 5 (sort (p/distinct c (map inc) (filter odd?))))))) 191 | (testing "equality semantic" 192 | (is (= (sort (distinct (map vector c c))) 193 | (sort (p/distinct (map vector c c)))))) 194 | (testing "mutability on" 195 | (is (= #{1 2 3} 196 | (into #{} (binding [p/*mutable* true] (p/distinct [1 2 3])))))))) 197 | 198 | (deftest reverse-test 199 | (testing "swap reverse simmetrical regions in arrays" 200 | (let [s (range 10)] 201 | (is (= s (let [a (object-array s)] (p/arswap identity 0 9 0 a) (into [] a)))) 202 | (is (= (reverse s) (let [a (object-array s)] (p/arswap identity 0 9 5 a) (into [] a)))) 203 | (is (= (reverse s) (let [a (object-array s)] (p/arswap identity 0 9 10 a) (into [] a)))) 204 | (is (= [9 8 2 3 4 5 6 7 1 0] (let [a (object-array s)] (p/arswap identity 0 9 2 a) (into [] a)))) 205 | (is (= [9 8 7 6 5 4 3 2 1] (let [a (object-array (rest s))] (p/arswap identity 0 8 4 a) (into [] a)))) 206 | (is (= [9 8 7 4 5 6 3 2 1] (let [a (object-array (rest s))] (p/arswap identity 0 8 3 a) (into [] a)))))) 207 | (testing "swap reverse with transform" 208 | (let [s (range 10)] 209 | (is (= ["9" "8" 2 3 4 5 6 7 "1" "0"] (let [a (object-array s)] (p/arswap str 0 9 2 a) (into [] a)))) 210 | (is (= [:9 :8 :7 4 5 6 :3 :2 :1] (let [a (object-array (rest s))] (p/arswap (comp keyword str) 0 8 3 a) (into [] a)))))) 211 | (testing "sanity" 212 | (is (= nil (p/armap identity nil))) 213 | (is (= (reverse ()) (let [a (object-array ())] (p/armap identity a) (into [] a)))) 214 | (is (= (reverse (range 1)) (let [a (object-array (range 1))] (p/armap identity a) (into [] a)))) 215 | (is (= (reverse (range 5)) (let [a (object-array (range 5))] (p/armap identity a) (into [] a)))) 216 | (is (= (reverse (range 1e2)) (let [a (object-array (range 1e2))] (p/armap identity a) (into [] a)))) 217 | (let [xs (shuffle (range 11)) 218 | a (object-array xs)] 219 | (is (= (reverse (map str xs)) (do (p/armap str a) (into [] a))))))) 220 | 221 | (deftest slurping 222 | (testing "slurping sanity" 223 | (is (= (slurp "test/words") (p/slurp (File. "test/words")))))) 224 | 225 | (deftest parallel-let 226 | (testing "it works like normal let" 227 | (is (= 3 (p/let [a 1 b 2] (+ a b)))) 228 | (is (= 3 (p/let [a (future 1) b (future 2)] (+ @a @b)))) 229 | (is (= 6 (p/let [[a b] [1 2] {c :c} {:c 3}] (+ a b c)))) 230 | (is (= 300 (p/let [a (do (Thread/sleep 20) 100) b (do (Thread/sleep 10) 200)] (+ a b)))))) 231 | 232 | (deftest parallel-args 233 | (testing "works like a standard function invocation" 234 | (is (= 6 (p/args + 1 2 3))) 235 | (is (= 1 (p/args first [1 2 3]))) 236 | (is (= 300 (p/args + (do (Thread/sleep 20) 100) (do (Thread/sleep 10) 200)))))) 237 | 238 | (deftest parallel-and 239 | (testing "works like a standard and" 240 | (is (= (and) (p/and))) 241 | (is (= "a" (p/and true 1 "a"))) 242 | (is (= :x (p/and :y true :x))) 243 | (is (= false (p/and true false true))) 244 | (is (p/and (do (Thread/sleep 20) true) (do (Thread/sleep 10) true))))) 245 | 246 | (deftest parallel-or 247 | (testing "works like a standard or" 248 | (is (= (or) (p/or))) 249 | (is (= true (p/or true 1 "a"))) 250 | (is (= :y (p/or false :y true :x))) 251 | (is (= true (p/or true false true))) 252 | (is (p/or (do (Thread/sleep 20) false) (do (Thread/sleep 10) true))))) 253 | 254 | (deftest parallel-do-doto 255 | 256 | (testing "like do, but forms evaluate in parallel." 257 | (is (= nil (p/do))) 258 | (is (= 1 (p/do 1))) 259 | (is (some #{[1 2] [2 1]} 260 | (set (repeatedly 50 261 | #(let [a (ConcurrentLinkedQueue.)] 262 | (p/do (.add a 1) (.add a 2)) (vec a))))))) 263 | 264 | (testing "like doto, but forms evaluated in parallel." 265 | (is (= 1 (p/doto 1))) 266 | (is (= [1 2] (vec (p/doto (ConcurrentLinkedQueue.) (.add 1) (.add 2))))))) 267 | 268 | (deftest pmap-test 269 | (testing "like pmap but results are not ordered" 270 | (is (= (set (range 1 1001)) (set (p/pmap inc (range 1000))))))) 271 | -------------------------------------------------------------------------------- /test/parallel/merge_sort_test.clj: -------------------------------------------------------------------------------- 1 | (ns parallel.merge-sort-test 2 | (:require [parallel.merge-sort :as msort] 3 | [clojure.core.reducers :as r] 4 | [clojure.test :refer :all]) 5 | (:import [parallel.merge_sort MergeSort] 6 | [java.util.concurrent ForkJoinPool] 7 | [java.util Arrays])) 8 | 9 | (deftest parallel-merge-sort 10 | (testing "with numbers" 11 | (let [n 10000 12 | v (into [] (shuffle (range n))) 13 | a1 (object-array v) 14 | a2 (object-array v)] 15 | (is (= (do (Arrays/parallelSort a2 0 n compare) (into [] a2)) 16 | (do (.join (.submit ^ForkJoinPool @r/pool (MergeSort. a1 0 n 8192 compare))) (into [] a1)))))) 17 | (testing "with strings" 18 | (let [n 10000 19 | v (into [] (map str (shuffle (range n)))) 20 | a1 (object-array v) 21 | a2 (object-array v)] 22 | (is (= (do (Arrays/parallelSort a2 0 n compare) (into [] a2)) 23 | (do (.join (.submit ^ForkJoinPool @r/pool (MergeSort. a1 0 n 8192 compare))) (into [] a1))))))) 24 | -------------------------------------------------------------------------------- /test/xf_test.clj: -------------------------------------------------------------------------------- 1 | (ns xf-test 2 | (:require [parallel.xf :as xf] 3 | [clojure.test :refer :all])) 4 | 5 | (deftest interleave-test 6 | (testing "interleave with sequence" 7 | (is (= [0 :a 1 :b 2 :c] (sequence (xf/interleave [:a :b :c]) (range 3)))) 8 | (are [x y] (= x y) 9 | (sequence (xf/interleave [1 2]) [3 4]) (interleave [3 4] [1 2]) 10 | (sequence (xf/interleave [1]) [3 4]) (interleave [3 4] [1]) 11 | (sequence (xf/interleave [1 2]) [3]) (interleave [3] [1 2]) 12 | (sequence (xf/interleave []) [3 4]) (interleave [3 4] []) 13 | (sequence (xf/interleave [1 2]) []) (interleave [] [1 2]) 14 | (sequence (xf/interleave []) []) (interleave [] []))) 15 | (testing "interleave with eduction" 16 | (is (= [1 0 2 1 3 2 4 3 5 4 6 5 7 6 8 7 9 8 10 9] 17 | (eduction (map inc) (xf/interleave (range)) (filter number?) (range 10)))))) 18 | 19 | (deftest pmap-test 20 | (testing "pmap as a transducer, similarly to map" 21 | (is (= 250000 22 | (transduce 23 | (comp 24 | (xf/pmap inc) 25 | (filter odd?)) + 26 | (range 1000)))) 27 | (is (= (sequence 28 | (comp 29 | (filter odd?) 30 | (map #(* % %)) 31 | (take 10)) 32 | (range 1000)) 33 | (sequence 34 | (comp 35 | (filter odd?) 36 | (xf/pmap #(* % %)) 37 | (take 10)) 38 | (range 1000)))))) 39 | 40 | (deftest identity-test 41 | (testing "single" 42 | (is (= (range 10) (sequence xf/identity (range 10)))) 43 | (is (= (range 1 11) (sequence (comp (map inc) xf/identity) (range 10)))) 44 | (is (= (range 1 11) (sequence (comp xf/identity (map inc)) (range 10)))) 45 | (is (= [2 4 6 8 10] (sequence (comp (filter odd?) xf/identity (map inc)) (range 10))))) 46 | 47 | (testing "multi" 48 | (is (= (map vector (range 10) (range 10)) 49 | (sequence xf/identity (range 10) (range 10)))) 50 | (is (= (range 0 20 2) 51 | (sequence 52 | (comp (map #(+ %1 %2)) 53 | xf/identity) 54 | (range 10) (range 10)))) 55 | (is (= (range 0 20 2) 56 | (sequence 57 | (comp xf/identity 58 | (map #(apply + %))) 59 | (range 10) (range 10)))) 60 | (is (= [1 1 2 2 3 3 4 4 5 5] 61 | (sequence 62 | (comp xf/identity 63 | cat 64 | (map inc)) 65 | (range 5) (range 5)))) 66 | (is (= (range 0 20 2) 67 | (sequence 68 | (comp (map vector) 69 | xf/identity 70 | (map #(apply + %))) 71 | (range 10) (range 10)))) 72 | )) 73 | --------------------------------------------------------------------------------