├── .clj-kondo
└── config.edn
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── build.clj
├── deps.edn
├── docs
└── shootout.md
├── src
├── clj-kondo
│ └── clj-kondo.exports
│ │ └── net.clojars.john
│ │ └── injest
│ │ ├── config.edn
│ │ └── injest
│ │ └── path.clj
└── injest
│ ├── classical.cljc
│ ├── data.cljc
│ ├── impl.cljc
│ ├── path.cljc
│ ├── report.cljc
│ ├── report
│ └── path.cljc
│ ├── state.cljc
│ ├── test.clj
│ ├── test.cljs
│ └── util.cljc
└── test
└── injest
└── path_test.clj
/.clj-kondo/config.edn:
--------------------------------------------------------------------------------
1 | {:config-paths ["../src/clj-kondo/clj-kondo.exports/net.clojars.john/injest"]}
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /classes
3 | /checkouts
4 | *.jar
5 | *.class
6 | /.cljs_node_repl
7 | /.calva/output-window/
8 | /.cpcache
9 | /.lein-*
10 | /.lsp/sqlite*.db
11 | /.nrepl-history
12 | /.nrepl-port
13 | /.rebel_readline_history
14 | /.socket-repl-port
15 | .hgignore
16 | .hg/
17 | linux-install-*
18 | .clj-kondo/.cache
19 | .clj-kondo/net.clojars.john
20 | .lsp
21 | out
22 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 | All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/).
3 |
4 | ## [0.1.0-beta.7] - 2022-02-12
5 | - went to beta
6 | - added monitoring
7 |
8 | ## [0.1.0-alpha.24] - 2021-10-23
9 | - improve linter
10 | - update docs
11 |
12 | ## [0.1.0-alpha.23] - 2021-10-22
13 | - enable `cat`
14 | - fix cljs
15 | - update docs
16 |
17 | ## [0.1.0-alpha.22] - 2021-10-09
18 | - add lint defs for clj-kondo
19 | - remove .devcontainer
20 |
21 | ## [0.1.0-alpha.15] - 2021-10-02
22 | - fix bug in `=>>` with small sequences
23 | - more docks and docs/shootout.md
24 | - discussion on comparing `|>>` and `=>>` happened on clojureverse: [Fight Night](https://clojureverse.org/t/parallel-transducing-context-fight-night-pipeline-vs-fold/8208)
25 |
26 | ## [0.1.0-alpha.13] - 2021-09-25
27 | - reverted names back to `=>>` and `|>>`
28 | - fixed bug with `=>>` fold impl, limiting smallest partition to parallelism count
29 |
30 | ## [0.1.0-alpha.12] - 2021-09-22
31 | - Added tests
32 | - Adopted Sean Corfield's deps-new lib template
33 |
34 | ## [0.1.0-alpha.9] - 2021-09-20
35 | ### Changed
36 | - Major code cleanup
37 |
38 | [0.1.0-alpha.9]: https://github.com/johnmn3/injest/compare/v0.1-alpha.8...v0.1-alpha.9
39 | [0.1.0-alpha.9]: https://github.com/johnmn3/injest/compare/v0.1-alpha.9...0.1.0-alpha.13
40 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
4 |
5 | 1. DEFINITIONS
6 |
7 | "Contribution" means:
8 |
9 | a) in the case of the initial Contributor, the initial code and
10 | documentation distributed under this Agreement, and
11 |
12 | b) in the case of each subsequent Contributor:
13 |
14 | i) changes to the Program, and
15 |
16 | ii) additions to the Program;
17 |
18 | where such changes and/or additions to the Program originate from and are
19 | distributed by that particular Contributor. A Contribution 'originates' from
20 | a Contributor if it was added to the Program by such Contributor itself or
21 | anyone acting on such Contributor's behalf. Contributions do not include
22 | additions to the Program which: (i) are separate modules of software
23 | distributed in conjunction with the Program under their own license
24 | agreement, and (ii) are not derivative works of the Program.
25 |
26 | "Contributor" means any person or entity that distributes the Program.
27 |
28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
29 | necessarily infringed by the use or sale of its Contribution alone or when
30 | combined with the Program.
31 |
32 | "Program" means the Contributions distributed in accordance with this
33 | Agreement.
34 |
35 | "Recipient" means anyone who receives the Program under this Agreement,
36 | including all Contributors.
37 |
38 | 2. GRANT OF RIGHTS
39 |
40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
42 | reproduce, prepare derivative works of, publicly display, publicly perform,
43 | distribute and sublicense the Contribution of such Contributor, if any, and
44 | such derivative works, in source code and object code form.
45 |
46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
49 | transfer the Contribution of such Contributor, if any, in source code and
50 | object code form. This patent license shall apply to the combination of the
51 | Contribution and the Program if, at the time the Contribution is added by the
52 | Contributor, such addition of the Contribution causes such combination to be
53 | covered by the Licensed Patents. The patent license shall not apply to any
54 | other combinations which include the Contribution. No hardware per se is
55 | licensed hereunder.
56 |
57 | c) Recipient understands that although each Contributor grants the licenses
58 | to its Contributions set forth herein, no assurances are provided by any
59 | Contributor that the Program does not infringe the patent or other
60 | intellectual property rights of any other entity. Each Contributor disclaims
61 | any liability to Recipient for claims brought by any other entity based on
62 | infringement of intellectual property rights or otherwise. As a condition to
63 | exercising the rights and licenses granted hereunder, each Recipient hereby
64 | assumes sole responsibility to secure any other intellectual property rights
65 | needed, if any. For example, if a third party patent license is required to
66 | allow Recipient to distribute the Program, it is Recipient's responsibility
67 | to acquire that license before distributing the Program.
68 |
69 | d) Each Contributor represents that to its knowledge it has sufficient
70 | copyright rights in its Contribution, if any, to grant the copyright license
71 | set forth in this Agreement.
72 |
73 | 3. REQUIREMENTS
74 |
75 | A Contributor may choose to distribute the Program in object code form under
76 | its own license agreement, provided that:
77 |
78 | a) it complies with the terms and conditions of this Agreement; and
79 |
80 | b) its license agreement:
81 |
82 | i) effectively disclaims on behalf of all Contributors all warranties and
83 | conditions, express and implied, including warranties or conditions of title
84 | and non-infringement, and implied warranties or conditions of merchantability
85 | and fitness for a particular purpose;
86 |
87 | ii) effectively excludes on behalf of all Contributors all liability for
88 | damages, including direct, indirect, special, incidental and consequential
89 | damages, such as lost profits;
90 |
91 | iii) states that any provisions which differ from this Agreement are offered
92 | by that Contributor alone and not by any other party; and
93 |
94 | iv) states that source code for the Program is available from such
95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
96 | or through a medium customarily used for software exchange.
97 |
98 | When the Program is made available in source code form:
99 |
100 | a) it must be made available under this Agreement; and
101 |
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 |
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 |
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 |
111 | 4. COMMERCIAL DISTRIBUTION
112 |
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering. The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor to control, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 |
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 |
144 | 5. NO WARRANTY
145 |
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 |
157 | 6. DISCLAIMER OF LIABILITY
158 |
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 |
168 | 7. GENERAL
169 |
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 |
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 |
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 |
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 |
210 | This Agreement is governed by the laws of the State of New York and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # `injest`: `+>` `+>>` `x>>` `=>>`
2 |
3 | [](https://clojars.org/net.clojars.john/injest)
4 | [](https://cljdoc.org/d/net.clojars.john/injest)
5 | [](https://clojurians.zulipchat.com/#streams/302003/injest)
6 |
7 | Clojure's [threading macros](https://clojure.org/guides/threading_macros) (the `->` and `->>` [thrushes](http://blog.fogus.me/2010/09/28/thrush-in-clojure-redux/)) are great for navigating into data and transforming sequences. `injest`'s [_path thread_](#path-threads) macros `+>` and `+>>` are just like `->` and `->>` but with expanded path navigating abilities similar to `get-in`.
8 |
9 | [Transducers](https://clojure.org/reference/transducers) are great for performing sequence transformations efficiently. `x>>` combines the efficiency of transducers with the better ergonomics of `+>>`. Thread performance can be further extended by automatically parallelizing work with `=>>`.
10 |
11 | `injest` macros achieve this by scanning forms for transducers and `comp`ing them together into a function that either `sequence`s or parallel `fold`s the values flowing in the thread through the transducers.
12 |
13 | ## Getting Started
14 | ### deps.edn
15 | Place the following in the `:deps` map of your `deps.edn` file:
16 | ```clojure
17 | ...
18 | net.clojars.john/injest {:mvn/version "0.1.0-beta.8"}
19 | ...
20 | ```
21 | ### clj-kondo
22 | Make clj-kondo/Clojure-LSP aware of `injest` by adding `"net.clojars.john/injest"` to the `:config-paths` vector of your `.clj-kondo/config.edn` file:
23 | ```clojure
24 | {:config-paths ["net.clojars.john/injest"]}
25 | ```
26 | This will automatically import `injest`'s lint definitions in Calva and other IDE's that leverage clj-kondo and/or Clojure-LSP.
27 |
28 | ### Quickstart
29 | To try it in a repl right now with `criterium` and `net.cgrand.xforms`, drop this in your shell:
30 | ```clojure
31 | clj -Sdeps \
32 | '{:deps
33 | {net.clojars.john/injest {:mvn/version "0.1.0-beta.8"}
34 | criterium/criterium {:mvn/version "0.4.6"}
35 | net.cgrand/xforms {:mvn/version "0.19.2"}}}'
36 | ```
37 | ### Requiring
38 | Then require the `injest` macros in your project:
39 | ```clojure
40 | (ns ...
41 | (:require [injest.path :refer [+> +>> x>> =>>]]
42 | ...
43 | ```
44 | To just use `x>>` or `=>>` with the classical thread behavior, without the additional [_path thread_](#path-threads) semantics, you can require in the `injest.classical` namespace instead of the `injest.path` namespace:
45 | ```clojure
46 | (ns ...
47 | (:require [injest.classical :refer [x>> =>>]]
48 | ...
49 | ```
50 | Having these two `:require` options allows individuals and organizations to adopt a la carte these orthogonal value propositions of _improved performance_ and _improved navigation_.
51 | # Path Threads
52 | `injest.path` allows for more intuitive path navigation, like you're used to with the `(-> m :a :b :c)` idiom. We refer to these as _path threads_.
53 |
54 | Ergonomically, path threads provide a semantic superset of the behaviors found in `->` and `->>`. In other words, there is generally nothing you can do with `->` that you can't do with `+>`. All the thread macros in `injest.path` have these path thread semantics.
55 | ## As a replacement for `get-in`, `get` and `nth`
56 | In path threads, naked integers and strings become lookups on the value being passed in, making those tokens useful again in threads. You can index into sequences with integers, like you would with `nth`, and replace `get`/`get-in` for most cases involving access in heterogeneous map nestings:
57 | ```clojure
58 | (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c :res}]}])}]
59 | (+> m 1 2 "b" 2 :c name)) ;=> "res"
60 | ```
61 | Here, we're looking up `1` in the map, then getting the third element of the sequence returned, then looking up `"b"` in the returned map, then getting the third element of the returned vector, then looking up `:c` in the returned map, and then finally calling name on the returned keyword value.
62 |
63 | In the above form, you could replace `+>` with either `+>>`, `x>>` or `=>>`, and you will still get the same result. `+>>` is simply the thread-last version of `+>` and `x>>` and `=>>` are transducing and parallel versions of `+>>`.
64 | ## Lambda wrapping
65 | Path threads allow you to thread values through anonymous functions, like `#(- 10 % 1)` or `(fn [x] (- 10 x 1))`, without having to wrap them in an extra enclosing set of parenthesis:
66 | ```clojure
67 | (x> 10 range rest 2 #(- 10 % 1)) ;=> 6
68 | ```
69 | Or, extending our prior example:
70 | ```clojure
71 | (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c :bob}]}])}]
72 | (x>> m 1 2 "b" 2 :c name #(println "hi " % "!"))) ;=> "hi bob!"
73 | ```
74 | This has the added benefit of conveying to the reader that the author intends for the anonymous function to only take one parameter. In the classical thread syntax, the reader would have to scan all the way to the end of `(#(... ` in order to know if an extra parameter is being passed in. This also prevents people from creating unmaintainable abstractions involving the threading of values into a literal lambda definition - a [common](https://stackoverflow.com/questions/7838326/function-call-in-threading-macro) [source](https://stackoverflow.com/questions/25317235/thread-first-array-map-literal-to-anonymous-function-in-clojure) [of](https://stackoverflow.com/questions/29897115/clojure-threading-first-macro-with-math-pow-or-any-other-multiple-args-functi) [errors](https://stackoverflow.com/questions/60027298/clojure-custom-function-for-threading-macro).
75 | ## Backwards compatability
76 | `+>` and `+>>` have the same laziness semantics as `->` and `->>`. So, if you find yourself wanting to migrate a _path thread_ away from a transducer/parallel context, back to the more lazy semantics, but you want to keep the path navigation semantics, you can simply replace the `x>>` or `=>>` macros with the `+>>` macro we required in above. Path navigating will continue to work:
77 | ```clojure
78 | (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c :bob}]}])}]
79 | (+>> m 1 2 "b" 2 :c name #(println "hi " % "!"))) ;=> "hi bob!"
80 | ```
81 | You can also just use `+>` and `+>>` on their own, without the transducifying macros, if you only want the more convenient ergonomics.
82 |
83 | As stated above, you can also require `x>>` and `=>>` in from `injest.classical` and, in the event you want to revert back to `->>`, you will be able to do that knowing that no one has added any _path thread_ semantics to the thread that would also need to be converted to the classical syntax.
84 | # `x>>` Auto Transducification
85 | Why? Well, for one, speed. Observe:
86 | ```clojure
87 | (->> (range 10000000)
88 | (map inc)
89 | (filter odd?)
90 | (mapcat #(do [% (dec %)]))
91 | (partition-by #(= 0 (mod % 5)))
92 | (map (partial apply +))
93 | ;; (mapv dec)
94 | (map (partial + 10))
95 | (map #(do {:temp-value %}))
96 | (map :temp-value)
97 | (filter even?)
98 | (apply +)
99 | time)
100 | ```
101 | Returns:
102 | ```clojure
103 | "Elapsed time: 8275.319295 msecs"
104 | 5000054999994
105 | ```
106 | Whereas:
107 | ```clojure
108 | (x>> (range 10000000)
109 | (map inc)
110 | (filter odd?)
111 | (mapcat #(do [% (dec %)]))
112 | (partition-by #(= 0 (mod % 5)))
113 | (map (partial apply +))
114 | ;; (mapv dec)
115 | (map (partial + 10))
116 | (map #(do {:temp-value %}))
117 | (map :temp-value)
118 | (filter even?)
119 | (apply +)
120 | time)
121 | ```
122 | Returns:
123 | ```clojure
124 | "Elapsed time: 2913.851103 msecs"
125 | 5000054999994
126 | ```
127 |
128 | Two to three times the speed with basically the same code. The more transducers you can get lined up contiguously, the less boxing you’ll have in your thread.
129 |
130 | > Note: These times reflect the execution environment provided by Github's browser-based vscode runtime. My local box performs much better and yours likely will too.
131 |
132 | Let’s uncomment the `(mapv dec)` that is currently commented out in both the threads above. Because `mapv` is not a transducer, items get boxed halfway through our thread. As a result our performance degrades slightly for `x>>`.
133 |
134 | First, let's see it with `->>`:
135 | ```clojure
136 | (->> (range 10000000)
137 | (map inc)
138 | (filter odd?)
139 | (mapcat #(do [% (dec %)]))
140 | (partition-by #(= 0 (mod % 5)))
141 | (map (partial apply +))
142 | (mapv dec)
143 | (map (partial + 10))
144 | (map #(do {:temp-value %}))
145 | (map :temp-value)
146 | (filter even?)
147 | (apply +)
148 | time)
149 | "Elapsed time: 6947.00928 msecs"
150 | 44999977000016
151 | ```
152 | Hmm, `->>` actually goes faster now, perhaps due to `mapv` removing some laziness. The more lazy semantics are less predictable in that way.
153 |
154 | But now, for `x>>`:
155 | ```clojure
156 | (x>> (range 10000000)
157 | (map inc)
158 | (filter odd?)
159 | (mapcat #(do [% (dec %)]))
160 | (partition-by #(= 0 (mod % 5)))
161 | (map (partial apply +))
162 | (mapv dec)
163 | (map (partial + 10))
164 | (map #(do {:temp-value %}))
165 | (map :temp-value)
166 | (filter even?)
167 | (apply +)
168 | time)
169 | "Elapsed time: 3706.701192 msecs"
170 | 44999977000016
171 | ```
172 | So we lost some speed due to the boxing, but we’re still doing a worthy bit better than the default thread macro. So keep in mind, if you want to maximize performance, try to align your transducers contiguously.
173 |
174 | > Note: In addition to improved speed, transducers also provide improved memory efficiency over finite sequences. So `x>>` may lower your memory usage as well.
175 |
176 | ### Available Transducers
177 | | These are the core functions that are available to use as transducers in a `x>>` thread-last: |
178 | | --- |
179 | | `take-nth`, `disj!`, `dissoc!`, `distinct`, `keep-indexed`, `random-sample`, `map-indexed`, `map`, `replace`, `drop`, `remove`, `cat`, `partition-all`, `interpose`, `mapcat`, `dedupe`, `drop-while`, `partition-by`, `take-while`, `take`, `keep`, `filter`, `halt-when` |
180 |
181 | ## `=>>` Auto Parallelization
182 | `injest` provides a parallel version of `x>>` as well. `=>>` leverages Clojure's parallel [`fold`](https://clojuredocs.org/clojure.core.reducers/fold) [reducer](https://clojure.org/reference/reducers#_using_reducers) in order to execute stateless transducers over a [Fork/Join pool](http://gee.cs.oswego.edu/dl/papers/fj.pdf). Remaining stateful transducers are `comp`ed and threaded just like `x>>`.
183 |
184 | It doesn't work well for small workloads though, so for demonstration purposes let's augment our above threads:
185 | ```clojure
186 | (require '[clojure.edn :as edn])
187 |
188 | (defn work-1000 [work-fn]
189 | (range (last (repeatedly 1000 work-fn))))
190 |
191 | (defn ->>work [input]
192 | (work-1000
193 | (fn []
194 | (->> input
195 | (map inc)
196 | (filter odd?)
197 | (mapcat #(do [% (dec %)]))
198 | (partition-by #(= 0 (mod % 5)))
199 | (map (partial apply +))
200 | (map (partial + 10))
201 | (map #(do {:temp-value %}))
202 | (map :temp-value)
203 | (filter even?)
204 | (apply +)
205 | str
206 | (take 3)
207 | (apply str)
208 | edn/read-string))))
209 |
210 | (defn x>>work [input]
211 | (work-1000
212 | (fn []
213 | (x>> input
214 | (map inc)
215 | (filter odd?)
216 | (mapcat #(do [% (dec %)]))
217 | (partition-by #(= 0 (mod % 5)))
218 | (map (partial apply +))
219 | (map (partial + 10))
220 | (map #(do {:temp-value %}))
221 | (map :temp-value)
222 | (filter even?)
223 | (apply +)
224 | str
225 | (take 3)
226 | (apply str)
227 | edn/read-string))))
228 | ```
229 | Same deal as before but we're just doing a little extra work in our thread, repeating it a thousand times and then preparing the results for handoff to the next stage of execution.
230 |
231 | Now let's run the classical `->>` macro:
232 | ```clojure
233 | (->> (range 100)
234 | (repeat 10)
235 | (map ->>work)
236 | (map ->>work)
237 | (map ->>work)
238 | (map ->>work)
239 | (map ->>work)
240 | (map ->>work)
241 | last
242 | count
243 | time)
244 | ; "Elapsed time: 18309.397391 msecs"
245 | ;=> 234
246 | ```
247 | Just over 18 seconds. Now let's try the `x>>` version:
248 | ```clojure
249 | (x>> (range 100)
250 | (repeat 10)
251 | (map x>>work)
252 | (map x>>work)
253 | (map x>>work)
254 | (map x>>work)
255 | (map x>>work)
256 | (map x>>work)
257 | last
258 | count
259 | time)
260 | ; "Elapsed time: 6252.224178 msecs"
261 | ;=> 234
262 | ```
263 | Just over 6 seconds. Much better. Now let's try the parallel `=>>` version:
264 | ```clojure
265 | (=>> (range 100)
266 | (repeat 10)
267 | (map x>>work)
268 | (map x>>work)
269 | (map x>>work)
270 | (map x>>work)
271 | (map x>>work)
272 | (map x>>work)
273 | last
274 | count
275 | time)
276 | ; "Elapsed time: 3142.75057 msecs"
277 | ;=> 234
278 | ```
279 | Just over 3 seconds. Much, much better!
280 |
281 | Again, in local dev your times may look a bit different. On my Macbook Pro here, those times are `11812.604504`, `5096.267348` and `933.940569` msecs. So, in other words, `=>>` can sometimes be 5 times faster than `x>>` and 10 times faster than `->>`, depending on the shape of your workloads and the number of cores you have available.
282 |
283 | > There is also a parallel thread macro (`|>>`) that uses `core.async/pipeline` for parallelization. It's still available for folks interested in improving it, but is not recomended for general use. `=>>` performs better in most cases. A soon-to-be-updated analysis ([shootout.md](https://github.com/johnmn3/injest/blob/main/docs/shootout.md)) compares the differences between `|>>` and `=>>`.
284 |
285 | ### Available Parallel Transducers
286 | | These are the core functions that are available to use as parallel transducers in a `=>>` thread-last: |
287 | | --- |
288 | | `dedupe`, `disj!`, `dissoc!`, `filter`, `keep`, `map`, `random-sample`, `remove`, `replace`, `take-while`, `halt-when`, `mapcat`, `cat` |
289 |
290 | ## Clojurescript
291 | ~In Clojurescript we don't yet have parallel thread macro implementations but for `x>>`~
292 |
293 | > Update: The parallel (`=>>`) thread macro has been implemented in [`cljs-thread`](https://github.com/johnmn3/cljs-thread?tab=readme-ov-file). We'll get into the Clojurescript version of `=>>` below, but first let's look at the single threaded `x>>`.
294 |
295 | The performance gains for `x>>` are even more pronounced than in Clojure. On my macbook pro, with an initial value of `(range 1000000)` in the above thread from our first example, the default threading macro `->>` produces:
296 | ```clojure
297 | (->> (range 1000000)
298 | (map inc)
299 | (filter odd?)
300 | (mapcat #(do [% (dec %)]))
301 | (partition-by #(= 0 (mod % 5)))
302 | (map (partial apply +))
303 | (map (partial + 10))
304 | (map #(do {:temp-value %}))
305 | (map :temp-value)
306 | (filter even?)
307 | (apply +)
308 | time)
309 | "Elapsed time: 3523.186678 msecs"
310 | 50005499994
311 | ```
312 | While the `x>>` version produces:
313 | ```clojure
314 | (x>> (range 1000000)
315 | (map inc)
316 | (filter odd?)
317 | (mapcat #(do [% (dec %)]))
318 | (partition-by #(= 0 (mod % 5)))
319 | (map (partial apply +))
320 | (map (partial + 10))
321 | (map #(do {:temp-value %}))
322 | (map :temp-value)
323 | (filter even?)
324 | (apply +)
325 | time)
326 | "Elapsed time: 574.145888 msecs"
327 | 50005499994
328 | ```
329 | That's a _six times_ speedup!
330 |
331 | Perhaps that speedup would not be so large if we tested both versions in _advanced_ compile mode. Then the difference in speed might come closer to the Clojure version. In any case, this is some very low-hanging performance fruit.
332 | ### `=>>` in Clojurescript
333 | So, suppose you have some non-trivial work:
334 | ```clojure
335 | (defn flip [n]
336 | (apply comp (take n (cycle [inc dec]))))
337 | ```
338 | On a single thread, in Chrome, this takes between 16 and 20 seconds (on this computer):
339 | ```clojure
340 | (->> (range)
341 | (map (flip 100))
342 | (map (flip 100))
343 | (map (flip 100))
344 | (take 1000000)
345 | (apply +)
346 | time)
347 | ```
348 | On Safari and Firefox, that will take between 60 and 70 seconds.
349 |
350 | Let's try it with `=>>`:
351 | ```clojure
352 | (=>> (range)
353 | (map (flip 100))
354 | (map (flip 100))
355 | (map (flip 100))
356 | (take 1000000)
357 | (apply +)
358 | time)
359 | ```
360 | On Chrome, that'll take only about 8 to 10 seconds. On Safari it takes about 30 seconds and in Firefox it takes around 20 seconds.
361 |
362 | So in Chrome and Safari, you can roughly double your speed and in Firefox you can go three or more times faster.
363 |
364 | By changing only one character, we can double or triple our performance, all while leaving the main thread free to render at 60 frames per second. Notice also how it's lazy :)
365 |
366 | See the [`cljs-thread`](https://github.com/johnmn3/cljs-thread) repo to learn more about how to set things up with the web workers.
367 |
368 | > Note: On the main/screen thread, `=>>` returns a promise. `=>>` defaults to a chunk size of 512.
369 | ## Extending `injest`
370 | The `injest.state` namespaces provides the `reg-xf!` and `reg-pxf!` macros that can take one or more transducers. Only stateless transducers (or, more precisely, transducers that can be used safely within a parallel `fold` or `pipeline` context) should be registered with `reg-pxf!`. `injest`'s thread macros will then include those functions when deciding which forms should be treated as transducers. You should only need to call `reg-xf!` in one of your initially loaded namesapces.
371 | ```clojure
372 | (require '[injest.state :as i.s])
373 | (require '[net.cgrand.xforms :as x])
374 |
375 | (i.s/reg-xf! x/reduce)
376 |
377 | (x>> (range 10000000)
378 | (map inc)
379 | (filter odd?)
380 | (mapcat #(do [% (dec %)]))
381 | (partition-by #(= 0 (mod % 5)))
382 | (map (partial apply +))
383 | (map (partial + 10))
384 | (map #(do {:temp-value %}))
385 | (map :temp-value)
386 | (filter even?)
387 | (x/reduce +)
388 | first
389 | time)
390 | ```
391 | Even better!
392 | ```clojure
393 | "Elapsed time: 2889.771067 msecs"
394 | 5000054999994
395 | ```
396 | In Clojurescript, you can add another Clojure (`*.clj`) namespace to your project and register there with the `regxf!` function and explicitly namespaced symbols.
397 | ```clojure
398 | (i.s/regxf! 'my.cljs.xforms.library/sliding-window)
399 | ```
400 | Or, if a transducer library like `net.cgrand.xforms` exports the same namespaces and names for both Clojure and Clojurescript, you can just `(i.s/reg-xf! x/reduce)` in a Clojure namespace in your project and then it will be available to the `x>>`/`=>>` threads in both your Clojure and Clojurescript namespaces.
401 |
402 | ## Reporting Instrumentation
403 | You can optionally instrument the `x>>` and `=>>` macros for profiling code in a deployed runtime environment like so:
404 | ```clojure
405 | (ns ...
406 | (:require
407 | [injest.report :as r]
408 | [injest.report.path :as injest :refer [+> +>> x>> =>>]]))
409 | ```
410 | Then in some core namespace, just register a report handler and then turn it on:
411 | ```clojure
412 | (r/add-report-tap! println 60) ;; <- or tap>, log/info, etc
413 | (r/report! true)
414 | ```
415 | If you don't provide `add-report-tap!` a second seconds parameter it will default to 10 seconds. The above expressions will handle reporting events with the `println` function, called once every 60 seconds.
416 |
417 | Then, in any namespace, be sure to require the macros from the `injest.report.path` namespace:
418 | ```clojure
419 | (ns ...
420 | (:require
421 | [injest.report.path :as injest :refer [+> +>> x>> =>>]]))
422 | ```
423 | Then you can use `x>>` and `=>>` like you normally would, but you will see a report on all instances in the repl:
424 | ```clojure
425 | {:namespace "injest.test"
426 | :line 15
427 | :column 5
428 | :x>> "x>> is 1.08 times faster than =>>"
429 | :=>> "=>> is 2.67 times faster than +>>"}
430 |
431 | {:namespace "injest.test"
432 | :line 38
433 | :column 3
434 | :+>> "+>> is 2.5 times faster than x>>"}
435 |
436 | {:namespace "injest.test"
437 | :line 44
438 | :column 5
439 | :=>> "=>> is 1.9 times faster than x>>"
440 | :x>> "x>> is 1.4 times faster than +>>"}
441 |
442 | ```
443 | As you can see, the first line of a given report result is the namespace, along with `?line=` and the line number and `&col=` and the column number. For the `x>>` variant, only `x>>` and `+>>` are compared. When `=>>` is used, all three of `=>>`, `x>>` and `+>>` are compared.
444 |
445 | This allows you to leverage the instrumented versions of the macros in order to assess which one is most appropriate for the runtime load in your actually running application.
446 | # Caveats
447 | It should be noted as well:
448 |
449 | * Because transducers have different laziness semantics, you can't be as liberal with your consumption, so test on live data before using this as a drop-in replacement for the default thread macros.
450 |
451 | If you have any problems, feature requests or ideas, feel free to drop a note in the issues or discuss it in the clojureverse [thread](https://clojureverse.org/t/x-x-auto-transducifying-thread-macros/8122/9).
452 | # References
453 | Some other perfomance-related investigations you may be interested in:
454 | * [cgrand/xforms](https://github.com/cgrand/xforms) - More transducers and reducing functions for Clojure(script)!
455 | * [clj-fast](https://github.com/bsless/clj-fast) - optimized core functions
456 | * [structural](https://github.com/joinr/structural) - efficient destructuring
457 |
458 | Inspiration for the lambda wrapping came from this ask.clojure request: [should-the-threading-macros-handle-lambdas](https://ask.clojure.org/index.php/9023/should-the-threading-macros-handle-lambdas)
459 |
460 | Inspiration for the `fold` implementation of `=>>` came from [reborg/parallel](https://github.com/reborg/parallel#ptransduce)'s `p/transduce`
461 |
462 | # Get Involved
463 |
464 | Want to implement the `somex>>` macro? Just copy how I did it and feel free to submit a PR. If you see a difficiency, file an issue here or swing by and join the discussion on the [zulip channel](https://clojurians.zulipchat.com/#streams/302003/injest).
465 |
466 | # License
467 |
468 | Distributed under the Eclipse Public License either version 1.0 or (at your option) any later version.
469 |
--------------------------------------------------------------------------------
/build.clj:
--------------------------------------------------------------------------------
1 | (ns build
2 | (:refer-clojure :exclude [test])
3 | (:require [org.corfield.build :as bb]))
4 |
5 | (def lib 'net.clojars.john/injest)
6 | (def version "0.1.0-beta.8")
7 |
8 | ;; clojure -T:build ci
9 | ;; clojure -T:build deploy
10 |
11 | (def url "https://github.com/johnmn3/injest")
12 |
13 | (def scm {:url url
14 | :connection "scm:git:git://github.com/johnmn3/injest.git"
15 | :developerConnection "scm:git:ssh://git@github.com/johnmn3/injest.git"
16 | :tag version})
17 |
18 | (defn test "Run the tests." [opts]
19 | (bb/run-tests opts))
20 |
21 | (defn ci "Run the CI pipeline of tests (and build the JAR)." [opts]
22 | (-> opts
23 | (assoc :lib lib :version version :scm scm)
24 | (bb/run-tests)
25 | (bb/clean)
26 | (bb/jar)))
27 |
28 | (defn deploy "Deploy the JAR to Clojars." [opts]
29 | (-> opts
30 | (assoc :lib lib :version version)
31 | (bb/deploy)))
32 |
--------------------------------------------------------------------------------
/deps.edn:
--------------------------------------------------------------------------------
1 | {:paths ["src"]
2 | :deps {org.clojure/clojure {:mvn/version "1.10.3"}
3 | org.clojure/clojurescript {:mvn/version "1.10.758"}
4 | org.clojure/core.async {:mvn/version "1.3.618"}}
5 | :aliases
6 | {:test
7 | {:extra-paths ["test"]
8 | :extra-deps {org.clojure/test.check {:mvn/version "1.1.0"}
9 | io.github.cognitect-labs/test-runner
10 | {:git/tag "v0.5.0" :git/sha "48c3c67"}}}
11 | :build {:deps {io.github.seancorfield/build-clj
12 | {:git/tag "v0.3.1" :git/sha "996ddfa"}}
13 | :ns-default build}}}
14 |
--------------------------------------------------------------------------------
/docs/shootout.md:
--------------------------------------------------------------------------------
1 | ## Parallel Transducing Context Shootout: `|>>` vs `=>>`
2 |
3 | Welcome to the parallel transducer context shootout!
4 |
5 | Here you'll find comparative benchmarks between `|>>` _('pipeline-thread-last')_ and `=>>` _('fold-thread-last')_.
6 |
7 | You can learn more about these `injest` macros in the [readme](https://github.com/johnmn3/injest/blob/main/README.md).
8 |
9 | In this comparative analysis, we explore a few different scenarios on both a 4 core machine and a 16 core machine.
10 |
11 | First, let's define some testing functions:
12 |
13 | ```clojure
14 | (require '[clojure.edn :as edn])
15 |
16 | (defn work-1000 [work-fn]
17 | (range (last (repeatedly 1000 work-fn))))
18 |
19 | (defn x>>work [input]
20 | (work-1000
21 | (fn []
22 | (x>> input
23 | (map inc)
24 | (filter odd?)
25 | (mapcat #(do [% (dec %)]))
26 | (partition-by #(= 0 (mod % 5)))
27 | (map (partial apply +))
28 | (map (partial + 10))
29 | (map #(do {:temp-value %}))
30 | (map :temp-value)
31 | (filter even?)
32 | (apply +)
33 | str
34 | (take 3)
35 | (apply str)
36 | edn/read-string))))
37 |
38 | ;; and one extra macro for returning a value for the number of seconds passed:
39 |
40 | (defmacro time-val [& body]
41 | `(x>> (time ~@body)
42 | with-out-str
43 | (drop 15)
44 | reverse
45 | (drop 8)
46 | reverse
47 | (apply str)
48 | edn/read-string
49 | (* 0.001)))
50 | ```
51 |
52 | You may recognize those test functions from the [readme](https://github.com/johnmn3/injest/blob/main/README.md). Now let's exercise them:
53 |
54 | ```clojure
55 | (dotimes [i 50]
56 | (println
57 | (=>> (range 100)
58 | (repeat i)
59 | (map x>>work)
60 | (map x>>work)
61 | (map x>>work)
62 | (map x>>work)
63 | (map x>>work)
64 | (map x>>work)
65 | time-val)))
66 | ;; and
67 | (dotimes [i 50]
68 | (println
69 | (|>> (range 100)
70 | (repeat i)
71 | (map x>>work)
72 | (map x>>work)
73 | (map x>>work)
74 | (map x>>work)
75 | (map x>>work)
76 | (map x>>work)
77 | time-val)))
78 | ```
79 |
80 | With 4 cores:
81 |
82 |
83 |
84 | With 16 cores:
85 |
86 |
87 |
88 | In the above example, all we're doing is increasing sequence size while keeping the workload the same, so `|>>` and `=>>` are tracking pretty closely to one another.
89 |
90 | If we want to measure different workloads, we'll need to get a little fancier with our testing functions.
91 |
92 | ```clojure
93 | (defn work [n]
94 | (time-val
95 | (->> (range n)
96 | (mapv (fn [_]
97 | (x>> (range n)
98 | (map inc)
99 | (filter odd?)
100 | (mapcat #(do [% (dec %)]))
101 | (partition-by #(= 0 (mod % 5)))
102 | (map (partial apply +))
103 | (map (partial + 10))
104 | (map #(do {:temp-value %}))
105 | (map :temp-value)
106 | (filter even?)
107 | (apply +)))))))
108 |
109 | (defn run-|>> [l w]
110 | (|>> (range l)
111 | (map (fn [_] (work w)))
112 | (map (fn [_] (work w)))
113 | (map (fn [_] (work w)))
114 | (map (fn [_] (work w)))
115 | (map (fn [_] (work w)))
116 | (map (fn [_] (work w)))
117 | (map (fn [_] (work w)))
118 | (map (fn [_] (work w)))
119 | (map (fn [_] (work w)))
120 | (map (fn [_] (work w)))
121 | (map (fn [_] (work w)))
122 | (map (fn [_] (work w)))
123 | (map (fn [_] (work w)))
124 | (map (fn [_] (work w)))
125 | (map (fn [_] (work w)))
126 | (map (fn [_] (work w)))))
127 |
128 | (defn run-=>> [l w]
129 | (=>> (range l)
130 | (map (fn [_] (work w)))
131 | (map (fn [_] (work w)))
132 | (map (fn [_] (work w)))
133 | (map (fn [_] (work w)))
134 | (map (fn [_] (work w)))
135 | (map (fn [_] (work w)))
136 | (map (fn [_] (work w)))
137 | (map (fn [_] (work w)))
138 | (map (fn [_] (work w)))
139 | (map (fn [_] (work w)))
140 | (map (fn [_] (work w)))
141 | (map (fn [_] (work w)))
142 | (map (fn [_] (work w)))
143 | (map (fn [_] (work w)))
144 | (map (fn [_] (work w)))
145 | (map (fn [_] (work w)))))
146 | ```
147 | We start with a `work` function that becomes increasingly more expensive as `n` rises. We then define run functions `run-|>>` and `run-=>>` that take a sequence length `l` and a work width `w`. Each run function exercises the work function 16 times. This way, we can get a sense of how sequence size vs workload size affects performance characteristics.
148 |
149 | Let's look at a "medium" sized work load:
150 | ```clojure
151 | (dotimes [n 10]
152 | (println (time-val (last (run-|>> 100 (* n 100))))))
153 | ;; and
154 | (dotimes [n 10]
155 | (println (time-val (last (run-=>> 100 (* n 100))))))
156 | ```
157 | Here, we're saying `100 (* n 100)` is a sequence 100 elements long, where `n` increases by 100 on each step. Let's see how they compare.
158 |
159 | On 4 cores:
160 |
161 |
162 |
163 | On 16 cores:
164 |
165 |
166 |
167 | In this example, the `|>>` pipeline thread does a little better in the high core count scenario. In the low core count scenario, they're almost identical.
168 |
169 | Now let's try a small, constant size workload with an increasingly larger sequence:
170 | ```clojure
171 | (dotimes [n 10]
172 | (println (time-val (last (run-|>> (* n 1000) 10)))))
173 | ;; and
174 | (dotimes [n 10]
175 | (println (time-val (last (run-=>> (* n 1000) 10)))))
176 | ```
177 |
178 | On 4 cores:
179 |
180 |
181 |
182 | On 16 cores:
183 |
184 |
185 |
186 | Much to my surprise, `|>>` won out with this particular workload on both 4 and 16 cores.
187 |
188 | How far can we take that? Let's try it with a really big sequence and a really small workload:
189 | ```clojure
190 | (dotimes [n 10]
191 | (println (time-val (last (run-|>> (* n 10000) 1)))))
192 | ;; and
193 | (dotimes [n 10]
194 | (println (time-val (last (run-=>> (* n 10000) 1)))))
195 | ```
196 |
197 | On 4 cores:
198 |
199 |
200 |
201 | On 16 cores:
202 |
203 |
204 |
205 | On both core counts, `=>>` wins out slightly. Here, we can see that `|>>` starts to fall behind when threads are not optimized for heavy workloads.
206 |
207 | What about the opposite scenario? Let's try a small, constant size sequence with an increasingly, extremely large workload per item:
208 |
209 | ```clojure
210 | (dotimes [n 4]
211 | (println (time-val (last (run-|>> 10 (* n 1000))))))
212 | ;; and
213 | (dotimes [n 4]
214 | (println (time-val (last (run-=>> 10 (* n 1000))))))
215 | ```
216 | We're only doing 4 runs here because the results take a while.
217 |
218 | On 4 cores:
219 |
220 |
221 |
222 | On 16 cores:
223 |
224 |
225 |
226 | As you can see, this is where `|>>` really shines: With super heavy work and a very high core count, `pipeline` starts to show significant efficiencies.
227 |
228 | Given these characteristics, one might ask, _"Why not always use `|>>` then?"_
229 |
230 | Unfortunately, `|>>` falls over with extremely large sequences with small, heterogeneous workloads. `injest` is designed to allow users to mix and match threads with transformation functions that are fully lazy, transducable and/or parallelizable. Under the hood, this sometimes involves passing some results to a `sequence` operation, then to a `pipeline` operation, then to a lazy `(apply foo)` operation, etc. I believe that in these heterogeneous workload scenarios, the thread communications for `|>>` is causing a traffic jam. Still under investigation though.
231 |
232 | For example, let's look at this test scenario:
233 | ```clojure
234 | (dotimes [n 10]
235 | (|>> (range (* n 100000))
236 | (map inc)
237 | (filter odd?)
238 | (mapcat #(do [% (dec %)]))
239 | (partition-by #(= 0 (mod % 5)))
240 | (map (partial apply +))
241 | (map (partial + 10))
242 | (map #(do {:temp-value %}))
243 | (map :temp-value)
244 | (filter even?)
245 | (apply +)
246 | time-val
247 | println))
248 | ;; and
249 | (dotimes [n 10]
250 | (=>> (range (* n 100000))
251 | (map inc)
252 | (filter odd?)
253 | (mapcat #(do [% (dec %)]))
254 | (partition-by #(= 0 (mod % 5)))
255 | (map (partial apply +))
256 | (map (partial + 10))
257 | (map #(do {:temp-value %}))
258 | (map :temp-value)
259 | (filter even?)
260 | (apply +)
261 | time-val
262 | println))
263 | ```
264 | On 4 cores:
265 |
266 | > todo
267 |
268 | On 16 cores:
269 |
270 |
271 |
272 | And that issue only compounds as the sequence size rises.
273 |
274 | So, let's be honest: at least half of the sequence transformation threads that we usually build with `->>` in Clojure are _not_ homogenous, heavily loaded threads. So, if a given thread is only _just starting_ to seem like it could benefit from parallelization, then it's a good chance that `|>>` will be a footgun for you, while `=>>` may pay dividends - so in general I recommend reaching for `=>>` first. However, once your threads' workloads starts to become _embarrasingly parallel_, then it makes sense to try out `|>>`, to see if it can get you even farther - especially with more available cores.
275 |
276 | I know, you're wondering, what do these tests look like against the single threaded transducing `x>>` and classical, lazy `->>` macros?
277 |
278 | Let's add a test case for that:
279 | ```clojure
280 | (defn lazy-work [n]
281 | (time-val
282 | (->> (range n)
283 | (mapv (fn [_]
284 | (->> (range n)
285 | (map inc)
286 | (filter odd?)
287 | (mapcat #(do [% (dec %)]))
288 | (partition-by #(= 0 (mod % 5)))
289 | (map (partial apply +))
290 | (map (partial + 10))
291 | (map #(do {:temp-value %}))
292 | (map :temp-value)
293 | (filter even?)
294 | (apply +)))))))
295 |
296 | (defn run-x>> [l w]
297 | (x>> (range l)
298 | (map (fn [_] (work w)))
299 | (map (fn [_] (work w)))
300 | (map (fn [_] (work w)))
301 | (map (fn [_] (work w)))
302 | (map (fn [_] (work w)))
303 | (map (fn [_] (work w)))
304 | (map (fn [_] (work w)))
305 | (map (fn [_] (work w)))
306 | (map (fn [_] (work w)))
307 | (map (fn [_] (work w)))
308 | (map (fn [_] (work w)))
309 | (map (fn [_] (work w)))
310 | (map (fn [_] (work w)))
311 | (map (fn [_] (work w)))
312 | (map (fn [_] (work w)))
313 | (map (fn [_] (work w)))))
314 |
315 | (defn run-->> [l w]
316 | (->> (range l)
317 | (map (fn [_] (lazy-work w)))
318 | (map (fn [_] (lazy-work w)))
319 | (map (fn [_] (lazy-work w)))
320 | (map (fn [_] (lazy-work w)))
321 | (map (fn [_] (lazy-work w)))
322 | (map (fn [_] (lazy-work w)))
323 | (map (fn [_] (lazy-work w)))
324 | (map (fn [_] (lazy-work w)))
325 | (map (fn [_] (lazy-work w)))
326 | (map (fn [_] (lazy-work w)))
327 | (map (fn [_] (lazy-work w)))
328 | (map (fn [_] (lazy-work w)))
329 | (map (fn [_] (lazy-work w)))
330 | (map (fn [_] (lazy-work w)))
331 | (map (fn [_] (lazy-work w)))
332 | (map (fn [_] (lazy-work w)))))
333 | ```
334 | Now, looking at our "medium" sized work load above:
335 | ```clojure
336 | (dotimes [n 10]
337 | (println (time-val (last (run-x>> 100 (* n 100))))))
338 | ;; and
339 | (dotimes [n 10]
340 | (println (time-val (last (run-->> 100 (* n 100))))))
341 | ```
342 | And adding those to our times, we get:
343 |
344 | On 4 cores:
345 |
346 |
347 |
348 | On 16 cores:
349 |
350 |
351 |
352 | As you can see, it would have taken a _very_ long time for the lazy version to ever finish all ten iterations.
353 |
354 | Let's see it with the small sequence, large work version:
355 | ```clojure
356 | (dotimes [n 4]
357 | (println (time-val (last (run-x>> 10 (* n 1000))))))
358 | ;; and
359 | (dotimes [n 4]
360 | (println (time-val (last (run-->> 10 (* n 1000))))))
361 | ```
362 | On 4 cores:
363 |
364 |
365 |
366 | On 16 cores:
367 |
368 |
369 |
370 | Aha!, We've discovered that `=>>` is breaking for _very_ small sequences (here 10). But only when it is shorter than the number of cores - in this case, in the 16 core version being greater than the number of items available in the sequence. We'll see if we can optimize this in our parallelism strategy.
371 |
372 | Let's see how these comparisons fair in the very large sequence cases:
373 |
374 | > todo
375 |
376 | Now let's see the case of an extremely large sequence with heterogeneous data and work:
377 |
378 | On 4 cores:
379 |
380 |
381 |
382 | On 16 cores:
383 |
384 |
385 |
386 | Here we can see that, with this kind of workload, the best we can do is try to keep up with the single threaded transducer version - which the `=>>` version does a pretty good job of.
387 |
388 |
389 |
--------------------------------------------------------------------------------
/src/clj-kondo/clj-kondo.exports/net.clojars.john/injest/config.edn:
--------------------------------------------------------------------------------
1 | {:lint-as {injest.core/x> clojure.core/->
2 | injest.core/x>> clojure.core/->>
3 | injest.core/=> clojure.core/->
4 | injest.core/=>> clojure.core/->>
5 | injest.core/|> clojure.core/->
6 | injest.core/|>> clojure.core/->>
7 |
8 | injest.path/+> clojure.core/->
9 | injest.path/+>> clojure.core/->>
10 | injest.path/x> clojure.core/->
11 | injest.path/x>> clojure.core/->>
12 | injest.path/=> clojure.core/->
13 | injest.path/=>> clojure.core/->>
14 | injest.path/|> clojure.core/->
15 | injest.path/|>> clojure.core/->>}
16 |
17 | :hooks {:macroexpand {injest.path/+> injest.path/+>
18 | injest.path/+>> injest.path/+>>
19 | injest.path/x> injest.path/+>
20 | injest.path/x>> injest.path/+>>
21 | injest.path/=> injest.path/+>
22 | injest.path/=>> injest.path/+>>}}
23 |
24 | :linters {:injest.path/+> {:level :error}
25 | :injest.path/+>> {:level :error}
26 | :unused-binding {:level :off}}
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/src/clj-kondo/clj-kondo.exports/net.clojars.john/injest/injest/path.clj:
--------------------------------------------------------------------------------
1 | (ns injest.path)
2 |
3 | (def protected-fns #{`fn 'fn 'fn* 'partial})
4 |
5 | (defn get-or-nth [m-or-v aval]
6 | (if (associative? m-or-v)
7 | (get m-or-v aval)
8 | (nth m-or-v aval)))
9 |
10 | (defn path-> [form x]
11 | (cond (and (seq? form) (not (protected-fns (first form))))
12 | (with-meta `(~(first form) ~x ~@(next form)) (meta form))
13 | (or (string? form) (nil? form) (boolean? form))
14 | (list x form)
15 | (int? form)
16 | (list 'injest.path/get-or-nth x form)
17 | :else
18 | (list form x)))
19 |
20 | (defn path->> [form x]
21 | (cond (and (seq? form) (not (protected-fns (first form))))
22 | (with-meta `(~(first form) ~@(next form) ~x) (meta form))
23 | (or (string? form) (nil? form) (boolean? form))
24 | (list x form)
25 | (int? form)
26 | (list 'injest.path/get-or-nth x form)
27 | :else
28 | (list form x)))
29 |
30 | (defn +>
31 | [x & forms]
32 | (loop [x x, forms forms]
33 | (if forms
34 | (recur (path-> (first forms) x) (next forms))
35 | x)))
36 |
37 | (defn +>>
38 | [x & forms]
39 | (loop [x x, forms forms]
40 | (if forms
41 | (recur (path->> (first forms) x) (next forms))
42 | x)))
--------------------------------------------------------------------------------
/src/injest/classical.cljc:
--------------------------------------------------------------------------------
1 | (ns injest.classical
2 | (:require [injest.impl :as i])
3 | #?(:cljs (:require-macros [injest.classical])))
4 |
5 | (defmacro x>
6 | "Just like -> but first composes transducers into a function
7 | that sequences the thread values through the transducers."
8 | [x & thread]
9 | `(-> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/xfn i/transducable?))))
10 |
11 | (defmacro x>>
12 | "Just like ->> but first composes transducers into a function
13 | that sequences the thread values through the transducers."
14 | [x & thread]
15 | `(->> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/xfn i/transducable?))))
16 |
17 | #?(:cljs (defmacro |> "Just like x>, for now" [& args] `(x> ~@args))
18 | :clj (defmacro |>
19 | "Just like x> but first composes stateless transducers into a function that
20 | pipelines in parallel the values flowing through the thread. Remaining
21 | stateful transducers are composed just like x>."
22 | [x & thread]
23 | `(x> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/pipeline-xfn i/par-transducable?)))))
24 |
25 | #?(:cljs (defmacro |>> "Just like x>>, for now" [& args] `(x>> ~@args))
26 | :clj (defmacro |>>
27 | "Just like x>> but first composes stateless transducers into a function that
28 | pipelines in parallel the values flowing through the thread. Remaining
29 | stateful transducers are composed just like x>>."
30 | [x & thread]
31 | `(x>> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/pipeline-xfn i/par-transducable?)))))
32 |
33 | #?(:cljs (defmacro => "Just like x>, for now" [& args] `(x> ~@args))
34 | :clj (defmacro =>
35 | "Just like x> but first composes stateless transducers into a function that
36 | `r/fold`s in parallel the values flowing through the thread. Remaining
37 | stateful transducers are composed just like x>."
38 | [x & thread]
39 | `(x> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/fold-xfn i/par-transducable?)))))
40 |
41 | #?(:cljs (defmacro =>> "Just like x>>, for now" [& args] `(x>> ~@args))
42 | :clj (defmacro =>>
43 | "Just like x>> but first composes stateless transducers into a function that
44 | `r/fold`s in parallel the values flowing through the thread. Remaining
45 | stateful transducers are composed just like x>>."
46 | [x & thread]
47 | `(x>> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/fold-xfn i/par-transducable?)))))
48 |
--------------------------------------------------------------------------------
/src/injest/data.cljc:
--------------------------------------------------------------------------------
1 | (ns injest.data)
2 |
3 | (def par-regs
4 | #{'cljs.core/dedupe
5 | 'cljs.core/disj!
6 | 'cljs.core/dissoc!
7 | 'cljs.core/filter
8 | 'cljs.core/keep
9 | 'cljs.core/map
10 | 'cljs.core/random-sample
11 | 'cljs.core/remove
12 | 'cljs.core/replace
13 | 'cljs.core/take-while
14 | 'cljs.core/halt-when
15 | 'cljs.core/mapcat
16 | 'cljs.core/cat
17 |
18 | 'clojure.core/dedupe
19 | 'clojure.core/disj!
20 | 'clojure.core/dissoc!
21 | 'clojure.core/filter
22 | 'clojure.core/keep
23 | 'clojure.core/map
24 | 'clojure.core/random-sample
25 | 'clojure.core/remove
26 | 'clojure.core/replace
27 | 'clojure.core/take-while
28 | 'clojure.core/halt-when
29 | 'clojure.core/mapcat
30 | 'clojure.core/cat})
31 |
32 | (def def-regs
33 | #{'cljs.core/mapcat
34 | 'cljs.core/disj!
35 | 'cljs.core/dissoc!
36 | 'cljs.core/keep
37 | 'cljs.core/filter
38 | 'cljs.core/take-while
39 | 'cljs.core/drop-while
40 | 'cljs.core/keep-indexed
41 | 'cljs.core/take
42 | 'cljs.core/partition-all
43 | 'cljs.core/distinct
44 | 'cljs.core/dedupe
45 | 'cljs.core/take-nth
46 | 'cljs.core/map
47 | 'cljs.core/partition-by
48 | 'cljs.core/remove
49 | 'cljs.core/cat
50 | 'cljs.core/replace
51 | 'cljs.core/random-sample
52 | 'cljs.core/interpose
53 | 'cljs.core/map-indexed
54 | 'cljs.core/drop
55 | 'cljs.core/halt-when
56 |
57 | 'clojure.core/take-nth
58 | 'clojure.core/disj!
59 | 'clojure.core/dissoc!
60 | 'clojure.core/distinct
61 | 'clojure.core/keep-indexed
62 | 'clojure.core/random-sample
63 | 'clojure.core/map-indexed
64 | 'clojure.core/map
65 | 'clojure.core/replace
66 | 'clojure.core/drop
67 | 'clojure.core/remove
68 | 'clojure.core/cat
69 | 'clojure.core/partition-all
70 | 'clojure.core/interpose
71 | 'clojure.core/mapcat
72 | 'clojure.core/dedupe
73 | 'clojure.core/drop-while
74 | 'clojure.core/partition-by
75 | 'clojure.core/take-while
76 | 'clojure.core/take
77 | 'clojure.core/keep
78 | 'clojure.core/filter
79 | 'clojure.core/halt-when})
80 |
--------------------------------------------------------------------------------
/src/injest/impl.cljc:
--------------------------------------------------------------------------------
1 | (ns injest.impl
2 | (:require
3 | #?(:clj [clojure.core.async :as a :refer [chan to-chan! pipeline > form first (contains? @s/transducables)))))
12 |
13 | (defn par-transducable? [form]
14 | (or (= form cat)
15 | (when (sequential? form)
16 | (->> form first (contains? @s/par-transducables)))))
17 |
18 | (defn compose-transducer-group [xfs]
19 | (->> xfs
20 | (map #(if-not (coll? %)
21 | %
22 | (if (= 1 (count %))
23 | (first %)
24 | (apply (first %) (rest %)))))
25 | (apply comp)))
26 |
27 | (defn xfn [xf-group]
28 | (let [ts (compose-transducer-group xf-group)]
29 | (fn [args]
30 | (sequence ts args))))
31 |
32 | #?(:cljs (def fold-xfn xfn)
33 | :clj
34 | (defn fold-xfn [xf-group]
35 | (let [ts (compose-transducer-group xf-group)]
36 | (fn [args]
37 | (r/fold 512 (r/monoid into conj) (ts conj) (vec args))))))
38 |
39 | #?(:cljs (def pipeline-xfn xfn)
40 | :clj
41 | (defn pipeline-xfn [xf-group]
42 | (let [p (+ 2 (.. Runtime getRuntime availableProcessors))
43 | ts (compose-transducer-group xf-group)]
44 | (fn [args]
45 | (let [results (chan)]
46 | (pipeline p results ts (to-chan! args))
47 | (> thread
51 | (u/qualify-thread env)
52 | (partition-by #(t-pred %))
53 | (mapv #(if-not (and (t-pred (first %))
54 | (not (< (count %) minimum-group-size)))
55 | %
56 | (list (list `(~t-fn ~(mapv vec %))))))
57 | (apply concat)))
58 |
59 | (defn get-or-nth [m-or-v aval]
60 | (if (associative? m-or-v)
61 | (get m-or-v aval)
62 | (nth m-or-v aval)))
63 |
64 | (comment
65 | (get-or-nth {0 :a 2 :b} 2) ;=> :b
66 | (get-or-nth [:a :b :c] 2) ;=> :c
67 | (get-or-nth `(x y z) 2) ;=> injest.path/z
68 | (get-or-nth {0 :a nil 2} nil) ;=> 2
69 | (get-or-nth {0 :a false 2} false) ;=> 2
70 |
71 | :end)
72 |
73 | (def protected-fns #{`fn 'fn 'fn* 'partial})
74 |
75 | (defn path-> [form x]
76 | (cond (and (seq? form) (not (protected-fns (first form))))
77 | (with-meta `(~(first form) ~x ~@(next form)) (meta form))
78 | (or (string? form) (nil? form) (boolean? form))
79 | (list x form)
80 | (int? form)
81 | (list `get-or-nth x form)
82 | :else
83 | (list form x)))
84 |
85 | (defn path->> [form x]
86 | (cond (and (seq? form) (not (protected-fns (first form))))
87 | (with-meta `(~(first form) ~@(next form) ~x) (meta form))
88 | (or (string? form) (nil? form) (boolean? form))
89 | (list x form)
90 | (int? form)
91 | (list `get-or-nth x form)
92 | :else
93 | (list form x)))
94 |
--------------------------------------------------------------------------------
/src/injest/path.cljc:
--------------------------------------------------------------------------------
1 | (ns injest.path
2 | (:require
3 | [injest.impl :as i])
4 | #?(:cljs (:require-macros [injest.path])))
5 |
6 | ;; non-transducer versions, with path navigation, for untransducifying a transducified path thread
7 | (defmacro +>
8 | "Just like -> but for ints will index into vectors and sequences and `get`
9 | into maps, whereas for strings, booleans and nils, will be passed to the
10 | thread-value as a lookup param. Also wraps lambdas.
11 | As in:
12 | (let [m {1 {\"b\" [0 1 {:c :res}]}}]
13 | (+> m 1 \"b\" 2 :c name #(str \"hi\" % \"!\"))) ;=> \"hi res!\""
14 | [x & forms]
15 | (loop [x x, forms forms]
16 | (if forms
17 | (recur (i/path-> (first forms) x) (next forms))
18 | x)))
19 |
20 | (defmacro +>>
21 | "Just like ->> but for ints will index into vectors and sequences and `get`
22 | into maps, whereas for strings, booleans and nils, will be passed to the
23 | thread-value as a lookup param. Also wraps lambdas.
24 | As in:
25 | (let [m {1 {\"b\" [0 1 {:c :res}]}}]
26 | (+>> m 1 \"b\" 2 :c name #(str \"hi\" % \"!\"))) ;=> \"hi res!\""
27 | [x & forms]
28 | (loop [x x, forms forms]
29 | (if forms
30 | (recur (i/path->> (first forms) x) (next forms))
31 | x)))
32 |
33 | ;; transducer version
34 | (defmacro x>
35 | "Just like +> but first composes transducers into a function that sequences
36 | the thread values through the transducers."
37 | [x & thread]
38 | `(+> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/xfn i/transducable?))))
39 |
40 | (defmacro x>>
41 | "Just like +>> but first composes transducers into a function that sequences
42 | the thread values through the transducers."
43 | [x & thread]
44 | `(+>> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/xfn i/transducable?))))
45 |
46 | ;; parallel transducer version
47 | #?(:cljs (defmacro |> "Just like x>, for now" [& args] `(x> ~@args))
48 | :clj (defmacro |>
49 | "Just like x> but first composes stateless transducers functions into a function
50 | that pipelines in parallel the thread values flowing through the thread.
51 | Remaining stateful transducers are composed just like x>."
52 | [x & thread]
53 | `(x> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/pipeline-xfn i/par-transducable?)))))
54 |
55 | #?(:cljs (defmacro |>> "Just like x>>, for now" [& args] `(x>> ~@args))
56 | :clj (defmacro |>>
57 | "Just like x>> but first composes stateless transducers functions into a function
58 | that pipelines in parallel the thread values flowing through the thread.
59 | Remaining stateful transducers are composed just like x>>."
60 | [x & thread]
61 | `(x>> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/pipeline-xfn i/par-transducable?)))))
62 |
63 | #?(:cljs (defmacro => "Just like x>, for now" [& args] `(x>> ~@args))
64 | :clj (defmacro =>
65 | "Just like x> but first composes stateless transducers into a function that
66 | `r/fold`s in parallel the values flowing through the thread. Remaining
67 | stateful transducers are composed just like x>>."
68 | [x & thread]
69 | `(x> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/fold-xfn i/par-transducable?)))))
70 |
71 | #?(:cljs (defmacro =>> "Just like x>>, for now" [& args] `(x>> ~@args))
72 | :clj (defmacro =>>
73 | "Just like x>> but first composes stateless transducers into a function that
74 | `r/fold`s in parallel the values flowing through the thread. Remaining
75 | stateful transducers are composed just like x>>."
76 | [x & thread]
77 | `(x>> ~x ~@(->> thread (i/pre-transducify-thread &env 1 `i/fold-xfn i/par-transducable?)))))
78 |
--------------------------------------------------------------------------------
/src/injest/report.cljc:
--------------------------------------------------------------------------------
1 | (ns injest.report
2 | (:require [clojure.edn :as edn])
3 | #?(:cljs (:require-macros [injest.report])))
4 |
5 | (def mon (atom {}))
6 | (def report-live? (atom false))
7 | (def report-taps (atom {}))
8 |
9 | (defn flc [f form-meta]
10 | (let [{:as m :keys [line column]} form-meta]
11 | (str "{:namespace \"" f "\"\n :line " line "\n :column " column)))
12 |
13 | (defn now []
14 | #?(:clj (.toEpochMilli (java.time.Instant/now))
15 | :cljs (.now js/Date)))
16 |
17 | (defmacro tv [& body]
18 | `(let [t1# (now)
19 | res# ~@body
20 | t2# (now)]
21 | {:res res# :time (- t2# t1#)}))
22 |
23 | (defn add-time [times new-time]
24 | (let [the-times (take 99 (or times '()))]
25 | (vec (conj the-times new-time))))
26 |
27 | (defmacro monitor [k applicator body]
28 | `(let [res# (tv (~applicator ~@body))
29 | t# (:time res#)
30 | result# (:res res#)]
31 | (swap! mon update-in [~k ~(str applicator)]
32 | #(do {:times (add-time (:times %) (:time res#))
33 | :time (int (* 1.0 (/ (apply + (:times %)) (inc (count (:times %))))))}))
34 | result#))
35 |
36 | ;; render report
37 | (defn round [n]
38 | (float (/ (int (* 100 n)) 100)))
39 |
40 | (defn unzero [n]
41 | (if (or (nil? n) (= 0 n) (= 0.0 n))
42 | 1
43 | n))
44 |
45 | (defn render-v [v]
46 | (let [t1 (some-> v (get "injest.path/+>>") :time)
47 | t2 (some-> v (get "injest.path/x>>") :time)
48 | t3 (some-> v (get "injest.path/=>>") :time)
49 | s-ts (->> [{:t (unzero t3) :s "=>>"}
50 | {:t (unzero t2) :s "x>>"}
51 | {:t (unzero t1) :s "+>>"}]
52 | (sort-by :t)
53 | reverse)
54 | max-ts (last s-ts)
55 | min-ts (first s-ts)
56 | mid-ts (second s-ts)
57 |
58 | diff1 (round (* 1.0 (/ (:t min-ts) (:t mid-ts))))
59 | diff2 (round (* 1.0 (/ (:t mid-ts) (:t max-ts))))]
60 | (if-not t3
61 | (if (= diff1 1.0)
62 | (str " :" (:s min-ts) " \"" (:s min-ts) " and " (:s mid-ts) " are basically the same speed\"")
63 | (str " :" (:s mid-ts) " \"" (:s mid-ts) " is " diff1 " times faster than " (:s min-ts) "\""))
64 | (str (if (= diff2 1.0)
65 | (str " :" (:s max-ts) " \"" (:s max-ts) " and " (:s mid-ts) " are basically the same speed\"")
66 | (str " :" (:s max-ts) " \"" (:s max-ts) " is " diff2 " times faster than " (:s mid-ts) "\""))
67 | "\n" ;"\n and \n"
68 | (if (= diff1 1.0)
69 | (str " :" (:s mid-ts) " \"" (:s mid-ts) " and " (:s min-ts) " are basically the same speed\"")
70 | (str " :" (:s mid-ts) " \"" (:s mid-ts) " is " diff1 " times faster than " (:s min-ts) "\"}"))))))
71 |
72 | (defn report []
73 | (->> @mon
74 | (mapv (fn [[k v]] (str k "\n" (render-v v))))
75 | sort
76 | (reduce #(str %1 "\n\n" %2))))
77 |
78 | (defn set-report-interval [callback ms]
79 | #?(:clj (future (while true (do (Thread/sleep ms) (when @report-live? (callback)))))
80 | :cljs identity #_(js/setInterval #(when @report-live? (callback)) ms)))
81 |
82 | (defn report! [bool]
83 | (when (false? bool)
84 | (->> @report-taps vals (mapv #?(:clj future-cancel :cljs #(js/clearInterval %))))
85 | (reset! report-taps {}))
86 | (reset! report-live? (boolean bool)))
87 |
88 | (defn add-report-tap! [handler & [seconds]]
89 | (let [f (set-report-interval #(handler (report)) (or (* 1000 seconds) 10000))]
90 | (swap! report-taps assoc handler f)))
91 |
--------------------------------------------------------------------------------
/src/injest/report/path.cljc:
--------------------------------------------------------------------------------
1 | (ns injest.report.path
2 | (:require
3 | [injest.path :as p]
4 | [injest.report :as r]
5 | #?(:cljs [cljs.analyzer :as ana]))
6 | #?(:cljs (:require-macros [injest.report.path])))
7 |
8 | ;; non-transducer versions, with path navigation, for untransducifying a transducified path thread
9 | (defmacro +>
10 | "Just like ->> but for ints will index into vectors and sequences and `get`
11 | into maps, whereas for strings, booleans and nils, will be passed to the
12 | thread-value as a lookup param. Also wraps lambdas.
13 | As in:
14 | (let [m {1 {\"b\" [0 1 {:c :res}]}}]
15 | (+> m 1 \"b\" 2 :c name #(str \"hi\" % \"!\"))) ;=> \"hi res!\""
16 | [x & forms]
17 | `(p/+> ~x ~@forms))
18 |
19 | (defmacro +>>
20 | "Just like ->> but for ints will index into vectors and sequences and `get`
21 | into maps, whereas for strings, booleans and nils, will be passed to the
22 | thread-value as a lookup param. Also wraps lambdas.
23 | As in:
24 | (let [m {1 {\"b\" [0 1 {:c :res}]}}]
25 | (+>> m 1 \"b\" 2 :c name #(str \"hi\" % \"!\"))) ;=> \"hi res!\""
26 | [x & forms]
27 | `(p/+>> ~x ~@forms))
28 |
29 | (defmacro get-namespace []
30 | (str *ns*))
31 |
32 | ;; transducer version
33 | (defmacro x>>
34 | "Just like +>> but first composes transducers into a function that sequences
35 | the thread values through the transducers."
36 | [x & thread]
37 | `(if-not @r/report-live?
38 | (injest.path/x>> ~x ~@thread)
39 | (let [a?# (= 0 (rand-int 2))
40 | ans# (get-namespace)
41 | k# (r/flc ans# ~(meta &form))]
42 | (if a?#
43 | (r/monitor k# injest.path/x>> ~(concat [x] thread))
44 | (r/monitor k# injest.path/+>> ~(concat [x] thread))))))
45 |
46 | ;; parallel transducer version
47 | #?(:cljs (defmacro =>> "Just like x>>, for now" [& args] `(x>> ~@args))
48 | :clj (defmacro =>>
49 | "Just like x>> but first composes stateless transducers into a function that
50 | `r/fold`s in parallel the values flowing through the thread. Remaining
51 | stateful transducers are composed just like x>>."
52 | [x & thread]
53 | `(if-not @r/report-live?
54 | (injest.path/x>> ~x ~@thread)
55 | (let [n# (rand-int 3)
56 | ans# (get-namespace)
57 | k# (r/flc ans# ~(meta &form))]
58 | (case n#
59 | 0 (r/monitor k# injest.path/=>> ~(concat [x] thread))
60 | 1 (r/monitor k# injest.path/x>> ~(concat [x] thread))
61 | 2 (r/monitor k# injest.path/+>> ~(concat [x] thread)))))))
62 |
--------------------------------------------------------------------------------
/src/injest/state.cljc:
--------------------------------------------------------------------------------
1 | (ns injest.state
2 | (:require
3 | [injest.util :as u]
4 | [injest.data :as d])
5 | #?(:cljs (:require-macros [injest.state])))
6 |
7 | (def transducables (atom #{}))
8 |
9 | (def par-transducables (atom #{}))
10 |
11 | (defmacro reg-xf! [& xfs]
12 | `(swap! transducables into ~(->> xfs (mapv #(u/qualify-sym % &env)))))
13 |
14 | (defn regxf! [& xfs]
15 | (swap! transducables into xfs))
16 |
17 | (defmacro reg-pxf! [& xfs]
18 | `(swap! par-transducables into ~(->> xfs (mapv #(u/qualify-sym % &env)))))
19 |
20 | (defn regpxf! [& xfs]
21 | (swap! par-transducables into xfs))
22 |
23 | (apply regxf! d/def-regs)
24 |
25 | (apply regpxf! d/par-regs)
26 |
27 | ; (regxf! 'clojure.core/map)
28 | ; or (reg-xf! map) ; Must be called from Clojure
29 |
--------------------------------------------------------------------------------
/src/injest/test.clj:
--------------------------------------------------------------------------------
1 | (ns injest.test
2 | (:require
3 | [injest.state :as i.s]
4 | [injest.report :as r]
5 | [injest.report.path :as injest :refer [+> +>> x>> =>>]]))
6 |
7 | (comment
8 |
9 | (r/add-report-tap! println 20)
10 | (r/report! true)
11 | (r/report! false)
12 |
13 |
14 | (dotimes [_ 10]
15 | (=>> (range 1000000)
16 | (map inc)
17 | (filter odd?)
18 | (mapcat #(do [% (dec %)]))
19 | (partition-by #(= 0 (mod % 5)))
20 | (map (partial apply +))
21 | ;; (mapv dec)
22 | (map (partial + 10))
23 | (map #(do {:temp-value %}))
24 | (map :temp-value)
25 | (filter even?)
26 | ;; (x/reduce +)
27 | ;; first
28 | (apply +)
29 | time))
30 |
31 | (macroexpand
32 | '(=>> (range 1000000)
33 | (map inc)
34 | (apply +)
35 | time))
36 | (x>> (range 1000000)
37 | (map inc)
38 | (apply +)
39 | time)
40 |
41 | (dotimes [_ 10]
42 | (=>> (range 1000000)
43 | (map inc)
44 | (filter odd?)
45 | (map (partial + 10))
46 | (filter even?)
47 | (apply +)
48 | time))
49 |
50 | (->> (range 1000000)
51 | (map inc)
52 | (filter odd?)
53 | (mapcat #(do [% (dec %)]))
54 | (partition-by #(= 0 (mod % 5)))
55 | (map (partial apply +))
56 | ;; (mapv dec)
57 | (map (partial + 10))
58 | (map #(do {:temp-value %}))
59 | (map :temp-value)
60 | (filter even?)
61 | ;; (x/reduce +)
62 | ;; first
63 | (apply +)
64 | time)
65 |
66 | :end
67 | )
--------------------------------------------------------------------------------
/src/injest/test.cljs:
--------------------------------------------------------------------------------
1 | (ns injest.test
2 | (:require
3 | [injest.state :as i.s]
4 | [injest.report :as r]
5 | [injest.report.path :as injest :refer [+> +>> x>> =>>]]))
6 |
7 | (comment
8 |
9 | ;; reporting not yet working in cljs
10 | (r/add-report-tap! println)
11 | (r/report! true)
12 | (r/report! false)
13 |
14 | ;; these aren't workign in cljs
15 | (i.s/regxf! 'cljs.core/map)
16 | (i.s/reg-xf! map)
17 |
18 | (require '[clojure.edn :as edn])
19 | ;; (require '[net.cgrand.xforms :as x])
20 |
21 | ;; (reg-xf `x/reduce)
22 |
23 | ;; copied from test.clj, recorded times need to be updated for cljs
24 | (->> (range 1000000)
25 | (map inc)
26 | (filter odd?)
27 | (mapcat #(do [% (dec %)]))
28 | (partition-by #(= 0 (mod % 5)))
29 | (map (partial apply +))
30 | ;; (mapv dec)
31 | (map (partial + 10))
32 | (map #(do {:temp-value %}))
33 | (map :temp-value)
34 | (filter even?)
35 | (apply +)
36 | time)
37 |
38 | (x>> (range 1000000)
39 | (map inc)
40 | (filter odd?)
41 | (mapcat #(do [% (dec %)]))
42 | (partition-by #(= 0 (mod % 5)))
43 | (map (partial apply +))
44 | ;; (mapv dec)
45 | (map (partial + 10))
46 | (map #(do {:temp-value %}))
47 | (map :temp-value)
48 | (filter even?)
49 | ;; (x/reduce +)
50 | ;; first
51 | (apply +)
52 | time)
53 |
54 | ;; work utilities
55 | (defn work-1000 [work-fn]
56 | (range (last (repeatedly 1000 work-fn))))
57 |
58 | (defn ->>work [input]
59 | (work-1000
60 | (fn []
61 | (->> input
62 | (map inc)
63 | (filter odd?)
64 | (mapcat #(do [% (dec %)]))
65 | (partition-by #(= 0 (mod % 5)))
66 | (map (partial apply +))
67 | (map (partial + 10))
68 | (map #(do {:temp-value %}))
69 | (map :temp-value)
70 | (filter even?)
71 | (apply +)
72 | str
73 | (take 3)
74 | (apply str)
75 | edn/read-string))))
76 |
77 | (defn x>>work [input]
78 | (work-1000
79 | (fn []
80 | (x>> input
81 | (map inc)
82 | (filter odd?)
83 | (mapcat #(do [% (dec %)]))
84 | (partition-by #(= 0 (mod % 5)))
85 | (map (partial apply +))
86 | (map (partial + 10))
87 | (map #(do {:temp-value %}))
88 | (map :temp-value)
89 | (filter even?)
90 | (apply +)
91 | str
92 | (take 3)
93 | (apply str)
94 | edn/read-string))))
95 |
96 | (->> (range 100)
97 | (repeat 10)
98 | (map ->>work)
99 | (map ->>work)
100 | (map ->>work)
101 | (map ->>work)
102 | (map ->>work)
103 | (map ->>work)
104 | last
105 | count
106 | time)
107 | ; "Elapsed time: 18309.397391 msecs"
108 | ; 234
109 |
110 | (x>> (range 100)
111 | (repeat 10)
112 | (map x>>work)
113 | (map x>>work)
114 | (map x>>work)
115 | (map x>>work)
116 | (map x>>work)
117 | (map x>>work)
118 | last
119 | count
120 | time)
121 | ; "Elapsed time: 6252.224178 msecs"
122 | ; 234
123 |
124 | (=>> (range 100)
125 | (repeat 10)
126 | (map ->>work)
127 | (map ->>work)
128 | (map ->>work)
129 | (map ->>work)
130 | (map ->>work)
131 | (map ->>work)
132 | last
133 | count
134 | time)
135 | ; "Elapsed time: 8976.963402 msecs"
136 | ; 234
137 |
138 | (=>> (range 100)
139 | (repeat 10)
140 | (map x>>work)
141 | (map x>>work)
142 | (map x>>work)
143 | (map x>>work)
144 | (map x>>work)
145 | (map x>>work)
146 | last
147 | count
148 | time)
149 | ; "Elapsed time: 2862.172838 msecs"
150 | ; 234
151 |
152 | :end)
153 |
154 | ;; path thread tests
155 |
156 | (comment
157 |
158 | (x>> [1 2 3]
159 | (map #(do [% %]))
160 | cat)
161 |
162 | (x>> [1 2 3]
163 | (map #(do [% %]))
164 | (cat))
165 |
166 | (let [m {1 {"b" [0 1 {:c :res}]}}]
167 | (x> m 1 "b" 2 :c))
168 |
169 | (x> {0 :a 2 :b} 2) ;=> :b
170 |
171 | (x> [0 2 5] 2 #(- 10 % 1)) ;=> 4
172 |
173 | (x> [0 1 2 3 4] rest 2 #(- 10 % 1)) ;=> 6
174 |
175 | (x> 10 range rest 2 #(- 10 % 1)) ;=> 6
176 |
177 | (x> [:a :b :c] 2) ;=> :c
178 |
179 | (x> `(x y z) 2) ;=> injest.path/z
180 |
181 | (x> {0 :a nil 2} nil) ;=> 2
182 |
183 | (x> {0 :a false 2} false) ;=> 2
184 |
185 | (x>> {0 :a 2 :b} 2) ;=> :b
186 |
187 | (x>> [:a :b :c] 2) ;=> :c
188 |
189 | (x>> `(x y z) 2) ;=> injest.path/z
190 |
191 | (x>> {0 :a nil 2} nil) ;=> 2
192 |
193 | (x>> {0 :a false 2} false) ;=> 2
194 |
195 | ; non-transducer, with path navigation, for untransducifying a transducified path thread
196 | (+> {0 :a 2 :b} 2) ;=> :b
197 |
198 | (+> [:a :b :c] 2) ;=> :c
199 |
200 | (+> `(x y z) 2) ;=> injest.path/z
201 |
202 | (+> {0 :a nil 2} nil) ;=> 2
203 |
204 | (+> {0 :a false 2} false) ;=> 2
205 |
206 | (+>> {0 :a 2 :b} 2) ;=> :b
207 |
208 | (+>> [:a :b :c] 2) ;=> :c
209 |
210 | (+>> `(x y z) 2) ;=> injest.path/z
211 |
212 | (+>> {0 :a nil 2} nil) ;=> 2
213 |
214 | (+>> {0 :a false 2} false) ;=> 2
215 |
216 | (let [m {1 {"b" [0 1 {:c :res}]}}]
217 | (x> m 1 "b" 2 :c name)) ;=> "res"
218 |
219 | (let [m {1 {"b" [0 1 {:c :res}]}}]
220 | (x>> m 1 "b" 2 :c name)) ;=> "res"
221 |
222 | (let [m {1 {"b" [0 1 {:c :res}]}}]
223 | (+> m 1 "b" 2 :c name)) ;=> "res"
224 |
225 | (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c :res}]}])}]
226 | (+>> m 1 2 "b" 2 :c name)) ;=> "res"
227 |
228 | (x>> (range 1000000)
229 | (map inc)
230 | (filter odd?)
231 | (mapcat #(do [% (dec %)]))
232 | (partition-by #(= 0 (mod % 5)))
233 | (map (partial apply +))
234 | (map (partial + 10))
235 | (map #(do {:temp-value %}))
236 | (map :temp-value)
237 | (filter even?)
238 | (apply +)
239 | time)
240 | ;; "Elapsed time: 6735.604664 msecs"
241 | ;; 5000054999994
242 |
243 | :end
244 | )
245 |
--------------------------------------------------------------------------------
/src/injest/util.cljc:
--------------------------------------------------------------------------------
1 | (ns injest.util
2 | (:require [cljs.analyzer.api :as api]))
3 |
4 | (def safe-resolve
5 | #?(:clj resolve :cljs identity))
6 |
7 | (defn qualify-sym [x env]
8 | (if-not env
9 | `(quote
10 | ~(symbol (safe-resolve x)))
11 | `(symbol
12 | (quote
13 | ~(some-> x
14 | ((partial cljs.analyzer.api/resolve env))
15 | :name
16 | symbol)))))
17 |
18 | (defn qualify-form [x env]
19 | (if-not (:ns env)
20 | (list (symbol (safe-resolve x)))
21 | (list
22 | (some-> x
23 | ((partial cljs.analyzer.api/resolve env))
24 | :name
25 | str
26 | symbol))))
27 |
28 | (defn qualify-thread [env thread]
29 | (mapv
30 | (fn w [x]
31 | (if (= x 'cat)
32 | (qualify-form x env)
33 | (if (and (list? x) (symbol? (first x)) (not (#{'fn 'fn*} (first x))))
34 | (-> x first (qualify-form env) (concat (rest x)))
35 | x)))
36 | thread))
37 |
--------------------------------------------------------------------------------
/test/injest/path_test.clj:
--------------------------------------------------------------------------------
1 | (ns injest.path-test
2 | (:require [clojure.test :refer :all]
3 | [injest.path :refer :all]))
4 |
5 | (deftest readme-example
6 | (testing "example from the readme"
7 | (is (= 5000054999994
8 | (x>> (range 10000000)
9 | (map inc)
10 | (filter odd?)
11 | (mapcat #(do [% (dec %)]))
12 | (partition-by #(= 0 (mod % 5)))
13 | (map (partial apply +))
14 | (map (partial + 10))
15 | (map #(do {:temp-value %}))
16 | (map :temp-value)
17 | (filter even?)
18 | (apply +))))))
19 |
20 | (deftest lookup-value-by-integer-key-in-map
21 | (testing "Get value from map by integer key"
22 | (is (= :b
23 | (+> {0 :a 2 :b}
24 | 2)))
25 | (is (= :b
26 | (+>> {0 :a 2 :b}
27 | 2)))
28 | (is (= :b
29 | (x> {0 :a 2 :b}
30 | 2)))
31 | (is (= :b
32 | (x>> {0 :a 2 :b}
33 | 2)))
34 | (is (= :b
35 | (=> {0 :a 2 :b}
36 | 2)))
37 | (is (= :b
38 | (=>> {0 :a 2 :b}
39 | 2)))))
40 |
41 | (deftest index-into-vector
42 | (testing "Get value of index in vector"
43 | (is (= 5
44 | (+> [0 2 5]
45 | 2)))
46 | (is (= 5
47 | (+>> [0 2 5]
48 | 2)))
49 | (is (= 5
50 | (x> [0 2 5]
51 | 2)))
52 | (is (= 5
53 | (x>> [0 2 5]
54 | 2)))
55 | (is (= 5
56 | (=> [0 2 5]
57 | 2)))
58 | (is (= 5
59 | (=>> [0 2 5]
60 | 2)))))
61 |
62 | (deftest index-into-sequence
63 | (testing "Get value of index in sequence"
64 | (is (= 5
65 | (+> '(0 2 5)
66 | 2)))
67 | (is (= 5
68 | (+>> '(0 2 5)
69 | 2)))
70 | (is (= 5
71 | (x> '(0 2 5)
72 | 2)))
73 | (is (= 5
74 | (x>> '(0 2 5)
75 | 2)))
76 | (is (= 5
77 | (=> '(0 2 5)
78 | 2)))
79 | (is (= 5
80 | (=>> '(0 2 5)
81 | 2)))))
82 |
83 | (deftest lookup-key-by-string-in-map
84 | (testing "Get value of index in vector"
85 | (is (= 5
86 | (+> {0 :a "s" 5}
87 | "s")))
88 | (is (= 5
89 | (+>> {0 :a "s" 5}
90 | "s")))
91 | (is (= 5
92 | (x> {0 :a "s" 5}
93 | "s")))
94 | (is (= 5
95 | (x>> {0 :a "s" 5}
96 | "s")))
97 | (is (= 5
98 | (=> {0 :a "s" 5}
99 | "s")))
100 | (is (= 5
101 | (=>> {0 :a "s" 5}
102 | "s")))))
103 |
104 | (deftest lookup-key-by-key-in-map
105 | (testing "Get value of index in vector"
106 | (is (= 5
107 | (+> {0 :a :k 5}
108 | :k)))
109 | (is (= 5
110 | (+>> {0 :a :k 5}
111 | :k)))
112 | (is (= 5
113 | (x> {0 :a :k 5}
114 | :k)))
115 | (is (= 5
116 | (x>> {0 :a :k 5}
117 | :k)))
118 | (is (= 5
119 | (=> {0 :a :k 5}
120 | :k)))
121 | (is (= 5
122 | (=>> {0 :a :k 5}
123 | :k)))))
124 |
125 | (deftest lookup-key-by-nil-in-map
126 | (testing "Get value of index in vector"
127 | (is (= 5
128 | (+> {0 :a nil 5}
129 | nil)))
130 | (is (= 5
131 | (+>> {0 :a nil 5}
132 | nil)))
133 | (is (= 5
134 | (x> {0 :a nil 5}
135 | nil)))
136 | (is (= 5
137 | (x>> {0 :a nil 5}
138 | nil)))
139 | (is (= 5
140 | (=> {0 :a nil 5}
141 | nil)))
142 | (is (= 5
143 | (=>> {0 :a nil 5}
144 | nil)))))
145 |
146 | (deftest lookup-key-by-boolean-in-map
147 | (testing "Get value of index in vector"
148 | (is (= 5
149 | (+> {0 :a true 5}
150 | true)))
151 | (is (= 5
152 | (+>> {0 :a true 5}
153 | true)))
154 | (is (= 5
155 | (x> {0 :a true 5}
156 | true)))
157 | (is (= 5
158 | (x>> {0 :a true 5}
159 | true)))
160 | (is (= 5
161 | (=> {0 :a true 5}
162 | true)))
163 | (is (= 5
164 | (=>> {0 :a true 5}
165 | true)))))
166 |
167 | (deftest lamda-wrapping
168 | (testing "wrap lambdas"
169 | (is (= 8
170 | (+> 1
171 | #(- 10 (+ % 1)))))
172 | (is (= 8
173 | (+>> 1
174 | #(- 10 (+ % 1)))))
175 | (is (= 8
176 | (x> 1
177 | #(- 10 (+ % 1)))))
178 | (is (= 8
179 | (x>> 1
180 | #(- 10 (+ % 1)))))
181 | (is (= 8
182 | (=> 1
183 | #(- 10 (+ % 1)))))
184 | (is (= 8
185 | (=>> 1
186 | #(- 10 (+ % 1)))))))
187 |
188 | (deftest all-thread-features
189 | (testing "test all the path features at once"
190 | (is (= "hi bob!"
191 | (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c {true {nil :bob}}}]}])}]
192 | (+> m 1 2 "b" 2 :c true nil name #(str "hi " % "!")))))
193 | (is (= "hi bob!"
194 | (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c {true {nil :bob}}}]}])}]
195 | (+>> m 1 2 "b" 2 :c true nil name #(str "hi " % "!")))))
196 | (is (= "hi bob!"
197 | (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c {true {nil :bob}}}]}])}]
198 | (x> m 1 2 "b" 2 :c true nil name #(str "hi " % "!")))))
199 | (is (= "hi bob!"
200 | (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c {true {nil :bob}}}]}])}]
201 | (x>> m 1 2 "b" 2 :c true nil name #(str "hi " % "!")))))
202 | (is (= "hi bob!"
203 | (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c {true {nil :bob}}}]}])}]
204 | (=> m 1 2 "b" 2 :c true nil name #(str "hi " % "!")))))
205 | (is (= "hi bob!"
206 | (let [m {1 (rest ['ignore0 0 1 {"b" [0 1 {:c {true {nil :bob}}}]}])}]
207 | (=>> m 1 2 "b" 2 :c true nil name #(str "hi " % "!")))))))
208 |
209 | (deftest thread-last-transducers
210 | (testing "exercise thread-last macros"
211 | (is (= 1044
212 | (+>> (range 100)
213 | (map inc)
214 | (filter odd?)
215 | (mapcat #(do [% (dec %)]))
216 | (partition-by #(= 0 (mod % 5)))
217 | (map (partial apply +))
218 | (map (partial + 10))
219 | (map #(do {:temp-value %}))
220 | (map :temp-value)
221 | (filter even?)
222 | (apply +))))
223 | (is (= 1044
224 | (x>> (range 100)
225 | (map inc)
226 | (filter odd?)
227 | (mapcat #(do [% (dec %)]))
228 | (partition-by #(= 0 (mod % 5)))
229 | (map (partial apply +))
230 | (map (partial + 10))
231 | (map #(do {:temp-value %}))
232 | (map :temp-value)
233 | (filter even?)
234 | (apply +))))
235 | (is (= 1044
236 | (=>> (range 100)
237 | (map inc)
238 | (filter odd?)
239 | (mapcat #(do [% (dec %)]))
240 | (partition-by #(= 0 (mod % 5)))
241 | (map (partial apply +))
242 | (map (partial + 10))
243 | (map #(do {:temp-value %}))
244 | (map :temp-value)
245 | (filter even?)
246 | (apply +))))))
247 |
--------------------------------------------------------------------------------