├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── bb.edn
├── bin
└── launchpad
├── build.clj
├── deps.edn
├── pom.xml
├── src
└── scicloj
│ └── metamorph
│ ├── core.clj
│ └── protocols.clj
├── template
└── pom.xml
├── test
└── scicloj
│ └── metamorph
│ └── core_test.clj
└── tests.edn
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /classes
3 | /checkouts
4 | profiles.clj
5 | *.jar
6 | *.class
7 | /.lein-*
8 | /.nrepl-port
9 | /.prepl-port
10 | .hgignore
11 | .hg/
12 | /.calva/
13 | /.classpath
14 | /.clj-kondo/
15 | /.cpcache/
16 | /.lsp/
17 | /.project
18 | /.rebel_readline_history
19 | /.settings/
20 | /README.html
21 | /pom.xml.asc
22 | /deps.local.edn
23 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 |
3 | ## unreleased
4 | - checked for misisng op
5 | - refactored, thanks to @erickisos
6 | - changed to deps.edn based build
7 |
8 | ## 0.2.3
9 | - fixed failing test
10 | ## 0.2.0
11 | - added three pipeline helper functions
12 | - addded fit / transform helpers
13 | ## 0.2.2
14 | - more checks of ctx and parameters
15 |
16 | ## 0.1.0
17 | - use uuids #1
18 | - alpha2: replaced special meaning of :keyword in pipeline definition by map
19 | Initial version moved from [tablecloth](https://github.com/scicloj/tablecloth/blob/pipelines/src/tablecloth/pipeline.clj)
20 |
21 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Eclipse Public License - v 2.0
2 |
3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE
4 | PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION
5 | OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
6 |
7 | 1. DEFINITIONS
8 |
9 | "Contribution" means:
10 |
11 | a) in the case of the initial Contributor, the initial content
12 | Distributed under this Agreement, and
13 |
14 | b) in the case of each subsequent Contributor:
15 | i) changes to the Program, and
16 | ii) additions to the Program;
17 | where such changes and/or additions to the Program originate from
18 | and are Distributed by that particular Contributor. A Contribution
19 | "originates" from a Contributor if it was added to the Program by
20 | such Contributor itself or anyone acting on such Contributor's behalf.
21 | Contributions do not include changes or additions to the Program that
22 | are not Modified Works.
23 |
24 | "Contributor" means any person or entity that Distributes the Program.
25 |
26 | "Licensed Patents" mean patent claims licensable by a Contributor which
27 | are necessarily infringed by the use or sale of its Contribution alone
28 | or when combined with the Program.
29 |
30 | "Program" means the Contributions Distributed in accordance with this
31 | Agreement.
32 |
33 | "Recipient" means anyone who receives the Program under this Agreement
34 | or any Secondary License (as applicable), including Contributors.
35 |
36 | "Derivative Works" shall mean any work, whether in Source Code or other
37 | form, that is based on (or derived from) the Program and for which the
38 | editorial revisions, annotations, elaborations, or other modifications
39 | represent, as a whole, an original work of authorship.
40 |
41 | "Modified Works" shall mean any work in Source Code or other form that
42 | results from an addition to, deletion from, or modification of the
43 | contents of the Program, including, for purposes of clarity any new file
44 | in Source Code form that contains any contents of the Program. Modified
45 | Works shall not include works that contain only declarations,
46 | interfaces, types, classes, structures, or files of the Program solely
47 | in each case in order to link to, bind by name, or subclass the Program
48 | or Modified Works thereof.
49 |
50 | "Distribute" means the acts of a) distributing or b) making available
51 | in any manner that enables the transfer of a copy.
52 |
53 | "Source Code" means the form of a Program preferred for making
54 | modifications, including but not limited to software source code,
55 | documentation source, and configuration files.
56 |
57 | "Secondary License" means either the GNU General Public License,
58 | Version 2.0, or any later versions of that license, including any
59 | exceptions or additional permissions as identified by the initial
60 | Contributor.
61 |
62 | 2. GRANT OF RIGHTS
63 |
64 | a) Subject to the terms of this Agreement, each Contributor hereby
65 | grants Recipient a non-exclusive, worldwide, royalty-free copyright
66 | license to reproduce, prepare Derivative Works of, publicly display,
67 | publicly perform, Distribute and sublicense the Contribution of such
68 | Contributor, if any, and such Derivative Works.
69 |
70 | b) Subject to the terms of this Agreement, each Contributor hereby
71 | grants Recipient a non-exclusive, worldwide, royalty-free patent
72 | license under Licensed Patents to make, use, sell, offer to sell,
73 | import and otherwise transfer the Contribution of such Contributor,
74 | if any, in Source Code or other form. This patent license shall
75 | apply to the combination of the Contribution and the Program if, at
76 | the time the Contribution is added by the Contributor, such addition
77 | of the Contribution causes such combination to be covered by the
78 | Licensed Patents. The patent license shall not apply to any other
79 | combinations which include the Contribution. No hardware per se is
80 | licensed hereunder.
81 |
82 | c) Recipient understands that although each Contributor grants the
83 | licenses to its Contributions set forth herein, no assurances are
84 | provided by any Contributor that the Program does not infringe the
85 | patent or other intellectual property rights of any other entity.
86 | Each Contributor disclaims any liability to Recipient for claims
87 | brought by any other entity based on infringement of intellectual
88 | property rights or otherwise. As a condition to exercising the
89 | rights and licenses granted hereunder, each Recipient hereby
90 | assumes sole responsibility to secure any other intellectual
91 | property rights needed, if any. For example, if a third party
92 | patent license is required to allow Recipient to Distribute the
93 | Program, it is Recipient's responsibility to acquire that license
94 | before distributing the Program.
95 |
96 | d) Each Contributor represents that to its knowledge it has
97 | sufficient copyright rights in its Contribution, if any, to grant
98 | the copyright license set forth in this Agreement.
99 |
100 | e) Notwithstanding the terms of any Secondary License, no
101 | Contributor makes additional grants to any Recipient (other than
102 | those set forth in this Agreement) as a result of such Recipient's
103 | receipt of the Program under the terms of a Secondary License
104 | (if permitted under the terms of Section 3).
105 |
106 | 3. REQUIREMENTS
107 |
108 | 3.1 If a Contributor Distributes the Program in any form, then:
109 |
110 | a) the Program must also be made available as Source Code, in
111 | accordance with section 3.2, and the Contributor must accompany
112 | the Program with a statement that the Source Code for the Program
113 | is available under this Agreement, and informs Recipients how to
114 | obtain it in a reasonable manner on or through a medium customarily
115 | used for software exchange; and
116 |
117 | b) the Contributor may Distribute the Program under a license
118 | different than this Agreement, provided that such license:
119 | i) effectively disclaims on behalf of all other Contributors all
120 | warranties and conditions, express and implied, including
121 | warranties or conditions of title and non-infringement, and
122 | implied warranties or conditions of merchantability and fitness
123 | for a particular purpose;
124 |
125 | ii) effectively excludes on behalf of all other Contributors all
126 | liability for damages, including direct, indirect, special,
127 | incidental and consequential damages, such as lost profits;
128 |
129 | iii) does not attempt to limit or alter the recipients' rights
130 | in the Source Code under section 3.2; and
131 |
132 | iv) requires any subsequent distribution of the Program by any
133 | party to be under a license that satisfies the requirements
134 | of this section 3.
135 |
136 | 3.2 When the Program is Distributed as Source Code:
137 |
138 | a) it must be made available under this Agreement, or if the
139 | Program (i) is combined with other material in a separate file or
140 | files made available under a Secondary License, and (ii) the initial
141 | Contributor attached to the Source Code the notice described in
142 | Exhibit A of this Agreement, then the Program may be made available
143 | under the terms of such Secondary Licenses, and
144 |
145 | b) a copy of this Agreement must be included with each copy of
146 | the Program.
147 |
148 | 3.3 Contributors may not remove or alter any copyright, patent,
149 | trademark, attribution notices, disclaimers of warranty, or limitations
150 | of liability ("notices") contained within the Program from any copy of
151 | the Program which they Distribute, provided that Contributors may add
152 | their own appropriate notices.
153 |
154 | 4. COMMERCIAL DISTRIBUTION
155 |
156 | Commercial distributors of software may accept certain responsibilities
157 | with respect to end users, business partners and the like. While this
158 | license is intended to facilitate the commercial use of the Program,
159 | the Contributor who includes the Program in a commercial product
160 | offering should do so in a manner which does not create potential
161 | liability for other Contributors. Therefore, if a Contributor includes
162 | the Program in a commercial product offering, such Contributor
163 | ("Commercial Contributor") hereby agrees to defend and indemnify every
164 | other Contributor ("Indemnified Contributor") against any losses,
165 | damages and costs (collectively "Losses") arising from claims, lawsuits
166 | and other legal actions brought by a third party against the Indemnified
167 | Contributor to the extent caused by the acts or omissions of such
168 | Commercial Contributor in connection with its distribution of the Program
169 | in a commercial product offering. The obligations in this section do not
170 | apply to any claims or Losses relating to any actual or alleged
171 | intellectual property infringement. In order to qualify, an Indemnified
172 | Contributor must: a) promptly notify the Commercial Contributor in
173 | writing of such claim, and b) allow the Commercial Contributor to control,
174 | and cooperate with the Commercial Contributor in, the defense and any
175 | related settlement negotiations. The Indemnified Contributor may
176 | participate in any such claim at its own expense.
177 |
178 | For example, a Contributor might include the Program in a commercial
179 | product offering, Product X. That Contributor is then a Commercial
180 | Contributor. If that Commercial Contributor then makes performance
181 | claims, or offers warranties related to Product X, those performance
182 | claims and warranties are such Commercial Contributor's responsibility
183 | alone. Under this section, the Commercial Contributor would have to
184 | defend claims against the other Contributors related to those performance
185 | claims and warranties, and if a court requires any other Contributor to
186 | pay any damages as a result, the Commercial Contributor must pay
187 | those damages.
188 |
189 | 5. NO WARRANTY
190 |
191 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT
192 | PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS"
193 | BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR
194 | IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF
195 | TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR
196 | PURPOSE. Each Recipient is solely responsible for determining the
197 | appropriateness of using and distributing the Program and assumes all
198 | risks associated with its exercise of rights under this Agreement,
199 | including but not limited to the risks and costs of program errors,
200 | compliance with applicable laws, damage to or loss of data, programs
201 | or equipment, and unavailability or interruption of operations.
202 |
203 | 6. DISCLAIMER OF LIABILITY
204 |
205 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT
206 | PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS
207 | SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
208 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST
209 | PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
210 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
211 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
212 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE
213 | POSSIBILITY OF SUCH DAMAGES.
214 |
215 | 7. GENERAL
216 |
217 | If any provision of this Agreement is invalid or unenforceable under
218 | applicable law, it shall not affect the validity or enforceability of
219 | the remainder of the terms of this Agreement, and without further
220 | action by the parties hereto, such provision shall be reformed to the
221 | minimum extent necessary to make such provision valid and enforceable.
222 |
223 | If Recipient institutes patent litigation against any entity
224 | (including a cross-claim or counterclaim in a lawsuit) alleging that the
225 | Program itself (excluding combinations of the Program with other software
226 | or hardware) infringes such Recipient's patent(s), then such Recipient's
227 | rights granted under Section 2(b) shall terminate as of the date such
228 | litigation is filed.
229 |
230 | All Recipient's rights under this Agreement shall terminate if it
231 | fails to comply with any of the material terms or conditions of this
232 | Agreement and does not cure such failure in a reasonable period of
233 | time after becoming aware of such noncompliance. If all Recipient's
234 | rights under this Agreement terminate, Recipient agrees to cease use
235 | and distribution of the Program as soon as reasonably practicable.
236 | However, Recipient's obligations under this Agreement and any licenses
237 | granted by Recipient relating to the Program shall continue and survive.
238 |
239 | Everyone is permitted to copy and distribute copies of this Agreement,
240 | but in order to avoid inconsistency the Agreement is copyrighted and
241 | may only be modified in the following manner. The Agreement Steward
242 | reserves the right to publish new versions (including revisions) of
243 | this Agreement from time to time. No one other than the Agreement
244 | Steward has the right to modify this Agreement. The Eclipse Foundation
245 | is the initial Agreement Steward. The Eclipse Foundation may assign the
246 | responsibility to serve as the Agreement Steward to a suitable separate
247 | entity. Each new version of the Agreement will be given a distinguishing
248 | version number. The Program (including Contributions) may always be
249 | Distributed subject to the version of the Agreement under which it was
250 | received. In addition, after a new version of the Agreement is published,
251 | Contributor may elect to Distribute the Program (including its
252 | Contributions) under the new version.
253 |
254 | Except as expressly stated in Sections 2(a) and 2(b) above, Recipient
255 | receives no rights or licenses to the intellectual property of any
256 | Contributor under this Agreement, whether expressly, by implication,
257 | estoppel or otherwise. All rights in the Program not expressly granted
258 | under this Agreement are reserved. Nothing in this Agreement is intended
259 | to be enforceable by any entity that is not a Contributor or Recipient.
260 | No third-party beneficiary rights are created under this Agreement.
261 |
262 | Exhibit A - Form of Secondary Licenses Notice
263 |
264 | "This Source Code may also be made available under the following
265 | Secondary Licenses when the conditions for such availability set forth
266 | in the Eclipse Public License, v. 2.0 are satisfied: GNU General Public
267 | License as published by the Free Software Foundation, either version 2
268 | of the License, or (at your option) any later version, with the GNU
269 | Classpath Exception which is available at
270 | https://www.gnu.org/software/classpath/license.html."
271 |
272 | Simply including a copy of this Agreement, including this Exhibit A
273 | is not sufficient to license the Source Code under Secondary Licenses.
274 |
275 | If it is not possible or desirable to put the notice in a particular
276 | file, then You may include the notice in a location (such as a LICENSE
277 | file in a relevant directory) where a recipient would be likely to
278 | look for such a notice.
279 |
280 | You may add additional accurate notices of copyright ownership.
281 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://clojars.org/scicloj/metamorph)
2 |
3 | # metamorph
4 |
5 | A Clojure library designed to providing pipelining operations.
6 |
7 | It allows to express any data transformation and machine learning pipeline as a simple sequence of pure functions:
8 |
9 | ```clojure
10 | (def pipe
11 | (pipeline
12 | (select-columns [:Text :Score])
13 | (count-vectorize :Text :bow nlp/default-text->bow {})
14 | (bow->sparse-array :bow :bow-sparse #(nlp/->vocabulary-top-n % 1000))
15 | (set-inference-target :Score)
16 | (ds/select-columns [:bow-sparse :Score])
17 | (model {:p 1000
18 | :model-type :maxent-multinomial
19 | :sparse-column :bow-sparse})))
20 | ```
21 |
22 |
23 | Several code examples for metamorph are available in this repo [metamorph-examples](https://github.com/scicloj/metamorph-examples)
24 |
25 | ### Pipeline operation
26 |
27 | Pipeline operation is a function which accepts context as a map and returns possibly modified context map.
28 |
29 | #### Context
30 |
31 | Context is just a map where pipeline information is stored. There are three reserved keys which are supposed to help organize a pipeline:
32 |
33 | * `:metamorph/data` - object which is subject to change and where the main data is stored. It can be anything: dataset, tensor, object, whatever you want
34 | * `:metamorph/id` - unique operation number which is injected to the context just before pipeline operation is called. This way pipeline operation have some identity which can be used to store and restore private data in the context.
35 | * `:metamorph/mode` - additional context information which can be used to determine pipeline phase. It can be added explicitely during pipeline creation.
36 | Different pipeline functions can work together, if they agree on a common set of modes and act accordingly depending on the mode.
37 | The main use case for this are pipelines which include a statistical model in some form. In here the model either gets fitted on the data (= learns form data) or it gets applied to data. For this common use case we define two standard modes, namely:
38 | * `:fit` - While the pipeline has this mode, a model containing function in the pipeline should fit its model from the data , this is as well called "train". It should write as well the fitted model to the key in `:metamorph/id` so, that on the next pipeline run in mode `transform` it can be used
39 | * `:transform` - While the pipeline is in this mode, the fitted model should be read from the key in `:metamorph/id` and apply the fitted model to the data
40 |
41 |
42 | In machine learning terminology, these 2 modes are typically called train and predict. In metamorph we use the fit/transform terms as the generalisation.
43 |
44 | Functions which only manipulate the data, should simply behave the same in any :mode, so ignoring `:metamorph/mode`
45 |
46 | ### Compliant operations
47 | All the steps of a metamorph pipeline are functions which need to follow the following conventions, in order to work well together:
48 |
49 | * Be usual Clojure functions which have at least one parameter, and this first parameter need to be a map, the context map. This map can potentially contain any key.
50 | * Keys of namespace :metamorph/xxx should be avoided and are reserved for usage by metamorph itself.
51 | * The value of a compliant function, need to be a function which takes as input the context and which value is the context. The function is allowed to add any keys with any value to the context map, but should not remove any key.
52 | * The object under `:metamorph/data` is considered to be the main data object, which nearly all functions will interact with. A functions which only interacts with this main data object, needs nevertheless return the whole context map with the data at key `:metamorph/data`
53 | * Each function which reads or writes specific keys to the pipeline context, should document this and use namespaced keys to avoid conflicts
54 | * Any pipeline function should **only** interact with the context map. It should neither read nor write anything outside the context. This is important, as it makes the whole pipleine completely self contained, and it can be re-executed anywehere, for example on new data.
55 | * Pipeline functions should be pure functions
56 |
57 | A typical skeleton of a compliant function looks like this:
58 |
59 | ```clojure
60 | (defn my-data-transform-function [any number of options]
61 | (fn [{:metamorph/keys [id data mode] :as ctx}]
62 | ;; do something with data and eventual with id and mode
63 | ;; and write it back somewhere in the ctx often to key `:metamorph/data`, but could be any key
64 | ;; the assoc makes as well sure, that other data in ctx is left unchanged
65 | (assoc ctx :metamorph/data ......)
66 | ))))
67 | ```
68 |
69 | ### Metamorph compliant libraries
70 | The following libraries provied metamorph compliant functions in a recent version:
71 |
72 | |library | purpose | link |
73 | |-----------------|------------------------|-------------------------------------------------- |
74 | |tablecloth | dataset manipulation | https://github.com/scicloj/tablecloth |
75 | |tech.ml.dataset | dataset manipulation | https://github.com/techascent/tech.ml.dataset |
76 | |metamorph.ml | machine learning | https://github.com/scicloj/metamorph.ml |
77 | |sklearn-clj | sklearn estimators as metamorph functions | https://github.com/scicloj/sklearn-clj |
78 |
79 |
80 | Other libraries which do "data transformations" can decide to make their functions metamorph compliant.
81 | This does not require any dependency on metamorhp, just the usage of the standard keys.
82 |
83 | Functions can easely be lifted to become metamorph compliant. For this we have the function `metamorph/lift"
84 |
85 | ### methamorph.ml
86 | A sister project [metamorph.ml](https://github.com/scicloj/metamorph.ml) allows to evaluate machine learning pipelines based on metamorph.
87 |
88 | ### scicloj.ml
89 | A machine learining solution based on metamorph pipelines including various classification and regression models.
90 | [scicloj.ml](https://github.com/scicloj/scicloj.ml)
91 |
92 |
93 | ### Similar concept in sklearn pipelines
94 | The `metamorph` concept is similar to the `pipeline` concept of sklearn, which allows as well to run a give pipeline in `fit` and `transform`.
95 | But metamorph allows to combine models with arbitrary transform functions, which don't need to be models.
96 |
97 |
98 | ### Two types of functions in pipeline
99 |
100 | We foresee that mainly 2 types of functions get added to a pipeline.
101 |
102 | 1. `Mode independend functions:` They only manipulate the main data object, and will ignore all other information in contexts.
103 | Neither will they use `:metamorph/mode` nor the `:metamorph/id` in the context map.
104 | 2. `Mode dependend functions`: These functions will behave different depending on the :mode and will likely store data in the context map, which can be used by the same function in an other mode or by other functions later in the pipeline.
105 |
106 | ### Pipelines can be constructed from functions or as pure data
107 | Metamorph pipelines can be either constructed from a sequence of function calls via th function `metmorhp.core/pipeline` or declarative as a sequence of maps.
108 |
109 | Both rely on the same functions.
110 |
111 | See here for examples:
112 | https://github.com/scicloj/tablecloth/blob/pipelines/src/tablecloth/pipeline.clj
113 |
114 | This should allow advanced use cases, like the **generation** of pipelines,
115 | which gives large flexibility for hyper parameter tuning in machine learning.
116 |
117 | ### Advantages of the metamorph concept
118 |
119 | * A complete (machine learning) pipeline becomes self contained. All information (data and "state" of models) is inside the pipeline context
120 | * All steps of the pipeline are pure functions, which outcome depends only on its context map parameter (containing the data) and eventual options
121 | * It unifies the data processing pipeline idea of `tablecloth` with the concept of fitted models and machine learining
122 | * It uses only pure Clojure data structures
123 | * It has no dependency to any concrete data manipulation library, but all can be integrated easely based on a small number of agreed map keys
124 |
125 | #### Creating a pipeline
126 |
127 | To create a pipeline function you can use two functions:
128 |
129 | * `metamorph.core/pipeline` to make a pipeline function out of pipeline operators (= compliant functions as described above)
130 | * `metamorph.core/->pipeline` works as above, but using declarative maps (describing as well compliant functions) to describe the pipeline
131 |
132 | ## Usage
133 |
134 | Compliant pipeline operations can either be created by "lifting" functions which work on the data object itself,
135 | or by using them from compliant libraries.
136 |
137 | Most functions in [tablecloth](https://github.com/scicloj/tablecloth) take a dataset as input in first position, and return a dataset.
138 | This means they can be used with the function "metamorhp.core/lift" to be converted (lifted) into a metamorph compliant function.
139 | (Tabecloth has lifted versions of its functions in namespace `tablecloth.pipeline`)
140 |
141 | In this short example, the main data object in the context is a simple string.
142 |
143 |
144 | ```clojure
145 | (require '[scicloj.metamorph.core :as morph])
146 |
147 | ;; a regular function which takes and returns a main object
148 | (defn regular-function-to-be-lifted
149 | [main-object par1 par2]
150 | (str "Hey, " (clojure.string/upper-case main-object) " , I'm regular function! (pars: " par1 ", " par2 ")"))
151 |
152 | ;; we make a pipeline-fn using `lift` and the regular function
153 |
154 | (def lifted-pipeline
155 | (morph/pipeline
156 | :anymode
157 | (morph/lift regular-function-to-be-lifted 1 2)))
158 |
159 | ;; lifted-pipeline is a regular Clojure function, taking the context in first place
160 | (lifted-pipeline {:metamorph/data "main data project"} )
161 | ;;->
162 | :metamorph{:data "Hey, MAIN DATA PROJECT , I'm regular function! (pars: 1, 2)"}
163 | ````
164 |
165 | ## License
166 |
167 | Copyright © 2021 Scicloj
168 |
169 | This program and the accompanying materials are made available under the
170 | terms of the Eclipse Public License 2.0 which is available at
171 | http://www.eclipse.org/legal/epl-2.0.
172 |
173 | This Source Code may also be made available under the following Secondary
174 | Licenses when the conditions for such availability set forth in the Eclipse
175 | Public License, v. 2.0 are satisfied: GNU General Public License as published by
176 | the Free Software Foundation, either version 2 of the License, or (at your
177 | option) any later version, with the GNU Classpath Exception which is available
178 | at https://www.gnu.org/software/classpath/license.html.
179 |
--------------------------------------------------------------------------------
/bb.edn:
--------------------------------------------------------------------------------
1 | {:deps {com.lambdaisland/launchpad {:mvn/version "0.15.79-alpha"}}}
2 |
3 |
--------------------------------------------------------------------------------
/bin/launchpad:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bb
2 |
3 | (require '[lambdaisland.launchpad :as launchpad])
4 |
5 | (launchpad/main {})
6 |
7 | ;; (launchpad/main {:steps (into [(partial launchpad/ensure-java-version 17)]
8 | ;; launchpad/default-steps)})
9 |
--------------------------------------------------------------------------------
/build.clj:
--------------------------------------------------------------------------------
1 | (ns build
2 | (:refer-clojure :exclude [test])
3 | (:require [clojure.tools.build.api :as b] ; for b/git-count-revs
4 | [org.corfield.build :as bb]))
5 |
6 | (def lib 'scicloj/metamorph)
7 | ; alternatively, use MAJOR.MINOR.COMMITS:
8 | ;; (def version (format "7.0.%s" (b/git-count-revs nil)))
9 | (def version (format "0.2.3"))
10 | (def class-dir "target/classes")
11 | (def basis (b/create-basis {:project "deps.edn"}))
12 | (def jar-file (format "target/%s-%s.jar" (name lib) version))
13 |
14 |
15 |
16 |
17 | (defn test "Run the tests." [opts]
18 | (-> opts
19 | (assoc :lib lib :version version
20 | :aliases [:run-tests])
21 | (bb/run-tests)))
22 |
23 |
24 |
25 |
26 | (defn jar [_]
27 | (b/write-pom {:class-dir class-dir
28 | :lib lib
29 | :version version
30 | :basis basis
31 | :src-pom "template/pom.xml"
32 | :scm {:connection "scm:git:https://github.com/scicloj/metamorph.git"
33 | :url "https://github.com/scicloj/metamorph"}
34 | :src-dirs ["src"]})
35 | (b/copy-dir {:src-dirs ["src" "resources"]
36 | :target-dir class-dir})
37 | (b/jar {:class-dir class-dir
38 | :jar-file jar-file}))
39 |
40 | (defn ci "Run the CI pipeline of tests (and build the JAR)." [opts]
41 | (-> opts
42 | (assoc :lib lib :version version
43 | :aliases [:run-tests])
44 |
45 | (bb/run-tests)
46 | (bb/clean)
47 | (jar)))
48 |
49 |
50 | (defn install "Install the JAR locally." [opts]
51 | (-> opts
52 | (assoc :lib lib :version version)
53 | (bb/install)))
54 |
55 | (defn deploy "Deploy the JAR to Clojars." [opts]
56 | (-> opts
57 | (assoc :lib lib :version version)
58 | (bb/deploy)))
59 |
--------------------------------------------------------------------------------
/deps.edn:
--------------------------------------------------------------------------------
1 | {:paths ["src" "target/classes"]
2 | :deps {
3 | org.clojure/clojure {:mvn/version "1.11.1"}}
4 | :aliases {
5 |
6 |
7 | :build {:deps {io.github.seancorfield/build-clj
8 | {:git/tag "v0.4.0" :git/sha "54e39ae"}}
9 | :ns-default build}
10 |
11 | :run-tests {:main-opts ["-m" "kaocha.runner"]}
12 | :test {:extra-paths ["test"]
13 |
14 | :extra-deps {lambdaisland/kaocha {:mvn/version "1.71.1119"}
15 | org.clojure/test.check {:mvn/version "1.1.0"}}}}}
16 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 4.0.0
4 | scicloj
5 | metamorph
6 | jar
7 | 0.2.3
8 | metamorph
9 | Context based pipeline.
10 | https://github.com/scicloj/metamorph
11 |
12 |
13 | EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0
14 | https://www.eclipse.org/legal/epl-2.0/
15 |
16 |
17 |
18 | https://github.com/scicloj/metamorph
19 | scm:git:git://github.com/scicloj/metamorph.git
20 | scm:git:ssh://git@github.com/scicloj/metamorph.git
21 | e4560bbb3484a7729120b5e990f4dc3744a47971
22 |
23 |
24 | src
25 | test
26 |
27 |
28 | resources
29 |
30 |
31 |
32 |
33 | resources
34 |
35 |
36 | target
37 | target/classes
38 |
39 |
40 | org.codehaus.mojo
41 | build-helper-maven-plugin
42 | 1.7
43 |
44 |
45 | add-source
46 | generate-sources
47 |
48 | add-source
49 |
50 |
51 |
52 | src
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 | central
63 | https://repo1.maven.org/maven2/
64 |
65 | false
66 |
67 |
68 | true
69 |
70 |
71 |
72 | clojars
73 | https://repo.clojars.org/
74 |
75 | true
76 |
77 |
78 | true
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 | org.clojure
88 | clojure
89 | 1.10.2
90 |
91 |
92 | org.clojure
93 | spec.alpha
94 | 0.2.194
95 |
96 |
97 | org.clojure
98 | core.specs.alpha
99 | 0.2.56
100 |
101 |
102 |
103 |
104 |
108 |
--------------------------------------------------------------------------------
/src/scicloj/metamorph/core.clj:
--------------------------------------------------------------------------------
1 | (ns scicloj.metamorph.core
2 | (:require [scicloj.metamorph.protocols :as prot]))
3 |
4 | (defn ^:deprecated uuid
5 | "DEPRECATED: Use clojure.core/random-uuid instead"
6 | []
7 | (random-uuid))
8 |
9 | (defn check-metamorph-compliant
10 | [ctx op]
11 | (cond
12 | (keyword? op) ctx
13 | (not (map? ctx)) (throw (IllegalArgumentException. (str "Metamorph pipe functions need to return a map, but returned: " ctx "of class: " (type ctx))))
14 | (not (contains? ctx :metamorph/data)) (do (println "Context after operation " op " with meta " (meta op) "does not contain :metamorph/data. This is likely as mistake.") ctx)
15 | :else ctx))
16 |
17 | (defn- process-operation
18 | "Process an operation on the given context."
19 | [ctx [id op]]
20 | (assert (some? op) "op cannot be nil")
21 | (cond
22 | (map? op) (merge ctx op)
23 | (ifn? op) (-> ctx
24 | (assoc :metamorph/id (get ctx :metamorph/id id))
25 | (op)
26 | (check-metamorph-compliant op)
27 | (dissoc :metamorph/id))
28 | :else (throw (IllegalArgumentException. (str "Cannot call a non function: " op)))))
29 |
30 | (defn- local-pipeline
31 | "Execute a list of operations on the given context."
32 | ([ops-with-id] (local-pipeline ops-with-id {}))
33 | ([ops-with-id ctx]
34 | (let [ctx (if-not (map? ctx) {:metamorph/data ctx} ctx)]
35 | (reduce process-operation ctx ops-with-id))))
36 |
37 | (defn pipeline
38 | "Create a metamorph pipeline function out of operators.
39 |
40 | `ops` are metamorph compliant functions (basicaly fn, which takle a ctx as first argument)
41 |
42 | This function returns a function, whcih can ve execute with a ctx as parameter.
43 | "
44 | [& ops]
45 | (->> ops
46 | (mapv #(vector (uuid) %)) ;; adds a uuid to every operation
47 | (partial local-pipeline))) ;; return a partialized version of local-pipeline with all the ops
48 |
49 | (declare process-param)
50 |
51 | (defn- process-map
52 | [config params]
53 | (update-vals params #(process-param config %)))
54 |
55 | (defn- process-seq
56 | [config params]
57 | (mapv #(process-param config %) params))
58 |
59 | (defn- resolve-keyword
60 | "Interpret keyword as a symbol and try to resolve it."
61 | [k]
62 | ;; (println "resolve k: " k "in ns: " *ns*)
63 | (let [resolved-as
64 | (-> (if-let [n (namespace k)] ;; namespaced?
65 | (let [sn (symbol n)
66 | n (str (get (ns-aliases *ns*) sn sn))] ;; try to find namespace in aliases
67 | (symbol n (name k))) ;; create proper symbol with fixed namespace
68 | (symbol (name k))) ;; no namespace case
69 | (resolve))]
70 | ;; (println "resolved-as: " resolved-as)
71 | resolved-as))
72 |
73 | (defn- maybe-var-get
74 | "If symbol can be resolved, return var, else return original keyword"
75 | [k]
76 | (or (some-> k resolve-keyword var-get)
77 | k))
78 |
79 | (defn- process-param
80 | "Recursively process parameters and try to resolve symbols for namespaced keywords.
81 |
82 | Special case for namespaced keyword is `ctx` namespace. It means that we should look up in `config` map."
83 | [config p]
84 | (cond
85 | (and (keyword? p) ;;
86 | (let [n (namespace p)]
87 | (and n (or (= n "ctx")
88 | (let [sn (symbol n)]
89 | (find-ns (get (ns-aliases *ns*) sn sn))))))) (let [n (namespace p)]
90 | (if (= n "ctx")
91 | (config (keyword (name p)))
92 | (maybe-var-get p)))
93 | (map? p) (process-map config p)
94 | (sequential? p) (process-seq config p)
95 | :else p))
96 |
97 | (defn log-and-apply [f args]
98 | (if (fn? f)
99 | (apply f args)
100 | (throw (IllegalArgumentException. (str "Cannot apply a non-function: " f " - args: " args)))))
101 |
102 | (defn ->pipeline
103 | "Create pipeline from declarative description."
104 | ([ops] (->pipeline {} ops))
105 | ([config ops]
106 | (apply pipeline (for [line ops]
107 | (cond
108 | ;; if it's a sequence, resolve function, process parameters and call it.
109 | (sequential? line) (let [[op & params] line
110 | nparams (process-param config params)
111 | f (cond
112 | (keyword? op) (maybe-var-get op)
113 | (symbol? op) (var-get (resolve op))
114 | :else op)]
115 | (log-and-apply f nparams))
116 | (keyword? line) (maybe-var-get line)
117 | :else line))))) ;; leave untouched otherwise
118 |
119 | ;; lifting
120 |
121 | (defn lift
122 | "Create context aware version of the given `op` function. `:metamorph/data` will be used as a first parameter.
123 |
124 | Result of the `op` function will be stored under `:metamorph/data`"
125 | [op & params]
126 | (if (satisfies? prot/MetamorphProto op)
127 | (prot/lift op params)
128 | (fn [ctx]
129 | (assert (contains? ctx :metamorph/data))
130 | (assoc ctx :metamorph/data (apply op (:metamorph/data ctx) params)))))
131 |
132 | (defn do-ctx
133 | "Apply f:: ctx -> any, ignore the result, leaving
134 | pipeline unaffected. Akin to using doseq for side-effecting
135 | operations like printing, visualization, or binding to vars
136 | for debugging."
137 | [f]
138 | (fn [ctx] (f ctx) ctx))
139 |
140 | (defmacro def-ctx
141 | "Convenience macro for defining pipelined operations that
142 | bind the current value of the context to a var, for simple
143 | debugging purposes."
144 | [varname]
145 | `(do-ctx (fn [ctx#] (def ~varname ctx#))))
146 |
147 | (defn pipe-it
148 | "Takes a data objects, executes the pipeline op(s) with it in :metamorph/data
149 | in mode :fit and returns content of :metamorph/data.
150 | Usefull to use execute a pipeline of pure data->data functions on some data"
151 | [data & ops]
152 | (let [pipe-fn (apply pipeline ops)]
153 | (:metamorph/data
154 | (pipe-fn {:metamorph/data data
155 | :metamorph/mode :fit}))))
156 |
157 | (defn fit
158 | "Helper function which executes pipeline op(s) in mode :fit on the given data and returns the fitted ctx.
159 |
160 | Main use is for cases in which the pipeline gets executed ones and no model is part of the pipeline."
161 | [data & ops]
162 | (let [pipe-fn (apply pipeline ops)]
163 | (pipe-fn {:metamorph/data data
164 | :metamorph/mode :fit})))
165 |
166 | (defn fit-pipe
167 | "Helper function which executes pipeline op(s) in mode :fit on the given data and returns the fitted ctx.
168 |
169 | Main use is for cases in which the pipeline gets executed ones and no model is part of the pipeline."
170 | [data pipe-fn]
171 | (pipe-fn {:metamorph/data data
172 | :metamorph/mode :fit}))
173 |
174 | (defn transform-pipe
175 | "Helper functions which execute the passed `pipe-fn` on the given `data` in mode :transform.
176 | It merges the data into the provided `ctx` while doing so."
177 | [data pipe-fn ctx]
178 |
179 | (pipe-fn
180 | (merge ctx
181 | {:metamorph/data data
182 | :metamorph/mode :transform})))
183 |
--------------------------------------------------------------------------------
/src/scicloj/metamorph/protocols.clj:
--------------------------------------------------------------------------------
1 | (ns scicloj.metamorph.protocols)
2 |
3 | (defprotocol MetamorphProto
4 | (lift [obj args] "Create pipeline operator."))
5 |
--------------------------------------------------------------------------------
/template/pom.xml:
--------------------------------------------------------------------------------
1 |
4 | Context based pipeline.
5 |
6 |
7 | EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0
8 | https://www.eclipse.org/legal/epl-2.0/
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/test/scicloj/metamorph/core_test.clj:
--------------------------------------------------------------------------------
1 | (ns scicloj.metamorph.core-test
2 | (:require
3 | [clojure.string :as string]
4 | [clojure.test :as t]
5 | [scicloj.metamorph.core :as sut]
6 | [scicloj.metamorph.protocols :as prot]))
7 |
8 | (defn gen-rand
9 | [s]
10 | (let [x (atom s)]
11 | #(let [n (first @x)]
12 | (swap! x rest)
13 | n)))
14 |
15 | (defn context-operator
16 | [ctx]
17 | (let [id (:metamorph/id ctx)
18 | mode (:metamorph/mode ctx)
19 | my-data (-> (ctx id)
20 | (update :modes conj mode))]
21 | (assoc ctx id my-data)))
22 |
23 | (defn operator-creator
24 | [par1 par2]
25 | (fn [ctx]
26 | (let [id (:metamorph/id ctx)]
27 | (-> (assoc ctx id (+ par1 par2))
28 | (update :metamorph/data conj [par1 par2])))))
29 |
30 | (def local-value1 123.1)
31 | (def local-value2 432.1)
32 |
33 | (def pipeline-declaration
34 | [:context-operator ;; existing symbol, will be resolved
35 | [:operator-creator 3 4] ;; explicit data
36 | [:operator-creator ::local-value1 ::local-value2] ;; access local variables
37 | {:metamorph/mode :new-mode} ;; this is changed mode
38 | :context-operator
39 | [:operator-creator :ctx/a :ctx/b]]) ;; optional parameters
40 |
41 | (with-redefs
42 | [scicloj.metamorph.core/uuid
43 | (gen-rand (range 10))])
44 |
45 |
46 |
47 |
48 | (def dpipeline-1
49 | (with-redefs
50 | [scicloj.metamorph.core/uuid
51 | (gen-rand (range 10))]
52 | (sut/->pipeline {:a -1 :b -2} pipeline-declaration)))
53 |
54 | (def dpipeline-2
55 | (with-redefs
56 | [scicloj.metamorph.core/uuid
57 | (gen-rand (range 10))]
58 | (sut/->pipeline {:a 100 :b 1000} pipeline-declaration)))
59 |
60 | (defn make-pipeline
61 | [a b]
62 | (sut/pipeline
63 | context-operator
64 | (operator-creator 3 4)
65 | (operator-creator local-value1 local-value2)
66 | {:metamorph/mode :new-mode}
67 | context-operator
68 | (operator-creator a b)))
69 |
70 | (def pipeline-1
71 | (with-redefs
72 | [scicloj.metamorph.core/uuid
73 | (gen-rand (range 10))]
74 | (make-pipeline -1 -2)))
75 |
76 | (def pipeline-2
77 | (with-redefs
78 | [scicloj.metamorph.core/uuid
79 | (gen-rand (range 10))]
80 | (make-pipeline 100 1000)))
81 |
82 | ;; (pipeline-1 [])
83 | ;; => {:metamorph/data [[3 4] [123.1 432.1] [-1 -2]],
84 | ;; 0 {:modes (nil)},
85 | ;; 1 7,
86 | ;; 2 555.2,
87 | ;; :metamorph/mode :new-mode,
88 | ;; 4 {:modes (:new-mode)},
89 | ;; 5 -3}
90 |
91 | (def res11 {:metamorph/data [[3 4] [123.1 432.1] [-1 -2]]
92 | 0 {:modes '(nil)}
93 | 1 7
94 | 2 555.2 :metamorph/mode :new-mode
95 | 4 {:modes '(:new-mode)}
96 | 5 -3})
97 |
98 | (def res12 {:metamorph/data [[3 4] [123.1 432.1] [-1 -2] [3 4] [123.1 432.1] [-1 -2]]
99 | 0 {:modes '(:some-mode nil)}
100 | 1 7
101 | 2 555.2 :metamorph/mode :new-mode
102 | 4 {:modes '(:new-mode :new-mode)}
103 | 5 -3})
104 |
105 | (def res21 {:metamorph/data [[3 4] [123.1 432.1] [100 1000]]
106 | 0 {:modes '(nil)}
107 | 1 7
108 | 2 555.2 :metamorph/mode :new-mode
109 | 4 {:modes '(:new-mode)}
110 | 5 1100})
111 |
112 | (def res22 {:metamorph/data [[3 4] [123.1 432.1] [100 1000] [3 4] [123.1 432.1] [100 1000]]
113 | 0 {:modes '(:some-mode nil)}
114 | 1 7
115 | 2 555.2 :metamorph/mode :new-mode
116 | 4 {:modes '(:new-mode :new-mode)}
117 | 5 1100})
118 |
119 |
120 | (def pipeline-3
121 | (with-redefs
122 | [scicloj.metamorph.core/uuid
123 | (gen-rand (range 10))]
124 | (sut/pipeline
125 | {:metamorph/id :test-id}
126 | (operator-creator 1 2))))
127 |
128 |
129 |
130 | (t/deftest ovewrite-id
131 | (t/is (= 3
132 | (:test-id (pipeline-3 [])))))
133 |
134 | (def dpipeline-3
135 | (with-redefs
136 | [scicloj.metamorph.core/uuid
137 | (gen-rand (range 10))]
138 | (sut/->pipeline
139 | [
140 | {:metamorph/id :test-id}
141 | [:operator-creator 1 2]])))
142 |
143 |
144 | (t/deftest ovewrite-id-d
145 | (t/is (= 3
146 | (:test-id (dpipeline-3 [])))))
147 |
148 |
149 |
150 | (t/deftest whole-process
151 |
152 | (t/is (= (pipeline-1 []) res11))
153 | (t/is (= (pipeline-1 (assoc (pipeline-1 []) :metamorph/mode :some-mode)) res12))
154 | (t/is (= (pipeline-2 []) res21))
155 | (t/is (= (pipeline-2 (assoc (pipeline-2 []) :metamorph/mode :some-mode)) res22))
156 | (t/is (= (dpipeline-1 []) res11))
157 | (t/is (= (dpipeline-1 (assoc (dpipeline-1 []) :metamorph/mode :some-mode)) res12))
158 | (t/is (= (dpipeline-2 []) res21))
159 | (t/is (= (dpipeline-2 (assoc (dpipeline-2 []) :metamorph/mode :some-mode)) res22)))
160 |
161 | ;; lifting
162 |
163 | (defn regular-function-to-be-lifted
164 | [_main-object par1 par2]
165 | (str "Hey, I'm regular function! (pars: " par1 ", " par2 ")"))
166 |
167 | (def object-that-can-be-lifted
168 | (reify prot/MetamorphProto
169 | (lift [_ args]
170 | (apply sut/lift regular-function-to-be-lifted args))))
171 |
172 | (def lifted-pipeline
173 | (sut/pipeline
174 | {:metamorph/mode :anymode}
175 | (sut/lift regular-function-to-be-lifted 1 2)))
176 |
177 | (def declarative-lifted-pipeline
178 | (sut/->pipeline
179 | [{:metamorph/mode :anymode}
180 | [:sut/lift ::regular-function-to-be-lifted 1 2]]))
181 |
182 | (def object-pipeline
183 | (sut/pipeline
184 | (sut/lift object-that-can-be-lifted 1 2)))
185 |
186 | (def declarative-object-pipeline
187 | (sut/->pipeline
188 | [{:metamorph/mode :anymode}
189 | [:sut/lift ::object-that-can-be-lifted 1 2]]))
190 |
191 |
192 | (def expected-result
193 | {:metamorph/data "Hey, I'm regular function! (pars: 1, 2)"})
194 |
195 | (def expected-result-with-mode
196 | {:metamorph/data "Hey, I'm regular function! (pars: 1, 2)" :metamorph/mode :anymode})
197 |
198 | (t/deftest lift-function
199 | (t/is (= ((sut/lift regular-function-to-be-lifted 1 2) {:metamorph/data nil}) expected-result))
200 | (t/is (= (lifted-pipeline {:metamorph/data :something}) expected-result-with-mode))
201 | (t/is (= (declarative-lifted-pipeline {:metamorph/data :something}) expected-result-with-mode))
202 | (t/is (= (object-pipeline {:metamorph/data :something}) expected-result))
203 | (t/is (= (declarative-object-pipeline {:metamorph/data :something}) expected-result-with-mode)))
204 |
205 | (t/deftest fit-transform
206 |
207 | (let [pipe-fn
208 | (sut/pipeline
209 | (sut/lift string/upper-case))
210 |
211 | fitted
212 | (sut/fit
213 | "hello"
214 | pipe-fn)
215 |
216 | transformed
217 | (sut/transform-pipe "world" pipe-fn fitted)]
218 | (t/is (= "HELLO" (:metamorph/data fitted)))
219 | (t/is (= :fit (:metamorph/mode fitted)))
220 |
221 | (t/is (= "WORLD" (:metamorph/data transformed)))
222 | (t/is (= :transform (:metamorph/mode transformed)))))
223 |
224 |
225 | (t/deftest fail-proper-on-nonfn
226 | (t/is (thrown? IllegalArgumentException
227 | (sut/pipe-it
228 | "hello"
229 |
230 | [(sut/lift string/upper-case)]))))
231 |
232 | (t/deftest fail-proper-on-nonfn
233 | (t/is (thrown? IllegalArgumentException
234 | (sut/pipe-it "hello" first))))
235 |
236 | (t/deftest non-ctx-result-does-not-fail
237 | (t/is (= {:a :blub}
238 | ((sut/pipeline (fn [_] {:a :blub}))))))
239 |
240 |
241 | (t/deftest various-lifts
242 | (t/is (= {:metamorph/data "HELLO"}
243 | ((sut/pipeline (sut/lift string/upper-case))
244 | {:metamorph/data "hello"})))
245 |
246 |
247 | (t/is (= {:metamorph/data "HELLO"}
248 | ((sut/pipeline
249 | {:metamorph/id :test} (sut/lift string/upper-case))
250 | {:metamorph/data "hello"}))))
251 |
--------------------------------------------------------------------------------
/tests.edn:
--------------------------------------------------------------------------------
1 | #kaocha/v1 {}
2 |
--------------------------------------------------------------------------------