├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── bb.edn ├── bin └── launchpad ├── build.clj ├── deps.edn ├── pom.xml ├── src └── scicloj │ └── metamorph │ ├── core.clj │ └── protocols.clj ├── template └── pom.xml ├── test └── scicloj │ └── metamorph │ └── core_test.clj └── tests.edn /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | *.jar 6 | *.class 7 | /.lein-* 8 | /.nrepl-port 9 | /.prepl-port 10 | .hgignore 11 | .hg/ 12 | /.calva/ 13 | /.classpath 14 | /.clj-kondo/ 15 | /.cpcache/ 16 | /.lsp/ 17 | /.project 18 | /.rebel_readline_history 19 | /.settings/ 20 | /README.html 21 | /pom.xml.asc 22 | /deps.local.edn 23 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## unreleased 4 | - checked for misisng op 5 | - refactored, thanks to @erickisos 6 | - changed to deps.edn based build 7 | 8 | ## 0.2.3 9 | - fixed failing test 10 | ## 0.2.0 11 | - added three pipeline helper functions 12 | - addded fit / transform helpers 13 | ## 0.2.2 14 | - more checks of ctx and parameters 15 | 16 | ## 0.1.0 17 | - use uuids #1 18 | - alpha2: replaced special meaning of :keyword in pipeline definition by map 19 | Initial version moved from [tablecloth](https://github.com/scicloj/tablecloth/blob/pipelines/src/tablecloth/pipeline.clj) 20 | 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Eclipse Public License - v 2.0 2 | 3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE 4 | PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION 5 | OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 6 | 7 | 1. DEFINITIONS 8 | 9 | "Contribution" means: 10 | 11 | a) in the case of the initial Contributor, the initial content 12 | Distributed under this Agreement, and 13 | 14 | b) in the case of each subsequent Contributor: 15 | i) changes to the Program, and 16 | ii) additions to the Program; 17 | where such changes and/or additions to the Program originate from 18 | and are Distributed by that particular Contributor. A Contribution 19 | "originates" from a Contributor if it was added to the Program by 20 | such Contributor itself or anyone acting on such Contributor's behalf. 21 | Contributions do not include changes or additions to the Program that 22 | are not Modified Works. 23 | 24 | "Contributor" means any person or entity that Distributes the Program. 25 | 26 | "Licensed Patents" mean patent claims licensable by a Contributor which 27 | are necessarily infringed by the use or sale of its Contribution alone 28 | or when combined with the Program. 29 | 30 | "Program" means the Contributions Distributed in accordance with this 31 | Agreement. 32 | 33 | "Recipient" means anyone who receives the Program under this Agreement 34 | or any Secondary License (as applicable), including Contributors. 35 | 36 | "Derivative Works" shall mean any work, whether in Source Code or other 37 | form, that is based on (or derived from) the Program and for which the 38 | editorial revisions, annotations, elaborations, or other modifications 39 | represent, as a whole, an original work of authorship. 40 | 41 | "Modified Works" shall mean any work in Source Code or other form that 42 | results from an addition to, deletion from, or modification of the 43 | contents of the Program, including, for purposes of clarity any new file 44 | in Source Code form that contains any contents of the Program. Modified 45 | Works shall not include works that contain only declarations, 46 | interfaces, types, classes, structures, or files of the Program solely 47 | in each case in order to link to, bind by name, or subclass the Program 48 | or Modified Works thereof. 49 | 50 | "Distribute" means the acts of a) distributing or b) making available 51 | in any manner that enables the transfer of a copy. 52 | 53 | "Source Code" means the form of a Program preferred for making 54 | modifications, including but not limited to software source code, 55 | documentation source, and configuration files. 56 | 57 | "Secondary License" means either the GNU General Public License, 58 | Version 2.0, or any later versions of that license, including any 59 | exceptions or additional permissions as identified by the initial 60 | Contributor. 61 | 62 | 2. GRANT OF RIGHTS 63 | 64 | a) Subject to the terms of this Agreement, each Contributor hereby 65 | grants Recipient a non-exclusive, worldwide, royalty-free copyright 66 | license to reproduce, prepare Derivative Works of, publicly display, 67 | publicly perform, Distribute and sublicense the Contribution of such 68 | Contributor, if any, and such Derivative Works. 69 | 70 | b) Subject to the terms of this Agreement, each Contributor hereby 71 | grants Recipient a non-exclusive, worldwide, royalty-free patent 72 | license under Licensed Patents to make, use, sell, offer to sell, 73 | import and otherwise transfer the Contribution of such Contributor, 74 | if any, in Source Code or other form. This patent license shall 75 | apply to the combination of the Contribution and the Program if, at 76 | the time the Contribution is added by the Contributor, such addition 77 | of the Contribution causes such combination to be covered by the 78 | Licensed Patents. The patent license shall not apply to any other 79 | combinations which include the Contribution. No hardware per se is 80 | licensed hereunder. 81 | 82 | c) Recipient understands that although each Contributor grants the 83 | licenses to its Contributions set forth herein, no assurances are 84 | provided by any Contributor that the Program does not infringe the 85 | patent or other intellectual property rights of any other entity. 86 | Each Contributor disclaims any liability to Recipient for claims 87 | brought by any other entity based on infringement of intellectual 88 | property rights or otherwise. As a condition to exercising the 89 | rights and licenses granted hereunder, each Recipient hereby 90 | assumes sole responsibility to secure any other intellectual 91 | property rights needed, if any. For example, if a third party 92 | patent license is required to allow Recipient to Distribute the 93 | Program, it is Recipient's responsibility to acquire that license 94 | before distributing the Program. 95 | 96 | d) Each Contributor represents that to its knowledge it has 97 | sufficient copyright rights in its Contribution, if any, to grant 98 | the copyright license set forth in this Agreement. 99 | 100 | e) Notwithstanding the terms of any Secondary License, no 101 | Contributor makes additional grants to any Recipient (other than 102 | those set forth in this Agreement) as a result of such Recipient's 103 | receipt of the Program under the terms of a Secondary License 104 | (if permitted under the terms of Section 3). 105 | 106 | 3. REQUIREMENTS 107 | 108 | 3.1 If a Contributor Distributes the Program in any form, then: 109 | 110 | a) the Program must also be made available as Source Code, in 111 | accordance with section 3.2, and the Contributor must accompany 112 | the Program with a statement that the Source Code for the Program 113 | is available under this Agreement, and informs Recipients how to 114 | obtain it in a reasonable manner on or through a medium customarily 115 | used for software exchange; and 116 | 117 | b) the Contributor may Distribute the Program under a license 118 | different than this Agreement, provided that such license: 119 | i) effectively disclaims on behalf of all other Contributors all 120 | warranties and conditions, express and implied, including 121 | warranties or conditions of title and non-infringement, and 122 | implied warranties or conditions of merchantability and fitness 123 | for a particular purpose; 124 | 125 | ii) effectively excludes on behalf of all other Contributors all 126 | liability for damages, including direct, indirect, special, 127 | incidental and consequential damages, such as lost profits; 128 | 129 | iii) does not attempt to limit or alter the recipients' rights 130 | in the Source Code under section 3.2; and 131 | 132 | iv) requires any subsequent distribution of the Program by any 133 | party to be under a license that satisfies the requirements 134 | of this section 3. 135 | 136 | 3.2 When the Program is Distributed as Source Code: 137 | 138 | a) it must be made available under this Agreement, or if the 139 | Program (i) is combined with other material in a separate file or 140 | files made available under a Secondary License, and (ii) the initial 141 | Contributor attached to the Source Code the notice described in 142 | Exhibit A of this Agreement, then the Program may be made available 143 | under the terms of such Secondary Licenses, and 144 | 145 | b) a copy of this Agreement must be included with each copy of 146 | the Program. 147 | 148 | 3.3 Contributors may not remove or alter any copyright, patent, 149 | trademark, attribution notices, disclaimers of warranty, or limitations 150 | of liability ("notices") contained within the Program from any copy of 151 | the Program which they Distribute, provided that Contributors may add 152 | their own appropriate notices. 153 | 154 | 4. COMMERCIAL DISTRIBUTION 155 | 156 | Commercial distributors of software may accept certain responsibilities 157 | with respect to end users, business partners and the like. While this 158 | license is intended to facilitate the commercial use of the Program, 159 | the Contributor who includes the Program in a commercial product 160 | offering should do so in a manner which does not create potential 161 | liability for other Contributors. Therefore, if a Contributor includes 162 | the Program in a commercial product offering, such Contributor 163 | ("Commercial Contributor") hereby agrees to defend and indemnify every 164 | other Contributor ("Indemnified Contributor") against any losses, 165 | damages and costs (collectively "Losses") arising from claims, lawsuits 166 | and other legal actions brought by a third party against the Indemnified 167 | Contributor to the extent caused by the acts or omissions of such 168 | Commercial Contributor in connection with its distribution of the Program 169 | in a commercial product offering. The obligations in this section do not 170 | apply to any claims or Losses relating to any actual or alleged 171 | intellectual property infringement. In order to qualify, an Indemnified 172 | Contributor must: a) promptly notify the Commercial Contributor in 173 | writing of such claim, and b) allow the Commercial Contributor to control, 174 | and cooperate with the Commercial Contributor in, the defense and any 175 | related settlement negotiations. The Indemnified Contributor may 176 | participate in any such claim at its own expense. 177 | 178 | For example, a Contributor might include the Program in a commercial 179 | product offering, Product X. That Contributor is then a Commercial 180 | Contributor. If that Commercial Contributor then makes performance 181 | claims, or offers warranties related to Product X, those performance 182 | claims and warranties are such Commercial Contributor's responsibility 183 | alone. Under this section, the Commercial Contributor would have to 184 | defend claims against the other Contributors related to those performance 185 | claims and warranties, and if a court requires any other Contributor to 186 | pay any damages as a result, the Commercial Contributor must pay 187 | those damages. 188 | 189 | 5. NO WARRANTY 190 | 191 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT 192 | PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS" 193 | BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR 194 | IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF 195 | TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR 196 | PURPOSE. Each Recipient is solely responsible for determining the 197 | appropriateness of using and distributing the Program and assumes all 198 | risks associated with its exercise of rights under this Agreement, 199 | including but not limited to the risks and costs of program errors, 200 | compliance with applicable laws, damage to or loss of data, programs 201 | or equipment, and unavailability or interruption of operations. 202 | 203 | 6. DISCLAIMER OF LIABILITY 204 | 205 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT 206 | PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS 207 | SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 208 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST 209 | PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 210 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 211 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 212 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE 213 | POSSIBILITY OF SUCH DAMAGES. 214 | 215 | 7. GENERAL 216 | 217 | If any provision of this Agreement is invalid or unenforceable under 218 | applicable law, it shall not affect the validity or enforceability of 219 | the remainder of the terms of this Agreement, and without further 220 | action by the parties hereto, such provision shall be reformed to the 221 | minimum extent necessary to make such provision valid and enforceable. 222 | 223 | If Recipient institutes patent litigation against any entity 224 | (including a cross-claim or counterclaim in a lawsuit) alleging that the 225 | Program itself (excluding combinations of the Program with other software 226 | or hardware) infringes such Recipient's patent(s), then such Recipient's 227 | rights granted under Section 2(b) shall terminate as of the date such 228 | litigation is filed. 229 | 230 | All Recipient's rights under this Agreement shall terminate if it 231 | fails to comply with any of the material terms or conditions of this 232 | Agreement and does not cure such failure in a reasonable period of 233 | time after becoming aware of such noncompliance. If all Recipient's 234 | rights under this Agreement terminate, Recipient agrees to cease use 235 | and distribution of the Program as soon as reasonably practicable. 236 | However, Recipient's obligations under this Agreement and any licenses 237 | granted by Recipient relating to the Program shall continue and survive. 238 | 239 | Everyone is permitted to copy and distribute copies of this Agreement, 240 | but in order to avoid inconsistency the Agreement is copyrighted and 241 | may only be modified in the following manner. The Agreement Steward 242 | reserves the right to publish new versions (including revisions) of 243 | this Agreement from time to time. No one other than the Agreement 244 | Steward has the right to modify this Agreement. The Eclipse Foundation 245 | is the initial Agreement Steward. The Eclipse Foundation may assign the 246 | responsibility to serve as the Agreement Steward to a suitable separate 247 | entity. Each new version of the Agreement will be given a distinguishing 248 | version number. The Program (including Contributions) may always be 249 | Distributed subject to the version of the Agreement under which it was 250 | received. In addition, after a new version of the Agreement is published, 251 | Contributor may elect to Distribute the Program (including its 252 | Contributions) under the new version. 253 | 254 | Except as expressly stated in Sections 2(a) and 2(b) above, Recipient 255 | receives no rights or licenses to the intellectual property of any 256 | Contributor under this Agreement, whether expressly, by implication, 257 | estoppel or otherwise. All rights in the Program not expressly granted 258 | under this Agreement are reserved. Nothing in this Agreement is intended 259 | to be enforceable by any entity that is not a Contributor or Recipient. 260 | No third-party beneficiary rights are created under this Agreement. 261 | 262 | Exhibit A - Form of Secondary Licenses Notice 263 | 264 | "This Source Code may also be made available under the following 265 | Secondary Licenses when the conditions for such availability set forth 266 | in the Eclipse Public License, v. 2.0 are satisfied: GNU General Public 267 | License as published by the Free Software Foundation, either version 2 268 | of the License, or (at your option) any later version, with the GNU 269 | Classpath Exception which is available at 270 | https://www.gnu.org/software/classpath/license.html." 271 | 272 | Simply including a copy of this Agreement, including this Exhibit A 273 | is not sufficient to license the Source Code under Secondary Licenses. 274 | 275 | If it is not possible or desirable to put the notice in a particular 276 | file, then You may include the notice in a location (such as a LICENSE 277 | file in a relevant directory) where a recipient would be likely to 278 | look for such a notice. 279 | 280 | You may add additional accurate notices of copyright ownership. 281 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Clojars Project](https://img.shields.io/clojars/v/scicloj/metamorph.svg)](https://clojars.org/scicloj/metamorph) 2 | 3 | # metamorph 4 | 5 | A Clojure library designed to providing pipelining operations. 6 | 7 | It allows to express any data transformation and machine learning pipeline as a simple sequence of pure functions: 8 | 9 | ```clojure 10 | (def pipe 11 | (pipeline 12 | (select-columns [:Text :Score]) 13 | (count-vectorize :Text :bow nlp/default-text->bow {}) 14 | (bow->sparse-array :bow :bow-sparse #(nlp/->vocabulary-top-n % 1000)) 15 | (set-inference-target :Score) 16 | (ds/select-columns [:bow-sparse :Score]) 17 | (model {:p 1000 18 | :model-type :maxent-multinomial 19 | :sparse-column :bow-sparse}))) 20 | ``` 21 | 22 | 23 | Several code examples for metamorph are available in this repo [metamorph-examples](https://github.com/scicloj/metamorph-examples) 24 | 25 | ### Pipeline operation 26 | 27 | Pipeline operation is a function which accepts context as a map and returns possibly modified context map. 28 | 29 | #### Context 30 | 31 | Context is just a map where pipeline information is stored. There are three reserved keys which are supposed to help organize a pipeline: 32 | 33 | * `:metamorph/data` - object which is subject to change and where the main data is stored. It can be anything: dataset, tensor, object, whatever you want 34 | * `:metamorph/id` - unique operation number which is injected to the context just before pipeline operation is called. This way pipeline operation have some identity which can be used to store and restore private data in the context. 35 | * `:metamorph/mode` - additional context information which can be used to determine pipeline phase. It can be added explicitely during pipeline creation. 36 | Different pipeline functions can work together, if they agree on a common set of modes and act accordingly depending on the mode. 37 | The main use case for this are pipelines which include a statistical model in some form. In here the model either gets fitted on the data (= learns form data) or it gets applied to data. For this common use case we define two standard modes, namely: 38 | * `:fit` - While the pipeline has this mode, a model containing function in the pipeline should fit its model from the data , this is as well called "train". It should write as well the fitted model to the key in `:metamorph/id` so, that on the next pipeline run in mode `transform` it can be used 39 | * `:transform` - While the pipeline is in this mode, the fitted model should be read from the key in `:metamorph/id` and apply the fitted model to the data 40 | 41 | 42 | In machine learning terminology, these 2 modes are typically called train and predict. In metamorph we use the fit/transform terms as the generalisation. 43 | 44 | Functions which only manipulate the data, should simply behave the same in any :mode, so ignoring `:metamorph/mode` 45 | 46 | ### Compliant operations 47 | All the steps of a metamorph pipeline are functions which need to follow the following conventions, in order to work well together: 48 | 49 | * Be usual Clojure functions which have at least one parameter, and this first parameter need to be a map, the context map. This map can potentially contain any key. 50 | * Keys of namespace :metamorph/xxx should be avoided and are reserved for usage by metamorph itself. 51 | * The value of a compliant function, need to be a function which takes as input the context and which value is the context. The function is allowed to add any keys with any value to the context map, but should not remove any key. 52 | * The object under `:metamorph/data` is considered to be the main data object, which nearly all functions will interact with. A functions which only interacts with this main data object, needs nevertheless return the whole context map with the data at key `:metamorph/data` 53 | * Each function which reads or writes specific keys to the pipeline context, should document this and use namespaced keys to avoid conflicts 54 | * Any pipeline function should **only** interact with the context map. It should neither read nor write anything outside the context. This is important, as it makes the whole pipleine completely self contained, and it can be re-executed anywehere, for example on new data. 55 | * Pipeline functions should be pure functions 56 | 57 | A typical skeleton of a compliant function looks like this: 58 | 59 | ```clojure 60 | (defn my-data-transform-function [any number of options] 61 | (fn [{:metamorph/keys [id data mode] :as ctx}] 62 | ;; do something with data and eventual with id and mode 63 | ;; and write it back somewhere in the ctx often to key `:metamorph/data`, but could be any key 64 | ;; the assoc makes as well sure, that other data in ctx is left unchanged 65 | (assoc ctx :metamorph/data ......) 66 | )))) 67 | ``` 68 | 69 | ### Metamorph compliant libraries 70 | The following libraries provied metamorph compliant functions in a recent version: 71 | 72 | |library | purpose | link | 73 | |-----------------|------------------------|-------------------------------------------------- | 74 | |tablecloth | dataset manipulation | https://github.com/scicloj/tablecloth | 75 | |tech.ml.dataset | dataset manipulation | https://github.com/techascent/tech.ml.dataset | 76 | |metamorph.ml | machine learning | https://github.com/scicloj/metamorph.ml | 77 | |sklearn-clj | sklearn estimators as metamorph functions | https://github.com/scicloj/sklearn-clj | 78 | 79 | 80 | Other libraries which do "data transformations" can decide to make their functions metamorph compliant. 81 | This does not require any dependency on metamorhp, just the usage of the standard keys. 82 | 83 | Functions can easely be lifted to become metamorph compliant. For this we have the function `metamorph/lift" 84 | 85 | ### methamorph.ml 86 | A sister project [metamorph.ml](https://github.com/scicloj/metamorph.ml) allows to evaluate machine learning pipelines based on metamorph. 87 | 88 | ### scicloj.ml 89 | A machine learining solution based on metamorph pipelines including various classification and regression models. 90 | [scicloj.ml](https://github.com/scicloj/scicloj.ml) 91 | 92 | 93 | ### Similar concept in sklearn pipelines 94 | The `metamorph` concept is similar to the `pipeline` concept of sklearn, which allows as well to run a give pipeline in `fit` and `transform`. 95 | But metamorph allows to combine models with arbitrary transform functions, which don't need to be models. 96 | 97 | 98 | ### Two types of functions in pipeline 99 | 100 | We foresee that mainly 2 types of functions get added to a pipeline. 101 | 102 | 1. `Mode independend functions:` They only manipulate the main data object, and will ignore all other information in contexts. 103 | Neither will they use `:metamorph/mode` nor the `:metamorph/id` in the context map. 104 | 2. `Mode dependend functions`: These functions will behave different depending on the :mode and will likely store data in the context map, which can be used by the same function in an other mode or by other functions later in the pipeline. 105 | 106 | ### Pipelines can be constructed from functions or as pure data 107 | Metamorph pipelines can be either constructed from a sequence of function calls via th function `metmorhp.core/pipeline` or declarative as a sequence of maps. 108 | 109 | Both rely on the same functions. 110 | 111 | See here for examples: 112 | https://github.com/scicloj/tablecloth/blob/pipelines/src/tablecloth/pipeline.clj 113 | 114 | This should allow advanced use cases, like the **generation** of pipelines, 115 | which gives large flexibility for hyper parameter tuning in machine learning. 116 | 117 | ### Advantages of the metamorph concept 118 | 119 | * A complete (machine learning) pipeline becomes self contained. All information (data and "state" of models) is inside the pipeline context 120 | * All steps of the pipeline are pure functions, which outcome depends only on its context map parameter (containing the data) and eventual options 121 | * It unifies the data processing pipeline idea of `tablecloth` with the concept of fitted models and machine learining 122 | * It uses only pure Clojure data structures 123 | * It has no dependency to any concrete data manipulation library, but all can be integrated easely based on a small number of agreed map keys 124 | 125 | #### Creating a pipeline 126 | 127 | To create a pipeline function you can use two functions: 128 | 129 | * `metamorph.core/pipeline` to make a pipeline function out of pipeline operators (= compliant functions as described above) 130 | * `metamorph.core/->pipeline` works as above, but using declarative maps (describing as well compliant functions) to describe the pipeline 131 | 132 | ## Usage 133 | 134 | Compliant pipeline operations can either be created by "lifting" functions which work on the data object itself, 135 | or by using them from compliant libraries. 136 | 137 | Most functions in [tablecloth](https://github.com/scicloj/tablecloth) take a dataset as input in first position, and return a dataset. 138 | This means they can be used with the function "metamorhp.core/lift" to be converted (lifted) into a metamorph compliant function. 139 | (Tabecloth has lifted versions of its functions in namespace `tablecloth.pipeline`) 140 | 141 | In this short example, the main data object in the context is a simple string. 142 | 143 | 144 | ```clojure 145 | (require '[scicloj.metamorph.core :as morph]) 146 | 147 | ;; a regular function which takes and returns a main object 148 | (defn regular-function-to-be-lifted 149 | [main-object par1 par2] 150 | (str "Hey, " (clojure.string/upper-case main-object) " , I'm regular function! (pars: " par1 ", " par2 ")")) 151 | 152 | ;; we make a pipeline-fn using `lift` and the regular function 153 | 154 | (def lifted-pipeline 155 | (morph/pipeline 156 | :anymode 157 | (morph/lift regular-function-to-be-lifted 1 2))) 158 | 159 | ;; lifted-pipeline is a regular Clojure function, taking the context in first place 160 | (lifted-pipeline {:metamorph/data "main data project"} ) 161 | ;;-> 162 | :metamorph{:data "Hey, MAIN DATA PROJECT , I'm regular function! (pars: 1, 2)"} 163 | ```` 164 | 165 | ## License 166 | 167 | Copyright © 2021 Scicloj 168 | 169 | This program and the accompanying materials are made available under the 170 | terms of the Eclipse Public License 2.0 which is available at 171 | http://www.eclipse.org/legal/epl-2.0. 172 | 173 | This Source Code may also be made available under the following Secondary 174 | Licenses when the conditions for such availability set forth in the Eclipse 175 | Public License, v. 2.0 are satisfied: GNU General Public License as published by 176 | the Free Software Foundation, either version 2 of the License, or (at your 177 | option) any later version, with the GNU Classpath Exception which is available 178 | at https://www.gnu.org/software/classpath/license.html. 179 | -------------------------------------------------------------------------------- /bb.edn: -------------------------------------------------------------------------------- 1 | {:deps {com.lambdaisland/launchpad {:mvn/version "0.15.79-alpha"}}} 2 | 3 | -------------------------------------------------------------------------------- /bin/launchpad: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bb 2 | 3 | (require '[lambdaisland.launchpad :as launchpad]) 4 | 5 | (launchpad/main {}) 6 | 7 | ;; (launchpad/main {:steps (into [(partial launchpad/ensure-java-version 17)] 8 | ;; launchpad/default-steps)}) 9 | -------------------------------------------------------------------------------- /build.clj: -------------------------------------------------------------------------------- 1 | (ns build 2 | (:refer-clojure :exclude [test]) 3 | (:require [clojure.tools.build.api :as b] ; for b/git-count-revs 4 | [org.corfield.build :as bb])) 5 | 6 | (def lib 'scicloj/metamorph) 7 | ; alternatively, use MAJOR.MINOR.COMMITS: 8 | ;; (def version (format "7.0.%s" (b/git-count-revs nil))) 9 | (def version (format "0.2.3")) 10 | (def class-dir "target/classes") 11 | (def basis (b/create-basis {:project "deps.edn"})) 12 | (def jar-file (format "target/%s-%s.jar" (name lib) version)) 13 | 14 | 15 | 16 | 17 | (defn test "Run the tests." [opts] 18 | (-> opts 19 | (assoc :lib lib :version version 20 | :aliases [:run-tests]) 21 | (bb/run-tests))) 22 | 23 | 24 | 25 | 26 | (defn jar [_] 27 | (b/write-pom {:class-dir class-dir 28 | :lib lib 29 | :version version 30 | :basis basis 31 | :src-pom "template/pom.xml" 32 | :scm {:connection "scm:git:https://github.com/scicloj/metamorph.git" 33 | :url "https://github.com/scicloj/metamorph"} 34 | :src-dirs ["src"]}) 35 | (b/copy-dir {:src-dirs ["src" "resources"] 36 | :target-dir class-dir}) 37 | (b/jar {:class-dir class-dir 38 | :jar-file jar-file})) 39 | 40 | (defn ci "Run the CI pipeline of tests (and build the JAR)." [opts] 41 | (-> opts 42 | (assoc :lib lib :version version 43 | :aliases [:run-tests]) 44 | 45 | (bb/run-tests) 46 | (bb/clean) 47 | (jar))) 48 | 49 | 50 | (defn install "Install the JAR locally." [opts] 51 | (-> opts 52 | (assoc :lib lib :version version) 53 | (bb/install))) 54 | 55 | (defn deploy "Deploy the JAR to Clojars." [opts] 56 | (-> opts 57 | (assoc :lib lib :version version) 58 | (bb/deploy))) 59 | -------------------------------------------------------------------------------- /deps.edn: -------------------------------------------------------------------------------- 1 | {:paths ["src" "target/classes"] 2 | :deps { 3 | org.clojure/clojure {:mvn/version "1.11.1"}} 4 | :aliases { 5 | 6 | 7 | :build {:deps {io.github.seancorfield/build-clj 8 | {:git/tag "v0.4.0" :git/sha "54e39ae"}} 9 | :ns-default build} 10 | 11 | :run-tests {:main-opts ["-m" "kaocha.runner"]} 12 | :test {:extra-paths ["test"] 13 | 14 | :extra-deps {lambdaisland/kaocha {:mvn/version "1.71.1119"} 15 | org.clojure/test.check {:mvn/version "1.1.0"}}}}} 16 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | scicloj 5 | metamorph 6 | jar 7 | 0.2.3 8 | metamorph 9 | Context based pipeline. 10 | https://github.com/scicloj/metamorph 11 | 12 | 13 | EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 14 | https://www.eclipse.org/legal/epl-2.0/ 15 | 16 | 17 | 18 | https://github.com/scicloj/metamorph 19 | scm:git:git://github.com/scicloj/metamorph.git 20 | scm:git:ssh://git@github.com/scicloj/metamorph.git 21 | e4560bbb3484a7729120b5e990f4dc3744a47971 22 | 23 | 24 | src 25 | test 26 | 27 | 28 | resources 29 | 30 | 31 | 32 | 33 | resources 34 | 35 | 36 | target 37 | target/classes 38 | 39 | 40 | org.codehaus.mojo 41 | build-helper-maven-plugin 42 | 1.7 43 | 44 | 45 | add-source 46 | generate-sources 47 | 48 | add-source 49 | 50 | 51 | 52 | src 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | central 63 | https://repo1.maven.org/maven2/ 64 | 65 | false 66 | 67 | 68 | true 69 | 70 | 71 | 72 | clojars 73 | https://repo.clojars.org/ 74 | 75 | true 76 | 77 | 78 | true 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | org.clojure 88 | clojure 89 | 1.10.2 90 | 91 | 92 | org.clojure 93 | spec.alpha 94 | 0.2.194 95 | 96 | 97 | org.clojure 98 | core.specs.alpha 99 | 0.2.56 100 | 101 | 102 | 103 | 104 | 108 | -------------------------------------------------------------------------------- /src/scicloj/metamorph/core.clj: -------------------------------------------------------------------------------- 1 | (ns scicloj.metamorph.core 2 | (:require [scicloj.metamorph.protocols :as prot])) 3 | 4 | (defn ^:deprecated uuid 5 | "DEPRECATED: Use clojure.core/random-uuid instead" 6 | [] 7 | (random-uuid)) 8 | 9 | (defn check-metamorph-compliant 10 | [ctx op] 11 | (cond 12 | (keyword? op) ctx 13 | (not (map? ctx)) (throw (IllegalArgumentException. (str "Metamorph pipe functions need to return a map, but returned: " ctx "of class: " (type ctx)))) 14 | (not (contains? ctx :metamorph/data)) (do (println "Context after operation " op " with meta " (meta op) "does not contain :metamorph/data. This is likely as mistake.") ctx) 15 | :else ctx)) 16 | 17 | (defn- process-operation 18 | "Process an operation on the given context." 19 | [ctx [id op]] 20 | (assert (some? op) "op cannot be nil") 21 | (cond 22 | (map? op) (merge ctx op) 23 | (ifn? op) (-> ctx 24 | (assoc :metamorph/id (get ctx :metamorph/id id)) 25 | (op) 26 | (check-metamorph-compliant op) 27 | (dissoc :metamorph/id)) 28 | :else (throw (IllegalArgumentException. (str "Cannot call a non function: " op))))) 29 | 30 | (defn- local-pipeline 31 | "Execute a list of operations on the given context." 32 | ([ops-with-id] (local-pipeline ops-with-id {})) 33 | ([ops-with-id ctx] 34 | (let [ctx (if-not (map? ctx) {:metamorph/data ctx} ctx)] 35 | (reduce process-operation ctx ops-with-id)))) 36 | 37 | (defn pipeline 38 | "Create a metamorph pipeline function out of operators. 39 | 40 | `ops` are metamorph compliant functions (basicaly fn, which takle a ctx as first argument) 41 | 42 | This function returns a function, whcih can ve execute with a ctx as parameter. 43 | " 44 | [& ops] 45 | (->> ops 46 | (mapv #(vector (uuid) %)) ;; adds a uuid to every operation 47 | (partial local-pipeline))) ;; return a partialized version of local-pipeline with all the ops 48 | 49 | (declare process-param) 50 | 51 | (defn- process-map 52 | [config params] 53 | (update-vals params #(process-param config %))) 54 | 55 | (defn- process-seq 56 | [config params] 57 | (mapv #(process-param config %) params)) 58 | 59 | (defn- resolve-keyword 60 | "Interpret keyword as a symbol and try to resolve it." 61 | [k] 62 | ;; (println "resolve k: " k "in ns: " *ns*) 63 | (let [resolved-as 64 | (-> (if-let [n (namespace k)] ;; namespaced? 65 | (let [sn (symbol n) 66 | n (str (get (ns-aliases *ns*) sn sn))] ;; try to find namespace in aliases 67 | (symbol n (name k))) ;; create proper symbol with fixed namespace 68 | (symbol (name k))) ;; no namespace case 69 | (resolve))] 70 | ;; (println "resolved-as: " resolved-as) 71 | resolved-as)) 72 | 73 | (defn- maybe-var-get 74 | "If symbol can be resolved, return var, else return original keyword" 75 | [k] 76 | (or (some-> k resolve-keyword var-get) 77 | k)) 78 | 79 | (defn- process-param 80 | "Recursively process parameters and try to resolve symbols for namespaced keywords. 81 | 82 | Special case for namespaced keyword is `ctx` namespace. It means that we should look up in `config` map." 83 | [config p] 84 | (cond 85 | (and (keyword? p) ;; 86 | (let [n (namespace p)] 87 | (and n (or (= n "ctx") 88 | (let [sn (symbol n)] 89 | (find-ns (get (ns-aliases *ns*) sn sn))))))) (let [n (namespace p)] 90 | (if (= n "ctx") 91 | (config (keyword (name p))) 92 | (maybe-var-get p))) 93 | (map? p) (process-map config p) 94 | (sequential? p) (process-seq config p) 95 | :else p)) 96 | 97 | (defn log-and-apply [f args] 98 | (if (fn? f) 99 | (apply f args) 100 | (throw (IllegalArgumentException. (str "Cannot apply a non-function: " f " - args: " args))))) 101 | 102 | (defn ->pipeline 103 | "Create pipeline from declarative description." 104 | ([ops] (->pipeline {} ops)) 105 | ([config ops] 106 | (apply pipeline (for [line ops] 107 | (cond 108 | ;; if it's a sequence, resolve function, process parameters and call it. 109 | (sequential? line) (let [[op & params] line 110 | nparams (process-param config params) 111 | f (cond 112 | (keyword? op) (maybe-var-get op) 113 | (symbol? op) (var-get (resolve op)) 114 | :else op)] 115 | (log-and-apply f nparams)) 116 | (keyword? line) (maybe-var-get line) 117 | :else line))))) ;; leave untouched otherwise 118 | 119 | ;; lifting 120 | 121 | (defn lift 122 | "Create context aware version of the given `op` function. `:metamorph/data` will be used as a first parameter. 123 | 124 | Result of the `op` function will be stored under `:metamorph/data`" 125 | [op & params] 126 | (if (satisfies? prot/MetamorphProto op) 127 | (prot/lift op params) 128 | (fn [ctx] 129 | (assert (contains? ctx :metamorph/data)) 130 | (assoc ctx :metamorph/data (apply op (:metamorph/data ctx) params))))) 131 | 132 | (defn do-ctx 133 | "Apply f:: ctx -> any, ignore the result, leaving 134 | pipeline unaffected. Akin to using doseq for side-effecting 135 | operations like printing, visualization, or binding to vars 136 | for debugging." 137 | [f] 138 | (fn [ctx] (f ctx) ctx)) 139 | 140 | (defmacro def-ctx 141 | "Convenience macro for defining pipelined operations that 142 | bind the current value of the context to a var, for simple 143 | debugging purposes." 144 | [varname] 145 | `(do-ctx (fn [ctx#] (def ~varname ctx#)))) 146 | 147 | (defn pipe-it 148 | "Takes a data objects, executes the pipeline op(s) with it in :metamorph/data 149 | in mode :fit and returns content of :metamorph/data. 150 | Usefull to use execute a pipeline of pure data->data functions on some data" 151 | [data & ops] 152 | (let [pipe-fn (apply pipeline ops)] 153 | (:metamorph/data 154 | (pipe-fn {:metamorph/data data 155 | :metamorph/mode :fit})))) 156 | 157 | (defn fit 158 | "Helper function which executes pipeline op(s) in mode :fit on the given data and returns the fitted ctx. 159 | 160 | Main use is for cases in which the pipeline gets executed ones and no model is part of the pipeline." 161 | [data & ops] 162 | (let [pipe-fn (apply pipeline ops)] 163 | (pipe-fn {:metamorph/data data 164 | :metamorph/mode :fit}))) 165 | 166 | (defn fit-pipe 167 | "Helper function which executes pipeline op(s) in mode :fit on the given data and returns the fitted ctx. 168 | 169 | Main use is for cases in which the pipeline gets executed ones and no model is part of the pipeline." 170 | [data pipe-fn] 171 | (pipe-fn {:metamorph/data data 172 | :metamorph/mode :fit})) 173 | 174 | (defn transform-pipe 175 | "Helper functions which execute the passed `pipe-fn` on the given `data` in mode :transform. 176 | It merges the data into the provided `ctx` while doing so." 177 | [data pipe-fn ctx] 178 | 179 | (pipe-fn 180 | (merge ctx 181 | {:metamorph/data data 182 | :metamorph/mode :transform}))) 183 | -------------------------------------------------------------------------------- /src/scicloj/metamorph/protocols.clj: -------------------------------------------------------------------------------- 1 | (ns scicloj.metamorph.protocols) 2 | 3 | (defprotocol MetamorphProto 4 | (lift [obj args] "Create pipeline operator.")) 5 | -------------------------------------------------------------------------------- /template/pom.xml: -------------------------------------------------------------------------------- 1 | 4 | Context based pipeline. 5 | 6 | 7 | EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 8 | https://www.eclipse.org/legal/epl-2.0/ 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /test/scicloj/metamorph/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns scicloj.metamorph.core-test 2 | (:require 3 | [clojure.string :as string] 4 | [clojure.test :as t] 5 | [scicloj.metamorph.core :as sut] 6 | [scicloj.metamorph.protocols :as prot])) 7 | 8 | (defn gen-rand 9 | [s] 10 | (let [x (atom s)] 11 | #(let [n (first @x)] 12 | (swap! x rest) 13 | n))) 14 | 15 | (defn context-operator 16 | [ctx] 17 | (let [id (:metamorph/id ctx) 18 | mode (:metamorph/mode ctx) 19 | my-data (-> (ctx id) 20 | (update :modes conj mode))] 21 | (assoc ctx id my-data))) 22 | 23 | (defn operator-creator 24 | [par1 par2] 25 | (fn [ctx] 26 | (let [id (:metamorph/id ctx)] 27 | (-> (assoc ctx id (+ par1 par2)) 28 | (update :metamorph/data conj [par1 par2]))))) 29 | 30 | (def local-value1 123.1) 31 | (def local-value2 432.1) 32 | 33 | (def pipeline-declaration 34 | [:context-operator ;; existing symbol, will be resolved 35 | [:operator-creator 3 4] ;; explicit data 36 | [:operator-creator ::local-value1 ::local-value2] ;; access local variables 37 | {:metamorph/mode :new-mode} ;; this is changed mode 38 | :context-operator 39 | [:operator-creator :ctx/a :ctx/b]]) ;; optional parameters 40 | 41 | (with-redefs 42 | [scicloj.metamorph.core/uuid 43 | (gen-rand (range 10))]) 44 | 45 | 46 | 47 | 48 | (def dpipeline-1 49 | (with-redefs 50 | [scicloj.metamorph.core/uuid 51 | (gen-rand (range 10))] 52 | (sut/->pipeline {:a -1 :b -2} pipeline-declaration))) 53 | 54 | (def dpipeline-2 55 | (with-redefs 56 | [scicloj.metamorph.core/uuid 57 | (gen-rand (range 10))] 58 | (sut/->pipeline {:a 100 :b 1000} pipeline-declaration))) 59 | 60 | (defn make-pipeline 61 | [a b] 62 | (sut/pipeline 63 | context-operator 64 | (operator-creator 3 4) 65 | (operator-creator local-value1 local-value2) 66 | {:metamorph/mode :new-mode} 67 | context-operator 68 | (operator-creator a b))) 69 | 70 | (def pipeline-1 71 | (with-redefs 72 | [scicloj.metamorph.core/uuid 73 | (gen-rand (range 10))] 74 | (make-pipeline -1 -2))) 75 | 76 | (def pipeline-2 77 | (with-redefs 78 | [scicloj.metamorph.core/uuid 79 | (gen-rand (range 10))] 80 | (make-pipeline 100 1000))) 81 | 82 | ;; (pipeline-1 []) 83 | ;; => {:metamorph/data [[3 4] [123.1 432.1] [-1 -2]], 84 | ;; 0 {:modes (nil)}, 85 | ;; 1 7, 86 | ;; 2 555.2, 87 | ;; :metamorph/mode :new-mode, 88 | ;; 4 {:modes (:new-mode)}, 89 | ;; 5 -3} 90 | 91 | (def res11 {:metamorph/data [[3 4] [123.1 432.1] [-1 -2]] 92 | 0 {:modes '(nil)} 93 | 1 7 94 | 2 555.2 :metamorph/mode :new-mode 95 | 4 {:modes '(:new-mode)} 96 | 5 -3}) 97 | 98 | (def res12 {:metamorph/data [[3 4] [123.1 432.1] [-1 -2] [3 4] [123.1 432.1] [-1 -2]] 99 | 0 {:modes '(:some-mode nil)} 100 | 1 7 101 | 2 555.2 :metamorph/mode :new-mode 102 | 4 {:modes '(:new-mode :new-mode)} 103 | 5 -3}) 104 | 105 | (def res21 {:metamorph/data [[3 4] [123.1 432.1] [100 1000]] 106 | 0 {:modes '(nil)} 107 | 1 7 108 | 2 555.2 :metamorph/mode :new-mode 109 | 4 {:modes '(:new-mode)} 110 | 5 1100}) 111 | 112 | (def res22 {:metamorph/data [[3 4] [123.1 432.1] [100 1000] [3 4] [123.1 432.1] [100 1000]] 113 | 0 {:modes '(:some-mode nil)} 114 | 1 7 115 | 2 555.2 :metamorph/mode :new-mode 116 | 4 {:modes '(:new-mode :new-mode)} 117 | 5 1100}) 118 | 119 | 120 | (def pipeline-3 121 | (with-redefs 122 | [scicloj.metamorph.core/uuid 123 | (gen-rand (range 10))] 124 | (sut/pipeline 125 | {:metamorph/id :test-id} 126 | (operator-creator 1 2)))) 127 | 128 | 129 | 130 | (t/deftest ovewrite-id 131 | (t/is (= 3 132 | (:test-id (pipeline-3 []))))) 133 | 134 | (def dpipeline-3 135 | (with-redefs 136 | [scicloj.metamorph.core/uuid 137 | (gen-rand (range 10))] 138 | (sut/->pipeline 139 | [ 140 | {:metamorph/id :test-id} 141 | [:operator-creator 1 2]]))) 142 | 143 | 144 | (t/deftest ovewrite-id-d 145 | (t/is (= 3 146 | (:test-id (dpipeline-3 []))))) 147 | 148 | 149 | 150 | (t/deftest whole-process 151 | 152 | (t/is (= (pipeline-1 []) res11)) 153 | (t/is (= (pipeline-1 (assoc (pipeline-1 []) :metamorph/mode :some-mode)) res12)) 154 | (t/is (= (pipeline-2 []) res21)) 155 | (t/is (= (pipeline-2 (assoc (pipeline-2 []) :metamorph/mode :some-mode)) res22)) 156 | (t/is (= (dpipeline-1 []) res11)) 157 | (t/is (= (dpipeline-1 (assoc (dpipeline-1 []) :metamorph/mode :some-mode)) res12)) 158 | (t/is (= (dpipeline-2 []) res21)) 159 | (t/is (= (dpipeline-2 (assoc (dpipeline-2 []) :metamorph/mode :some-mode)) res22))) 160 | 161 | ;; lifting 162 | 163 | (defn regular-function-to-be-lifted 164 | [_main-object par1 par2] 165 | (str "Hey, I'm regular function! (pars: " par1 ", " par2 ")")) 166 | 167 | (def object-that-can-be-lifted 168 | (reify prot/MetamorphProto 169 | (lift [_ args] 170 | (apply sut/lift regular-function-to-be-lifted args)))) 171 | 172 | (def lifted-pipeline 173 | (sut/pipeline 174 | {:metamorph/mode :anymode} 175 | (sut/lift regular-function-to-be-lifted 1 2))) 176 | 177 | (def declarative-lifted-pipeline 178 | (sut/->pipeline 179 | [{:metamorph/mode :anymode} 180 | [:sut/lift ::regular-function-to-be-lifted 1 2]])) 181 | 182 | (def object-pipeline 183 | (sut/pipeline 184 | (sut/lift object-that-can-be-lifted 1 2))) 185 | 186 | (def declarative-object-pipeline 187 | (sut/->pipeline 188 | [{:metamorph/mode :anymode} 189 | [:sut/lift ::object-that-can-be-lifted 1 2]])) 190 | 191 | 192 | (def expected-result 193 | {:metamorph/data "Hey, I'm regular function! (pars: 1, 2)"}) 194 | 195 | (def expected-result-with-mode 196 | {:metamorph/data "Hey, I'm regular function! (pars: 1, 2)" :metamorph/mode :anymode}) 197 | 198 | (t/deftest lift-function 199 | (t/is (= ((sut/lift regular-function-to-be-lifted 1 2) {:metamorph/data nil}) expected-result)) 200 | (t/is (= (lifted-pipeline {:metamorph/data :something}) expected-result-with-mode)) 201 | (t/is (= (declarative-lifted-pipeline {:metamorph/data :something}) expected-result-with-mode)) 202 | (t/is (= (object-pipeline {:metamorph/data :something}) expected-result)) 203 | (t/is (= (declarative-object-pipeline {:metamorph/data :something}) expected-result-with-mode))) 204 | 205 | (t/deftest fit-transform 206 | 207 | (let [pipe-fn 208 | (sut/pipeline 209 | (sut/lift string/upper-case)) 210 | 211 | fitted 212 | (sut/fit 213 | "hello" 214 | pipe-fn) 215 | 216 | transformed 217 | (sut/transform-pipe "world" pipe-fn fitted)] 218 | (t/is (= "HELLO" (:metamorph/data fitted))) 219 | (t/is (= :fit (:metamorph/mode fitted))) 220 | 221 | (t/is (= "WORLD" (:metamorph/data transformed))) 222 | (t/is (= :transform (:metamorph/mode transformed))))) 223 | 224 | 225 | (t/deftest fail-proper-on-nonfn 226 | (t/is (thrown? IllegalArgumentException 227 | (sut/pipe-it 228 | "hello" 229 | 230 | [(sut/lift string/upper-case)])))) 231 | 232 | (t/deftest fail-proper-on-nonfn 233 | (t/is (thrown? IllegalArgumentException 234 | (sut/pipe-it "hello" first)))) 235 | 236 | (t/deftest non-ctx-result-does-not-fail 237 | (t/is (= {:a :blub} 238 | ((sut/pipeline (fn [_] {:a :blub})))))) 239 | 240 | 241 | (t/deftest various-lifts 242 | (t/is (= {:metamorph/data "HELLO"} 243 | ((sut/pipeline (sut/lift string/upper-case)) 244 | {:metamorph/data "hello"}))) 245 | 246 | 247 | (t/is (= {:metamorph/data "HELLO"} 248 | ((sut/pipeline 249 | {:metamorph/id :test} (sut/lift string/upper-case)) 250 | {:metamorph/data "hello"})))) 251 | -------------------------------------------------------------------------------- /tests.edn: -------------------------------------------------------------------------------- 1 | #kaocha/v1 {} 2 | --------------------------------------------------------------------------------