├── .gitignore ├── .jokerd └── linter.cljc ├── LICENSE ├── README.md ├── deps.edn ├── pom.xml ├── project.clj ├── resources ├── .keep ├── cat.jpg ├── generated_godzilla_plot_summaries.md ├── generated_godzilla_plot_summaries.txt ├── opencv-gray-logo.png ├── opencv │ ├── cat-cartoonize-color.png │ ├── cat-cartoonize-gray.png │ ├── cat-sketch.png │ ├── cat.jpg │ ├── lenna.png │ └── opencv-logo.png ├── pytorch │ ├── data │ │ └── .keep │ └── models │ │ └── .keep └── ufosightings-since-2010.csv └── src └── gigasquid ├── _configure.clj ├── bokeh ├── README.md ├── core.clj ├── line10k.clj └── multi_polygons.clj ├── diffprivlib.clj ├── facebook_prophet.clj ├── gpt2.clj ├── igraph.clj ├── lieden.clj ├── mxnet.clj ├── nltk.clj ├── numpy_plot.clj ├── opencv ├── README.md └── core.clj ├── plot.clj ├── psutil ├── README.md └── core.clj ├── pygal ├── README.md └── core.clj ├── pytorch_mnist.clj ├── sci_spacy.clj ├── seaborn.clj ├── sk_learn ├── datasets_estimators.clj ├── info.txt ├── model_selection.clj ├── pipelining.clj ├── supervised_learning.clj └── unsupervised_learning.clj ├── slicing.clj ├── spacy.clj ├── trimap.clj ├── umap.clj └── utils.clj /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | *.jar 5 | *.class 6 | /.cpcache 7 | /.lein-* 8 | /.nrepl-history 9 | /.nrepl-port 10 | .hgignore 11 | .hg/ 12 | -------------------------------------------------------------------------------- /.jokerd/linter.cljc: -------------------------------------------------------------------------------- 1 | (ns libpython-clj.python) 2 | 3 | (defmacro with [& form] 4 | `(let ~@form)) 5 | 6 | (defmacro py. [& form] 7 | (let [[member-symbol instance-member & args] form] 8 | `(str (pr ~member-symbol) 9 | (prn ~@args) 10 | (pr nil)))) 11 | 12 | (defmacro py.- [& form] 13 | (let [[member-symbol instance-field] form] 14 | `(str (pr ~member-symbol) 15 | (pr nil)))) 16 | 17 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 18 | 19 | (ns libpython-clj.require) 20 | 21 | (defmacro require-python [form] 22 | (let [form (second form) ;; first is (quote ...) 23 | [_ & {:keys [as refer]}] form] 24 | (let [references (if refer [:refer refer] '())] 25 | (apply list 26 | (cond-> `[do] 27 | as (conj `(create-ns (quote ~as)) 28 | `(require (quote [~as ~@references])))))))) 29 | (comment 30 | (*require-python '[torch.optim.lr_scheduler :as lr_scheduler])) 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor tocontrol, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of New York and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # libpython-clj-examples 2 | 3 | ### Overview 4 | 5 | This repo contains some examples of using [libpython-clj](https://github.com/clj-python/libpython-clj) with various python libraries. 6 | So far there are source code examples meant to be walked through in the REPL 7 | 8 | - [GPT2 text generation from hugging-face](https://github.com/huggingface/transformers) 9 | - [MXNet MNIST classification using the Module API](https://mxnet.apache.org/) 10 | - [Pytorch MNIST](https://github.com/pytorch/examples/tree/master/mnist) 11 | - [Matlib PyPlot](https://github.com/matplotlib/matplotlib) 12 | - [NLTK](https://www.nltk.org/) 13 | - [SpaCy](https://github.com/explosion/spaCy) 14 | - [Sci SpaCy](https://github.com/allenai/scispacy) 15 | - [Seaborn](https://github.com/mwaskom/seaborn) 16 | - [UMAP](https://github.com/lmcinnes/umap) 17 | - [TRIMAP](https://pypi.org/project/trimap/) 18 | - [Igraph](https://igraph.org/) 19 | - [Leiden](https://github.com/vtraag/leidenalg) 20 | - [Sklearn](https://github.com/scikit-learn/scikit-learn) 21 | - [Facebook Prophet](https://github.com/facebook/prophet) 22 | - [Pygal](http://www.pygal.org/en/latest/index.html#) 23 | - [Bokeh](https://docs.bokeh.org/en/latest/index.html) 24 | - [OpenCV](https://opencv.org/) 25 | - [psutil](https://psutil.readthedocs.io/en/latest/) 26 | - [diffprivlb](https://github.com/IBM/differential-privacy-library) 27 | 28 | In general, you will need a python3 env and pip install the various packages 29 | before running 30 | 31 | ### Nextjournal Posts 32 | 33 | - [Pyplot](https://nextjournal.com/kommen/parens-for-polyglot) 34 | - [GPT2 & MXNet](https://nextjournal.com/kommen/gigasquids-libpython-clj-examples) 35 | - [Seaborn Visualizations](https://nextjournal.com/gigasquid/parens-for-python---seaborn-visualizations) 36 | - [UMAP and Trimap](https://nextjournal.com/gigasquid/parens-for-python---umap-trimap) 37 | - [Network Analysis with IGraph and Leiden](https://nextjournal.com/gigasquid/parens-for-python---network-analysis-and-visualization) 38 | - [Sci Spacy tutorial](https://nextjournal.com/gigasquid/parens-for-python---sci-spacy) 39 | - [Facebook Prophet](https://nextjournal.com/gigasquid/parens-for-python---predicting-sportsball-ufos) 40 | 41 | ## License 42 | 43 | Copyright © 2020 Carin Meier 44 | 45 | Distributed under the Eclipse Public License either version 1.0 or (at 46 | your option) any later version. 47 | -------------------------------------------------------------------------------- /deps.edn: -------------------------------------------------------------------------------- 1 | {:paths ["src" "resources"] 2 | :mvn/repos {"central" {:url "https://repo1.maven.org/maven2/"} 3 | "clojars" {:url "https://clojars.org/repo"}} 4 | :deps {org.clojure/clojure {:mvn/version "1.10.1"} 5 | clj-python/libpython-clj {:mvn/version "1.37"} 6 | org.clojure/data.csv {:mvn/version "0.1.4"}} 7 | :aliases 8 | {:test {:extra-paths ["test"] 9 | :extra-deps {org.clojure/test.check {:mvn/version "0.10.0"}}} 10 | :runner 11 | {:extra-deps {com.cognitect/test-runner 12 | {:git/url "https://github.com/cognitect-labs/test-runner" 13 | :sha "f7ef16dc3b8332b0d77bc0274578ad5270fbfedd"}} 14 | :main-opts ["-m" "cognitect.test-runner" 15 | "-d" "test"]} 16 | :uberjar {:extra-deps {seancorfield/depstar {:mvn/version "0.5.1"}} 17 | :main-opts ["-m" "hf.depstar.uberjar" "libpython-clj-examples.jar" 18 | "-C" "-m" "gigasquid.libpython-clj-examples"]}}} 19 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | gigasquid 5 | libpython-clj-examples 6 | 0.1.0-SNAPSHOT 7 | gigasquid/libpython-clj-examples 8 | FIXME: my new application. 9 | https://github.com/gigasquid/libpython-clj-examples 10 | 11 | 12 | Eclipse Public License 13 | http://www.eclipse.org/legal/epl-v10.html 14 | 15 | 16 | 17 | 18 | Cmeier 19 | 20 | 21 | 22 | https://github.com/gigasquid/libpython-clj-examples 23 | scm:git:git://github.com/gigasquid/libpython-clj-examples.git 24 | scm:git:ssh://git@github.com/gigasquid/libpython-clj-examples.git 25 | HEAD 26 | 27 | 28 | 29 | org.clojure 30 | clojure 31 | 1.10.1 32 | 33 | 34 | 35 | src 36 | 37 | 38 | 39 | clojars 40 | https://repo.clojars.org/ 41 | 42 | 43 | sonatype 44 | https://oss.sonatype.org/content/repositories/snapshots/ 45 | 46 | 47 | 48 | 49 | clojars 50 | Clojars repository 51 | https://clojars.org/repo 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject gigasquid/libpython-clj-examples "0.1.0" 2 | :description "Example codes that use libpython-clj with various Python libraries" 3 | :url "https://github.com/gigasquid/libpython-clj-examples" 4 | :license {:name "Eclipse Public License - v 1.0" 5 | :url "http://www.eclipse.org/legal/epl-v10.html" 6 | :distribution :repo} 7 | 8 | :jvm-opts ["-Djdk.attach.allowAttachSelf" 9 | "-XX:+UnlockDiagnosticVMOptions" 10 | "-XX:+DebugNonSafepoints"] 11 | :plugins [[lein-tools-deps "0.4.5"]] 12 | :middleware [lein-tools-deps.plugin/resolve-dependencies-with-deps-edn] 13 | :lein-tools-deps/config {:config-files [:project] 14 | :resolve-aliases []} 15 | 16 | :repl-options {:init-ns gigasquid.-configure}) 17 | -------------------------------------------------------------------------------- /resources/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/.keep -------------------------------------------------------------------------------- /resources/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/cat.jpg -------------------------------------------------------------------------------- /resources/generated_godzilla_plot_summaries.md: -------------------------------------------------------------------------------- 1 | 2 | ## The Following Godzilla Plot Summaries Created By GPT2 3 | 4 | ![godzilla](https://upload.wikimedia.org/wikipedia/en/2/29/Godzilla_%2754_design.jpg) 5 | 6 | Prompts from the first sentences of : [https://www.empireonline.com/movies/features/godzilla-movie-guide/](https://www.empireonline.com/movies/features/godzilla-movie-guide/) 7 | 8 | 9 | ### Movie: GODZILLA (1954) 10 | 11 | The disappearance of some fishing boats revives the legend of an ancient sea monster, and heralds the arrival of Godzilla himself for the first time. 12 | 13 | Why? Because the creatures lifeforms have always been talking to each other. And what happens when we begin to learn about the reptiles as well? 14 | 15 | In INTRODUCING THE FISHING SEA, archaeologist Paul Goldman, an expert on extinct reptiles, explains why their existence is so important. He found that only a third of the species of the very elusive fishes that claimed to be the life forms of ancient species disappeared from the oceans of the Great Barrier Reef 16 | 17 | 18 | ### Movie: GODZILLA RAIDS AGAIN (1955) 19 | 20 | Crash-landing on Iwato Island, two pilots stumble upon Godzilla fighting the Stegosaurus-thing Anguirus. 21 | 22 | The fourth finale, Sinosha 3, ends with Abuma and Gero, the scientists, helping Manchin to survive the destruction that will engulf the world. In the fourth book, Manchin and his crew escape from a factory in the Baku prefecture and they discover the battle was won by a deadly robot called the Hawk. For the past 20 years, Manchin has been rescuing the others as they battle with the Hawk, and he was once a Baku fighter. 23 | 24 | 25 | ### Movie: KING KONG VS. GODZILLA (1962) 26 | 27 | Godzilla, frozen at the end of the last film, thaws out, sinks a submarine and, having been prevented from entering Tokyo by a Japanese military officer who was barking orders from his superior's squadron, begins to land at the base of the Yamashita plant. However, a mob of enraged and murderous Japanese troops arrive and begin to storm the base. In the middle of this, a group of K-1 Bear helicopters arrive and fight off the Japanese, who attempt to put the helicopter down. The helicopters take the Hatomi clan and lead them on a savage rampage. The helicopter explodes, killing three of the Hatomi. 28 | 29 | ### Movie: MOTHRA VS. GODZILLA (1964) 30 | 31 | Scientists discover a big egg in some typhoon wreckage, and learn that it comes from Infant Island, where lives the lepidopterous god Mothra on the island. The Lepidoptera (and the Phyllobionidae) are the only known species of monsters as far as modern humans know, and although there are a few of them, they do exist in all oceans of the world. 32 | 33 | 34 | ### Movie: GHIDORAH, THE THREE-HEADED MONSTER (1964) 35 | 36 | Following his debut in his own 1956 film, Rodan emerges from the crater of Mt. Aso, while Godzilla emerges from the deep once again. As Godzilla is the only monster of his kind left in Japan, his father is under attack by Godzilla's forces. The battle between Godzilla and the forces of nature has become so ferocious that only a few survivors survive, while the remaining (namely the garden santa) are bitten and killed by their own juices. 37 | 38 | The battle ends with Godzilla being sliced open by a spider-like claw. Godzilla is defeated and sent to prison for his crimes, later leaving Earth. 39 | 40 | ### Movie: INVASION OF ASTRO-MONSTER (1965) 41 | 42 | A joint Japanese / American space mission rocks up behind Jupiter at a newly discovered planet the Earth has dubbed 'Planet X' (pictured, Jupiter's rings that were once linked to Earth early in its journey to the star). The newly discovered planet's rings are two times larger than Earth's 43 | 44 | The discovery of this mysterious planet, dubbed Planet X, has laid the foundation for another space mission into the solar system. 45 | 46 | The solar system's giant moon, Europa, is about 100 million miles in diameter and is one of the brightest stars in the solar system. 47 | It was discovered in 2010 by Japanese scientists. 48 | 49 | ### Movie: EBIRAH, HORROR OF THE DEEP (1966) 50 | 51 | Terrorist ne’er-do-wells The Red Bamboo are manufacturing heavy water on a secret island, employing slave labour shipped in from Infant Island, the home of Mothra. Their abilities to create mass quantities and elasticity are just as the first pioneers in the field. The insect is able to grow up to be large, heavy and impressive, yet often selfish and vile. One of the most infamous are the Red Bamboo, perhaps the most infamous of their kind. Those able to harness this arcane power are the most dangerous of all the Red Bamboo. 52 | 53 | ### Movie: MONSTER ISLAND'S DECISIVE BATTLE: GODZILLA'S SON (1967) 54 | 55 | Scientists working on a weather control system are hampered by the Kamacuras, two giant praying mantises. In the center of the face of the mantis hangs a single statue of a human god, while the other's head, which bears three orange eyes, hangs in a circle. 56 | 57 | This is the first time that a group of scientists have been able to study the face of a giant praying mantise,\" said Carpio Bendaño, an ecologist who studies the effects of earthquakes on creatures like the swarm of giant goo, which is the only known creature. 58 | 59 | ### Movie: DESTROY ALL MONSTERS (1968) 60 | 61 | Toho’s twentieth kaiju film was a celebratory affair, intended to mark Godzilla’s final appearance in the early '90s. Although many people were thrilled with the two-day, homecoming, many were disappointed. 62 | 63 | In the mid-'90s, both Japan and North America became obsessed with Godzilla. The animators wanted to convey the fatherly themes of our culture (the Japan of the period)—he was a giant ape, a monster, a man and his kids. The film was in triumph. 64 | 65 | ... 66 | 67 | ### Movie: GODZILLA: KING OF THE MONSTERS (2019) 68 | The first direct sequel to the 2014 Godzilla once again sees Monarch-connected humans standing by ineffectually as Gojira protects the world from a new era of monster gods. 69 | 70 | Gojira, who is Catholic, took the name Godzilla in part to honor his papal sainthood. Her character was named after Queen Elizabeth II, the monarch who was also Catholic. 71 | 72 | The sequel, which will be released worldwide on September 20th, is slated to be the sequel to the 2013 A Godzilla film, which directed by Tetsuya Nomura. 73 | 74 | 75 | -------------------------------------------------------------------------------- /resources/generated_godzilla_plot_summaries.txt: -------------------------------------------------------------------------------- 1 | The Following Godzilla Plot Summaries have been Created By GPT2 2 | 3 | Prompts from the first sentences of :https://www.empireonline.com/movies/features/godzilla-movie-guide/ 4 | 5 | Movie: GODZILLA (1954) 6 | 7 | The disappearance of some fishing boats revives the legend of an ancient sea monster, and heralds the arrival of Godzilla himself for the first time. 8 | 9 | Why? Because the creatures lifeforms have always been talking to each other. And what happens when we begin to learn about the reptiles as well? 10 | 11 | In INTRODUCING THE FISHING SEA, archaeologist Paul Goldman, an expert on extinct reptiles, explains why their existence is so important. He found that only a third of the species of the very elusive fishes that claimed to be the life forms of ancient species disappeared from the oceans of the Great Barrier Reef 12 | 13 | 14 | Movie: GODZILLA RAIDS AGAIN (1955) 15 | 16 | Crash-landing on Iwato Island, two pilots stumble upon Godzilla fighting the Stegosaurus-thing Anguirus. 17 | 18 | The fourth finale, Sinosha 3, ends with Abuma and Gero, the scientists, helping Manchin to survive the destruction that will engulf the world. In the fourth book, Manchin and his crew escape from a factory in the Baku prefecture and they discover the battle was won by a deadly robot called the Hawk. For the past 20 years, Manchin has been rescuing the others as they battle with the Hawk, and he was once a Baku fighter. 19 | 20 | 21 | Movie: KING KONG VS. GODZILLA (1962) 22 | 23 | Godzilla, frozen at the end of the last film, thaws out, sinks a submarine and, having been prevented from entering Tokyo by a Japanese military officer who was barking orders from his superior's squadron, begins to land at the base of the Yamashita plant. However, a mob of enraged and murderous Japanese troops arrive and begin to storm the base. In the middle of this, a group of K-1 Bear helicopters arrive and fight off the Japanese, who attempt to put the helicopter down. The helicopters take the Hatomi clan and lead them on a savage rampage. The helicopter explodes, killing three of the Hatomi. 24 | 25 | Movie: MOTHRA VS. GODZILLA (1964) 26 | 27 | Scientists discover a big egg in some typhoon wreckage, and learn that it comes from Infant Island, where lives the lepidopterous god Mothra on the island. The Lepidoptera (and the Phyllobionidae) are the only known species of monsters as far as modern humans know, and although there are a few of them, they do exist in all oceans of the world. 28 | 29 | 30 | Movie: GHIDORAH, THE THREE-HEADED MONSTER (1964) 31 | 32 | Following his debut in his own 1956 film, Rodan emerges from the crater of Mt. Aso, while Godzilla emerges from the deep once again. As Godzilla is the only monster of his kind left in Japan, his father is under attack by Godzilla's forces. The battle between Godzilla and the forces of nature has become so ferocious that only a few survivors survive, while the remaining (namely the garden santa) are bitten and killed by their own juices. 33 | 34 | The battle ends with Godzilla being sliced open by a spider-like claw. Godzilla is defeated and sent to prison for his crimes, later leaving Earth. 35 | 36 | Movie: INVASION OF ASTRO-MONSTER (1965) 37 | 38 | A joint Japanese / American space mission rocks up behind Jupiter at a newly discovered planet the Earth has dubbed 'Planet X' (pictured, Jupiter's rings that were once linked to Earth early in its journey to the star). The newly discovered planet's rings are two times larger than Earth's 39 | 40 | The discovery of this mysterious planet, dubbed Planet X, has laid the foundation for another space mission into the solar system. 41 | 42 | The solar system's giant moon, Europa, is about 100 million miles in diameter and is one of the brightest stars in the solar system. 43 | It was discovered in 2010 by Japanese scientists. 44 | 45 | Movie: EBIRAH, HORROR OF THE DEEP (1966) 46 | 47 | Terrorist ne’er-do-wells The Red Bamboo are manufacturing heavy water on a secret island, employing slave labour shipped in from Infant Island, the home of Mothra. Their abilities to create mass quantities and elasticity are just as the first pioneers in the field. The insect is able to grow up to be large, heavy and impressive, yet often selfish and vile. One of the most infamous are the Red Bamboo, perhaps the most infamous of their kind. Those able to harness this arcane power are the most dangerous of all the Red Bamboo. 48 | 49 | Movie: MONSTER ISLAND'S DECISIVE BATTLE: GODZILLA'S SON (1967) 50 | 51 | Scientists working on a weather control system are hampered by the Kamacuras, two giant praying mantises. In the center of the face of the mantis hangs a single statue of a human god, while the other's head, which bears three orange eyes, hangs in a circle. 52 | 53 | This is the first time that a group of scientists have been able to study the face of a giant praying mantise,\" said Carpio Bendaño, an ecologist who studies the effects of earthquakes on creatures like the swarm of giant goo, which is the only known creature. 54 | 55 | Movie: DESTROY ALL MONSTERS (1968) 56 | 57 | Toho’s twentieth kaiju film was a celebratory affair, intended to mark Godzilla’s final appearance in the early '90s. Although many people were thrilled with the two-day, homecoming, many were disappointed. 58 | 59 | In the mid-'90s, both Japan and North America became obsessed with Godzilla. The animators wanted to convey the fatherly themes of our culture (the Japan of the period)—he was a giant ape, a monster, a man and his kids. The film was in triumph. 60 | 61 | ... 62 | 63 | Movie: GODZILLA: KING OF THE MONSTERS (2019) 64 | "The first direct sequel to the 2014 Godzilla once again sees Monarch-connected humans standing by ineffectually as Gojira protects the world from a new era of monster gods. 65 | 66 | Gojira, who is Catholic, took the name Godzilla in part to honor his papal sainthood. Her character was named after Queen Elizabeth II, the monarch who was also Catholic. 67 | 68 | The sequel, which will be released worldwide on September 20th, is slated to be the sequel to the 2013 A Godzilla film, which directed by Tetsuya Nomura. 69 | 70 | 71 | -------------------------------------------------------------------------------- /resources/opencv-gray-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv-gray-logo.png -------------------------------------------------------------------------------- /resources/opencv/cat-cartoonize-color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv/cat-cartoonize-color.png -------------------------------------------------------------------------------- /resources/opencv/cat-cartoonize-gray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv/cat-cartoonize-gray.png -------------------------------------------------------------------------------- /resources/opencv/cat-sketch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv/cat-sketch.png -------------------------------------------------------------------------------- /resources/opencv/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv/cat.jpg -------------------------------------------------------------------------------- /resources/opencv/lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv/lenna.png -------------------------------------------------------------------------------- /resources/opencv/opencv-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv/opencv-logo.png -------------------------------------------------------------------------------- /resources/pytorch/data/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/pytorch/data/.keep -------------------------------------------------------------------------------- /resources/pytorch/models/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/pytorch/models/.keep -------------------------------------------------------------------------------- /resources/ufosightings-since-2010.csv: -------------------------------------------------------------------------------- 1 | ds,y 2 | 2010-01-01,296 3 | 2010-02-01,194 4 | 2010-03-01,265 5 | 2010-04-01,296 6 | 2010-05-01,334 7 | 2010-06-01,392 8 | 2010-07-01,859 9 | 2010-08-01,538 10 | 2010-09-01,457 11 | 2010-10-01,480 12 | 2010-11-01,369 13 | 2010-12-01,310 14 | 2011-01-01,332 15 | 2011-02-01,278 16 | 2011-03-01,336 17 | 2011-04-01,319 18 | 2011-05-01,323 19 | 2011-06-01,418 20 | 2011-07-01,780 21 | 2011-08-01,647 22 | 2011-09-01,562 23 | 2011-10-01,647 24 | 2011-11-01,451 25 | 2011-12-01,539 26 | 2012-01-01,589 27 | 2012-02-01,399 28 | 2012-03-01,540 29 | 2012-04-01,506 30 | 2012-05-01,526 31 | 2012-06-01,775 32 | 2012-07-01,955 33 | 2012-08-01,911 34 | 2012-09-01,784 35 | 2012-10-01,684 36 | 2012-11-01,783 37 | 2012-12-01,680 38 | 2013-01-01,401 39 | 2013-02-01,285 40 | 2013-03-01,401 41 | 2013-04-01,431 42 | 2013-05-01,542 43 | 2013-06-01,656 44 | 2013-07-01,990 45 | 2013-08-01,928 46 | 2013-09-01,799 47 | 2013-10-01,802 48 | 2013-11-01,814 49 | 2013-12-01,774 50 | 2014-01-01,719 51 | 2014-02-01,560 52 | 2014-03-01,526 53 | 2014-04-01,673 54 | 2014-05-01,661 55 | 2014-06-01,791 56 | 2014-07-01,1116 57 | 2014-08-01,937 58 | 2014-09-01,842 59 | 2014-10-01,802 60 | 2014-11-01,551 61 | 2014-12-01,531 62 | 2015-01-01,542 63 | 2015-02-01,347 64 | 2015-03-01,483 65 | 2015-04-01,463 66 | 2015-05-01,468 67 | 2015-06-01,494 68 | 2015-07-01,745 69 | 2015-08-01,630 70 | 2015-09-01,735 71 | 2015-10-01,664 72 | 2015-11-01,906 73 | 2015-12-01,440 74 | 2016-01-01,423 75 | 2016-02-01,436 76 | 2016-03-01,414 77 | 2016-04-01,390 78 | 2016-05-01,388 79 | 2016-06-01,437 80 | 2016-07-01,697 81 | 2016-08-01,560 82 | 2016-09-01,596 83 | 2016-10-01,493 84 | 2016-11-01,498 85 | 2016-12-01,357 86 | 2017-01-01,339 87 | 2017-02-01,422 88 | 2017-03-01,349 89 | 2017-04-01,417 90 | 2017-05-01,365 91 | 2017-06-01,423 92 | 2017-07-01,517 93 | 2017-08-01,414 94 | 2017-09-01,453 95 | 2017-10-01,489 96 | 2017-11-01,358 97 | 2017-12-01,509 98 | 2018-01-01,315 99 | 2018-02-01,241 100 | 2018-03-01,238 101 | 2018-04-01,234 102 | 2018-05-01,251 103 | 2018-06-01,238 104 | 2018-07-01,427 105 | 2018-08-01,371 106 | 2018-09-01,310 107 | 2018-10-01,278 108 | 2018-11-01,252 109 | 2018-12-01,245 110 | 2019-01-01,342 111 | 2019-02-01,213 112 | 2019-03-01,325 113 | 2019-04-01,385 114 | 2019-05-01,540 115 | 2019-06-01,473 116 | 2019-07-01,597 117 | 2019-08-01,469 118 | 2019-09-01,649 119 | 2019-10-01,719 120 | 2019-11-01,695 121 | 2019-12-01,753 122 | 2020-01-01,572 123 | 2020-02-01,153 -------------------------------------------------------------------------------- /src/gigasquid/_configure.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.-configure 2 | (:require 3 | [libpython-clj.python :as py])) 4 | 5 | ; local install 6 | ; (py/initialize! :python-executable "/usr/bin/python3.8" 7 | ; :library-path "/usr/lib/libpython3.8.so.1.0") 8 | 9 | ; virtualenv @ "env" directory 10 | ; (py/initialize! :python-executable "env/bin/python3.8" 11 | ; :library-path "/usr/lib/libpython3.so") 12 | -------------------------------------------------------------------------------- /src/gigasquid/bokeh/README.md: -------------------------------------------------------------------------------- 1 | # Bokeh 2 | 3 | Bokeh is an interactive visualization library for modern web browsers. 4 | It provides elegant, concise construction of versatile graphics, and affords high-performance interactivity over large or streaming datasets. Bokeh can help anyone who would like to quickly and easily make interactive plots, dashboards, and data applications. 5 | 6 | - Github - [bokeh/bokeh](https://github.com/bokeh/bokeh) 7 | - Official [documentation](https://docs.bokeh.org/en/latest/) 8 | 9 | ## Initial Examples 10 | 11 | Are based on the following examples 12 | 13 | - [Quick Start Guide](https://docs.bokeh.org/en/latest/docs/user_guide/quickstart.html#userguide-quickstart) 14 | - [MultiPolygons](https://github.com/bokeh/bokeh/blob/1.4.0/examples/plotting/notebook/MultiPolygons.ipynb) 15 | - [A line with 10k points to show off the WebGL line implementation](https://github.com/bokeh/bokeh/blob/1.4.0/examples/webgl/line10k.py) 16 | 17 | ## Basic installation 18 | 19 | - Python 20 | 21 | ```shell 22 | pip3 install bokeh 23 | pip3 install numpy ## used in the 3rd example above 24 | ``` 25 | 26 | - Clojure 27 | 28 | Just run your favorite `cider-jack-in` if you are on Emacs. 29 | For other editors, you will do the equivalent command for your editor. 30 | -------------------------------------------------------------------------------- /src/gigasquid/bokeh/core.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.bokeh.core 2 | (:require [libpython-clj.require 3 | :refer [require-python]] 4 | [libpython-clj.python 5 | :as py 6 | :refer [py. 7 | py.. 8 | py.- 9 | att-type-map 10 | ->python 11 | ->jvm 12 | ]] 13 | [clojure.java.shell :as sh] 14 | [clojure.pprint :refer [pprint]]) 15 | (:import [java.io File])) 16 | 17 | ;;; Python installation 18 | ;;; pip3 install bokeh 19 | 20 | (comment 21 | (require-python '[sklearn.svm :as svm]) 22 | (require-python '[numpy :as np]) 23 | #_ (require-python '[pandas :as pd]) 24 | ) 25 | 26 | ;; First require the basic package 27 | (require-python '[bokeh.plotting :as bkp]) 28 | 29 | (comment 30 | 31 | ;; Quick getting start guide 32 | ;; https://docs.bokeh.org/en/latest/docs/user_guide/quickstart.html#getting-started 33 | (py/from-import bokeh.plotting figure output_file show curdoc) 34 | 35 | (let [x [1 2 3 4 5] 36 | y [6 7 2 4 5] 37 | p (bkp/figure :title "Simple line example" 38 | :x_axis_label "x" 39 | :y_axis_label "y")] 40 | (py. p line x y :legend "Temp." :line_width 2) 41 | (bkp/show p)) 42 | 43 | ;; More plotting example 44 | (let [x [0.1, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0] 45 | y0 (into [] (map (fn [i] (Math/pow i 2)) x)) 46 | y1 (into [] (map (fn [i] (Math/pow 10 i)) x)) 47 | y2 (into [] (map (fn [i] (Math/pow 10 (Math/pow i 2))) x)) 48 | p (bkp/figure :tools "pan,box_zoom,reset,save" 49 | :y_axis_type "log" 50 | :y_range [0.001 (Math/pow 10 11)] 51 | :title "log axis example" 52 | :x_axis_label "sections" 53 | :y_axis_label "particles")] 54 | (py. p line x x 55 | :legend "y=x") 56 | 57 | (py. p circle x x 58 | :legend "y=x" 59 | :fill_color "white" 60 | :size 8) 61 | 62 | (py. p line x y0 63 | :legend "y=x^2" 64 | :line_width 3) 65 | 66 | (py. p line x y1 67 | :legend "y=10^x" 68 | :line_color "red") 69 | 70 | (py. p circle x y1 71 | :legend "y=10^x" 72 | :fill_color "red" 73 | :line_color "red" 74 | :size 6) 75 | 76 | (py. p line x y2 77 | :legend "y=10^x^2" 78 | :line_color "orange" 79 | :line_dash "4 4") 80 | 81 | (bkp/show p)) 82 | 83 | ) 84 | 85 | (comment 86 | ;; More example 87 | (let [p (bkp/figure 88 | :plot_width 300 89 | :plot_height 300 90 | :tools "pan,reset,save")] 91 | (py. p 92 | circle 93 | [1 2.5 3 2] 94 | [2 3 1 1.6] 95 | :radius 0.3 96 | :alpha 0.5) 97 | (bkp/show p)) 98 | 99 | ) 100 | 101 | 102 | ;; Providing Data 103 | ;; https://docs.bokeh.org/en/latest/docs/user_guide/data.html 104 | 105 | (comment 106 | (require-python '[bokeh.plotting :as bkp]) ;;=> :ok 107 | (require-python '[bokeh.models :as bkm]) ;;=> :ok 108 | 109 | (let [data {:x_values [1 2 3 4 5] 110 | :y_values [6 7 2 3 6]} 111 | source (bkm/ColumnDataSource :data data) 112 | p (bkp/figure)] 113 | (py. p circle 114 | :x "x_values" 115 | :y "y_values" 116 | :source source) 117 | (bkp/show p)) 118 | 119 | ) 120 | -------------------------------------------------------------------------------- /src/gigasquid/bokeh/line10k.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.bokeh.line10k 2 | (:require [libpython-clj.require 3 | :refer [require-python]] 4 | [libpython-clj.python 5 | :as py 6 | :refer [py. 7 | py.. 8 | py.- 9 | att-type-map 10 | ->python 11 | ->jvm 12 | ]] 13 | [clojure.java.shell :as sh] 14 | [clojure.pprint :refer [pprint]]) 15 | (:import [java.io File])) 16 | 17 | ;;; Python installation 18 | ;;; pip3 install bokeh 19 | ;;; pip3 install numpy 20 | 21 | ;; Based on: https://github.com/bokeh/bokeh/blob/1.4.0/examples/webgl/line10k.py 22 | 23 | ;; First require the basic package 24 | (py/from-import bokeh.plotting figure output_file show curdoc) 25 | (require-python '[bokeh.plotting :as bkp]) 26 | (require-python '[numpy :as np]) 27 | (require-python '[numpy.random :as np-random]) 28 | (require-python '[builtins :as python]) 29 | 30 | ;; Note: 31 | #_(= np/pi Math/PI) ;;=> true 32 | 33 | (let [N 10000 34 | x (np/linspace 0 (* 10 np/pi) N) 35 | y (np/add 36 | (np/cos x) 37 | (np/sin (np/add (np/multiply 2 x) 1.25)) 38 | (np-random/normal 0 0.001 (python/tuple [N]))) 39 | p (bkp/figure :title "A line consisting of 10k points" 40 | :output_backend "webgl")] 41 | 42 | (py. p line x y :color "#22aa22" :line_width 3) 43 | ;; Tips: 44 | ;; To save the output to a file you can use the next line 45 | #_(output_file "line10.html" :title "line10k example") 46 | 47 | ;; Or simply show it immediately via your browser 48 | (bkp/show p)) 49 | -------------------------------------------------------------------------------- /src/gigasquid/bokeh/multi_polygons.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.bokeh.multi-polygons 2 | (:require [libpython-clj.require 3 | :refer [require-python]] 4 | [libpython-clj.python 5 | :as py 6 | :refer [py. 7 | py.. 8 | py.- 9 | att-type-map 10 | ->python 11 | ->jvm 12 | ]] 13 | [clojure.java.shell :as sh] 14 | [clojure.pprint :refer [pprint]]) 15 | (:import [java.io File])) 16 | 17 | ;;; Python installation 18 | ;;; pip3 install bokeh 19 | 20 | ;; First require the basic package 21 | (require-python '[bokeh.plotting :as bkp]) 22 | 23 | (comment 24 | ;; https://github.com/bokeh/bokeh/blob/1.4.0/examples/plotting/notebook/MultiPolygons.ipynb 25 | 26 | (py/from-import bokeh.plotting figure output_file show curdoc) 27 | 28 | (let [p (bkp/figure :title "Polygons with no holes" 29 | :plot_width 300 30 | :plot_height 300 31 | :tools "hover,tap,wheel_zoom,pan,reset,help")] 32 | (py. p multi_polygons 33 | :xs [[[[1, 2, 2, 1, 1]]]] 34 | :ys [[[[3, 3, 4, 4, 3]]]]) 35 | (bkp/show p)) 36 | 37 | ;; Polygons with holes 38 | (let [p (bkp/figure :title "Polygons with holes" 39 | :plot_width 300 40 | :plot_height 300 41 | :tools "hover,tap,wheel_zoom,pan,reset,help")] 42 | (py. p multi_polygons 43 | :xs [[[[1, 2, 2, 1], [1.2, 1.6, 1.6], [1.8, 1.8, 1.6]]]] 44 | :ys [[[[3, 3, 4, 4], [3.2, 3.6, 3.2], [3.4, 3.8, 3.8]]]]) 45 | (bkp/show p)) 46 | 47 | ) 48 | 49 | (comment 50 | ;; https://github.com/bokeh/bokeh/blob/1.4.0/examples/plotting/notebook/MultiPolygons.ipynb 51 | ;; Now we'll examine a MultiPolygon. 52 | ;; A MultiPolygon is composed of different parts each of which is a Polygon and each of which can have or not have holes. 53 | 54 | ;; In python: 55 | ;; p = figure(plot_width=300, plot_height=300, tools='hover,tap,wheel_zoom,pan,reset,help') 56 | ;; p.multi_polygons(xs=[[[ [1, 1, 2, 2], [1.2, 1.6, 1.6], [1.8, 1.8, 1.6] ], [ [3, 4, 3] ]]], 57 | ;; ys=[[[ [4, 3, 3, 4], [3.2, 3.2, 3.6], [3.4, 3.8, 3.8] ], [ [1, 1, 3] ]]]) 58 | ;; show(p) 59 | 60 | (let [p (bkp/figure 61 | :title "Multi-Polygons 1" 62 | :plot_width 300 63 | :plot_height 300 64 | :tools "hover,tap,wheel_zoom,pan,reset,help") 65 | xs [[[[1, 1, 2, 2] [1.2, 1.6, 1.6] [1.8, 1.8, 1.6]] [[3, 4, 3]]]] 66 | ys [[[[4, 3, 3, 4] [3.2, 3.2, 3.6] [3.4, 3.8, 3.8]] [[1, 1, 3]]]]] 67 | (py. p multi_polygons :xs xs :ys ys) 68 | (bkp/show p)) 69 | 70 | ;; More example 71 | (let [p (bkp/figure 72 | :title "Multi-Polygons 2" 73 | :plot_width 300 74 | :plot_height 300 75 | :tools "hover,tap,wheel_zoom,pan,reset,help") 76 | xs [[[[1 1 2 2] [1.2 1.6 1.6] [1.8 1.8 1.6]] [[3 3 4]]] 77 | [[[1 2 2 1] [1.3 1.3 1.7 1.7]]]] 78 | ys [[[[4 3 3 4] [3.2 3.2 3.6] [3.4 3.8 3.8]] [[1 3 1]]], 79 | [[[1 1 2 2] [1.3 1.7 1.7 1.3]]]]] 80 | (py. p multi_polygons :xs xs :ys ys) 81 | (bkp/show p)) 82 | 83 | 84 | ;; ===================================== ;; 85 | ;; Using multi-polygons glyph directly 86 | ;; TODO: revisit this code 87 | #_ 88 | (comment 89 | (py/from-import bokeh.models ColumnDataSource Plot LinearAxis Grid) 90 | (py/from-import bokeh.models.glyphs MultiPolygons) 91 | (py/from-import bokeh.models.tools TapTool WheelZoomTool ResetTool HoverTool) 92 | (py/from-import bokeh.plotting figure output_file show curdoc) 93 | 94 | ;; Note: for this we need to use dict 95 | (require-python '[builtins :as python]) 96 | 97 | (let [source (ColumnDataSource(python/dict 98 | :xs [[[[1, 1, 2, 2] 99 | [1.2, 1.6, 1.6] 100 | [1.8, 1.8, 1.6]] 101 | [[3, 3, 4]]] 102 | [[[1, 2, 2, 1] 103 | [1.3, 1.3, 1.7, 1.7]]]] 104 | :ys [[[[4, 3, 3, 4] 105 | [3.2, 3.2, 3.6] 106 | [3.4, 3.8, 3.8]] 107 | [[1, 3, 1]]] 108 | [[[1, 1, 2, 2] 109 | [1.3, 1.7, 1.7, 1.3]]]] 110 | :color ["blue" "red"], 111 | :label ["A" "B"])) 112 | plot (Plot :plot_width 300 113 | :plot_height 300 114 | ;;:tools [(HoverTool) (TapTool) (WheelZoomTool)] 115 | ) 116 | glyph (py. MultiPolygons :xs "xs" :ys "ys" :fill_color "color")] 117 | plot 118 | #_(py. plot add_glyph source glyph) 119 | ) 120 | ) 121 | ;; ===================================== ;; 122 | 123 | ;; TODO: 124 | ;; Using numpy array with MultiPolygons 125 | -------------------------------------------------------------------------------- /src/gigasquid/diffprivlib.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.diffprivlib 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [gigasquid.plot :as plot])) 5 | 6 | ;;; From https://github.com/IBM/differential-privacy-library 7 | 8 | ;;; Install: pip install diffprivlib 9 | 10 | (require-python '[sklearn.datasets :as datasets]) 11 | (require-python '[sklearn.model_selection :as model-selection]) 12 | (require-python '[matplotlib.pyplot :as pyplot]) 13 | (require-python '[numpy :as np]) 14 | (require-python '[diffprivlib.models :as models]) 15 | (require-python '[sklearn.metrics :as metrics]) 16 | (require-python '[builtins :as python]) 17 | 18 | ;;; Using the iris dataset - load with 80/20 split 19 | 20 | (def dataset (datasets/load_iris)) 21 | (def iris-data (let [[X-train X-test y-train y-test] 22 | (model-selection/train_test_split (py.- dataset data) 23 | (py.- dataset target) 24 | :test_size 0.2)] 25 | {:X-train X-train :X-test X-test 26 | :y-train y-train :y-test y-test})) 27 | 28 | ;; Now, let's train a differentially private naive Bayes classifier. Our classifier runs just like an sklearn classifier, so you can get up and running quickly. 29 | 30 | ;; diffprivlib.models.GaussianNB can be run without any parameters, although this will throw a warning (we need to specify the bounds parameter to avoid this). The privacy level is controlled by the parameter epsilon, which is passed to the classifier at initialisation (e.g. GaussianNB(epsilon=0.1)). The default is epsilon = 1.0. 31 | 32 | (def clf (models/GaussianNB)) 33 | (py. clf fit (:X-train iris-data) (:y-train iris-data)) 34 | 35 | ;; We can now classify unseen examples, knowing that the trained model is differentially private and preserves the privacy of the 'individuals' in the training set (flowers are entitled to their privacy too!). 36 | 37 | (py. clf predict (:X-test iris-data)) 38 | 39 | ;;=> [1 0 1 1 1 2 1 0 2 2 2 2 1 0 0 2 1 0 1 0 0 1 0 1 2 2 0 2 1 1] 40 | 41 | ;;We can easily evaluate the accuracy of the model for various epsilon values and plot it with matplotlib. 42 | 43 | (def epsilons (np/logspace -2 2 50)) 44 | (def bounds (python/list [(python/tuple [4.3 7.9]) (python/tuple [2.0 4.4]) 45 | (python/tuple [1.1 6.9]) (python/tuple [0.1 2.5])])) 46 | 47 | (def accuracy (mapv (fn [epsilon] 48 | (let [clf (models/GaussianNB :bounds bounds :epsilon epsilon) 49 | _ (py. clf fit (:X-train iris-data) (:y-train iris-data)) 50 | predictions (->> (:X-test iris-data) 51 | (py. clf predict))] 52 | (metrics/accuracy_score(:y-test iris-data) predictions))) 53 | epsilons)) 54 | 55 | accuracy 56 | ;;=> [0.3333333333333333 0.36666666666666664 0.36666666666666664 0.36666666666666664 0.36666666666666664 0.2 0.3333333333333333 0.3 0.3333333333333333 0.3333333333333333 0.3 0.3 0.6 0.5666666666666667 0.2 0.7 0.6 0.1 0.6666666666666666 0.9 0.6666666666666666 0.6666666666666666 1.0 0.6 0.8 0.7666666666666667 0.8666666666666667 0.8333333333333334 0.9333333333333333 0.8666666666666667 0.9 1.0 0.9333333333333333 0.9333333333333333 0.9 0.9333333333333333 0.8333333333333334 1.0 0.8 0.8 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0] 57 | 58 | (plot/with-show-one 59 | (pyplot/semilogx epsilons accuracy) 60 | (pyplot/title "Differentially private Naive Bayes accuracy") 61 | (pyplot/xlabel "epsilon") 62 | (pyplot/ylabel "Accuracy")) 63 | -------------------------------------------------------------------------------- /src/gigasquid/facebook_prophet.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.facebook-prophet 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [gigasquid.plot :as plot])) 5 | 6 | ;;; sudo pip3 install fbprophet 7 | ;;; sudo pip3 install holidays==0.9.12 8 | 9 | ;;; tutorial https://facebook.github.io/prophet/docs/quick_start.html#python-api 10 | 11 | (require-python '[pandas :as pd]) 12 | (require-python '[fbprophet :as fbprophet]) 13 | (require-python '[matplotlib.pyplot :as pyplot]) 14 | 15 | ;; The input to Prophet is always a dataframe with two columns: ds and y. The ds (datestamp) column should be of a format expected by Pandas, ideally YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp. The y column must be numeric, and represents the measurement we wish to forecast. 16 | 17 | ;; As an example, let’s look at a time series of the log daily page views for the Wikipedia page for Peyton Manning. We scraped this data using the Wikipediatrend package in R. Peyton Manning provides a nice example because it illustrates some of Prophet’s features, like multiple seasonality, changing growth rates, and the ability to model special days (such as Manning’s playoff and superbowl appearances). The CSV is available here. 18 | 19 | (def csv-file (slurp "https://raw.githubusercontent.com/facebook/prophet/master/examples/example_wp_log_peyton_manning.csv")) 20 | (spit "manning.csv" csv-file) 21 | (def df (pd/read_csv "manning.csv")) 22 | (py.- df head) 23 | ;; 37 | 38 | ;; We fit the model by instantiating a new Prophet object. Any settings to the forecasting procedure are passed into the constructor. Then you call its fit method and pass in the historical dataframe. Fitting should take 1-5 seconds. 39 | 40 | (def m (fbprophet/Prophet)) 41 | (py. m fit df) 42 | 43 | ;; Predictions are then made on a dataframe with a column ds containing the dates for which a prediction is to be made. You can get a suitable dataframe that extends into the future a specified number of days using the helper method Prophet.make_future_dataframe. By default it will also include the dates from the history, so we will see the model fit as well. 44 | 45 | (def future (py. m make_future_dataframe :periods 365)) 46 | (py.- future tail) 47 | ;; 61 | 62 | ;; The predict method will assign each row in future a predicted value which it names yhat. If you pass in historical dates, it will provide an in-sample fit. The forecast object here is a new dataframe that includes a column yhat with the forecast, as well as columns for components and uncertainty intervals. 63 | 64 | (def forecast (py. m predict future)) 65 | (py.- forecast yhat_lower) 66 | (py/att-type-map forecast) 67 | (def vals (py. forecast __array__ ["ds" "yhat" "yhat_lower" "yhat_upper"])) 68 | (py/python-type vals) ;=> :ndarray 69 | ;; [[Timestamp('2007-12-10 00:00:00') 8.041238819642132 8.219483670063799 70 | ;; ... 0.0 0.0 8.844169826770502] 71 | ;; [Timestamp('2007-12-11 00:00:00') 8.039694770587365 8.037913913183381 72 | ;; ... 0.0 0.0 8.592697395711903] 73 | ;; [Timestamp('2007-12-12 00:00:00') 8.038150721532599 7.768551313613439 74 | ;; ... 0.0 0.0 8.388514099061501] 75 | ;; ... 76 | ;; [Timestamp('2017-01-17 00:00:00') 7.186504354691647 7.597836987450301 77 | ;; ... 0.0 0.0 8.318929898087168] 78 | ;; [Timestamp('2017-01-18 00:00:00') 7.18547676307155 7.496134175329733 ... 79 | ;; 0.0 0.0 8.151543221567003] 80 | ;; [Timestamp('2017-01-19 00:00:00') 7.184449171451455 7.447042010204286 81 | ;; ... 0.0 0.0 8.163477149645047]] 82 | 83 | 84 | (plot/with-show 85 | (py. m plot forecast)) 86 | 87 | (plot/with-show 88 | (py. m plot_components forecast)) 89 | 90 | 91 | ;;;; We can also do monthly data as well - Let's take an example of UFOsightings 92 | ;;; From 2010 to the present from http://www.nuforc.org/webreports/ndxevent.html 93 | 94 | (def df (pd/read_csv "resources/ufosightings-since-2010.csv")) 95 | (def m (fbprophet/Prophet :seasonality_mode "multiplicative")) ;;; Let's factor in some holiday effects 96 | (py. m fit df) 97 | (def future (py. m make_future_dataframe :periods 48 :freq "M")) ;;; note Monthly prediction 98 | (def forecast (py. m predict future)) 99 | 100 | (plot/with-show 101 | (py. m plot forecast)) 102 | 103 | (plot/with-show 104 | (py. m plot_components forecast)) 105 | -------------------------------------------------------------------------------- /src/gigasquid/gpt2.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.gpt2 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]])) 4 | 5 | ;;; sudo pip3 install torch 6 | ;;; sudo pip3 install transformers 7 | 8 | ;https://huggingface.co/transformers/quickstart.html - OpenAI GPT-2 9 | 10 | (require-python '[transformers :as transformers]) 11 | (require-python '[torch :as torch]) 12 | 13 | 14 | ;;; Load pre-trained model tokenizer (vocabulary) 15 | 16 | (def tokenizer (py. transformers/GPT2Tokenizer "from_pretrained" "gpt2")) 17 | (def text "Who was Jim Henson ? Jim Henson was a") 18 | ;; encode text input 19 | (def indexed-tokens (py. tokenizer encode text)) 20 | indexed-tokens ;=>[8241, 373, 5395, 367, 19069, 5633, 5395, 367, 19069, 373, 257] 21 | 22 | ;; convert indexed tokens to pytorch tensor 23 | (def tokens-tensor (torch/tensor [indexed-tokens])) 24 | tokens-tensor 25 | ;; ([[ 8241, 373, 5395, 367, 19069, 5633, 5395, 367, 19069, 373, 26 | ;; 257]]) 27 | 28 | ;;; Load pre-trained model (weights) 29 | ;;; Note: this will take a few minutes to download everything 30 | (def model (py. transformers/GPT2LMHeadModel from_pretrained "gpt2")) 31 | 32 | ;;; Set the model in evaluation mode to deactivate the DropOut modules 33 | ;;; This is IMPORTANT to have reproducible results during evaluation! 34 | (py. model eval) 35 | 36 | 37 | ;;; Predict all tokens 38 | (def predictions (py/with [r (torch/no_grad)] 39 | (first (model tokens-tensor)))) 40 | 41 | ;;; get the predicted next sub-word" 42 | (def predicted-index (let [last-word-predictions (-> predictions first last) 43 | arg-max (torch/argmax last-word-predictions)] 44 | (py. arg-max item))) 45 | 46 | predicted-index ;=>582 47 | 48 | (py. tokenizer decode (-> (into [] indexed-tokens) 49 | (conj predicted-index))) 50 | 51 | ;=> "Who was Jim Henson? Jim Henson was a man" 52 | 53 | 54 | ;;;; =========================== 55 | 56 | ;; GPT-2 as well as some other models (GPT, XLNet, Transfo-XL, CTRL) make use of a past or mems attribute which can be used to prevent re-computing the key/value pairs when using sequential decoding. It is useful when generating sequences as a big part of the attention mechanism benefits from previous computations. 57 | 58 | ;; Here is a fully-working example using the past with GPT2LMHeadModel and argmax decoding (which should only be used as an example, as argmax decoding introduces a lot of repetition): 59 | 60 | (def tokenizer (py. transformers/GPT2Tokenizer from_pretrained "gpt2")) 61 | (def model (py. transformers/GPT2LMHeadModel from_pretrained "gpt2")) 62 | 63 | (def generated (into [] (py. tokenizer encode "The Manhattan bridge"))) 64 | (def context (torch/tensor [generated])) 65 | 66 | 67 | (defn generate-sequence-step [{:keys [generated-tokens context past]}] 68 | (let [[output past] (model context :past past) 69 | token (torch/argmax (first output)) 70 | new-generated (conj generated-tokens (py. token tolist))] 71 | {:generated-tokens new-generated 72 | :context (py. token unsqueeze 0) 73 | :past past 74 | :token token})) 75 | 76 | (defn decode-sequence [{:keys [generated-tokens]}] 77 | (py. tokenizer decode generated-tokens)) 78 | 79 | (loop [step {:generated-tokens generated 80 | :context context 81 | :past nil} 82 | i 10] 83 | (if (pos? i) 84 | (recur (generate-sequence-step step) (dec i)) 85 | (decode-sequence step))) 86 | 87 | ;=> "The Manhattan bridge\n\nThe Manhattan bridge is a major artery for" 88 | 89 | 90 | ;;; Let's make a nice function to generate text 91 | 92 | (defn generate-text [starting-text num-of-words-to-predict] 93 | (let [tokens (into [] (py. tokenizer encode starting-text)) 94 | context (torch/tensor [tokens]) 95 | result (reduce 96 | (fn [r i] 97 | (println i) 98 | (generate-sequence-step r)) 99 | 100 | {:generated-tokens tokens 101 | :context context 102 | :past nil} 103 | 104 | (range num-of-words-to-predict))] 105 | (decode-sequence result))) 106 | 107 | (generate-text "Natural language processing tasks are typically approached with" 108 | 100) 109 | 110 | ;=> "Clojure is a dynamic, general purpose programming language, combining the approachability and interactive. It is a language that is easy to learn and use, and is easy to use for anyone" 111 | 112 | 113 | 114 | ;;;;;; Better sequence generating 115 | ;;; With temperature to get rid of repititions 116 | 117 | ;;; from https://github.com/huggingface/transformers/issues/1725 118 | 119 | (require-python 'torch.nn.functional) 120 | 121 | (defn sample-sequence-step [{:keys [generated-tokens context past temp] 122 | :or {temp 0.8}}] 123 | (let [[output past] (py/with [r (torch/no_grad)] 124 | (model context :past past)) 125 | next-token-logits (torch/div (-> output first last) 126 | (if (pos? temp) temp 1)) 127 | token (torch/multinomial 128 | (torch.nn.functional/softmax next-token-logits :dim -1) :num_samples 1) 129 | new-generated (conj generated-tokens (first (py/$a token tolist)))] 130 | {:generated-tokens new-generated 131 | :context (py/$a token unsqueeze 0) 132 | :past past 133 | :token token})) 134 | 135 | (defn generate-text2 [starting-text num-of-words-to-predict temp] 136 | (let [tokens (into [] (py/$a tokenizer encode starting-text)) 137 | context (torch/tensor [tokens]) 138 | result (reduce 139 | (fn [r i] 140 | (println i) 141 | (sample-sequence-step (assoc r :temp temp))) 142 | 143 | {:generated-tokens tokens 144 | :context context 145 | :past nil} 146 | 147 | (range num-of-words-to-predict))] 148 | (decode-sequence result))) 149 | 150 | (generate-text2 "Natural language processing tasks are typically approached with" 151 | 100 152 | 0.8) 153 | 154 | ;>"Natural language processing tasks are typically approached with distress signals and pleasurable stimuli.\n\n7.2.3. Structural networks\n\nStructural networks are comprised of various layers of information that are coupled with instructions for performing behavioral tasks. Such networks can be used for e.g., associating individual groups with special differential activities (e.g., listening to music, studying a subject's handwriting), or for performing complex tasks such as reading and writing a chart. The presence of structures that are familiar to the participant may also help" 155 | 156 | (generate-text2 "It is thought that cheese was first discovered around 8000 BC around the time when sheep were first domesticated" 157 | 100 158 | 0.8) 159 | ;=>"It is thought that cheese was first discovered around 8000 BC around the time when sheep were first domesticated as sheep. Native American plants and animals associated with such plants are described as being \"mushy, leafy and musky\" from having \"powder-like stalks and narrow niche-like leaves.\" They are believed to have been found in the Cauca Chaco area of South America and northern Mexico. The earliest known cases of cheese in the Americas could be traced back to around 160 BC, when the deposits of the Cauca Chaco were discovered in Colombia, Peru and Argentina" 160 | 161 | 162 | (generate-text2 "Rich Hickey developed Clojure because he wanted a modern Lisp for functional programming, symbiotic with the established Java platform" 163 | 100 164 | 0.8) 165 | "Rich Hickey developed Clojure because he wanted a modern Lisp for functional programming, symbiotic with the established Java platform. He knew that Clojure would make it hard to access any memory through Java, and code a good amount of Lisp. He had much to learn about programming at the time, and Clojure was perfect for him. It was important to understand the dominant language of Lisp, which was Clojure and JVM. Because of this, JVM was named 'Snack: No Slobs in Clojure'. This was a very important order of things, for JVM. Clojure had a major advantage over JVM in" 166 | 167 | (generate-text2 "What is the average rainfall in Florida?" 168 | 100 169 | 0.8) 170 | -------------------------------------------------------------------------------- /src/gigasquid/igraph.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.igraph 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]])) 4 | 5 | ;;; https://igraph.org/python/doc/tutorial/tutorial.html#creating-a-graph-from-scratch 6 | 7 | ;;; igraph is a graph python library 8 | 9 | ;;; sudo pip3 install python-igraph 10 | ;;; sudo pip3 install pycairo 11 | 12 | 13 | (require-python '[igraph :as igraph]) 14 | 15 | (def g (igraph/Graph)) 16 | (py. g add_vertices 3) 17 | (py. g add_edges [[0 1] [1 2]]) 18 | 19 | ;;; it's very stateful from here but 20 | (doto g 21 | (py. add_edges [[2 0]]) 22 | (py. add_vertices 3) 23 | (py. add_edges [[2 3] [3 4] [4 5] [5 3]])) 24 | 25 | (igraph/summary g) 26 | ;;; IGRAPH U--- 6 7 -- 27 | 28 | 29 | (def g2 (py. (igraph/Graph) Famous "petersen")) 30 | ;;; this actually works fine one I installed everything 31 | ;;; the image will show up 32 | (def plot (igraph/plot g2)) 33 | ;;; save the image to disk 34 | (py. plot save "myplot.png") 35 | -------------------------------------------------------------------------------- /src/gigasquid/lieden.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.lieden 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [clojure.java.shell :as sh])) 5 | 6 | ;;;sudo pip3 install leidenalg 7 | 8 | ;;; you also need to make sure igraph is working and installed too (see igraph.clj) 9 | 10 | ;;; What is leidenalg? https://github.com/vtraag/leidenalg 11 | ;; Implementation of the Leiden algorithm for various quality functions to be used with igraph in Python. 12 | ;;; sudo pip3 install pycairo 13 | 14 | (require-python '[igraph :as ig]) 15 | (require-python '[leidenalg :as la]) 16 | 17 | ;;https://leidenalg.readthedocs.io/en/latest/intro.html 18 | 19 | ;;Let us then look at one of the most famous examples of network science: the Zachary karate club (it even has a prize named after it): 20 | (def G (py. (ig/Graph) Famous "Zachary")) 21 | 22 | ;;;Now detecting communities with modularity is straightforward 23 | 24 | 25 | (def partition (la/find_partition G la/ModularityVertexPartition)) 26 | 27 | ;;; plotting results 28 | 29 | (def plot (ig/plot partition)) 30 | 31 | ;;; save the plot png 32 | 33 | (py. plot save "zach.png") 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/gigasquid/mxnet.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.mxnet 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [clojure.string :as string])) 5 | 6 | ;;; sudo pip3 install mxnet 7 | ;;; sudo pip3 install opencv-python 8 | 9 | (require-python '[mxnet :as mxnet]) 10 | (require-python '[mxnet.ndarray :as ndarray]) 11 | (require-python '[mxnet.module :as module]) 12 | (require-python '[mxnet.io :as io]) 13 | (require-python '[mxnet.test_utils :as test-utils]) 14 | (require-python '[mxnet.initializer :as initializer]) 15 | (require-python '[mxnet.metric :as metric]) 16 | (require-python '[mxnet.symbol :as sym]) 17 | 18 | 19 | ;;; get the mnist data and format it 20 | 21 | (def mnist (test-utils/get_mnist)) 22 | (def train-x (ndarray/array (py. (py/get-item mnist "train_data") "reshape" -1 784))) 23 | (def train-y (ndarray/array (py/get-item mnist "train_label"))) 24 | (def test-x (ndarray/array (py. (py/get-item mnist "test_data") "reshape" -1 784))) 25 | (def test-y (ndarray/array (py/get-item mnist "test_label"))) 26 | 27 | (def batch-size 100) 28 | 29 | (def train-dataset (io/NDArrayIter :data train-x 30 | :label train-y 31 | :batch_size batch-size 32 | :shuffle true)) 33 | (def test-dataset (io/NDArrayIter :data test-x 34 | :label test-y 35 | :batch_size batch-size)) 36 | 37 | 38 | (def data-shapes (py.- train-dataset "provide_data")) 39 | (def label-shapes (py.- train-dataset "provide_label")) 40 | 41 | data-shapes ;=> [DataDesc[data,(10, 784),,NCHW]] 42 | label-shapes ;=> [DataDesc[softmax_label,(10,),,NCHW]] 43 | 44 | 45 | ;;;; Setting up the model and initializing it 46 | 47 | (def data (sym/Variable "data")) 48 | 49 | (def net (-> (sym/Variable "data") 50 | (sym/FullyConnected :name "fc1" :num_hidden 128) 51 | (sym/Activation :name "relu1" :act_type "relu") 52 | (sym/FullyConnected :name "fc2" :num_hidden 64) 53 | (sym/Activation :name "relu2" :act_type "relu") 54 | (sym/FullyConnected :name "fc3" :num_hidden 10) 55 | (sym/SoftmaxOutput :name "softmax"))) 56 | 57 | 58 | 59 | (def model (py/call-kw mxnet.module/Module [] {:symbol net :context (mxnet/cpu)})) 60 | (py. model bind :data_shapes data-shapes :label_shapes label-shapes) 61 | (py. model init_params) 62 | (py. model init_optimizer :optimizer "adam") 63 | (def acc-metric (mxnet.metric/Accuracy)) 64 | 65 | 66 | (defn end-of-data-error? [e] 67 | (string/includes? (.getMessage e) "StopIteration")) 68 | 69 | (defn reset [iter] 70 | (py. iter reset)) 71 | 72 | (defn next-batch [iter] 73 | (try (py. iter next) 74 | (catch Exception e 75 | (when-not (end-of-data-error? e) 76 | (throw e))))) 77 | 78 | (defn get-metric [metric] 79 | (py. metric get)) 80 | 81 | (defn train-epoch [model dataset metric] 82 | (reset dataset) 83 | (loop [batch (next-batch dataset) 84 | i 0] 85 | (if batch 86 | (do 87 | (py. model forward batch :is_train true) 88 | (py. model backward) 89 | (py. model update) 90 | (py. model update_metric metric (py/get-attr batch "label")) 91 | (when (zero? (mod i 100)) (println "i-" i " Training Accuracy " (py/$a metric get))) 92 | (recur (next-batch dataset) (inc i))) 93 | (println "Final Training Accuracy " (get-metric metric))))) 94 | 95 | (defn test-accuracy [model dataset metric] 96 | (reset dataset) 97 | (loop [batch (next-batch dataset) 98 | i 0] 99 | (if batch 100 | (do 101 | (py. model forward batch) 102 | (py. model update_metric metric (py/get-attr batch "label")) 103 | (when (zero? (mod i 100)) (println "i-" i " Test Accuracy " (py/$a metric get))) 104 | (recur (next-batch dataset) (inc i))) 105 | (println "Final Test Accuracy " (get-metric metric))))) 106 | 107 | 108 | (comment 109 | 110 | 111 | ;;;training 112 | (dotimes [i 3] 113 | (println "========= Epoch " i " ============") 114 | (train-epoch model train-dataset acc-metric)) 115 | (get-metric acc-metric) ;=> ('accuracy', 0.9483555555555555) 116 | 117 | ;;;; 118 | (test-accuracy model test-dataset acc-metric) 119 | (get-metric acc-metric) ;=> ('accuracy', 0.9492052631578948) 120 | 121 | ;;visualization 122 | 123 | (py. train-dataset "reset") 124 | (def bd (next-batch train-dataset)) 125 | (def data (first (py.- bd "data"))) 126 | 127 | (def image (ndarray/slice data :begin 0 :end 1)) 128 | (def image2 (py. image "reshape" [28 28])) 129 | (def image3 (-> (ndarray/multiply image2 256) 130 | (ndarray/cast :dtype "uint8"))) 131 | (def npimage (py. image3 "asnumpy")) 132 | 133 | 134 | (require-python '[cv2 :as cv2]) 135 | (cv2/imwrite "number.jpg" npimage) 136 | 137 | 138 | ) 139 | 140 | 141 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /src/gigasquid/nltk.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.nltk 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]])) 4 | 5 | ;;; What is NLTK ? 6 | ;;; https://www.nltk.org/ 7 | ;; ;; NLTK is a leading platform for building Python programs to work with human language data. 8 | ;; It provides easy-to-use interfaces to over 50 corpora and lexical resources such as WordNet, 9 | ;; along with a suite of text processing libraries for classification, tokenization, stemming, 10 | ;; tagging, parsing, and semantic reasoning, wrappers for industrial-strength NLP libraries 11 | ;; and an active discussion forum. 12 | 13 | (require-python '[nltk :as nltk]) 14 | (comment 15 | 16 | ;; We will follow some examples from here first 17 | ;; https://www.nltk.org/book/ch01.html 18 | 19 | ;;; you can download individual packages using a parameter 20 | ;(nltk/download "wordnet") 21 | ;;; you can install just what you need for the examples 22 | ;(nltk/download "book") 23 | ;;;; install just the corpora, no grammars or trained models using 24 | ;(nltk/download "all-corpora") 25 | ;;;; or a popular subset 26 | ;(nltk/download "popular") 27 | ;;; or you can download everything with "all" 28 | ;(nltk/download "all") ;;; BEWARE it downloads lots! 29 | 30 | ;;;; Book datasets 31 | (nltk/download "book") 32 | (require-python '[nltk.book :as book]) 33 | 34 | (book/texts) 35 | ;;; prints out in repl 36 | ;; text1: Moby Dick by Herman Melville 1851 37 | ;; text2: Sense and Sensibility by Jane Austen 1811 38 | ;; text3: The Book of Genesis 39 | ;; text4: Inaugural Address Corpus 40 | ;; text5: Chat Corpus 41 | ;; text6: Monty Python and the Holy Grail 42 | ;; text7: Wall Street Journal 43 | ;; text8: Personals Corpus 44 | ;; text9: The Man Who Was Thursday by G . K . Chesterton 1908 45 | 46 | book/text1 ;=> 47 | book/text2 ;=> 48 | 49 | ;;; concordance veiw of a givin word gives every occurence 50 | 51 | (py. book/text1 concordance "monstrous") 52 | ;;; displays in repl 53 | ;Displaying 11 of 11 matches: 54 | ;; ong the former , one was of a most monstrous size . ... This came towards us , 55 | ;; ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r 56 | ;; ll over with a heathenish array of monstrous clubs and spears . Some were thick 57 | ;; d as you gazed , and wondered what monstrous cannibal and savage could ever hav 58 | ;; that has survived the flood ; most monstrous and most mountainous ! That Himmal 59 | ;; they might scout at Moby Dick as a monstrous fable , or still worse and more de 60 | ;; th of Radney .'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l 61 | ;; ing Scenes . In connexion with the monstrous pictures of whales , I am strongly 62 | ;; ere to enter upon those still more monstrous stories of them which are to be fo 63 | ;; ght have been rummaged out of this monstrous cabinet there is no telling . But 64 | ;; of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u 65 | 66 | 67 | 68 | ;;; What other words appear in a similar range of contexts 69 | (py. book/text1 similar "monstrous") 70 | ;;; displays in repl 71 | ;; contemptible christian abundant few part mean careful puzzled 72 | ;; mystifying passing curious loving wise doleful gamesome singular 73 | ;; delightfully perilous fearless 74 | 75 | (py. book/text2 similar "monstrous") 76 | ;; displays in repl 77 | ;; delightfully perilous fearless 78 | ;; very so exceedingly heartily a as good great extremely remarkably 79 | ;; sweet vast amazingly 80 | 81 | ;;; see what sort of methods that this "Text" object has 82 | (py/att-type-map book/text3) 83 | ;;; get the length of the book of Genesis 84 | (py/len book/text3) ;=> 44764 85 | ;; or get the tokens and count them in clojure 86 | (count (py.- book/text3 tokens)) ;=> 44764 87 | 88 | ;;; get the sorted set of tokens 89 | (-> (py.- book/text3 tokens) set count) ;=> 2789 90 | 91 | ;;; lexical diversity (measure of the richness of text ) 92 | (defn lexical-diversity [text] 93 | (let [tokens (py.- text tokens)] 94 | (/ (-> tokens set count) 95 | (* 1.0 (count tokens))))) 96 | 97 | (lexical-diversity book/text3) ;=> 0.06230453042623537 98 | (lexical-diversity book/text5) ;=> 0.13477005109975562 99 | 100 | 101 | ;;; Moving onto Chapter 2 https://www.nltk.org/book/ch02.html 102 | 103 | ;;; Accessing Text Corpora 104 | 105 | (require-python '[nltk.corpus :as corpus]) 106 | 107 | ;; NLTK includes a small selection of texts from the Project Gutenberg electronic text archive, which contains some 25,000 free electronic books, hosted at http://www.gutenberg.org/. We begin by getting the Python interpreter to load the NLTK package, then ask to see nltk.corpus.gutenberg.fileids(), the file identifiers in this corpus: 108 | 109 | (py. corpus/gutenberg fileids) 110 | ;=> ['austen-emma.txt', 'austen-persuasion.txt', 'austen-sense.txt', 'bible-kjv.txt', 'blake-poems.txt', 'bryant-stories.txt', 'burgess-busterbrown.txt', 'carroll-alice.txt', 'chesterton-ball.txt', 'chesterton-brown.txt', 'chesterton-thursday.txt', 'edgeworth-parents.txt', 'melville-moby_dick.txt', 'milton-paradise.txt', 'shakespeare-caesar.txt', 'shakespeare-hamlet.txt', 'shakespeare-macbeth.txt', 'whitman-leaves.txt'] 111 | 112 | ;;; let's pick out emma 113 | (def emma (py. corpus/gutenberg words "austen-emma.txt")) 114 | (py/len emma) ;=>192427 115 | 116 | ;;;;;; Switching over to another tutorial 117 | ;; https://www.datacamp.com/community/tutorials/text-analytics-beginners-nltk 118 | 119 | 120 | ;;; Sentence tokenization 121 | (require-python '[nltk.tokenize :as tokenize]) 122 | 123 | (def text "Hello Mr. Smith, how are you doing today? The weather is great, and city is awesome. 124 | The sky is pinkish-blue. You shouldn't eat cardboard") 125 | (def tokenized-sent (tokenize/sent_tokenize text)) 126 | tokenized-sent 127 | ;;=> ['Hello Mr. Smith, how are you doing today?', 'The weather is great, and city is awesome.', 'The sky is pinkish-blue.', "You shouldn't eat cardboard"] 128 | 129 | 130 | (def tokenized-word (tokenize/word_tokenize text)) 131 | tokenized-word 132 | ;;=> ['Hello', 'Mr.', 'Smith', ',', 'how', 'are', 'you', 'doing', 'today', '?', 'The', 'weather', 'is', 'great', ',', 'and', 'city', 'is', 'awesome', '.', 'The', 'sky', 'is', 'pinkish-blue', '.', 'You', 'should', "n't", 'eat', 'cardboard'] 133 | 134 | ;;; Frequency Distribution 135 | 136 | (require-python '[nltk.probability :as probability]) 137 | 138 | (def fdist (probability/FreqDist tokenized-word)) 139 | fdist ;=> 140 | 141 | (py. fdist most_common) 142 | ;=> [('is', 3), (',', 2), ('The', 2), ('.', 2), ('Hello', 1), ('Mr.', 1), ('Smith', 1), ('how', 1), ('are', 1), ('you', 1), ('doing', 1), ('today', 1), ('?', 1), ('weather', 1), ('great', 1), ('and', 1), ('city', 1), ('awesome', 1), ('sky', 1), ('pinkish-blue', 1), ('You', 1), ('should', 1), ("n't", 1), ('eat', 1), ('cardboard', 1)] 143 | 144 | 145 | ;;; stopwords (considered noise in tett) 146 | 147 | (require-python '[nltk.corpus :as corpus]) 148 | 149 | (def stop-words (into #{} (py. corpus/stopwords words "english"))) 150 | stop-words 151 | ;=> #{"d" "itself" "more" "didn't" "ain" "won" "hers" "ours" "further" "shouldn" "his" "him" "hasn't" "s" "doesn" "are" "didn" "don't" "very" "you'd" "under" "who" "which" "isn" "of" "this" "after" "once" "up" "off" "she" "shan't" "nor" "does" "theirs" "ll" "yours" "not" "mustn't" "it" "over" "by" "she's" "it's" "hasn" "is" "few" "shouldn't" "why" "doing" "mightn't" "about" "they" "you" "its" "than" "those" "where" "just" "for" "needn" "should" "my" "again" "themselves" "should've" "ourselves" "whom" "yourselves" "because" "any" "most" "you've" "mustn" "you're" "can" "were" "weren" "ma" "did" "was" "that" "mightn" "if" "same" "both" "doesn't" "don" "had" "what" "an" "or" "have" "couldn't" "am" "couldn" "won't" "their" "a" "so" "them" "weren't" "wouldn" "on" "shan" "own" "above" "but" "when" "until" "be" "haven" "t" "having" "out" "aren't" "that'll" "herself" "and" "do" "myself" "i" "down" "hadn" "here" "too" "y" "between" "such" "needn't" "against" "each" "how" "other" "from" "these" "while" "no" "with" "now" "some" "will" "himself" "all" "you'll" "wouldn't" "re" "then" "isn't" "through" "yourself" "has" "haven't" "being" "our" "during" "wasn" "ve" "before" "only" "your" "to" "into" "m" "aren" "we" "as" "wasn't" "he" "me" "at" "below" "o" "the" "her" "been" "there" "in" "hadn't"} 152 | 153 | ;;; removing stopwords 154 | 155 | (def filtered-sent (->> tokenized-sent 156 | (map tokenize/word_tokenize) 157 | (map #(remove stop-words %)))) 158 | filtered-sent 159 | ;; (("Hello" "Mr." "Smith" "," "today" "?") 160 | ;; ("The" "weather" "great" "," "city" "awesome" ".") 161 | ;; ("The" "sky" "pinkish-blue" ".") 162 | ;; ("You" "n't" "eat" "cardboard")) 163 | 164 | 165 | ;;;; Lexicon Normalization 166 | ;;stemming 167 | 168 | (require-python '[nltk.stem :as stem]) 169 | 170 | (let [ps (stem/PorterStemmer)] 171 | (->> filtered-sent 172 | (map (fn [sent] (map #(py. ps stem %) sent))))) 173 | ;;=> (("hello" "mr." "smith" "," "today" "?") 174 | ;; ("the" "weather" "great" "," "citi" "awesom" ".") 175 | ;; ("the" "sky" "pinkish-blu" ".") ("you" "n't" "eat" "cardboard") 176 | 177 | 178 | ;;; Lemmatization 179 | 180 | (require-python '[nltk.stem.wordnet :as wordnet]) 181 | 182 | (let [lem (wordnet/WordNetLemmatizer) 183 | stem (stem/PorterStemmer) 184 | word "flying"] 185 | {:lemmatized-word (py. lem lemmatize word "v") 186 | :stemmed-word (py. stem stem word)}) 187 | ;=> {:lemmatized-word "fly", :stemmed-word "fli"} 188 | 189 | ;;; POS Tagging 190 | (let [sent "Albert Einstein was born in Ulm, Germany in 1879." 191 | tokens (nltk/word_tokenize sent)] 192 | {:tokens tokens 193 | :pos-tag (nltk/pos_tag tokens)}) 194 | ;; {:tokens 195 | ;; ['Albert', 'Einstein', 'was', 'born', 'in', 'Ulm', ',', 'Germany', 'in', '1879', '.'], 196 | ;; :pos-tag 197 | ;; [('Albert', 'NNP'), ('Einstein', 'NNP'), ('was', 'VBD'), ('born', 'VBN'), ('in', 'IN'), ('Ulm', 'NNP'), (',', ','), ('Germany', 'NNP'), ('in', 'IN'), ('1879', 'CD'), ('.', '.')]} 198 | 199 | 200 | 201 | 202 | ) 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | -------------------------------------------------------------------------------- /src/gigasquid/numpy_plot.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.numpy-plot 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [gigasquid.plot :as plot])) 5 | 6 | (require-python '[matplotlib.pyplot :as pyplot]) 7 | (require-python '[numpy :as numpy]) 8 | 9 | ;;;; you will need matplotlib, numpy, and pillow installed to run this in python3 10 | 11 | ;;; This uses a macro from printing in the plot namespace that uses the shell "open" command 12 | ;;; to show a saved image from pyplot. If you don't have a mac you will need to modify that 13 | ;;; to whatever shell command you have. 14 | 15 | (comment 16 | (def x (numpy/linspace 0 2 50)) 17 | 18 | (plot/with-show (matplotlib.pyplot/plot [[1 2 3 4 5] [1 2 3 4 10]] :label "linear")) 19 | 20 | (plot/with-show 21 | (pyplot/plot [x x] :label "linear") 22 | (pyplot/plot [x (py. x "__pow__" 2)] :label "quadratic") 23 | (pyplot/plot [x (py. x "__pow__" 3)] :label "cubic") 24 | (pyplot/xlabel "x label") 25 | (pyplot/ylabel "y label") 26 | (pyplot/title "Simple Plot")) 27 | 28 | 29 | ;;; numpy printing tutorial http://cs231n.github.io/python-numpy-tutorial/#matplotlib-plotting 30 | (let [x (numpy/arange 0 (* 3 numpy/pi) 0.1) 31 | y (numpy/sin x)] 32 | (plot/with-show 33 | (pyplot/plot x y))) 34 | 35 | (let [x (numpy/arange 0 (* 3 numpy/pi) 0.1) 36 | y-sin (numpy/sin x) 37 | y-cos (numpy/cos x)] 38 | (plot/with-show 39 | (pyplot/plot x y-sin) 40 | (pyplot/plot x y-cos) 41 | (pyplot/xlabel "x axis label") 42 | (pyplot/ylabel "y axis label") 43 | (pyplot/title "Sine and Cosine") 44 | (pyplot/legend ["Sine" "Cosine"]))) 45 | 46 | ;;;; Subplots 47 | 48 | (let [x (numpy/arange 0 (* 3 numpy/pi) 0.1) 49 | y-sin (numpy/sin x) 50 | y-cos (numpy/cos x)] 51 | (plot/with-show 52 | ;;; set up a subplot gird that has a height of 2 and width of 1 53 | ;; and set the first such subplot as active 54 | (pyplot/subplot 2 1 1) 55 | (pyplot/plot x y-sin) 56 | (pyplot/title "Sine") 57 | 58 | ;;; set the second subplot as active and make the second plot 59 | (pyplot/subplot 2 1 2) 60 | (pyplot/plot x y-cos) 61 | (pyplot/title "Cosine"))) 62 | 63 | ;;;;; Images 64 | 65 | (let [img (pyplot/imread "resources/cat.jpg") 66 | img-tinted (numpy/multiply img [1 0.95 0.9])] 67 | (plot/with-show 68 | (pyplot/subplot 1 2 1) 69 | (pyplot/imshow img) 70 | (pyplot/subplot 1 2 2) 71 | (pyplot/imshow (numpy/uint8 img-tinted)))) 72 | 73 | 74 | ;;;;; pie chart 75 | ;;;; from https://org/3.1.1/gallery/pie_and_polar_charts/pie_features.html 76 | 77 | 78 | (let [labels ["Frogs" "Hogs" "Dogs" "Logs"] 79 | sizes [15 30 45 10] 80 | explode [0 0.1 0 0] ; only explode the 2nd slice (Hogs) 81 | ] 82 | (plot/with-show 83 | (let [[fig1 ax1] (pyplot/subplots)] 84 | (py. ax1 "pie" sizes :explode explode :labels labels :autopct "%1.1f%%" 85 | :shadow true :startangle 90) 86 | (py. ax1 "axis" "equal")) ;equal aspec ration ensures that pie is drawn as circle 87 | )) 88 | ) 89 | -------------------------------------------------------------------------------- /src/gigasquid/opencv/README.md: -------------------------------------------------------------------------------- 1 | # OpenCV 2 | 3 | - [OpenCV](https://opencv.org/) 4 | - Official OpenCV [documentation](https://opencv.org/) 5 | 6 | ## Basic installation 7 | 8 | - Python 9 | 10 | ```shell 11 | pip3 install numpy matplotlib opencv-contrib-python-headless 12 | ``` 13 | 14 | - Clojure 15 | 16 | Just run your favorite `cider-jack-in` if you are on Emacs. 17 | For other editors, you will do the equivalent command for your editor. 18 | 19 | ## Outputs 20 | 21 | ![Input Image](../../../../master/resources/opencv/cat.jpg) 22 | 23 | - Simple Sketch 24 | 25 | ```clojure 26 | ;; Process image as sketch 27 | (process-image {:input-file "resources/opencv/cat.jpg" 28 | :output-file "resources/opencv/cat-sketch.png" 29 | :tx-fns sketch-image}) 30 | ``` 31 | 32 | ![Simple Sketch](../../../../master/resources/opencv/cat-sketch.png) 33 | 34 | - Cartoonize Image (color) 35 | 36 | ```clojure 37 | (process-image {:input-file "resources/opencv/cat.jpg" 38 | :output-file "resources/opencv/cat-cartoonize-color.png" 39 | :tx-fns cartoonize-image}) 40 | ``` 41 | 42 | ![Cartoonize Image (color)](../../../../master/resources/opencv/cat-cartoonize-color.png) 43 | 44 | - Cartoonize Image (gray-scale) 45 | 46 | ```clojure 47 | (process-image {:input-file "resources/opencv/cat.jpg" 48 | :output-file "resources/opencv/cat-cartoonize-gray.png" 49 | :tx-fns cartoonize-image-gray}) 50 | ``` 51 | 52 | ![Cartoonize Image (gray-scale)](../../../../master/resources/opencv/cat-cartoonize-gray.png) 53 | -------------------------------------------------------------------------------- /src/gigasquid/opencv/core.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.opencv.core 2 | (:require 3 | [clojure.string :as string] 4 | [gigasquid.utils :refer [display-image]] 5 | [libpython-clj.require 6 | :refer [require-python]] 7 | [libpython-clj.python 8 | :as py 9 | :refer [py. 10 | py.. 11 | py.- 12 | att-type-map 13 | ->python 14 | ->jvm 15 | as-map 16 | as-list]] 17 | [clojure.java.shell :as sh] 18 | [clojure.pprint :refer [pprint]]) 19 | (:import [java.io File])) 20 | 21 | ;;; Python installation 22 | ;;; sudo pip3 install numpy matplotlib opencv-contrib-python-headless 23 | 24 | (require-python 25 | '[cv2 26 | :as cv2] 27 | '[matplotlib.pyplot 28 | :as pyplot] 29 | '[builtins 30 | :as python 31 | :refer [slice tuple]] 32 | '[numpy 33 | :as np 34 | :refer [array]] 35 | '[operator 36 | :as operator 37 | :refer [getitem]]) 38 | 39 | ;; ====================================== ;; 40 | ;; Basic exploration to learn the api 41 | ;; ====================================== ;; 42 | (comment 43 | 44 | (def img (cv2/imread "resources/opencv/opencv-logo.png")) 45 | 46 | (-> img 47 | att-type-map) 48 | 49 | ;; Note: how we de-structure Python's tuple to vector in Clojure 50 | (let [img (cv2/imread "resources/opencv/opencv-logo.png") 51 | [h w c] (py.- img shape)] 52 | [h w c]) 53 | ;;=> (99, 82, 3) 54 | 55 | ;; Total number of elements is obtained by img.size 56 | (py.- img size) ;;=> 24354 57 | 58 | (py.- img dtype) ;;=> uint8 59 | 60 | (def img2 (cv2/cvtColor img cv2/COLOR_BGR2GRAY)) 61 | 62 | ;; Save the result to the file 63 | (cv2/imwrite "resources/opencv/opencv-gray-logo.png" img2) ;;=> true 64 | 65 | ) 66 | 67 | ;; ====================================== ;; 68 | ;; Useful transformation function 69 | ;; ====================================== ;; 70 | (defn ^:private read-input 71 | [input-file] 72 | (let [input-image (cv2/imread input-file) 73 | temp-file (File/createTempFile "opencv-temp" ".png")] 74 | [input-image temp-file])) 75 | 76 | ;; sketch image 77 | (defn sketch-image 78 | [img] 79 | (let [img-gray (cv2/cvtColor img cv2/COLOR_BGR2GRAY) 80 | img-gray (cv2/medianBlur img-gray 5) 81 | edges (cv2/Laplacian img-gray cv2/CV_8U :ksize 5) 82 | [_ thresholded] (cv2/threshold edges 70 255 cv2/THRESH_BINARY_INV)] 83 | thresholded)) 84 | 85 | (comment 86 | ;; Sketch the cat image 87 | (let [img (cv2/imread "resources/opencv/cat.jpg")] 88 | (sketch-image img)) 89 | 90 | ) 91 | 92 | (defn cartoonize-image 93 | ([image] 94 | (cartoonize-image image false)) 95 | ([image gray-mode] 96 | (let [thresholed (sketch-image image) 97 | filtered (cv2/bilateralFilter image 10 250 250) 98 | cartoonized (cv2/bitwise_and filtered filtered :mask thresholed)] 99 | (if gray-mode 100 | (cv2/cvtColor cartoonized cv2/COLOR_BGR2GRAY) 101 | cartoonized)))) 102 | 103 | (defn cartoonize-image-gray 104 | [image] 105 | (cartoonize-image image true)) 106 | 107 | (comment 108 | ;; gray-mode true 109 | (let [image (cv2/imread "resources/opencv/cat.jpg")] 110 | (cartoonize-image image true)) 111 | 112 | ;; Or use the wrapper function 113 | (let [image (cv2/imread "resources/cat.jpg")] 114 | (cartoonize-image-gray image)) 115 | 116 | ;; color mode 117 | (let [image (cv2/imread "resources/cat.jpg")] 118 | (cartoonize-image image)) 119 | ) 120 | 121 | ;; Re-usable function for exercising the above functions 122 | 123 | (defn process-image 124 | "Apply opencv function to a given image and optionally show it. 125 | 126 | (process-image {:input-file \"resources/opencv/cat.jpg\" 127 | :output-file \"resources/opencv/cat-sketch.png\" 128 | :xform-fns sketch-image 129 | :open? true})" 130 | [& [{:keys [input-file 131 | output-file 132 | tx-fns 133 | open?] 134 | :or {input-file "resources/opencv/cat.jpg" 135 | open? true}}]] 136 | (pyplot/figure :figsize (python/tuple [14 6])) 137 | (pyplot/suptitle "Example Sketch" 138 | :fontsize 14 139 | :fontweight "bold") 140 | (let [image-src (cv2/imread input-file) 141 | ;; TODO: allow arguments to the function to avoid overload 142 | image-dst (tx-fns image-src)] 143 | (cv2/imwrite output-file image-dst) 144 | (if open? 145 | (display-image output-file) 146 | (println (format "Your output file : %s" output-file))))) 147 | 148 | (comment 149 | 150 | ;; Process image as sketch 151 | (process-image {:input-file "resources/opencv/cat.jpg" 152 | :output-file "resources/opencv/cat-sketch.png" 153 | :tx-fns sketch-image}) 154 | 155 | (process-image {:input-file "resources/opencv/cat.jpg" 156 | :output-file "resources/opencv/cat-cartoonize-color.png" 157 | :tx-fns cartoonize-image}) 158 | 159 | (process-image {:input-file "resources/opencv/cat.jpg" 160 | :output-file "resources/opencv/cat-cartoonize-gray.png" 161 | :tx-fns cartoonize-image-gray}) 162 | 163 | ) 164 | 165 | (defn -main 166 | [& args] 167 | (process-image {:input-file "resources/opencv/cat.jpg" 168 | :output-file "resources/opencv/cat-sketch.png" 169 | :tx-fns sketch-image})) 170 | 171 | ;; We can also run it via main function 172 | #_(-main) 173 | -------------------------------------------------------------------------------- /src/gigasquid/plot.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.plot 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [gigasquid.utils :refer [display-image create-tmp-file]] 5 | [clojure.java.shell :as sh])) 6 | 7 | ;;; This uses the headless version of matplotlib to generate a graph then copy it to the JVM 8 | ;;; where we can then print it 9 | 10 | ;;; have to set the headless mode before requiring pyplot 11 | (def mplt (py/import-module "matplotlib")) 12 | (py. mplt "use" "Agg") 13 | 14 | (require-python '[matplotlib.pyplot :as pyplot]) 15 | (require-python 'matplotlib.backends.backend_agg) 16 | (require-python 'numpy) 17 | 18 | (defmacro with-show 19 | "Takes forms with mathplotlib.pyplot to then show locally" 20 | [& body] 21 | `(let [_# (pyplot/clf) 22 | fig# (pyplot/figure) 23 | agg-canvas# (matplotlib.backends.backend_agg/FigureCanvasAgg fig#) 24 | temp-file# (create-tmp-file "tmp-image" ".png") 25 | temp-image# (.getAbsolutePath temp-file#)] 26 | ~(cons 'do body) 27 | (py. agg-canvas# "draw") 28 | (pyplot/savefig temp-image#) 29 | (display-image temp-image#) 30 | (.deleteOnExit temp-file#))) 31 | 32 | ;;;; If you run into mem problems with temporary files try this one 33 | 34 | (defmacro with-show-one 35 | "Takes forms with mathplotlib.pyplot to then show locally" 36 | [& body] 37 | `(let [_# (pyplot/clf) 38 | fig# (pyplot/figure) 39 | agg-canvas# (matplotlib.backends.backend_agg/FigureCanvasAgg fig#)] 40 | ~(cons 'do body) 41 | (py. agg-canvas# "draw") 42 | (pyplot/savefig "temp.png") 43 | (sh/sh "open" "temp.png"))) 44 | 45 | (defmacro with-save 46 | "Takes forms with mathplotlib.pyplot to then show locally" 47 | [fname & body] 48 | `(let [_# (pyplot/clf) 49 | fig# (pyplot/figure) 50 | agg-canvas# (matplotlib.backends.backend_agg/FigureCanvasAgg fig#)] 51 | ~(cons 'do body) 52 | (py. agg-canvas# "draw") 53 | (pyplot/savefig ~fname))) 54 | 55 | 56 | 57 | 58 | (comment 59 | 60 | (def x (numpy/linspace 0 2 100)) 61 | 62 | (with-show 63 | (pyplot/plot [x x] :label "linear") 64 | (pyplot/plot [x (py. x "__pow__" 2)] :label "quadratic") 65 | (pyplot/plot [x (py. x "__pow__" 3)] :label "cubic") 66 | (pyplot/xlabel "x label") 67 | (pyplot/ylabel "y label") 68 | (pyplot/title "Simple Plot")) 69 | 70 | (with-show (pyplot/plot [[1 2 3 4 5] [1 2 3 4 10]] :label "linear")) 71 | 72 | ) 73 | -------------------------------------------------------------------------------- /src/gigasquid/psutil/README.md: -------------------------------------------------------------------------------- 1 | # psutil 2 | 3 | Cross-platform lib for process and system monitoring in Python 4 | 5 | Official [Documentation](https://psutil.readthedocs.io/en/latest/) 6 | 7 | ## Basic installation 8 | 9 | - Python 10 | 11 | See [installation](https://psutil.readthedocs.io/en/latest/#id1) 12 | 13 | ```shell 14 | # Or just 15 | pip3 install psutil 16 | ``` 17 | 18 | - Clojure 19 | 20 | Just run your favorite `cider-jack-in` if you are on Emacs. 21 | For other editors, you will do the equivalent command for your editor. 22 | -------------------------------------------------------------------------------- /src/gigasquid/psutil/core.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.psutil.core 2 | (:require [libpython-clj.require 3 | :refer [require-python]] 4 | [libpython-clj.python 5 | :as py 6 | :refer [py. 7 | py.. 8 | py.- 9 | att-type-map 10 | ->python 11 | ->jvm]] 12 | [clojure.java.shell :as sh] 13 | [clojure.string :as str] 14 | [clojure.pprint :refer [pprint]]) 15 | (:import [java.io File])) 16 | 17 | ;; https://psutil.readthedocs.io/en/latest/ 18 | ;; https://psutil.readthedocs.io/en/latest/#filtering-and-sorting-processes 19 | 20 | ;; Python 21 | ;; pip3 install pyutil 22 | 23 | (require-python '[builtins :as python]) 24 | (require-python '[psutil :as psu]) 25 | 26 | (comment 27 | ;; Get information about the process 28 | (def p (psu/Process)) 29 | 30 | (py.. p username) 31 | ;;=> "bchoomnuan" 32 | 33 | (py.. p cpu_times) 34 | ;;=> pcputimes(user=52.755533824, system=1.542032128, children_user=0.0, children_system=0.0) 35 | 36 | ;; And the list goes on 37 | (py.. p 38 | ;;cpu_percent 39 | ;;create_time 40 | ;;ppid 41 | ;;status 42 | ;;cmdline ;; intersting one! 43 | ;;as_dict 44 | ;;parents 45 | ;;cwd ;;=> "/Users/bchoomnuan/github/libpython-clj-examples" 46 | ;;uids 47 | ;;gids 48 | ;;terminal 49 | ;;memory_info 50 | ;;memory_full_info 51 | ;;(memory_percent :memtype "rss") 52 | 53 | ;; More fun to try these [I will skip this for now :)] 54 | ;; (send_signal ..) 55 | ;; (suspend) 56 | ;; (resume) 57 | ;; (terminate) 58 | ;; (kill) 59 | ;; (wait :timeout ..) 60 | ) 61 | ) 62 | 63 | (comment 64 | ;; List all running process name 65 | (def process-names 66 | (let [procs (psu/process_iter["name"])] 67 | (map (fn [p] 68 | (py. p name)) procs))) 69 | 70 | (count process-names) ;;=> 475 71 | 72 | ;; Take a peek at the first few items 73 | (take 5 process-names) 74 | ;;=> ("kernel_task" "launchd" "syslogd" "UserEventAgent" "uninstalld") 75 | 76 | ;; How about finding out all process that have the word "sys" in it? 77 | 78 | (filter (fn [x] (str/index-of (str/lower-case x) "sys")) 79 | process-names) 80 | 81 | ;;=> 82 | #_ 83 | ("syslogd" "systemstats" "syspolicyd" "sysmond" "systemstats" "systemsoundserve" "UIKitSystem" "SystemUIServer" "system_installd" "sysextd" "sysdiagnose") 84 | 85 | ;; You can do more of course, see the documentation for idea 86 | ;; https://psutil.readthedocs.io/en/latest/#filtering-and-sorting-processes 87 | 88 | ) 89 | 90 | (comment 91 | ;; There are many functions that we can use like getting information about cpu 92 | (def cpu-times (psu/cpu_times)) 93 | 94 | cpu-times ;;=> scputimes(user=67053.19, nice=0.0, system=52277.51, idle=1399764.29) 95 | 96 | ;; Note: the return is the Pythong object that we can inspect like 97 | (py.- cpu-times user) 98 | (py.- cpu-times system) 99 | (py.- cpu-times idle) 100 | 101 | ;; How about printing out the percentage of cpu usage every given interval 102 | (dotimes [x 3] 103 | (println (psu/cpu_percent :interval 1))) 104 | 105 | ;;=> ;; in your REPL 106 | ;; 3.4 107 | ;; 3.5 108 | ;; 3.4 109 | 110 | (def cpu-info 111 | (for [x (range 3)] 112 | (psu/cpu_percent :interval 1 113 | :percpu true))) 114 | 115 | (type cpu-info) 116 | ;;=> clojure.lang.LazySeq 117 | 118 | (pprint cpu-info) 119 | ;;=> ;; in your REPL 120 | #_ 121 | ([57.4, 1.0, 18.0, 0.0, 10.9, 0.0, 5.1, 0.0, 6.1, 0.0, 4.0, 0.0] 122 | [60.0, 0.0, 17.8, 1.0, 8.0, 1.0, 5.9, 0.0, 5.0, 0.0, 4.0, 0.0] 123 | [57.0, 1.0, 24.8, 3.0, 11.9, 0.0, 7.1, 0.0, 5.0, 0.0, 2.0, 0.0]) 124 | 125 | (-> cpu-info first type) 126 | ;;=> :pyobject 127 | 128 | (-> cpu-info first first type) ;;=> java.lang.Double 129 | 130 | (def cpu-info 131 | (for [x (range 3)] 132 | (psu/cpu_times_percent :interval 1 133 | :percpu false))) 134 | 135 | (-> cpu-info 136 | pprint) 137 | 138 | ;;=> in your REPL 139 | ;; (scputimes(user=1.5, nice=0.0, system=2.0, idle=96.5) 140 | ;; scputimes(user=1.0, nice=0.0, system=2.8, idle=96.2) 141 | ;; scputimes(user=0.7, nice=0.0, system=1.8, idle=97.4)) 142 | 143 | (psu/cpu_count) ;;=> 12 144 | 145 | (psu/cpu_count :logical false) ;;=> 6 146 | 147 | (psu/cpu_stats) 148 | ;;=> scpustats(ctx_switches=148596, interrupts=866048, soft_interrupts=579676465, syscalls=1635282) 149 | 150 | (psu/cpu_freq) 151 | ;;=> scpufreq(current=2200, min=2200, max=2200) 152 | 153 | (psu/getloadavg) 154 | ;;=> (3.3349609375, 2.94970703125, 2.6689453125) 155 | ) 156 | 157 | ;; Memory 158 | (comment 159 | 160 | (psu/virtual_memory) 161 | ;;=> svmem(total=17179869184, available=7311126528, percent=57.4, used=8922701824, free=184188928, active=5441355776, inactive=6698086400, wired=3481346048) 162 | 163 | (psu/swap_memory) 164 | ;;=> sswap(total=3221225472, used=1549008896, free=1672216576, percent=48.1, sin=206163009536, sout=310902784) 165 | 166 | ;; Disks 167 | (psu/disk_partitions) 168 | 169 | #_ [sdiskpart(device='/dev/disk1s6', mountpoint='/', fstype='apfs', opts='ro,local,rootfs,dovolfs,journaled,multilabel'), sdiskpart(device='/dev/disk1s1', mountpoint='/System/Volumes/Data', fstype='apfs', opts='rw,local,dovolfs,dontbrowse,journaled,multilabel'), sdiskpart(device='/dev/disk1s4', mountpoint='/private/var/vm', fstype='apfs', opts='rw,local,dovolfs,dontbrowse,journaled,multilabel'), sdiskpart(device='/dev/disk1s5', mountpoint='/Volumes/Macintosh HD', fstype='apfs', opts='rw,local,dovolfs,journaled,multilabel'), sdiskpart(device='/dev/disk1s3', mountpoint='/Volumes/Recovery', fstype='apfs', opts='rw,local,dovolfs,dontbrowse,journaled,multilabel')] 170 | 171 | (def du (psu/disk_usage "/")) 172 | 173 | ;;=> sdiskusage(total=250685575168, used=10963034112, free=7645040640, percent=58.9) 174 | 175 | (py.- du total) 176 | (py.- du used) 177 | (py.- du percent) 178 | 179 | (psu/disk_io_counters :perdisk false) 180 | ;;=> sdiskio(read_count=15153434, write_count=5535766, read_bytes=278249762816, write_bytes=100455395328, read_time=6554143, write_time=2819768) 181 | 182 | ;; Network 183 | (psu/net_io_counters :pernic true) 184 | 185 | (psu/net_if_addrs) 186 | 187 | (psu/net_if_stats) 188 | 189 | ) 190 | 191 | (comment 192 | ;; Sensors! 193 | (psu/sensors_battery) 194 | ;;=> sbattery(percent=97, secsleft=23400, power_plugged=False) 195 | 196 | (psu/swap_memory) 197 | ;;=> sswap(total=3221225472, used=1414791168, free=1806434304, percent=43.9, sin=219395137536, sout=312922112) 198 | 199 | ;; Others 200 | (psu/users) 201 | ;;=> [suser(name='bchoomnuan', terminal='console', host=None, started=1582210432.0, pid=199), suser(name='bchoomnuan', terminal='ttys001', host=None, started=1582428288.0, pid=24455)] 202 | 203 | ) 204 | 205 | (comment 206 | ;; Process management 207 | (count (psu/pids)) ;;=> 501 208 | 209 | (last (psu/pids)) 210 | 211 | ;; Take random process object 212 | (def p (psu/Process (last (psu/pids)))) 213 | 214 | (py. p name) ;;=> "microstackshot" 215 | 216 | (py. p exe) ;;=> "/usr/libexec/microstackshot" 217 | (py. p ppid) ;;=> 1 218 | 219 | (py.. p (children :recursive true)) 220 | 221 | ) 222 | 223 | (comment 224 | 225 | ;; Take a peek at 5 processes 226 | (doseq [proc (take 5 (psu/process_iter ["pid" "name"]))] 227 | (println proc) 228 | ) 229 | 230 | ;;=> Your REPL 231 | ;; psutil.Process(pid=0, name='kernel_task', started='2020-02-20 09:53:34') 232 | ;; psutil.Process(pid=1, name='launchd', started='2020-02-20 09:53:34') 233 | ;; psutil.Process(pid=120, name='syslogd', started='2020-02-20 09:53:41') 234 | ;; psutil.Process(pid=121, name='UserEventAgent', started='2020-02-20 09:53:41') 235 | ;; psutil.Process(pid=124, name='uninstalld', started='2020-02-20 09:53:41') 236 | 237 | (psu/pid_exists 99532) ;;=> true 238 | 239 | ) 240 | 241 | ;; There are much more things you can do, just go ahead and looking at the 242 | ;; official documentation to see nice and practical usage of the library. 243 | ;; https://psutil.readthedocs.io/en/latest/# 244 | -------------------------------------------------------------------------------- /src/gigasquid/pygal/README.md: -------------------------------------------------------------------------------- 1 | # Pygal - Sexy Python Charting 2 | 3 | - Github - [Kozea/pygal](https://github.com/Kozea/pygal) 4 | - Official [documentation](http://www.pygal.org/en/latest/documentation/index.html) page 5 | - Based on the following examples from [pygal.org](http://pygal.org). 6 | 7 | ## Basic installation 8 | 9 | - Python 10 | 11 | ```shell 12 | pip install pygal 13 | 14 | ## Optionally you should also install the following library 15 | pip install lxml # improve rendering speed (except on pypy) 16 | 17 | ## To render png output 18 | pip install cairosvg 19 | pip install tinycss 20 | pip install cssselect 21 | ``` 22 | 23 | - Clojure 24 | 25 | Just run your favorite `cider-jack-in` if you are on Emacs. 26 | For other editors, you will do the equivalent command for your editor. 27 | 28 | ### SVG Viewer (if you are on Mac OS) 29 | 30 | To render SVG on the Mac you may like to also install [http://gapplin.wolfrosch.com/](Gapplin) via [Mac's AppStore](https://apps.apple.com/us/app/gapplin/id768053424?mt=12) 31 | 32 | This will make the `open some-file.svg` work properly. 33 | -------------------------------------------------------------------------------- /src/gigasquid/pygal/core.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.pygal.core 2 | (:require [libpython-clj.require 3 | :refer [require-python]] 4 | [libpython-clj.python 5 | :as py 6 | :refer [py. 7 | py.. 8 | py.- 9 | att-type-map 10 | ->python 11 | ->jvm]] 12 | [clojure.java.shell :as sh] 13 | [gigasquid.utils :refer [create-tmp-file 14 | display-image]] 15 | [clojure.pprint :refer [pprint]]) 16 | (:import [java.io File])) 17 | 18 | ;;; Python installation 19 | ;;; sudo pip3 install pygal lxml cairosvg tinycss cssselect 20 | 21 | (require-python '[pygal :as pygal]) 22 | ;;=> :ok 23 | 24 | ;; http://www.pygal.org/en/latest/documentation/first_steps.html# 25 | 26 | (comment 27 | 28 | ;; For list of configuration see 29 | ;; http://www.pygal.org/en/latest/documentation/configuration/chart.html 30 | (def config (pygal/Config 31 | :pretty_print true 32 | :title "My Pygal Chart")) 33 | 34 | ;; Some configurable settings 35 | (py.- config title) ;;=> "My Pygal Chart" 36 | (py.- config width) ;;=> 800 37 | (py.- config height) ;;=> 600 38 | (py.- config show_legend) ;;=> true 39 | (py.- config fill) ;;=> false 40 | (py.- config style) ;;=> pygal.style.Style 41 | (py.- config legend_at_bottom) ;;=> false 42 | (py.- config legend_box_size) ;;=> 12 43 | (py.- config margin) ;;=> 20 44 | (py.- config max_scale) ;;=> 16 45 | (py.- config min_scale) ;;=> 4 46 | (py.- config pretty_print) ;;=> true 47 | 48 | ;; For full list of options try 49 | (-> config 50 | att-type-map 51 | pprint) 52 | 53 | ;;=> see the useful list in your REPL 54 | #_ 55 | { 56 | "__call__" :method, 57 | "__class__" :meta-config, 58 | ;; ... 59 | "_update" :method, 60 | "allow_interruptions" :bool, 61 | "box_mode" :str, 62 | "classes" :list, 63 | "copy" :method, 64 | "css" :list, 65 | "defs" :list, 66 | "disable_xml_declaration" :bool, 67 | "dots_size" :float, 68 | "dynamic_print_values" :bool, 69 | "explicit_size" :bool, 70 | "fill" :bool, 71 | "force_uri_protocol" :str, 72 | "formatter" :none-type, 73 | "half_pie" :bool, 74 | "height" :int, 75 | "include_x_axis" :bool, 76 | "inner_radius" :int, 77 | "interpolate" :none-type, 78 | "interpolation_parameters" :dict, 79 | "interpolation_precision" :int, 80 | "inverse_y_axis" :bool, 81 | "js" :list, 82 | "legend_at_bottom" :bool, 83 | "legend_at_bottom_columns" :none-type, 84 | "legend_box_size" :int, 85 | "logarithmic" :bool, 86 | "margin" :int, 87 | "margin_bottom" :none-type, 88 | "margin_left" :none-type, 89 | "margin_right" :none-type, 90 | "margin_top" :none-type, 91 | "max_scale" :int, 92 | "min_scale" :int, 93 | "missing_value_fill_truncation" :str, 94 | "no_data_text" :str, 95 | "no_prefix" :bool, 96 | "order_min" :none-type, 97 | "pretty_print" :bool, 98 | "print_labels" :bool, 99 | "print_values" :bool, 100 | "print_values_position" :str, 101 | "print_zeroes" :bool, 102 | "range" :none-type, 103 | "rounded_bars" :none-type, 104 | "secondary_range" :none-type, 105 | "show_dots" :bool, 106 | "show_legend" :bool, 107 | "show_minor_x_labels" :bool, 108 | "show_minor_y_labels" :bool, 109 | "show_only_major_dots" :bool, 110 | "show_x_guides" :bool, 111 | "show_x_labels" :bool, 112 | "show_y_guides" :bool, 113 | "show_y_labels" :bool, 114 | "spacing" :int, 115 | "stack_from_top" :bool, 116 | "strict" :bool, 117 | "stroke" :bool, 118 | "stroke_style" :none-type, 119 | "style" :type, 120 | "title" :str, 121 | "to_dict" :method, 122 | "tooltip_border_radius" :int, 123 | "tooltip_fancy_mode" :bool, 124 | "truncate_label" :none-type, 125 | "truncate_legend" :none-type, 126 | "value_formatter" :default, 127 | "width" :int, 128 | "x_label_rotation" :int, 129 | "x_labels" :none-type, 130 | "x_labels_major" :none-type, 131 | "x_labels_major_count" :none-type, 132 | "x_labels_major_every" :none-type, 133 | "x_title" :none-type, 134 | "x_value_formatter" :default, 135 | "xrange" :none-type, 136 | "y_label_rotation" :int, 137 | "y_labels" :none-type, 138 | "y_labels_major" :none-type, 139 | "y_labels_major_count" :none-type, 140 | "y_labels_major_every" :none-type, 141 | "y_title" :none-type, 142 | "zero" :int} 143 | ) 144 | 145 | ;; For bar-chart 146 | 147 | (comment 148 | 149 | ;; http://www.pygal.org/en/latest/documentation/configuration/chart.html 150 | (def barchart (pygal/Bar)) 151 | 152 | (py. barchart add "Fibonacci" [0 1 1 2 3 5 8 13 21 34 55]) 153 | ;;=> 154 | 155 | (py. barchart add "Padovan" [1 1 1 2 2 3 4 5 7 9 12]) 156 | ;;=> 157 | 158 | ;; Render will just return the object 159 | (def result (py. barchart render)) 160 | 161 | (type result) 162 | ;;=> :pyobject 163 | 164 | ;; To render the result in the browser try 165 | (py. barchart render_in_browser) 166 | 167 | ;; To render the result to file (svg) 168 | (py. barchart render_to_file "bar_chart.svg") 169 | ;;=> you should have the file on your system 170 | 171 | ;; To render the result as png 172 | (py. barchart render_to_png "bar_chart.png") 173 | ;;=> You should have the file on your system 174 | 175 | ) 176 | 177 | ;; As we may like to try out different flavor of graph 178 | ;; Let's create simple function to make it easier to explore. 179 | 180 | (defn pg-plot 181 | "Plot a specific type of graph using Pygal. 182 | 183 | Examples: 184 | (pg-plot (pygal/Bar :show_legend true 185 | :title \"Pygal Bar Chart\" 186 | :x_title \"x title\" 187 | :y_title \"y title\" 188 | :fill true) 189 | \"Fibonacci\" [0 1 1 2 3 5 8 13 21 34 55] 190 | \"Padovan\" [1 1 1 2 2 3 4 5 7 9 12])" 191 | [graph & xs] 192 | (let [tmp-file (create-tmp-file "tmp-output" ".svg") 193 | output (.getAbsolutePath tmp-file)] 194 | (doseq [[x y] 195 | (partition 2 xs)] 196 | (py. graph add x y)) 197 | (py. graph render_to_file output) 198 | (display-image output) 199 | (.deleteOnExit tmp-file))) 200 | 201 | (comment 202 | ;; Simple bar-graph 203 | (pg-plot (pygal/Bar :show_legend true 204 | :title "Bar Chart Example" 205 | :x_title "x title" 206 | :y_title "y title" 207 | :fill true) 208 | "Fibonacci" [0 1 1 2 3 5 8 13 21 34 55] 209 | "Padovan" [1 1 1 2 2 3 4 5 7 9 12]) 210 | 211 | ;; Simple line-graph 212 | (pg-plot (pygal/Line :show_legend true 213 | :title "Line Chart Example") 214 | "Fibonacci" [0 1 1 2 3 5 8 13 21 34 55] 215 | "Padovan" [1 1 1 2 2 3 4 5 7 9 12]) 216 | 217 | ;; http://www.pygal.org/en/latest/documentation/types/histogram.html 218 | (pg-plot (pygal/Histogram :show_legend true 219 | :title "Histogram Example") 220 | "Wide Bars" [[5 0 10] 221 | [4 5 13] 222 | [2 0 15]] 223 | "Narrow Bars" [[10 1 2] 224 | [12 4 4.5] 225 | [8 11 13]]) 226 | 227 | ) 228 | 229 | ;; XY - http://www.pygal.org/en/latest/documentation/types/xy.html 230 | 231 | (comment 232 | ;; Basic 233 | (py/from-import math cos) 234 | 235 | (map (fn [x] [(cos (/ x 10.0)) (/ x 10.0)]) (range -50 50 5)) 236 | (map (fn [x] [(/ x 10.0) (cos (/ x 10.0))]) (range -50 50 5)) 237 | 238 | ;; ## Python Code: 239 | ;; from math import cos 240 | ;; xy_chart = pygal.XY() 241 | ;; xy_chart.title = 'XY Cosinus' 242 | ;; xy_chart.add('x = cos(y)', [(cos(x / 10.), x / 10.) for x in range(-50, 50, 5)]) 243 | ;; xy_chart.add('y = cos(x)', [(x / 10., cos(x / 10.)) for x in range(-50, 50, 5)]) 244 | ;; xy_chart.add('x = 1', [(1, -5), (1, 5)]) 245 | ;; xy_chart.add('x = -1', [(-1, -5), (-1, 5)]) 246 | ;; xy_chart.add('y = 1', [(-5, 1), (5, 1)]) 247 | ;; xy_chart.add('y = -1', [(-5, -1), (5, -1)]) 248 | 249 | ;; ## Clojure Code - beautiful first class function, compare to Python's list comprehension! 250 | ;; ## I am obviously bias :) 251 | (pg-plot (pygal/XY :title "XY Cosinus Example") 252 | "x = cos(y)" (map (fn [x] [(cos (/ x 10.0)) (/ x 10.0)]) (range -50 50 5)) 253 | "y = cos(x)" (map (fn [x] [(/ x 10.0) (cos (/ x 10.0))]) (range -50 50 5)) 254 | "x = 1" [[-1 -5] [1 5]] 255 | "x = -1" [[-1 -5] [-1 5]] 256 | "y = 1" [[-5 1] [5 1]] 257 | "y = -1" [[-5 -1] [5 -1]]) 258 | 259 | ;; Scatter Plot 260 | (pg-plot (pygal/XY :stroke false 261 | :title "Correlation") 262 | "A" [[0 0] [0.1 0.2] [0.3 0.1] [0.5 1.0] [0.8 0.6] [1.0 1.08] [1.3 1.1] [2, 3.23] [2.43, 2]] 263 | "B" [[0.1 0.15] [0.12 0.23] [0.4 0.3] [0.6 0.4] [0.21 0.21] [0.5 0.3] [0.6 0.8] 264 | [0.7 0.8]] 265 | "C" [[0.05 0.01] [0.13 0.02] [1.5 1.7] [1.52 1.6] [1.8 1.63] [1.5 1.82] [1.7 1.23] [2.1 2.23] [2.3 1.98]] 266 | ) 267 | 268 | ;; Time 269 | (py/from-import datetime) 270 | 271 | ;; DateTime 272 | (pg-plot (pygal/DateTimeLine 273 | :title "DateTime Example" 274 | :x_label_rotation 35 275 | :truncate_label -1) 276 | "Series" [[(datetime 2013 1 2 12 0) 300] 277 | [(datetime 2013 1 12 14 30 45) 412] 278 | [(datetime 2013 2 2 6) 823] 279 | [(datetime 2013 2 22 9 45) 672]]) 280 | 281 | ;; Date 282 | (py/from-import datetime date) 283 | (pg-plot (pygal/DateLine 284 | :title "Date Example" 285 | :x_label_rotation 25 286 | :x_labels [(date 2013 1 1) 287 | (date 2013 7 1) 288 | (date 2014 1 1) 289 | (date 2015 1 1) 290 | (date 2015 7 1)]) 291 | "Series" [[(date 2013 1 2) 213] 292 | [(date 2013 8 2) 281] 293 | [(date 2014 12 7) 198] 294 | [(date 2015 3 21) 120]]) 295 | 296 | ;; Time 297 | (py/from-import datetime time) 298 | (pg-plot (pygal/TimeLine 299 | :title "Time Example" 300 | :x_label_rotation 25) 301 | "Series" [[(time) 0] 302 | [(time 6) 5] 303 | [(time 8 30) 12] 304 | [(time 11 59 59) 4] 305 | [(time 18) 10] 306 | [(time 23 30) -1]]) 307 | 308 | ;; TimeDelta 309 | (py/from-import datetime timedelta) 310 | 311 | (pg-plot (pygal/TimeDeltaLine 312 | :title "Time Delta Example" 313 | :x_label_rotation 25) 314 | "Series" [[(timedelta) 0] 315 | [(timedelta :seconds 6) 5] 316 | [(timedelta :minutes 11 :seconds 59) 4] 317 | [(timedelta :days 3 :microseconds 30) 12] 318 | [(timedelta :weeks 1) 10]]) 319 | 320 | ) 321 | 322 | ;; Pie 323 | ;; http://www.pygal.org/en/latest/documentation/types/pie.html# 324 | (comment 325 | ;; Basic 326 | (pg-plot (pygal/Pie :show_legend true 327 | :title "Browser usage in Feb 2012 (in %)") 328 | "IE" 19.5 329 | "Chrome" 36.3 330 | "Safari" 4.5 331 | "Opera" 2.3) 332 | 333 | ;; Multi-series 334 | (pg-plot (pygal/Pie :show_legend true 335 | :title "Browser usage in Feb 2012 (in %)") 336 | "IE" [5.7 10.2 2.6 1] 337 | "Firefox" [0.6 16.8 7.4 2.2 1.2 1 1 1.1 4.3 1] 338 | "Chrome" [0.3 0.9 17.1 15.3 0.6 0.5 1.6] 339 | "Safari" [4.4 0.1] 340 | "Opera" [0.1 1.6 0.1 0.5]) 341 | 342 | ;; Donut 343 | (pg-plot (pygal/Pie 344 | :inner_radius 0.4 345 | :show_legend true 346 | :title "Browser usage in Feb 2012 (in %)") 347 | "IE" 19.5 348 | "Chrome" 36.3 349 | "Safari" 4.5 350 | "Opera" 2.3) 351 | 352 | ;; Or a ring 353 | (pg-plot (pygal/Pie 354 | :inner_radius 0.75 355 | :show_legend true 356 | :title "Browser usage in Feb 2012 (in %)") 357 | "IE" 19.5 358 | "Chrome" 36.3 359 | "Safari" 4.5 360 | "Opera" 2.3) 361 | 362 | ;; Or Half pie 363 | (pg-plot (pygal/Pie 364 | :half_pie true 365 | :show_legend true 366 | :title "Browser usage in Feb 2012 (in %)") 367 | "IE" 19.5 368 | "Chrome" 36.3 369 | "Safari" 4.5 370 | "Opera" 2.3) 371 | 372 | ) 373 | 374 | ;; Radar http://www.pygal.org/en/latest/documentation/types/radar.html 375 | (comment 376 | 377 | (pg-plot (pygal/Radar 378 | :title "V8 Benchmark Results" 379 | :x_labels ["Richards" 380 | "DeltaBlue" 381 | "Crypto" 382 | "RayTrace" 383 | "EarleyBoyer" 384 | "RegExp" 385 | "Splay" 386 | "NavierStokes"]) 387 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607] 388 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450] 389 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669] 390 | "IE", [43, 41, 59, 79, 144, 136, 34, 102]) 391 | 392 | ) 393 | 394 | ;; Box - http://www.pygal.org/en/latest/documentation/types/box.html#extremes-default 395 | (comment 396 | 397 | ;; Extreme (defaul) 398 | (pg-plot (pygal/Box 399 | :title "V8 Benchmark Results" 400 | :x_labels ["Richards" 401 | "DeltaBlue" 402 | "Crypto" 403 | "RayTrace" 404 | "EarleyBoyer" 405 | "RegExp" 406 | "Splay" 407 | "NavierStokes"]) 408 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607] 409 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450] 410 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669] 411 | "IE", [43, 41, 59, 79, 144, 136, 34, 102]) 412 | 413 | ;; Extreme (Interquartile range) 414 | (pg-plot (pygal/Box 415 | :box_mode "1.5IQR" 416 | :title "V8 Benchmark Results" 417 | :x_labels ["Richards" 418 | "DeltaBlue" 419 | "Crypto" 420 | "RayTrace" 421 | "EarleyBoyer" 422 | "RegExp" 423 | "Splay" 424 | "NavierStokes"]) 425 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607] 426 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450] 427 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669] 428 | "IE", [43, 41, 59, 79, 144, 136, 34, 102]) 429 | 430 | ;; Tukey 431 | (pg-plot (pygal/Box 432 | :box_mode "tukey" 433 | :title "V8 Benchmark Results" 434 | :x_labels ["Richards" 435 | "DeltaBlue" 436 | "Crypto" 437 | "RayTrace" 438 | "EarleyBoyer" 439 | "RegExp" 440 | "Splay" 441 | "NavierStokes"]) 442 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607] 443 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450] 444 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669] 445 | "IE", [43, 41, 59, 79, 144, 136, 34, 102]) 446 | 447 | ;; Standard deviation 448 | (pg-plot (pygal/Box 449 | :box_mode "stdev" 450 | :title "V8 Benchmark Results" 451 | :x_labels ["Richards" 452 | "DeltaBlue" 453 | "Crypto" 454 | "RayTrace" 455 | "EarleyBoyer" 456 | "RegExp" 457 | "Splay" 458 | "NavierStokes"]) 459 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607] 460 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450] 461 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669] 462 | "IE", [43, 41, 59, 79, 144, 136, 34, 102]) 463 | 464 | ;; Population Standard Deviation 465 | (pg-plot (pygal/Box 466 | :box_mode "pstdev" 467 | :title "V8 Benchmark Results" 468 | :x_labels ["Richards" 469 | "DeltaBlue" 470 | "Crypto" 471 | "RayTrace" 472 | "EarleyBoyer" 473 | "RegExp" 474 | "Splay" 475 | "NavierStokes"]) 476 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607] 477 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450] 478 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669] 479 | "IE", [43, 41, 59, 79, 144, 136, 34, 102]) 480 | 481 | ) 482 | 483 | ;; Dot - http://www.pygal.org/en/latest/documentation/types/dot.html# 484 | 485 | (comment 486 | ;; Basic 487 | (pg-plot (pygal/Dot 488 | :x_label_rotation 30 489 | :title "V8 Benchmark Results" 490 | :x_labels ["Richards" 491 | "DeltaBlue" 492 | "Crypto" 493 | "RayTrace" 494 | "EarleyBoyer" 495 | "RegExp" 496 | "Splay" 497 | "NavierStokes"]) 498 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607] 499 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450] 500 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669] 501 | "IE", [43, 41, 59, 79, 144, 136, 34, 102]) 502 | 503 | ;; Negative 504 | (pg-plot (pygal/Dot 505 | :x_label_rotation 30) 506 | "Normal" [10 50 76 80 25] 507 | "With negatives" [0 -34 -29 39 -75] 508 | ) 509 | 510 | ) 511 | 512 | ;; Funnel - http://www.pygal.org/en/latest/documentation/types/funnel.html 513 | (comment 514 | 515 | ;; Basic 516 | (pg-plot (pygal/Funnel 517 | :title "V8 Benchmark Results" 518 | :x_labels ["Richards" 519 | "DeltaBlue" 520 | "Crypto" 521 | "RayTrace" 522 | "EarleyBoyer" 523 | "RegExp" 524 | "Splay" 525 | "NavierStokes"]) 526 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607] 527 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450] 528 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669] 529 | "IE", [43, 41, 59, 79, 144, 136, 34, 102]) 530 | ) 531 | 532 | ;;SolidGuage - http://www.pygal.org/en/latest/documentation/types/solidgauge.html 533 | 534 | (comment 535 | 536 | ;; Normal 537 | (pg-plot (pygal/SolidGauge 538 | :inner_radius 0.70 539 | :title "Solid Guage Normal Example" 540 | :value_formatter (fn [x] (format "%s %%" x))) 541 | "Series 1" [{:value 225000 :max_value 1275000}] 542 | "Series 2" [{:value 110 :max_value 100}] 543 | "Series 3" [{:value 3}] 544 | "Series 4" [{:value 51 :max_value 100} 545 | {:value 12 :max_value 100}] 546 | "Series 5" [{:value 79 :max_value 100}] 547 | "Series 6" [{:value 99}] 548 | "Series 7" [{:value 100 :max_value 100}]) 549 | 550 | 551 | ;; Half 552 | (let [style (py.- (pygal/Config :value_font_size 10) style)] 553 | (pg-plot (pygal/SolidGauge :half_pie true 554 | :inner_radius 0.70 555 | :title "Solid Guage Half Example" 556 | :style style 557 | :value_formatter (fn [x] (format "%s %%" x))) 558 | "Series 1" [{:value 225000 :max_value 1275000}] 559 | "Series 2" [{:value 110 :max_value 100}] 560 | "Series 3" [{:value 3}] 561 | "Series 4" [{:value 51 :max_value 100} 562 | {:value 12 :max_value 100}] 563 | "Series 5" [{:value 79 :max_value 100}] 564 | "Series 6" [{:value 99}] 565 | "Series 7" [{:value 100 :max_value 100}])) 566 | 567 | ) 568 | 569 | ;; TODO: add example for Pyramid 570 | ;; http://www.pygal.org/en/latest/documentation/types/pyramid.html 571 | 572 | ;; Treemap 573 | ;; http://www.pygal.org/en/latest/documentation/types/treemap.html 574 | (comment 575 | (pg-plot (pygal/Treemap 576 | :title "Binary Treemap Example") 577 | "A" [2, 1, 12, 4, 2, 1, 1, 3, 12, 3, 4, nil, 9] 578 | "B" [4, 2, 5, 10, 3, 4, 2, 7, 4, -10, nil, 8, 3, 1] 579 | "C" [3, 8, 3, 3, 5, 3, 3, 5, 4, 12] 580 | "D" [23, 18] 581 | "E" [1, 2, 1, 2, 3, 3, 1, 2, 3,4, 3, 1, 2, 1, 1, 1, 1, 1] 582 | "F" [31] 583 | "G" [5, 9.3, 8.1, 12, 4, 3, 2] 584 | "H" [12, 3, 3]) 585 | ) 586 | 587 | ;; Value configuration 588 | ;; http://www.pygal.org/en/stable/documentation/configuration/value.html 589 | (comment 590 | 591 | ;; Labels 592 | (pg-plot (pygal/Bar :title "Labels Example") 593 | "First" [{:value 2 :label "This is the first"}] 594 | "Second" [{:value 4 :label "This is the second"}] 595 | "Third" 7 596 | "Fourth" [{:value 5}] 597 | "Fifth" [{:value 3 :label "This is the fifth"}]) 598 | 599 | ;; Style 600 | ;; You can force the color of a value by specifying a color key 601 | (pg-plot (pygal/Bar :title "Style with Color Example") 602 | "Series" [{:value 2} 3 4 603 | {:value 10, :color :blue} 604 | {:value 11, :color "rgb(255, 45, 20, 0.6)" 4 2}]) 605 | 606 | ;; The color key set the fill and the stroke style. You can also set the css style manually: 607 | (pg-plot (pygal/Bar :title "Style with custom stroke Example") 608 | "Series" [{:value 2} 3 4 609 | {:value 10, 610 | :style "fill: red; stroke: black; stroke-width: 4"} 611 | {:value 11, 612 | :style "fill: rgb(255, 45, 20, 0.6); stroke: black; stroke-dasharray: 15, 10, 5, 10, 15"} 4 2] 613 | ) 614 | 615 | ;; Value formatting 616 | 617 | ;; You can add a `formatter` metada for a specific value 618 | ;; Note: we can't use the `pg-plot` method for this as it uses additional arguments 619 | ;; Python Code: 620 | ;; chart = pygal.Bar(print_values=True, value_formatter=lambda x: '{}$'.format(x)) 621 | ;; chart.add('bar', [.0002, .0005, .00035], formatter=lambda x: '<%s>' % x) 622 | ;; chart.add('bar', [.0004, {'value': .0009, 'formatter': lambda x: '«%s»' % x}, .001]) 623 | ;; chart.render() 624 | 625 | ;; Clojure Code: trying to keep it the same as Python above. 626 | (let [tmp-file (File/createTempFile "tmp-output" ".svg") 627 | output (.getAbsolutePath tmp-file) 628 | graph (pygal/Bar :print_values true 629 | :title "Value Formatting" 630 | :value_formatter (fn [x] (format "%.4f $" x)))] 631 | (py. graph add "bar" [0.0002 0.0005 0.00035] 632 | :formatter (fn [x] (format "<%.4f>" x))) 633 | (py. graph add "baz" [0.0004 {:value 0.0009 634 | :formatter (fn [x] (format "<<%.4f>>" x))}]) 635 | (py. graph render_to_file output) 636 | (sh/sh "open" output) 637 | (.deleteOnExit tmp-file)) 638 | 639 | ;; Node attributes: 640 | ;; It is possible to pass svg attribute to the node representing value. 641 | (pg-plot (pygal/Line 642 | :title "Node Attributes Example") 643 | "Series" [{:value 1 :node {:r 2}} 644 | {:value 2 :node {:r 4}} 645 | {:value 3 :node {:r 6}} 646 | {:value 4 :node {:r 8}}]) 647 | 648 | ) 649 | 650 | ;; Links 651 | ;; http://www.pygal.org/en/stable/documentation/configuration/value.html#links 652 | 653 | (comment 654 | ;; Basic 655 | ;; Add hyper links 656 | (pg-plot (pygal/Bar 657 | :title "Link - Basic Example") 658 | "First" [{:value 2 659 | :label "This is the first" 660 | :xlink "http://en.wikipedia.org/wiki/First"}] 661 | "Second" [{:value 4 662 | :label "This is the second" 663 | :xlink "http://en.wikipedia.org/wiki/Second"}] 664 | "Third" 7 665 | "Fourth" [{:value 5 666 | :xlink "http://en.wikipedia.org/wiki/Fourth"}] 667 | "Fifth" [{:value 3 668 | :label "This is the fifth" 669 | :xlink "http://en.wikipedia.org/wiki/Fifth"}]) 670 | 671 | ;; Advanced 672 | ;; Specify a map to xlink with all links attributes 673 | (pg-plot (pygal/Bar 674 | :title "Link - Advanced Example") 675 | "First" [{:value 2 676 | :label "This is the first" 677 | :xlink "http://en.wikipedia.org/wiki/First"}] 678 | "Second" [{:value 4 679 | :label "This is the second" 680 | :xlink {:href "http://en.wikipedia.org/wiki/Second" 681 | :target "_top"}}] 682 | "Third" 7 683 | "Fourth" [{:value 5 684 | :xlink {:href "http://en.wikipedia.org/wiki/Fourth" 685 | :target "_blank"}}] 686 | "Fifth" [{:value 3 687 | :label "This is the fifth" 688 | :xlink {:href "http://en.wikipedia.org/wiki/Fifth" 689 | :target "_self"}}]) 690 | 691 | ;; TODO: http://www.pygal.org/en/stable/documentation/configuration/value.html#legend 692 | ;; TODO: http://www.pygal.org/en/stable/documentation/configuration/value.html#confidence-intervals 693 | ) 694 | 695 | ;; Sparklines - http://www.pygal.org/en/stable/documentation/sparks.html# 696 | ;; TODO: 697 | 698 | ;; Maps 699 | (comment 700 | ;; Require 701 | ;; pip install pygal_maps_world 702 | (require-python '[pygal_maps_world :as pygal-mw]) 703 | ;; TODO: continue world map example 704 | ) 705 | 706 | ;; http://www.pygal.org/en/latest/documentation/types/maps/index.html 707 | ;; - World Map 708 | ;; - French Map 709 | ;; - Department 710 | ;; - Regions 711 | ;; - Department list 712 | ;; - Region list 713 | ;; - Swiss Map 714 | ;; - Canton 715 | ;; - Canton list 716 | -------------------------------------------------------------------------------- /src/gigasquid/pytorch_mnist.clj: -------------------------------------------------------------------------------- 1 | ;; This example was ported from pytorch/examples MNIST from: 2 | ;; https://github.com/pytorch/examples.git 3 | (ns gigasquid.pytorch-mnist 4 | (:require 5 | [libpython-clj.python :as py 6 | :refer [py* py** py. py.. py.- $a $. 7 | as-jvm with-gil-stack-rc-context 8 | stack-resource-context 9 | import-module 10 | get-attr get-item att-type-map call call-attr]] 11 | [libpython-clj.require :refer [require-python]])) 12 | 13 | ;;; sudo pip3 install torch 14 | ;;; sudo pip3 install torchvision 15 | 16 | (require-python 17 | '[torch :as torch] 18 | '[torch.cuda :as cuda] 19 | '[torch.onnx :as onnx] 20 | '[torch.nn :as nn :refer [Conv2d Dropout2d Linear]] 21 | '[torch.optim :as optim] 22 | '[torch.utils.data :as tud] 23 | '[torch.nn.functional :as F] 24 | '[torchvision.datasets :as datasets] 25 | '[torchvision.transforms :as transforms] 26 | '[torch.optim.lr_scheduler :as lr_scheduler]) 27 | 28 | (def enumerate (-> (py/import-module "builtins") 29 | (get-attr "enumerate"))) 30 | 31 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 32 | 33 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 34 | ;; If you have CUDA but do not want to use it, set this to false 35 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 36 | (def ^:dynamic *use-cuda* (cuda/is_available)) 37 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 38 | 39 | (def log-interval 100) 40 | 41 | ;; Yann LeCun: 42 | ;; Training with large minibatches is bad for your health. 43 | ;; More importantly, it's bad for your test error. 44 | ;; Friends dont let friends use minibatches larger than 32. 45 | ;; https://twitter.com/ylecun/status/989610208497360896 46 | ;; 47 | ;; input batch size for training (default: 64) 48 | (def batch-size 32) 49 | ;; input batch size for testing (default: 1000) 50 | (def test-batch-size 1000) 51 | ;; number of epochs to train (default: 14) 52 | (def epochs 14) 53 | ;; learning rate (default: 1.0) 54 | (def learning-rate 1.0) 55 | ;; Learning rate step gamma (default: 0.7) 56 | (def gamma 0.7) 57 | ;; random seed (default: 1) 58 | (def seed 42) 59 | 60 | (def mnist-mean [0.1307]) 61 | (def mnist-std [0.3081]) 62 | 63 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 64 | 65 | (defonce device (atom nil)) 66 | (defonce train-data (atom nil)) 67 | (defonce train-loader (atom nil)) 68 | (defonce test-data (atom nil)) 69 | (defonce test-loader (atom nil)) 70 | (defonce model (atom nil)) 71 | (defonce optimizer (atom nil)) 72 | 73 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 74 | 75 | ;;; load MNIST data from the internet 76 | (defn load-data! [] 77 | (let [gpu-opts (if *use-cuda* 78 | {:num_workers 1 :pin_memory true} 79 | {}) 80 | mnist-transform (transforms/Compose 81 | [(transforms/ToTensor) 82 | (transforms/Normalize mnist-mean mnist-std)])] 83 | ;; training data and loader 84 | (reset! train-data 85 | (datasets/MNIST "./resources/pytorch/data" 86 | :train true :download true :transform mnist-transform)) 87 | (let [kwargs (merge {:batch_size batch-size :shuffle true} 88 | gpu-opts) 89 | args (into [@train-data] (mapcat identity kwargs))] 90 | (reset! train-loader (apply tud/DataLoader args))) 91 | 92 | ;; test data and loader 93 | (reset! test-data 94 | (datasets/MNIST "./resources/pytorch/data" 95 | :train false :download true :transform mnist-transform)) 96 | (let [kwargs (merge {:batch_size test-batch-size :shuffle true} 97 | gpu-opts) 98 | args (into [@test-data] (mapcat identity kwargs))] 99 | (reset! test-loader (apply tud/DataLoader args)))) 100 | 101 | nil) 102 | 103 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 104 | 105 | 106 | ;;; neural network definition, uses convolutional neural nets (CNNs) 107 | (def MyNet 108 | (py/create-class 109 | "MyNet" [nn/Module] 110 | {"__init__" 111 | (py/make-tuple-instance-fn 112 | (fn [self] 113 | (py. nn/Module __init__ self) 114 | (py/set-attrs! 115 | self 116 | {"conv1" (Conv2d 1 32 3 1) 117 | "conv2" (Conv2d 32 64 3 1) 118 | "dropout1" (Dropout2d 0.25) 119 | "dropout2" (Dropout2d 0.5) 120 | "fc1" (Linear 9216 128) 121 | "fc2" (Linear 128 10)}) 122 | 123 | ;; __init__ must return nil 124 | nil)) 125 | "forward" 126 | (py/make-tuple-instance-fn 127 | (fn [self x] 128 | (let [x (py. self conv1 x) 129 | x (F/relu x) 130 | x (py. self conv2 x) 131 | x (F/max_pool2d x 2) 132 | x (py. self dropout1 x) 133 | x (torch/flatten x 1) 134 | x (py. self fc1 x) 135 | x (F/relu x) 136 | x (py. self dropout2 x) 137 | x (py. self fc2 x) 138 | output (F/log_softmax x :dim 1)] 139 | output)) 140 | :arg-converter as-jvm 141 | :method-name "forward")})) 142 | 143 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 144 | 145 | (defn setup! [] 146 | (py/gc!) 147 | (torch/manual_seed seed) 148 | (reset! device (if *use-cuda* 149 | (torch/device "cuda") 150 | (torch/device "cpu"))) 151 | (load-data!) 152 | (reset! model 153 | (let [inst (MyNet)] 154 | (py. inst "to" @device))) 155 | (reset! optimizer 156 | (optim/Adadelta (py. @model "parameters") 157 | :lr learning-rate)) 158 | nil) 159 | 160 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 161 | 162 | (defn train [args model device train-loader optimizer epoch] 163 | (py. model train) 164 | (dorun 165 | (for [[batch-idx [data target]] (enumerate train-loader)] 166 | (with-gil-stack-rc-context 167 | (let [data (py. data to device) 168 | target (py. target to device)] 169 | (py. optimizer zero_grad) 170 | (let [output (py. model __call__ data) 171 | loss (F/nll_loss output target)] 172 | (py. loss backward) 173 | (py. optimizer step) 174 | (when (= 0 (mod batch-idx log-interval)) 175 | (println 176 | (format "Train Epoch: %d [%d/%d (%.1f%%)]\tLoss: %.6f" 177 | epoch 178 | (* batch-idx (int (py. data "__len__"))) 179 | (py. (py.- train-loader dataset) "__len__") 180 | (/ (* 100.0 batch-idx) (int (py. train-loader "__len__"))) 181 | (py. loss item)))))))))) 182 | 183 | (defn test-model [args model device test-loader] 184 | (py. model eval) 185 | (let [test-lost (atom 0) 186 | correct (atom 0)] 187 | (letfn [(test-batch [data target] 188 | (let [data (py. data to device) 189 | target (py. target to device) 190 | output (py. model __call__ data)] 191 | (swap! test-lost + 192 | (py. (F/nll_loss output target :reduction "sum") item)) 193 | (let [pred (py. output argmax :dim 1 :keepdim true)] 194 | (swap! correct + 195 | (-> (py. pred eq (py. target view_as pred)) 196 | (py. sum) 197 | (py. item))))))] 198 | 199 | ; pytorch crash with "python error in flight" 200 | ; (py/with [ng torch/no_grad] 201 | ; (dorun 202 | ; (for [[data target] test-loader] 203 | ; (with-gil-stack-rc-context 204 | ; (test-batch data target))))) 205 | 206 | ; pytorch crash with "python error in flight" 207 | ; (py/with [ng torch/no_grad] 208 | ; (dorun 209 | ; (for [[data target] test-loader] 210 | ; (stack-resource-context 211 | ; (test-batch data target))))) 212 | 213 | (let [no-grad (torch/no_grad)] 214 | (try 215 | (py. no-grad __enter__) 216 | (dorun 217 | (for [[data target] test-loader] 218 | (with-gil-stack-rc-context 219 | (test-batch data target)))) 220 | (finally 221 | (py. no-grad __exit__))))) 222 | 223 | (let [data-set (py.- test-loader dataset) 224 | n (py. data-set __len__)] 225 | (swap! test-lost / (py. data-set __len__)) 226 | (println 227 | (format "\nTest set: Average loss: %.4f, Accuracy %d/%d (%.1f%%)\n" 228 | @test-lost @correct 229 | n 230 | (/ (* 100. @correct) (int n))))))) 231 | 232 | (defn train-test-loop! 233 | "RUN THIS IN A CONSOLE REPL IF YOUR EDITOR REPL DOESN'T HAVE STREAMING" 234 | [] 235 | (let [scheduler (lr_scheduler/StepLR @optimizer :step_size 1 :gamma gamma) 236 | args {}] 237 | (dorun 238 | (for [epoch (range epochs)] 239 | (do 240 | (train args @model @device @train-loader @optimizer epoch) 241 | (test-model args @model @device @test-loader) 242 | (py. scheduler step)))))) 243 | 244 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 245 | 246 | ;;; save the model to the universal ONNX format 247 | ;;; you can use NETRON at https://github.com/lutzroeder/netron to visualize 248 | ;;; this model. 249 | (defn save-model! [] 250 | (let [tensor (first (first (seq @train-loader))) 251 | size (vec (py. tensor size)) 252 | args (into size [:device "cuda"]) 253 | dummy-input (apply torch/randn args)] 254 | (onnx/export @model dummy-input "resources/pytorch/models/mnist_cnn.onnx" 255 | :verbose true 256 | :output_names ["digit_from_0_to_9"]) 257 | nil)) 258 | 259 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 260 | 261 | (defn --profile-cuda [] 262 | (binding [*use-cuda* true] 263 | (setup!) 264 | (train-test-loop!))) 265 | 266 | (defn --profile-no-cuda [] 267 | (binding [*use-cuda* false] 268 | (setup!) 269 | (train-test-loop!))) 270 | 271 | (comment 272 | (setup!) 273 | (train-test-loop!) 274 | (save-model!)) 275 | -------------------------------------------------------------------------------- /src/gigasquid/sci_spacy.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.sci-spacy 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [clojure.java.shell :as sh])) 5 | 6 | ;;;; You need to pip install the model 7 | ;; sudo pip3 install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz 8 | 9 | ;;; pip install scispacy 10 | 11 | ;;; tutorial from https://allenai.github.io/scispacy/ 12 | 13 | (require-python '[spacy :as spacy]) 14 | (require-python '[scispacy :as scispacy]) 15 | 16 | (def nlp (spacy/load "en_core_sci_sm")) 17 | (def text "Myeloid derived suppressor cells (MDSC) are immature 18 | myeloid cells with immunosuppressive activity. 19 | They accumulate in tumor-bearing mice and humans 20 | with different types of cancer, including hepatocellular 21 | carcinoma (HCC).") 22 | 23 | (def doc (nlp text)) 24 | 25 | (py/python-type (py.- doc sents)) ;=> :generator 26 | (py/python-type (py.- doc ents)) ;=> :tuple 27 | 28 | ;;; basically you should map over these things 29 | 30 | (map (fn [ent] (py.- ent text)) (py.- doc ents)) 31 | ;=> ("Myeloid" "suppressor cells" "MDSC" "immature" "myeloid cells" "immunosuppressive activity" "accumulate" "tumor-bearing mice" "humans" "cancer" "hepatocellular \n carcinoma" "HCC") 32 | 33 | ;;; what sort of things can you look at on the ent? 34 | (-> (py.- doc ents) first py/att-type-map) ;;; lots! 35 | 36 | ;; {"_" :underscore, 37 | ;; "__class__" :type, 38 | ;; "__delattr__" :method-wrapper, 39 | ;; "__dir__" :builtin-function-or-method, 40 | ;; "__doc__" :str, 41 | ;; "__eq__" :method-wrapper, 42 | ;; "__format__" :builtin-function-or-method, 43 | ;; "__ge__" :method-wrapper, 44 | ;; "__getattribute__" :method-wrapper, 45 | ;; "__getitem__" :method-wrapper, 46 | ;; "__gt__" :method-wrapper, 47 | ;; "__hash__" :method-wrapper, 48 | ;; "__init__" :method-wrapper, 49 | ;; "__init_subclass__" :builtin-function-or-method, 50 | ;; "__iter__" :method-wrapper, 51 | ;; "__le__" :method-wrapper, 52 | ;; "__len__" :method-wrapper, 53 | ;; "__lt__" :method-wrapper, 54 | ;; "__ne__" :method-wrapper, 55 | ;; "__new__" :builtin-function-or-method, 56 | ;; "__pyx_vtable__" :py-capsule, 57 | ;; "__reduce__" :builtin-function-or-method, 58 | ;; "__reduce_ex__" :builtin-function-or-method, 59 | ;; "__repr__" :method-wrapper, 60 | ;; "__setattr__" :method-wrapper, 61 | ;; "__sizeof__" :builtin-function-or-method, 62 | ;; "__str__" :method-wrapper, 63 | ;; "__subclasshook__" :builtin-function-or-method, 64 | ;; "_fix_dep_copy" :builtin-function-or-method, 65 | ;; "_recalculate_indices" :builtin-function-or-method, 66 | ;; "_vector" :none-type, 67 | ;; "_vector_norm" :none-type, 68 | ;; "as_doc" :builtin-function-or-method, 69 | ;; "conjuncts" :tuple, 70 | ;; "doc" :doc, 71 | ;; "end" :int, 72 | ;; "end_char" :int, 73 | ;; "ent_id" :int, 74 | ;; "ent_id_" :str, 75 | ;; "ents" :list, 76 | ;; "get_extension" :builtin-function-or-method, 77 | ;; "get_lca_matrix" :builtin-function-or-method, 78 | ;; "has_extension" :builtin-function-or-method, 79 | ;; "has_vector" :bool, 80 | ;; "kb_id" :int, 81 | ;; "kb_id_" :str, 82 | ;; "label" :int, 83 | ;; "label_" :str, 84 | ;; "lefts" :generator, 85 | ;; "lemma_" :str, 86 | ;; "lower_" :str, 87 | ;; "merge" :builtin-function-or-method, 88 | ;; "n_lefts" :int, 89 | ;; "n_rights" :int, 90 | ;; "noun_chunks" :generator, 91 | ;; "orth_" :str, 92 | ;; "remove_extension" :builtin-function-or-method, 93 | ;; "rights" :generator, 94 | ;; "root" :token, 95 | ;; "sent" :span, 96 | ;; "sentiment" :float, 97 | ;; "set_extension" :builtin-function-or-method, 98 | ;; "similarity" :builtin-function-or-method, 99 | ;; "start" :int, 100 | ;; "start_char" :int, 101 | ;; "string" :str, 102 | ;; "subtree" :generator, 103 | ;; "tensor" :ndarray, 104 | ;; "text" :str, 105 | ;; "text_with_ws" :str, 106 | ;; "to_array" :builtin-function-or-method, 107 | ;; "upper_" :str, 108 | ;; "vector" :ndarray, 109 | ;; "vector_norm" :float-32, 110 | ;; "vocab" :vocab} 111 | 112 | ;;; same with sentences 113 | (map (fn [sent] (py.- sent text)) (py.- doc sents)) 114 | ;; ("Myeloid derived suppressor cells (MDSC) are immature \n myeloid cells with immunosuppressive activity. \n " 115 | ;; "They accumulate in tumor-bearing mice and humans \n with different types of cancer, including hepatocellular \n carcinoma (HCC).") 116 | 117 | 118 | (require-python '[spacy.displacy :as displacy]) 119 | (spit "my-pic.svg" (displacy/render (first (py.- doc sents)) :style "dep")) 120 | (sh/sh "open" "-a" "Google Chrome" "my-pic.svg") 121 | 122 | -------------------------------------------------------------------------------- /src/gigasquid/seaborn.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.seaborn 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [gigasquid.plot :as plot])) 5 | 6 | (require-python '[seaborn :as sns]) 7 | (require-python '[matplotlib.pyplot :as pyplot]) 8 | 9 | ;;; What is seaborn? Really cool statistical plotting 10 | 11 | ;;; sudo pip3 install seaborn 12 | 13 | (sns/set) ;;; set default style 14 | 15 | ;;; code tutorial from https://seaborn.pydata.org/introduction.html 16 | 17 | (def dots (sns/load_dataset "dots")) 18 | (py. dots head) 19 | ;; align ... firing_rate 20 | ;; 0 dots ... 33.189967 21 | ;; 1 dots ... 31.691726 22 | ;; 2 dots ... 34.279840 23 | ;; 3 dots ... 32.631874 24 | ;; 4 dots ... 35.060487 25 | 26 | ;; [5 rows x 5 columns] 27 | 28 | (take 5 dots) ;=> ("align" "choice" "time" "coherence" "firing_rate") 29 | ;; seaborn will be most powerful when your datasets have a particular organization. This format is alternately called “long-form” or “tidy” data and is described in detail by Hadley Wickham in this academic paper. The rules can be simply stated: 30 | 31 | ;; Each variable is a column 32 | 33 | ;; Each observation is a row 34 | 35 | ;;;; statistical relationship plotting 36 | 37 | (plot/with-show 38 | (sns/relplot :x "time" :y "firing_rate" :col "align" 39 | :hue "choice" :size "coherence" :style "choice" 40 | :facet_kws {:sharex false} :kind "line" 41 | :legend "full" :data dots)) 42 | 43 | ;;;; statistical estimateion and error bars 44 | 45 | (def fmri (sns/load_dataset "fmri")) 46 | 47 | (plot/with-show 48 | (sns/relplot :x "timepoint" :y "signal" :col "region" 49 | :hue "event" :style "event" :kind "line" 50 | :data fmri)) 51 | 52 | ;;; enhance a scatter plot to include a linear regression model 53 | 54 | (def tips (sns/load_dataset "tips")) 55 | (plot/with-show 56 | (sns/lmplot :x "total_bill" :y "tip" :col "time" :hue "smoker" :data tips)) 57 | 58 | ;;; data analysis between caterogical values 59 | 60 | (plot/with-show 61 | (sns/catplot :x "day" :y "total_bill" :hue "smoker" :kind "swarm" :data tips)) 62 | 63 | (plot/with-show 64 | (sns/catplot :x "day" :y "total_bill" :hue "smoker" :kind "bar" :data tips)) 65 | 66 | ;;; visualizing dataset structure 67 | 68 | (def iris (sns/load_dataset "iris")) 69 | (plot/with-show 70 | (sns/jointplot :x "sepal_length" :y "petal_length" :data iris)) 71 | 72 | (plot/with-show 73 | (sns/pairplot :data iris :hue "species")) 74 | -------------------------------------------------------------------------------- /src/gigasquid/sk_learn/datasets_estimators.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.sk-learn.datasets-estimators 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [gigasquid.plot :as plot])) 5 | 6 | (require-python '[sklearn.datasets :as datasets]) 7 | (require-python '[matplotlib.pyplot :as pyplot]) 8 | (require-python '[matplotlib.pyplot.cm :as pyplot-cm]) 9 | 10 | ;;;; From https://scikit-learn.org/stable/tutorial/statistical_inference/settings.html 11 | 12 | ;;; Taking a look as the standard iris dataset 13 | 14 | (def iris (datasets/load_iris)) 15 | (def data (py.- iris data)) 16 | (py.- data shape);-> (150, 4) 17 | 18 | ;;; It is made of 150 observations of irises, each described by 4 features: their sepal and petal length and width 19 | 20 | ;;; An example of reshaping is with the digits dataset 21 | ;;; The digits dataset is made of 1797 8x8 images of hand-written digits 22 | 23 | (def digits (datasets/load_digits)) 24 | (def digit-images (py.- digits images)) 25 | (py.- digit-images shape) ;=> (1797, 8, 8) 26 | 27 | (plot/with-show 28 | (pyplot/imshow (last digit-images) :cmap pyplot-cm/gray_r)) 29 | 30 | ;;; To use this dataset we transform each 8x8 image to feature vector of length 64 31 | 32 | (def data (py. digit-images reshape (first (py.- digit-images shape)) -1)) 33 | 34 | (py.- data shape) ;=> (1797, 64) 35 | 36 | 37 | ;;;; Estimator objects 38 | 39 | ;;An estimator is any object that learns from data 40 | ; it may be a classification, regression or clustering algorithm or a transformer that extracts/filters useful features from raw data. 41 | 42 | ;;All estimator objects expose a fit method that takes a dataset (usually a 2-d array 43 | 44 | -------------------------------------------------------------------------------- /src/gigasquid/sk_learn/info.txt: -------------------------------------------------------------------------------- 1 | This is based on the statistical learning for scientific data processing 2 | 3 | https://scikit-learn.org/stable/tutorial/statistical_inference/index.html 4 | -------------------------------------------------------------------------------- /src/gigasquid/sk_learn/model_selection.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.sk-learn.model-selection 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [gigasquid.plot :as plot])) 5 | 6 | ;;; from https://scikit-learn.org/stable/tutorial/statistical_inference/model_selection.html 7 | 8 | (require-python '[sklearn.datasets :as datasets]) 9 | (require-python '[sklearn.model_selection :as model-selection]) 10 | (require-python '[sklearn.linear_model :as linear-model]) 11 | (require-python '[sklearn.svm :as svm]) 12 | (require-python '[numpy :as np]) 13 | 14 | (def digits (datasets/load_digits :return_X_y true)) 15 | (def x-digits (first digits)) 16 | (def y-digits (last digits)) 17 | (def svc (svm/SVC :C 1 :kernel "linear")) 18 | (def slice-x-digits (->> x-digits (take 100) (into []) (np/array))) 19 | (def slice-y-digits (->> y-digits (take 100) (into []) (np/array))) 20 | (def slice-x2-digits (->> x-digits (drop 100) (take 100) (into []) (np/array))) 21 | (def slice-y2-digits (->> y-digits (drop 100) (take 100) (into []) (np/array))) 22 | (-> svc 23 | (py. fit slice-x-digits slice-y-digits) 24 | (py. score slice-x2-digits slice-y2-digits)) ;=> 0.93 25 | 26 | 27 | ;;;; We can split into folds we can use for training and testing 28 | ;;; Note here we are doing it a clojure way - but we can use the split method with 29 | ;;; indexes later on 30 | 31 | (def x-folds (np/array_split x-digits 3)) 32 | (def y-folds (np/array_split y-digits 3)) 33 | 34 | (for [k (range 1 4)] 35 | (let [[test-x train-x1 train-x2 ](take 3 (drop (dec k) (cycle x-folds))) 36 | [test-y train-y1 train-y2] (take 3 (drop (dec k) (cycle y-folds))) 37 | train-x (np/concatenate [train-x1 train-x2]) 38 | train-y (np/concatenate [train-y1 train-y2])] 39 | (-> svc 40 | (py. fit train-x train-y) 41 | (py. score test-x test-y)))) 42 | ;=>(0.9348914858096828 0.9565943238731218 0.9398998330550918) 43 | 44 | ;;; Cross Validation generators 45 | ;; Scikit-learn has a collection of classes which can be used to generate lists of train/test indices for popular cross-validation strategies. 46 | 47 | ;; They expose a split method which accepts the input dataset to be split and yields the train/test set indices for each iteration of the chosen cross-validation strategy. 48 | 49 | (def X ["a" "a" "a" "b" "b" "c" "c" "c" "c" "c"]) 50 | (def k-fold (model-selection/KFold :n_splits 5)) 51 | (map (fn [[x y]] {:train x :test y}) 52 | (py. k-fold split X)) 53 | ;; ({:train [2 3 4 5 6 7 8 9], :test [0 1]} 54 | ;; {:train [0 1 4 5 6 7 8 9], :test [2 3]} 55 | ;; {:train [0 1 2 3 6 7 8 9], :test [4 5]} 56 | ;; {:train [0 1 2 3 4 5 8 9], :test [6 7]} 57 | ;; {:train [0 1 2 3 4 5 6 7], :test [8 9]}) 58 | 59 | 60 | ;;; let's understand the generateor for the split and how to use indexes on numpy 61 | (def try-x (first (py. k-fold split x-digits))) 62 | (def indexes (first try-x)) 63 | (py.- x-digits shape) ;-> (1797, 64) 64 | (py.- indexes shape) ;=> (1437,) 65 | ;;;; You can use py/get-item to get indexes from numpy 66 | (def test-items (py/get-item x-digits indexes)) 67 | (py.- test-items shape) ;=> (1437, 64) 68 | 69 | 70 | 71 | (map (fn [[train-indexes test-indexes]] 72 | (-> svc 73 | (py. fit (py/get-item x-digits train-indexes) 74 | (py/get-item y-digits train-indexes)) 75 | (py. score (py/get-item x-digits test-indexes) 76 | (py/get-item y-digits test-indexes)))) 77 | (py. k-fold split x-digits)) 78 | ;=>(0.9638888888888889 0.9222222222222223 0.9637883008356546 0.9637883008356546 0.9303621169916435) 79 | 80 | ;; The cross-validation score can be directly calculated using the cross_val_score helper. Given an estimator, the cross-validation object and the input dataset, the cross_val_score splits the data repeatedly into a training and a testing set, trains the estimator using the training set and computes the scores based on the testing set for each iteration of cross-validation 81 | 82 | ;;; n_jobs=-1 means the computation will use all cpus 83 | (model-selection/cross_val_score svc x-digits y-digits :cv k-fold :n_jobs -1) 84 | ;=>[0.96388889 0.92222222 0.9637883 0.9637883 0.93036212] 85 | 86 | ;Alternatively, the scoring argument can be provided to specify an alternative scoring method. 87 | (model-selection/cross_val_score svc x-digits y-digits :cv k-fold 88 | :scoring "precision_macro") 89 | ;=>[0.96578289 0.92708922 0.96681476 0.96362897 0.93192644] 90 | 91 | 92 | ;;;; Grid search 93 | ;;scikit-learn provides an object that, given data, computes the score during the fit of an estimator on a parameter grid and chooses the parameters to maximize the cross-validation score. This object takes an estimator during the construction and exposes an estimator API: 94 | 95 | (def Cs (np/logspace -6 -1 10)) 96 | (def clf (model-selection/GridSearchCV :estimator svc 97 | :param_grid {:C Cs} 98 | :n_jobs -1)) 99 | (def slice-x-digits (->> x-digits (take 1000) (into []) (np/array))) 100 | (def slice-y-digits (->> y-digits (take 1000) (into []) (np/array))) 101 | (def slice-x2-digits (->> x-digits (drop 1000) (take 1000) (into []) (np/array))) 102 | (def slice-y2-digits (->> y-digits (drop 1000) (take 1000) (into []) (np/array))) 103 | (py. clf fit slice-x-digits slice-y-digits) 104 | (py.- clf best_score_) ;=> 0.95 105 | (-> clf (py.- best_estimator_) (py.- C)) ;=> 0.0021544346900318843 106 | (py. clf score slice-x2-digits slice-y2-digits) ;=> 0.946047678795483 107 | 108 | 109 | ;;; Nested cross validation 110 | (model-selection/cross_val_score clf x-digits y-digits) 111 | ;;=>[0.94722222 0.91666667 0.96657382 0.97493036 0.93593315] 112 | 113 | 114 | ;; Cross-validated esitmators 115 | 116 | (def lasso (linear-model/LassoCV)) 117 | (def diabetes (datasets/load_diabetes :return_X_y true)) 118 | (def x-diabetes (first diabetes)) 119 | (def y-diabetes (last diabetes)) 120 | (py. lasso fit x-diabetes y-diabetes) 121 | ;;; The estimator chose automatically its lambda: 122 | (py.- lasso alpha_);=> 0.003753767152692203 123 | 124 | -------------------------------------------------------------------------------- /src/gigasquid/sk_learn/pipelining.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.sk-learn.pipelining 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [gigasquid.plot :as plot])) 5 | 6 | ;We have seen that some estimators can transform data and that some estimators can predict variables. We can also create combined estimators: 7 | 8 | ;;; https://scikit-learn.org/stable/tutorial/statistical_inference/putting_together.html 9 | 10 | (require-python '[sklearn.datasets :as datasets]) 11 | (require-python '[sklearn.decomposition :as decomposition]) 12 | (require-python '[sklearn.linear_model :as linear-model]) 13 | (require-python '[sklearn.pipeline :as pipeline]) 14 | (require-python '[sklearn.model_selection :as model-selection]) 15 | (require-python '[numpy :as np]) 16 | (require-python '[matplotlib.pyplot :as pyplot]) 17 | 18 | ;; Define a pipeline to search for the best combination of PCA truncation 19 | ;; and classifier regularization. 20 | (def pca (decomposition/PCA)) 21 | (def logistic (linear-model/LogisticRegression :max_iter 10000 :tol 0.1)) 22 | (def pipe (pipeline/Pipeline :steps [ ["pca" pca] ["logistic" logistic]])) 23 | 24 | (def digits (datasets/load_digits :return_X_y true)) 25 | (def X-digits (first digits)) 26 | (def y-digits (last digits)) 27 | 28 | ;;; Parameters of pipelines can be set using ‘__’ separated parameter names: 29 | 30 | (def logspace (np/logspace -4 4 4)) 31 | (def param-grid {"pca__n_components" [5 15 30 45 64] 32 | "logistic__C" logspace}) 33 | 34 | 35 | (def search (model-selection/GridSearchCV :estimator pipe 36 | :param_grid param-grid 37 | :n_jobs -1)) 38 | (py. search fit X-digits y-digits) 39 | (py.- search best_score_);=> 0.9198885793871865 40 | (py.- search best_params_) 41 | ;=>{'logistic__C': 0.046415888336127774, 'pca__n_components': 45} 42 | 43 | ;;; Plot the PCA Spectrum 44 | (py. pca fit X-digits) 45 | 46 | (plot/with-show 47 | (let [[fig axes] (pyplot/subplots :nrows 2 :sharex true :figsize [6 6]) 48 | val1 (np/arange 1 (inc (py.- pca n_components_))) 49 | val2 (py.- pca explained_variance_ratio_) 50 | ax0 (first axes) 51 | ax1 (last axes) 52 | val3 (-> (py.- search best_estimator_) 53 | (py.- named_steps) 54 | (py/get-item "pca") 55 | (py.- n_components))] 56 | (py. ax0 plot val1 val2 "+" :linewidth 2) 57 | (py. ax0 set_ylabel "PCA explained variance ratio") 58 | (py. ax0 axvline val3 :linestyle ":" :label "n_components chosen") 59 | (py. ax0 legend :prop {"size" 12})) 60 | ) 61 | 62 | -------------------------------------------------------------------------------- /src/gigasquid/sk_learn/supervised_learning.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.sk-learn.supervised-learning 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [gigasquid.plot :as plot])) 5 | 6 | 7 | ;;; From https://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html 8 | 9 | ;; Clasifying irises 10 | 11 | (require-python '[numpy :as np]) 12 | (require-python '[numpy.random :as np-random]) 13 | (require-python '[sklearn.datasets :as datasets]) 14 | (require-python '[matplotlib.pyplot :as pyplot]) 15 | 16 | (def iris (datasets/load_iris :return_X_y true)) 17 | (def iris-x (first iris)) 18 | (def iris-y (last iris)) 19 | (take 2 iris-x) ;=> ([5.1 3.5 1.4 0.2] [4.9 3. 1.4 0.2]) 20 | (take 2 iris-y) ;=> (0 0) 21 | (np/unique iris-y) ;=> [0 1 2] 22 | 23 | 24 | ;;; K-Nearest neighbors classifier 25 | 26 | ;;; The simplest possible classifier is the nearest neighbor: given a new observation X_test, find in the training set (i.e. the data used to train the estimator) the observation with the closest feature vector. 27 | 28 | ;;Split iris data in train and test data 29 | ;; A random permutation, to split the data randomly 30 | 31 | ;;; here instead of following the python example we are going to use 32 | ;; shuffle and take instead 33 | 34 | (py.- iris-x shape) ;=> (150, 4) 35 | (py.- iris-y shape) ;=> (150,) 36 | (def shuffled-data (->> (map (fn [x y] {:x x :y y}) iris-x iris-y) 37 | (shuffle))) 38 | (def train-data (take 140 shuffled-data)) 39 | (def test-data (drop 140 shuffled-data)) 40 | (count train-data) ;-> 140 41 | (count test-data) ;-> 10 42 | (def iris-x-train (mapv :x train-data)) 43 | (def iris-y-train (mapv :y train-data)) 44 | (def iris-x-test (mapv :x test-data)) 45 | (def iris-y-test (mapv :y test-data)) 46 | 47 | 48 | (require-python '[sklearn.neighbors :as neighbors]) 49 | (def knn (neighbors/KNeighborsClassifier)) 50 | (py. knn fit iris-x-train iris-y-train) 51 | ;;; predict 52 | (py. knn predict iris-x-test) ;=> [0 0 1 2 2 0 2 2 0 2] 53 | ;;; actual test 54 | iris-y-test ;=> [0 0 1 2 2 0 2 1 0 2] 55 | 56 | 57 | ;;; Linear model - From regression to sparsity 58 | ;; Diabetes dataset 59 | 60 | ;;The diabetes dataset consists of 10 physiological variables (age, sex, weight, blood pressure) measure on 442 patients, and an indication of disease progression after one year: 61 | 62 | (require-python '[sklearn.linear_model :as linear-model]) 63 | 64 | (def diabetes (datasets/load_diabetes :return_X_y true)) 65 | (def diabetes-x (first diabetes)) 66 | (def diabetes-y (last diabetes)) 67 | (py.- diabetes-x shape);=> (442, 10) 68 | (- 442 20) ;=> 422 69 | (def diabetes-x-train (->> diabetes-x (take 422) (into []) (np/array))) 70 | (def diabetes-y-train (->> diabetes-y (take 422) (into []) (np/array))) 71 | (def diabetes-x-test (->> diabetes-x (drop 422) (into []) (np/array))) 72 | (def diabetes-y-test (->> diabetes-y (drop 422) (into []) (np/array))) 73 | 74 | 75 | ;;LinearRegression, in its simplest form, fits a linear model to the data set by adjusting a set of parameters in order to make the sum of the squared residuals of the model as small as possible. 76 | 77 | (py/python-type diabetes-x-train);=> :ndarray 78 | (py.- diabetes-x shape);=> (442, 10) 79 | (py.- diabetes-x-train shape);=> (422, 10) 80 | 81 | (def regr (linear-model/LinearRegression)) 82 | (py. regr fit diabetes-x-train diabetes-y-train) 83 | (py.- regr coef_) 84 | 85 | ;; [ 3.03499549e-01 -2.37639315e+02 5.10530605e+02 3.27736980e+02 86 | ;; -8.14131709e+02 4.92814588e+02 1.02848452e+02 1.84606489e+02 87 | ;; 7.43519617e+02 7.60951722e+01] 88 | 89 | ;;; The mean square error 90 | (np/mean 91 | (np/square 92 | (np/subtract (py. regr predict diabetes-x-test) diabetes-y-test)));=> 13.41173112391975 93 | (py. regr score diabetes-x diabetes-y);=> 0.5175336599402476 94 | 95 | ;;; shrinkage 96 | ;;If there are few data points per dimension, noise in the observations induces high variance: 97 | (def X [[0.5] [1]]) 98 | (def Y [0.5 1]) 99 | (def test [[0] [2]]) 100 | (def regr (linear-model/LinearRegression)) 101 | 102 | (np-random/seed 0) 103 | (plot/with-show 104 | (dotimes [i 6] 105 | (let [this-x (np/multiply 0.1 106 | (np/add 107 | (np-random/normal :size [2 1]) X)) 108 | _ (py. regr fit this-x Y) 109 | prediction (py. regr predict test)] 110 | (pyplot/plot test prediction) 111 | (pyplot/scatter this-x Y :s 3)))) 112 | 113 | ;;A solution in high-dimensional statistical learning is to shrink the regression coefficients to zero: any two randomly chosen set of observations are likely to be uncorrelated. This is called Ridge regression: 114 | 115 | (def regr (linear-model/Ridge :alpha 1)) 116 | (plot/with-show 117 | (dotimes [i 6] 118 | (let [this-x (np/multiply 0.1 119 | (np/add 120 | (np-random/normal :size [2 1]) X)) 121 | _ (py. regr fit this-x Y) 122 | prediction (py. regr predict test)] 123 | (pyplot/plot test prediction) 124 | (pyplot/scatter this-x Y :s 3)))) 125 | 126 | ;; This is an example of bias/variance tradeoff: the larger the ridge alpha parameter, the higher the bias and the lower the variance. 127 | 128 | ;; We can choose alpha to minimize left out error, this time using the diabetes dataset rather than our synthetic data: 129 | 130 | (def alphas (np/logspace -4 -1 6)) 131 | (mapv #(-> regr 132 | (py. set_params :alpha %) 133 | (py. fit diabetes-x-train diabetes-y-train) 134 | (py. score diabetes-x-test diabetes-y-test)) 135 | alphas) 136 | ;-=>[0.5851110683883531 0.5852073015444674 0.585467754069849 0.5855512036503915 0.5830717085554161 0.570589994372801] 137 | 138 | 139 | ;;; Sparsity 140 | (def regr (linear-model/Lasso)) 141 | (def scores (map #(-> regr 142 | (py. set_params :alpha %) 143 | (py. fit diabetes-x-train diabetes-y-train) 144 | (py. score diabetes-x-test diabetes-y-test)) 145 | alphas)) 146 | (def best-alpha (->> (map (fn [a s] {:alpha a :score s}) alphas scores) 147 | (sort-by :score) 148 | last)) 149 | (-> regr 150 | (py. set_params :alpha best-alpha) 151 | (py. fit diabetes-x-train diabetes-y-train) 152 | (py.- coef_)) 153 | 154 | ;; [ 0. -212.43764548 517.19478111 313.77959962 -160.8303982 155 | ;; -0. -187.19554705 69.38229038 508.66011217 71.84239008] 156 | 157 | 158 | ;;;; Classification 159 | ;; For classification, as in the labeling iris task, linear regression is not the right approach as it will give too much weight to data far from the decision frontier. A linear approach is to fit a sigmoid function or logistic function: 160 | 161 | (def log (linear-model/LogisticRegression :C 1e5)) 162 | ;;The C parameter controls the amount of regularization in the LogisticRegression object: a large value for C results in less regularization. penalty="l2" gives Shrinkage (i.e. non-sparse coefficients), while penalty="l1" gives Sparsity. 163 | (py. log fit iris-x-train iris-y-train) 164 | (py. log score iris-x-test iris-y-test);=> 1.0 165 | 166 | 167 | ;;;; Support Vector Machines 168 | 169 | (require-python '[sklearn.svm :as svm]) 170 | 171 | (def svc (svm/SVC :kernel "linear")) 172 | (py. svc fit iris-x-train iris-y-train) 173 | (;; C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0, 174 | ;; decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear', 175 | ;; max_iter=-1, probability=False, random_state=None, shrinking=True, 176 | ;; tol=0.001, verbose=False) 177 | 178 | -------------------------------------------------------------------------------- /src/gigasquid/sk_learn/unsupervised_learning.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.sk-learn.unsupervised-learning 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [gigasquid.plot :as plot])) 5 | 6 | ;; from https://scikit-learn.org/stable/tutorial/statistical_inference/unsupervised_learning.html 7 | 8 | (require-python '[sklearn.datasets :as datasets]) 9 | (require-python '[sklearn.cluster :as cluster]) 10 | (require-python '[sklearn.feature_extraction :as feature-extraction]) 11 | (require-python '[sklearn.decomposition :as decomposition]) 12 | (require-python '[numpy :as np]) 13 | (require-python '[numpy.random :as np-random]) 14 | (require-python '[scipy.signal :as signal]) 15 | 16 | ;;; K-means clustering 17 | (def iris (datasets/load_iris :return_X_y true)) 18 | (def iris-x (first iris)) 19 | (def iris-y (last iris)) 20 | 21 | (def k-means (cluster/KMeans :n_clusters 3)) 22 | (py. k-means fit iris-x) 23 | (take-last 10 (py.- k-means labels_));=> (2 2 0 2 2 2 0 2 2 0) 24 | (take-last 10 iris-y) ;=> (2 2 2 2 2 2 2 2 2 2) 25 | 26 | ;;; Feature agglomeration 27 | ;; We have seen that sparsity could be used to mitigate the curse of dimensionality, i.e an insufficient amount of observations compared to the number of features. Another approach is to merge together similar features: feature agglomeration. This approach can be implemented by clustering in the feature direction, in other words clustering the transposed data. 28 | (def digits (datasets/load_digits)) 29 | (def images (py.- digits images)) 30 | (def X (np/reshape images [(py/len images) -1])) 31 | (py.- (first images) shape) ;=> (8, 8) 32 | (def connectivity (feature-extraction/grid_to_graph 8 8)) 33 | (def agglo (cluster/FeatureAgglomeration :connectivity connectivity :n_clusters 32)) 34 | (py. agglo fit X) 35 | (def X-reduced (py. agglo transform X)) 36 | (def X-approx (py. agglo inverse_transform X-reduced)) 37 | (def images-shape (py.- images shape)) 38 | images-shape ;=> (1797, 8, 8) 39 | (def images-approx (np/reshape X-approx images-shape)) 40 | 41 | ;;; Principal component analyis : PCA 42 | 43 | ;; Create a signal with only 2 useful dimensions 44 | (def x1 (np-random/normal :size 100)) 45 | (def x2 (np-random/normal :size 100)) 46 | (def x3 (np/add x1 x2)) 47 | (def X (np/column_stack [x1 x2 x3])) 48 | (def pca (decomposition/PCA)) 49 | (py. pca fit X) 50 | (py.- pca explained_variance_) ;=> [2.90691814e+00 9.90171666e-01 2.83277241e-31] 51 | 52 | ;; As we can see, only the 2 first components are useful 53 | (py/att-type-map pca) 54 | (py/set-attr! pca "n_components" 2) 55 | (py.- pca n_components) ;=>2 56 | (def X-reduced (py. pca fit_transform X)) 57 | (py.- X-reduced shape);=> (100, 2) 58 | 59 | ;;;Independent Component Analysis: ICA 60 | ;;Independent component analysis (ICA) selects components so that the distribution of their loadings carries a maximum amount of independent information. It is able to recover non-Gaussian independent signals: 61 | 62 | ;; Generate the sample data 63 | (def time (np/linspace 0 10 2000)) 64 | (def s1 (np/sin (np/multiply 2 time))) 65 | (def s2 (np/sign (np/sin (np/multiply 3 time)))) 66 | (def s3 (signal/sawtooth (np/multiply 2 np/pi time))) 67 | (def S (np/column_stack [s1 s2 s3])) 68 | (def S (np/add S 0.2 (np-random/normal :size (py.- S shape)))) 69 | (def S (np/divide S (py. S std :axis 0))) 70 | ;;; Mix data 71 | (def A (np/array [[1 1 1] [0.5 2 1] [1.5 1 2]])) ;; mixing matrix 72 | (def X (np/dot S (py.- A T))) 73 | 74 | ;; Compute ICA 75 | (def ica (decomposition/FastICA)) 76 | (def S_ (py. ica fit_transform X)) ;; get the estimated sources 77 | (def A_ (-> ica 78 | (py.- mixing_) 79 | (py.- T))) 80 | (np/allclose X (np/add (np/dot S_ A_) (py.- ica mean_))) ;=> true 81 | -------------------------------------------------------------------------------- /src/gigasquid/slicing.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.slicing 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]])) 4 | 5 | 6 | (require-python '[builtins :as python]) 7 | 8 | ;;https://data-flair.training/blogs/python-slice/ 9 | 10 | (def l (py/->py-list [1 2 3 4])) 11 | 12 | ;;; slice object slice(stop) or slice(start, stop, step) 13 | 14 | ;;; sub elements 2 3 4 15 | 16 | 17 | (python/slice 3) ;=> slice(None, 3, None) 18 | (py/get-item l (python/slice 3)) ;=> [1, 2, 3] 19 | 20 | 21 | ;;; with specifiying interval 22 | (py/get-item l (python/slice 1 3)) ;=> [2, 3] 23 | 24 | ;;; negative indices go from right to left 25 | (py/get-item l (python/slice -3 -1)) ;=> [2, 3] 26 | 27 | 28 | ;;; python slicing tuples 29 | 30 | (def t (py/->py-list [1 2 3 4 5])) 31 | (py/get-item t (python/slice 2 4)) ;=> [3, 4] 32 | 33 | (py/get-item t (python/slice -1 -5 -2)) ;=> [5, 3] 34 | 35 | ;;; is equivalent to t[-1:-5:-2] 36 | 37 | 38 | ;;; t[:3] From 0 to 2 39 | ;;; is the same as 40 | (py/get-item t (python/slice nil 3)) ;=> [1, 2, 3] 41 | 42 | 43 | ;;; t[3:] From 3 to the end 44 | ;; is the same as 45 | (py/get-item t (python/slice 3 nil)) ;=> [4, 5] 46 | 47 | ;;; t[:] From beginning to the end 48 | ;;; is the same as 49 | (py/get-item t (python/slice nil nil)) ;=>[1, 2, 3, 4, 5] 50 | 51 | 52 | ;;;; Extended Python slices with step value 53 | 54 | ;;; t[::-1] reverse 55 | (py/get-item t (python/slice nil nil -1)) ;=> [5, 4, 3, 2, 1] 56 | 57 | 58 | ;;; t[::-2] Reverse with step=2 59 | (py/get-item t (python/slice nil nil -2)) ;=> [5, 3, 1] 60 | 61 | 62 | ;; t[:5:-1] Index 5 to end (already ahead of that), right to left; results in empty tuple 63 | (py/get-item t (python/slice nil 5 -1)) ;=> [] 64 | -------------------------------------------------------------------------------- /src/gigasquid/spacy.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.spacy 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]])) 4 | 5 | ;;;; What is SpaCy? 6 | 7 | ;;; also natural language toolkit https://spacy.io/usage/spacy-101#whats-spacy 8 | ;;; opinionated library and more Object oriented than NLTK. Has word vector support 9 | ;;; better performance for tokenization and pos tagging (source https://medium.com/@akankshamalhotra24/introduction-to-libraries-of-nlp-in-python-nltk-vs-spacy-42d7b2f128f2) 10 | 11 | ;;; Install pip3 install spacy 12 | ;;;; python3 -m spacy download en_core_web_sm 13 | 14 | (require-python '[spacy :as spacy]) 15 | 16 | 17 | ;;; Following this tutorial https://spacy.io/usage/spacy-101#annotat 18 | 19 | ;;; linguistic annotations 20 | 21 | 22 | (def nlp (spacy/load "en_core_web_sm")) 23 | 24 | (let [doc (nlp "Apple is looking at buying U.K. startup for $1 billion")] 25 | (map (fn [token] 26 | [(py.- token text) (py.- token pos_) (py.- token dep_)]) 27 | doc)) 28 | ;; (["Apple" "PROPN" "nsubj"] 29 | ;; ["is" "AUX" "aux"] 30 | ;; ["looking" "VERB" "ROOT"] 31 | ;; ["at" "ADP" "prep"] 32 | ;; ["buying" "VERB" "pcomp"] 33 | ;; ["U.K." "PROPN" "compound"] 34 | ;; ["startup" "NOUN" "dobj"] 35 | ;; ["for" "ADP" "prep"] 36 | ;; ["$" "SYM" "quantmod"] 37 | ;; ["1" "NUM" "compound"] 38 | ;; ["billion" "NUM" "pobj"]) 39 | 40 | 41 | (let [doc (nlp "Apple is looking at buying U.K. startup for $1 billion")] 42 | (map (fn [token] 43 | {:text (py.- token text) 44 | :lemma (py.- token lemma_) 45 | :pos (py.- token pos_) 46 | :tag (py.- token tag_) 47 | :dep (py.- token dep_) 48 | :shape (py.- token shape_) 49 | :alpha (py.- token is_alpha) 50 | :is_stop (py.- token is_stop)} ) 51 | doc)) 52 | 53 | ;; ({:text "Apple", 54 | ;; :lemma "Apple", 55 | ;; :pos "PROPN", 56 | ;; :tag "NNP", 57 | ;; :dep "nsubj", 58 | ;; :shape "Xxxxx", 59 | ;; :alpha true, 60 | ;; :is_stop false} 61 | ;; {:text "is", 62 | ;; :lemma "be", 63 | ;; :pos "AUX", 64 | ;; :tag "VBZ", 65 | ;; :dep "aux", 66 | ;; :shape "xx", 67 | ;; :alpha true, 68 | ;; :is_stop true} 69 | ;; ... 70 | 71 | 72 | ;;; Named entities 73 | 74 | (let [doc (nlp "Apple is looking at buying U.K. startup for $1 billion")] 75 | (map (fn [ent] 76 | {:text (py.- ent text) 77 | :start-char (py.- ent start_char) 78 | :end-char (py.- ent end_char) 79 | :label (py.- ent label_)} ) 80 | (py.- doc ents))) 81 | 82 | ;; ({:text "Apple", :start-char 0, :end-char 5, :label "ORG"} 83 | ;; {:text "U.K.", :start-char 27, :end-char 31, :label "GPE"} 84 | ;; {:text "$1 billion", :start-char 44, :end-char 54, :label "MONEY"}) 85 | 86 | 87 | ;;; Word Vectors 88 | 89 | ;; To make them compact and fast, spaCy’s small models (all packages that end in sm) don’t ship with word vectors, and only include context-sensitive tensors. This means you can still use the similarity() methods to compare documents, spans and tokens – but the result won’t be as good, and individual tokens won’t have any vectors assigned. So in order to use real word vectors, you need to download a larger model: 90 | 91 | ;;;python -m spacy download en_core_web_md (medium one) 92 | 93 | ;;; then restart cider to pick up changes 94 | 95 | (require-python '[spacy :as spacy]) 96 | (def nlp (spacy/load "en_core_web_md")) 97 | 98 | (let [tokens (nlp "dog cat banana afskfsd")] 99 | (map (fn [token] 100 | {:text (py.- token text) 101 | :has-vector (py.- token has_vector) 102 | :vector_norm (py.- token vector_norm) 103 | :is_oov (py.- token is_oov)} ) 104 | tokens)) 105 | 106 | ;; ({:text "dog", 107 | ;; :has-vector true, 108 | ;; :vector_norm 7.033673286437988, 109 | ;; :is_oov false} 110 | ;; {:text "cat", 111 | ;; :has-vector true, 112 | ;; :vector_norm 6.680818557739258, 113 | ;; :is_oov false} 114 | ;; {:text "banana", 115 | ;; :has-vector true, 116 | ;; :vector_norm 6.700014114379883, 117 | ;; :is_oov false} 118 | ;; {:text "afskfsd", :has-vector false, :vector_norm 0.0, :is_oov true}) 119 | 120 | 121 | ;;; finding similarity 122 | 123 | (let [tokens (nlp "dog cat banana")] 124 | (for [token1 tokens 125 | token2 tokens] 126 | {:token1 (py.- token1 text) 127 | :token2 (py.- token2 text) 128 | :similarity (py. token1 similarity token2)})) 129 | 130 | ;; ({:token1 "dog", :token2 "dog", :similarity 1.0} 131 | ;; {:token1 "dog", :token2 "cat", :similarity 0.8016854524612427} 132 | ;; {:token1 "dog", :token2 "banana", :similarity 0.2432764321565628} 133 | ;; {:token1 "cat", :token2 "dog", :similarity 0.8016854524612427} 134 | ;; {:token1 "cat", :token2 "cat", :similarity 1.0} 135 | ;; {:token1 "cat", :token2 "banana", :similarity 0.28154364228248596} 136 | ;; {:token1 "banana", :token2 "dog", :similarity 0.2432764321565628} 137 | ;; {:token1 "banana", :token2 "cat", :similarity 0.28154364228248596} 138 | ;; {:token1 "banana", :token2 "banana", :similarity 1.0}) 139 | 140 | -------------------------------------------------------------------------------- /src/gigasquid/trimap.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.trimap 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [gigasquid.plot :as plot])) 5 | 6 | ;;;; you will need all the below libraries pip installed 7 | 8 | ;;; What is Trimap? It is a dimensionality reduction library (like umap) but using a different algorithim 9 | ;;https://pypi.org/project/trimap/ 10 | 11 | ;;; also see the umap.clj example 12 | 13 | (require-python '[trimap :as trimap]) 14 | (require-python '[sklearn.datasets :as sk-data]) 15 | (require-python '[matplotlib.pyplot :as pyplot]) 16 | 17 | (def digits (sk-data/load_digits)) 18 | (def digits-data (py.- digits data)) 19 | 20 | (def embedding (py. (trimap/TRIMAP) fit_transform digits-data)) 21 | (py.- embedding shape) ;=> (1797, 2) 22 | 23 | 24 | ;; We now have a dataset with 1797 rows (one for each hand-written digit sample), but only 2 columns. We can now plot the resulting embedding, coloring the data points by the class that theyr belong to (i.e. the digit they represent). 25 | 26 | (plot/with-show 27 | (let [x (mapv first embedding) 28 | y (mapv last embedding) 29 | colors (py.- digits target) 30 | bounds (numpy/subtract (numpy/arange 11) 0.5) 31 | ticks (numpy/arange 10)] 32 | (pyplot/scatter x y :c colors :cmap "Spectral" :s 5) 33 | (py. (pyplot/gca) set_aspect "equal" "datalim") 34 | (py. (pyplot/colorbar :boundaries bounds) set_ticks ticks) 35 | (pyplot/title "UMAP projection of the Digits dataset" :fontsize 24))) 36 | -------------------------------------------------------------------------------- /src/gigasquid/umap.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.umap 2 | (:require [libpython-clj.require :refer [require-python]] 3 | [libpython-clj.python :as py :refer [py. py.. py.-]] 4 | [gigasquid.plot :as plot])) 5 | 6 | ;;;; you will need all these things below installed 7 | ;;; with pip or something else 8 | 9 | ;;; What is umap? - dimensionality reduction library 10 | 11 | 12 | (require-python '[seaborn :as sns]) 13 | (require-python '[matplotlib.pyplot :as pyplot]) 14 | (require-python '[sklearn.datasets :as sk-data]) 15 | (require-python '[sklearn.model_selection :as sk-model]) 16 | (require-python '[numpy :as numpy]) 17 | (require-python '[pandas :as pandas]) 18 | (require-python '[umap :as umap]) 19 | 20 | 21 | ;;; Code walkthrough from here https://umap-learn.readthedocs.io/en/latest/basic_usage.html 22 | 23 | 24 | ;;; set the defaults for plotting 25 | (sns/set) 26 | 27 | ;;; IRIS data 28 | 29 | ;; The next step is to get some data to work with. To ease us into things we’ll start with the iris dataset. It isn’t very representative of what real data would look like, but it is small both in number of points and number of features, and will let us get an idea of what the dimension reduction is doing. We can load the iris dataset from sklearn. 30 | 31 | (def iris (sk-data/load_iris)) 32 | (print (py.- iris DESCR)) 33 | 34 | ;; Iris plants dataset 35 | ;; -------------------- 36 | 37 | ;; **Data Set Characteristics:** 38 | 39 | ;; :Number of Instances: 150 (50 in each of three classes) 40 | ;; :Number of Attributes: 4 numeric, predictive attributes and the class 41 | ;; :Attribute Information: 42 | ;; - sepal length in cm 43 | ;; - sepal width in cm 44 | ;; - petal length in cm 45 | ;; - petal width in cm 46 | ;; - class: 47 | ;; - Iris-Setosa 48 | ;; - Iris-Versicolour 49 | ;; - Iris-Virginica 50 | 51 | ;; :Summary Statistics: 52 | 53 | ;; ============== ==== ==== ======= ===== ==================== 54 | ;; Min Max Mean SD Class Correlation 55 | ;; ============== ==== ==== ======= ===== ==================== 56 | ;; sepal length: 4.3 7.9 5.84 0.83 0.7826 57 | ;; sepal width: 2.0 4.4 3.05 0.43 -0.4194 58 | ;; petal length: 1.0 6.9 3.76 1.76 0.9490 (high!) 59 | ;; petal width: 0.1 2.5 1.20 0.76 0.9565 (high!) 60 | ;; ============== ==== ==== ======= ===== ==================== 61 | 62 | (def iris-df (pandas/DataFrame (py.- iris data) :columns (py.- iris feature_names))) 63 | (py/att-type-map iris-df) 64 | 65 | (def iris-name-series (let [iris-name-map (zipmap (range 3) (py.- iris target_names))] 66 | (pandas/Series (map (fn [item] 67 | (get iris-name-map item)) 68 | (py.- iris target))))) 69 | 70 | (py. iris-df __setitem__ "species" iris-name-series) 71 | (py/get-item iris-df "species") 72 | ;; 0 setosa 73 | ;; 1 setosa 74 | ;; 2 setosa 75 | ;; 3 setosa 76 | ;; 4 setosa 77 | ;; ... 78 | ;; 145 virginica 79 | ;; 146 virginica 80 | ;; 147 virginica 81 | ;; 148 virginica 82 | ;; 149 virginica 83 | ;; Name: species, Length: 150, dtype: object 84 | 85 | 86 | (plot/with-show 87 | (sns/pairplot iris-df :hue "species")) 88 | 89 | 90 | ;; Time to reduce dimensions 91 | (def reducer (umap/UMAP)) 92 | 93 | ;;; we need to train the reducer to learn about the manifold 94 | ;; fit_tranforms first fits the data and then transforms it into a numpy array 95 | 96 | (def embedding (py. reducer fit_transform (py.- iris data))) 97 | (py.- embedding shape) ;=> (150, 2) 98 | 99 | ;;; 150 samples with 2 column. Each row of the array is a 2-dimensional representation of the corresponding flower. Thus we can plot the embedding as a standard scatterplot and color by the target array (since it applies to the transformed data which is in the same order as the original). 100 | 101 | (first embedding) ;=> [12.449954 -6.0549345] 102 | 103 | 104 | (let [colors (mapv #(py/get-item (sns/color_palette) %) 105 | (py.- iris target)) 106 | x (mapv first embedding) 107 | y (mapv last embedding)] 108 | (plot/with-show 109 | (pyplot/scatter x y :c colors) 110 | (py. (pyplot/gca) set_aspect "equal" "datalim") 111 | (pyplot/title "UMAP projection of the Iris dataset" :fontsize 24))) 112 | 113 | 114 | ;;;; Digits Data 115 | 116 | (def digits (sk-data/load_digits)) 117 | (print (py.- digits DESCR)) 118 | 119 | ;; .. _digits_dataset: 120 | 121 | ;; Optical recognition of handwritten digits dataset 122 | ;; -------------------------------------------------- 123 | 124 | ;; **Data Set Characteristics:** 125 | 126 | ;; :Number of Instances: 5620 127 | ;; :Number of Attributes: 64 128 | ;; :Attribute Information: 8x8 image of integer pixels in the range 0..16. 129 | ;; :Missing Attribute Values: None 130 | ;; :Creator: E. Alpaydin (alpaydin '@' boun.edu.tr) 131 | ;; :Date: July; 1998 132 | 133 | ;;; Plot the images to get an idea of what we are looking at 134 | 135 | (plot/with-show 136 | (let [[fig ax-array] (pyplot/subplots 20 20) 137 | axes (py. ax-array flatten)] 138 | (doall (map-indexed (fn [i ax] 139 | (py. ax imshow (py/get-item (py.- digits images) i) :cmap "gray_r")) 140 | axes)) 141 | (pyplot/setp axes :xticks [] :yticks [] :frame_on false) 142 | (pyplot/tight_layout :h_pad 0.5 :w_pad 0.01))) 143 | 144 | ;;; Try to do a scatterplot of the first 10 dimessions for the 64 element long of grayscale values 145 | 146 | (def digits-df (pandas/DataFrame (mapv #(take 10 %) (py.- digits data)))) 147 | (def digits-target-series (pandas/DataFrame (mapv #(str "Digit " %) (py.- digits target)))) 148 | (py. digits-df __setitem__ "digit" digits-target-series) 149 | 150 | (plot/with-show 151 | (sns/pairplot digits-df :hue "digit" :palette "Spectral")) 152 | 153 | ;;;; use umap with the fit instead 154 | 155 | (def reducer (umap/UMAP :random_state 42)) 156 | (py. reducer fit (py.- digits data)) 157 | 158 | ;;; now we can look at the embedding attribute on the reducer or call transform on the original data 159 | (def embedding (py. reducer transform (py.- digits data))) 160 | (py.- embedding shape) ;=>(1797, 2) 161 | 162 | 163 | ;; We now have a dataset with 1797 rows (one for each hand-written digit sample), but only 2 columns. As with the Iris example we can now plot the resulting embedding, coloring the data points by the class that theyr belong to (i.e. the digit they represent). 164 | 165 | (plot/with-show 166 | (let [x (mapv first embedding) 167 | y (mapv last embedding) 168 | colors (py.- digits target) 169 | bounds (numpy/subtract (numpy/arange 11) 0.5) 170 | ticks (numpy/arange 10)] 171 | (pyplot/scatter x y :c colors :cmap "Spectral" :s 5) 172 | (py. (pyplot/gca) set_aspect "equal" "datalim") 173 | (py. (pyplot/colorbar :boundaries bounds) set_ticks ticks) 174 | (pyplot/title "UMAP projection of the Digits dataset" :fontsize 24))) 175 | 176 | ;;;; Whooo! 177 | 178 | -------------------------------------------------------------------------------- /src/gigasquid/utils.clj: -------------------------------------------------------------------------------- 1 | (ns gigasquid.utils 2 | (:require 3 | [clojure.string :as string] 4 | [clojure.java.shell :as sh] 5 | [clojure.pprint :refer [pprint]]) 6 | (:import [java.io File])) 7 | 8 | (def is-linux? 9 | (= "linux" 10 | (-> "os.name" 11 | System/getProperty 12 | string/lower-case))) 13 | 14 | (def is-mac? 15 | (-> "os.name" 16 | System/getProperty 17 | string/lower-case 18 | (string/starts-with? "mac"))) 19 | 20 | (defn display-image 21 | "Display image on OSX or on Linux based system" 22 | [image-file] 23 | (cond 24 | is-mac? 25 | (sh/sh "open" image-file) 26 | 27 | is-linux? 28 | (sh/sh "display" image-file))) 29 | 30 | (defn create-tmp-file 31 | "Return full path of temporary file. 32 | 33 | Example: 34 | (create-tmp-file \"tmp-image\" \".png\") " 35 | [prefix ext] 36 | (File/createTempFile prefix ext)) 37 | --------------------------------------------------------------------------------