├── .gitignore
├── .jokerd
└── linter.cljc
├── LICENSE
├── README.md
├── deps.edn
├── pom.xml
├── project.clj
├── resources
├── .keep
├── cat.jpg
├── generated_godzilla_plot_summaries.md
├── generated_godzilla_plot_summaries.txt
├── opencv-gray-logo.png
├── opencv
│ ├── cat-cartoonize-color.png
│ ├── cat-cartoonize-gray.png
│ ├── cat-sketch.png
│ ├── cat.jpg
│ ├── lenna.png
│ └── opencv-logo.png
├── pytorch
│ ├── data
│ │ └── .keep
│ └── models
│ │ └── .keep
└── ufosightings-since-2010.csv
└── src
└── gigasquid
├── _configure.clj
├── bokeh
├── README.md
├── core.clj
├── line10k.clj
└── multi_polygons.clj
├── diffprivlib.clj
├── facebook_prophet.clj
├── gpt2.clj
├── igraph.clj
├── lieden.clj
├── mxnet.clj
├── nltk.clj
├── numpy_plot.clj
├── opencv
├── README.md
└── core.clj
├── plot.clj
├── psutil
├── README.md
└── core.clj
├── pygal
├── README.md
└── core.clj
├── pytorch_mnist.clj
├── sci_spacy.clj
├── seaborn.clj
├── sk_learn
├── datasets_estimators.clj
├── info.txt
├── model_selection.clj
├── pipelining.clj
├── supervised_learning.clj
└── unsupervised_learning.clj
├── slicing.clj
├── spacy.clj
├── trimap.clj
├── umap.clj
└── utils.clj
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /classes
3 | /checkouts
4 | *.jar
5 | *.class
6 | /.cpcache
7 | /.lein-*
8 | /.nrepl-history
9 | /.nrepl-port
10 | .hgignore
11 | .hg/
12 |
--------------------------------------------------------------------------------
/.jokerd/linter.cljc:
--------------------------------------------------------------------------------
1 | (ns libpython-clj.python)
2 |
3 | (defmacro with [& form]
4 | `(let ~@form))
5 |
6 | (defmacro py. [& form]
7 | (let [[member-symbol instance-member & args] form]
8 | `(str (pr ~member-symbol)
9 | (prn ~@args)
10 | (pr nil))))
11 |
12 | (defmacro py.- [& form]
13 | (let [[member-symbol instance-field] form]
14 | `(str (pr ~member-symbol)
15 | (pr nil))))
16 |
17 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18 |
19 | (ns libpython-clj.require)
20 |
21 | (defmacro require-python [form]
22 | (let [form (second form) ;; first is (quote ...)
23 | [_ & {:keys [as refer]}] form]
24 | (let [references (if refer [:refer refer] '())]
25 | (apply list
26 | (cond-> `[do]
27 | as (conj `(create-ns (quote ~as))
28 | `(require (quote [~as ~@references]))))))))
29 | (comment
30 | (*require-python '[torch.optim.lr_scheduler :as lr_scheduler]))
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
4 |
5 | 1. DEFINITIONS
6 |
7 | "Contribution" means:
8 |
9 | a) in the case of the initial Contributor, the initial code and
10 | documentation distributed under this Agreement, and
11 |
12 | b) in the case of each subsequent Contributor:
13 |
14 | i) changes to the Program, and
15 |
16 | ii) additions to the Program;
17 |
18 | where such changes and/or additions to the Program originate from and are
19 | distributed by that particular Contributor. A Contribution 'originates' from
20 | a Contributor if it was added to the Program by such Contributor itself or
21 | anyone acting on such Contributor's behalf. Contributions do not include
22 | additions to the Program which: (i) are separate modules of software
23 | distributed in conjunction with the Program under their own license
24 | agreement, and (ii) are not derivative works of the Program.
25 |
26 | "Contributor" means any person or entity that distributes the Program.
27 |
28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
29 | necessarily infringed by the use or sale of its Contribution alone or when
30 | combined with the Program.
31 |
32 | "Program" means the Contributions distributed in accordance with this
33 | Agreement.
34 |
35 | "Recipient" means anyone who receives the Program under this Agreement,
36 | including all Contributors.
37 |
38 | 2. GRANT OF RIGHTS
39 |
40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
42 | reproduce, prepare derivative works of, publicly display, publicly perform,
43 | distribute and sublicense the Contribution of such Contributor, if any, and
44 | such derivative works, in source code and object code form.
45 |
46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
49 | transfer the Contribution of such Contributor, if any, in source code and
50 | object code form. This patent license shall apply to the combination of the
51 | Contribution and the Program if, at the time the Contribution is added by the
52 | Contributor, such addition of the Contribution causes such combination to be
53 | covered by the Licensed Patents. The patent license shall not apply to any
54 | other combinations which include the Contribution. No hardware per se is
55 | licensed hereunder.
56 |
57 | c) Recipient understands that although each Contributor grants the licenses
58 | to its Contributions set forth herein, no assurances are provided by any
59 | Contributor that the Program does not infringe the patent or other
60 | intellectual property rights of any other entity. Each Contributor disclaims
61 | any liability to Recipient for claims brought by any other entity based on
62 | infringement of intellectual property rights or otherwise. As a condition to
63 | exercising the rights and licenses granted hereunder, each Recipient hereby
64 | assumes sole responsibility to secure any other intellectual property rights
65 | needed, if any. For example, if a third party patent license is required to
66 | allow Recipient to distribute the Program, it is Recipient's responsibility
67 | to acquire that license before distributing the Program.
68 |
69 | d) Each Contributor represents that to its knowledge it has sufficient
70 | copyright rights in its Contribution, if any, to grant the copyright license
71 | set forth in this Agreement.
72 |
73 | 3. REQUIREMENTS
74 |
75 | A Contributor may choose to distribute the Program in object code form under
76 | its own license agreement, provided that:
77 |
78 | a) it complies with the terms and conditions of this Agreement; and
79 |
80 | b) its license agreement:
81 |
82 | i) effectively disclaims on behalf of all Contributors all warranties and
83 | conditions, express and implied, including warranties or conditions of title
84 | and non-infringement, and implied warranties or conditions of merchantability
85 | and fitness for a particular purpose;
86 |
87 | ii) effectively excludes on behalf of all Contributors all liability for
88 | damages, including direct, indirect, special, incidental and consequential
89 | damages, such as lost profits;
90 |
91 | iii) states that any provisions which differ from this Agreement are offered
92 | by that Contributor alone and not by any other party; and
93 |
94 | iv) states that source code for the Program is available from such
95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
96 | or through a medium customarily used for software exchange.
97 |
98 | When the Program is made available in source code form:
99 |
100 | a) it must be made available under this Agreement; and
101 |
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 |
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 |
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 |
111 | 4. COMMERCIAL DISTRIBUTION
112 |
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering. The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor tocontrol, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 |
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 |
144 | 5. NO WARRANTY
145 |
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 |
157 | 6. DISCLAIMER OF LIABILITY
158 |
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 |
168 | 7. GENERAL
169 |
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 |
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 |
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 |
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 |
210 | This Agreement is governed by the laws of the State of New York and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # libpython-clj-examples
2 |
3 | ### Overview
4 |
5 | This repo contains some examples of using [libpython-clj](https://github.com/clj-python/libpython-clj) with various python libraries.
6 | So far there are source code examples meant to be walked through in the REPL
7 |
8 | - [GPT2 text generation from hugging-face](https://github.com/huggingface/transformers)
9 | - [MXNet MNIST classification using the Module API](https://mxnet.apache.org/)
10 | - [Pytorch MNIST](https://github.com/pytorch/examples/tree/master/mnist)
11 | - [Matlib PyPlot](https://github.com/matplotlib/matplotlib)
12 | - [NLTK](https://www.nltk.org/)
13 | - [SpaCy](https://github.com/explosion/spaCy)
14 | - [Sci SpaCy](https://github.com/allenai/scispacy)
15 | - [Seaborn](https://github.com/mwaskom/seaborn)
16 | - [UMAP](https://github.com/lmcinnes/umap)
17 | - [TRIMAP](https://pypi.org/project/trimap/)
18 | - [Igraph](https://igraph.org/)
19 | - [Leiden](https://github.com/vtraag/leidenalg)
20 | - [Sklearn](https://github.com/scikit-learn/scikit-learn)
21 | - [Facebook Prophet](https://github.com/facebook/prophet)
22 | - [Pygal](http://www.pygal.org/en/latest/index.html#)
23 | - [Bokeh](https://docs.bokeh.org/en/latest/index.html)
24 | - [OpenCV](https://opencv.org/)
25 | - [psutil](https://psutil.readthedocs.io/en/latest/)
26 | - [diffprivlb](https://github.com/IBM/differential-privacy-library)
27 |
28 | In general, you will need a python3 env and pip install the various packages
29 | before running
30 |
31 | ### Nextjournal Posts
32 |
33 | - [Pyplot](https://nextjournal.com/kommen/parens-for-polyglot)
34 | - [GPT2 & MXNet](https://nextjournal.com/kommen/gigasquids-libpython-clj-examples)
35 | - [Seaborn Visualizations](https://nextjournal.com/gigasquid/parens-for-python---seaborn-visualizations)
36 | - [UMAP and Trimap](https://nextjournal.com/gigasquid/parens-for-python---umap-trimap)
37 | - [Network Analysis with IGraph and Leiden](https://nextjournal.com/gigasquid/parens-for-python---network-analysis-and-visualization)
38 | - [Sci Spacy tutorial](https://nextjournal.com/gigasquid/parens-for-python---sci-spacy)
39 | - [Facebook Prophet](https://nextjournal.com/gigasquid/parens-for-python---predicting-sportsball-ufos)
40 |
41 | ## License
42 |
43 | Copyright © 2020 Carin Meier
44 |
45 | Distributed under the Eclipse Public License either version 1.0 or (at
46 | your option) any later version.
47 |
--------------------------------------------------------------------------------
/deps.edn:
--------------------------------------------------------------------------------
1 | {:paths ["src" "resources"]
2 | :mvn/repos {"central" {:url "https://repo1.maven.org/maven2/"}
3 | "clojars" {:url "https://clojars.org/repo"}}
4 | :deps {org.clojure/clojure {:mvn/version "1.10.1"}
5 | clj-python/libpython-clj {:mvn/version "1.37"}
6 | org.clojure/data.csv {:mvn/version "0.1.4"}}
7 | :aliases
8 | {:test {:extra-paths ["test"]
9 | :extra-deps {org.clojure/test.check {:mvn/version "0.10.0"}}}
10 | :runner
11 | {:extra-deps {com.cognitect/test-runner
12 | {:git/url "https://github.com/cognitect-labs/test-runner"
13 | :sha "f7ef16dc3b8332b0d77bc0274578ad5270fbfedd"}}
14 | :main-opts ["-m" "cognitect.test-runner"
15 | "-d" "test"]}
16 | :uberjar {:extra-deps {seancorfield/depstar {:mvn/version "0.5.1"}}
17 | :main-opts ["-m" "hf.depstar.uberjar" "libpython-clj-examples.jar"
18 | "-C" "-m" "gigasquid.libpython-clj-examples"]}}}
19 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 4.0.0
4 | gigasquid
5 | libpython-clj-examples
6 | 0.1.0-SNAPSHOT
7 | gigasquid/libpython-clj-examples
8 | FIXME: my new application.
9 | https://github.com/gigasquid/libpython-clj-examples
10 |
11 |
12 | Eclipse Public License
13 | http://www.eclipse.org/legal/epl-v10.html
14 |
15 |
16 |
17 |
18 | Cmeier
19 |
20 |
21 |
22 | https://github.com/gigasquid/libpython-clj-examples
23 | scm:git:git://github.com/gigasquid/libpython-clj-examples.git
24 | scm:git:ssh://git@github.com/gigasquid/libpython-clj-examples.git
25 | HEAD
26 |
27 |
28 |
29 | org.clojure
30 | clojure
31 | 1.10.1
32 |
33 |
34 |
35 | src
36 |
37 |
38 |
39 | clojars
40 | https://repo.clojars.org/
41 |
42 |
43 | sonatype
44 | https://oss.sonatype.org/content/repositories/snapshots/
45 |
46 |
47 |
48 |
49 | clojars
50 | Clojars repository
51 | https://clojars.org/repo
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
1 | (defproject gigasquid/libpython-clj-examples "0.1.0"
2 | :description "Example codes that use libpython-clj with various Python libraries"
3 | :url "https://github.com/gigasquid/libpython-clj-examples"
4 | :license {:name "Eclipse Public License - v 1.0"
5 | :url "http://www.eclipse.org/legal/epl-v10.html"
6 | :distribution :repo}
7 |
8 | :jvm-opts ["-Djdk.attach.allowAttachSelf"
9 | "-XX:+UnlockDiagnosticVMOptions"
10 | "-XX:+DebugNonSafepoints"]
11 | :plugins [[lein-tools-deps "0.4.5"]]
12 | :middleware [lein-tools-deps.plugin/resolve-dependencies-with-deps-edn]
13 | :lein-tools-deps/config {:config-files [:project]
14 | :resolve-aliases []}
15 |
16 | :repl-options {:init-ns gigasquid.-configure})
17 |
--------------------------------------------------------------------------------
/resources/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/.keep
--------------------------------------------------------------------------------
/resources/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/cat.jpg
--------------------------------------------------------------------------------
/resources/generated_godzilla_plot_summaries.md:
--------------------------------------------------------------------------------
1 |
2 | ## The Following Godzilla Plot Summaries Created By GPT2
3 |
4 | 
5 |
6 | Prompts from the first sentences of : [https://www.empireonline.com/movies/features/godzilla-movie-guide/](https://www.empireonline.com/movies/features/godzilla-movie-guide/)
7 |
8 |
9 | ### Movie: GODZILLA (1954)
10 |
11 | The disappearance of some fishing boats revives the legend of an ancient sea monster, and heralds the arrival of Godzilla himself for the first time.
12 |
13 | Why? Because the creatures lifeforms have always been talking to each other. And what happens when we begin to learn about the reptiles as well?
14 |
15 | In INTRODUCING THE FISHING SEA, archaeologist Paul Goldman, an expert on extinct reptiles, explains why their existence is so important. He found that only a third of the species of the very elusive fishes that claimed to be the life forms of ancient species disappeared from the oceans of the Great Barrier Reef
16 |
17 |
18 | ### Movie: GODZILLA RAIDS AGAIN (1955)
19 |
20 | Crash-landing on Iwato Island, two pilots stumble upon Godzilla fighting the Stegosaurus-thing Anguirus.
21 |
22 | The fourth finale, Sinosha 3, ends with Abuma and Gero, the scientists, helping Manchin to survive the destruction that will engulf the world. In the fourth book, Manchin and his crew escape from a factory in the Baku prefecture and they discover the battle was won by a deadly robot called the Hawk. For the past 20 years, Manchin has been rescuing the others as they battle with the Hawk, and he was once a Baku fighter.
23 |
24 |
25 | ### Movie: KING KONG VS. GODZILLA (1962)
26 |
27 | Godzilla, frozen at the end of the last film, thaws out, sinks a submarine and, having been prevented from entering Tokyo by a Japanese military officer who was barking orders from his superior's squadron, begins to land at the base of the Yamashita plant. However, a mob of enraged and murderous Japanese troops arrive and begin to storm the base. In the middle of this, a group of K-1 Bear helicopters arrive and fight off the Japanese, who attempt to put the helicopter down. The helicopters take the Hatomi clan and lead them on a savage rampage. The helicopter explodes, killing three of the Hatomi.
28 |
29 | ### Movie: MOTHRA VS. GODZILLA (1964)
30 |
31 | Scientists discover a big egg in some typhoon wreckage, and learn that it comes from Infant Island, where lives the lepidopterous god Mothra on the island. The Lepidoptera (and the Phyllobionidae) are the only known species of monsters as far as modern humans know, and although there are a few of them, they do exist in all oceans of the world.
32 |
33 |
34 | ### Movie: GHIDORAH, THE THREE-HEADED MONSTER (1964)
35 |
36 | Following his debut in his own 1956 film, Rodan emerges from the crater of Mt. Aso, while Godzilla emerges from the deep once again. As Godzilla is the only monster of his kind left in Japan, his father is under attack by Godzilla's forces. The battle between Godzilla and the forces of nature has become so ferocious that only a few survivors survive, while the remaining (namely the garden santa) are bitten and killed by their own juices.
37 |
38 | The battle ends with Godzilla being sliced open by a spider-like claw. Godzilla is defeated and sent to prison for his crimes, later leaving Earth.
39 |
40 | ### Movie: INVASION OF ASTRO-MONSTER (1965)
41 |
42 | A joint Japanese / American space mission rocks up behind Jupiter at a newly discovered planet the Earth has dubbed 'Planet X' (pictured, Jupiter's rings that were once linked to Earth early in its journey to the star). The newly discovered planet's rings are two times larger than Earth's
43 |
44 | The discovery of this mysterious planet, dubbed Planet X, has laid the foundation for another space mission into the solar system.
45 |
46 | The solar system's giant moon, Europa, is about 100 million miles in diameter and is one of the brightest stars in the solar system.
47 | It was discovered in 2010 by Japanese scientists.
48 |
49 | ### Movie: EBIRAH, HORROR OF THE DEEP (1966)
50 |
51 | Terrorist ne’er-do-wells The Red Bamboo are manufacturing heavy water on a secret island, employing slave labour shipped in from Infant Island, the home of Mothra. Their abilities to create mass quantities and elasticity are just as the first pioneers in the field. The insect is able to grow up to be large, heavy and impressive, yet often selfish and vile. One of the most infamous are the Red Bamboo, perhaps the most infamous of their kind. Those able to harness this arcane power are the most dangerous of all the Red Bamboo.
52 |
53 | ### Movie: MONSTER ISLAND'S DECISIVE BATTLE: GODZILLA'S SON (1967)
54 |
55 | Scientists working on a weather control system are hampered by the Kamacuras, two giant praying mantises. In the center of the face of the mantis hangs a single statue of a human god, while the other's head, which bears three orange eyes, hangs in a circle.
56 |
57 | This is the first time that a group of scientists have been able to study the face of a giant praying mantise,\" said Carpio Bendaño, an ecologist who studies the effects of earthquakes on creatures like the swarm of giant goo, which is the only known creature.
58 |
59 | ### Movie: DESTROY ALL MONSTERS (1968)
60 |
61 | Toho’s twentieth kaiju film was a celebratory affair, intended to mark Godzilla’s final appearance in the early '90s. Although many people were thrilled with the two-day, homecoming, many were disappointed.
62 |
63 | In the mid-'90s, both Japan and North America became obsessed with Godzilla. The animators wanted to convey the fatherly themes of our culture (the Japan of the period)—he was a giant ape, a monster, a man and his kids. The film was in triumph.
64 |
65 | ...
66 |
67 | ### Movie: GODZILLA: KING OF THE MONSTERS (2019)
68 | The first direct sequel to the 2014 Godzilla once again sees Monarch-connected humans standing by ineffectually as Gojira protects the world from a new era of monster gods.
69 |
70 | Gojira, who is Catholic, took the name Godzilla in part to honor his papal sainthood. Her character was named after Queen Elizabeth II, the monarch who was also Catholic.
71 |
72 | The sequel, which will be released worldwide on September 20th, is slated to be the sequel to the 2013 A Godzilla film, which directed by Tetsuya Nomura.
73 |
74 |
75 |
--------------------------------------------------------------------------------
/resources/generated_godzilla_plot_summaries.txt:
--------------------------------------------------------------------------------
1 | The Following Godzilla Plot Summaries have been Created By GPT2
2 |
3 | Prompts from the first sentences of :https://www.empireonline.com/movies/features/godzilla-movie-guide/
4 |
5 | Movie: GODZILLA (1954)
6 |
7 | The disappearance of some fishing boats revives the legend of an ancient sea monster, and heralds the arrival of Godzilla himself for the first time.
8 |
9 | Why? Because the creatures lifeforms have always been talking to each other. And what happens when we begin to learn about the reptiles as well?
10 |
11 | In INTRODUCING THE FISHING SEA, archaeologist Paul Goldman, an expert on extinct reptiles, explains why their existence is so important. He found that only a third of the species of the very elusive fishes that claimed to be the life forms of ancient species disappeared from the oceans of the Great Barrier Reef
12 |
13 |
14 | Movie: GODZILLA RAIDS AGAIN (1955)
15 |
16 | Crash-landing on Iwato Island, two pilots stumble upon Godzilla fighting the Stegosaurus-thing Anguirus.
17 |
18 | The fourth finale, Sinosha 3, ends with Abuma and Gero, the scientists, helping Manchin to survive the destruction that will engulf the world. In the fourth book, Manchin and his crew escape from a factory in the Baku prefecture and they discover the battle was won by a deadly robot called the Hawk. For the past 20 years, Manchin has been rescuing the others as they battle with the Hawk, and he was once a Baku fighter.
19 |
20 |
21 | Movie: KING KONG VS. GODZILLA (1962)
22 |
23 | Godzilla, frozen at the end of the last film, thaws out, sinks a submarine and, having been prevented from entering Tokyo by a Japanese military officer who was barking orders from his superior's squadron, begins to land at the base of the Yamashita plant. However, a mob of enraged and murderous Japanese troops arrive and begin to storm the base. In the middle of this, a group of K-1 Bear helicopters arrive and fight off the Japanese, who attempt to put the helicopter down. The helicopters take the Hatomi clan and lead them on a savage rampage. The helicopter explodes, killing three of the Hatomi.
24 |
25 | Movie: MOTHRA VS. GODZILLA (1964)
26 |
27 | Scientists discover a big egg in some typhoon wreckage, and learn that it comes from Infant Island, where lives the lepidopterous god Mothra on the island. The Lepidoptera (and the Phyllobionidae) are the only known species of monsters as far as modern humans know, and although there are a few of them, they do exist in all oceans of the world.
28 |
29 |
30 | Movie: GHIDORAH, THE THREE-HEADED MONSTER (1964)
31 |
32 | Following his debut in his own 1956 film, Rodan emerges from the crater of Mt. Aso, while Godzilla emerges from the deep once again. As Godzilla is the only monster of his kind left in Japan, his father is under attack by Godzilla's forces. The battle between Godzilla and the forces of nature has become so ferocious that only a few survivors survive, while the remaining (namely the garden santa) are bitten and killed by their own juices.
33 |
34 | The battle ends with Godzilla being sliced open by a spider-like claw. Godzilla is defeated and sent to prison for his crimes, later leaving Earth.
35 |
36 | Movie: INVASION OF ASTRO-MONSTER (1965)
37 |
38 | A joint Japanese / American space mission rocks up behind Jupiter at a newly discovered planet the Earth has dubbed 'Planet X' (pictured, Jupiter's rings that were once linked to Earth early in its journey to the star). The newly discovered planet's rings are two times larger than Earth's
39 |
40 | The discovery of this mysterious planet, dubbed Planet X, has laid the foundation for another space mission into the solar system.
41 |
42 | The solar system's giant moon, Europa, is about 100 million miles in diameter and is one of the brightest stars in the solar system.
43 | It was discovered in 2010 by Japanese scientists.
44 |
45 | Movie: EBIRAH, HORROR OF THE DEEP (1966)
46 |
47 | Terrorist ne’er-do-wells The Red Bamboo are manufacturing heavy water on a secret island, employing slave labour shipped in from Infant Island, the home of Mothra. Their abilities to create mass quantities and elasticity are just as the first pioneers in the field. The insect is able to grow up to be large, heavy and impressive, yet often selfish and vile. One of the most infamous are the Red Bamboo, perhaps the most infamous of their kind. Those able to harness this arcane power are the most dangerous of all the Red Bamboo.
48 |
49 | Movie: MONSTER ISLAND'S DECISIVE BATTLE: GODZILLA'S SON (1967)
50 |
51 | Scientists working on a weather control system are hampered by the Kamacuras, two giant praying mantises. In the center of the face of the mantis hangs a single statue of a human god, while the other's head, which bears three orange eyes, hangs in a circle.
52 |
53 | This is the first time that a group of scientists have been able to study the face of a giant praying mantise,\" said Carpio Bendaño, an ecologist who studies the effects of earthquakes on creatures like the swarm of giant goo, which is the only known creature.
54 |
55 | Movie: DESTROY ALL MONSTERS (1968)
56 |
57 | Toho’s twentieth kaiju film was a celebratory affair, intended to mark Godzilla’s final appearance in the early '90s. Although many people were thrilled with the two-day, homecoming, many were disappointed.
58 |
59 | In the mid-'90s, both Japan and North America became obsessed with Godzilla. The animators wanted to convey the fatherly themes of our culture (the Japan of the period)—he was a giant ape, a monster, a man and his kids. The film was in triumph.
60 |
61 | ...
62 |
63 | Movie: GODZILLA: KING OF THE MONSTERS (2019)
64 | "The first direct sequel to the 2014 Godzilla once again sees Monarch-connected humans standing by ineffectually as Gojira protects the world from a new era of monster gods.
65 |
66 | Gojira, who is Catholic, took the name Godzilla in part to honor his papal sainthood. Her character was named after Queen Elizabeth II, the monarch who was also Catholic.
67 |
68 | The sequel, which will be released worldwide on September 20th, is slated to be the sequel to the 2013 A Godzilla film, which directed by Tetsuya Nomura.
69 |
70 |
71 |
--------------------------------------------------------------------------------
/resources/opencv-gray-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv-gray-logo.png
--------------------------------------------------------------------------------
/resources/opencv/cat-cartoonize-color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv/cat-cartoonize-color.png
--------------------------------------------------------------------------------
/resources/opencv/cat-cartoonize-gray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv/cat-cartoonize-gray.png
--------------------------------------------------------------------------------
/resources/opencv/cat-sketch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv/cat-sketch.png
--------------------------------------------------------------------------------
/resources/opencv/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv/cat.jpg
--------------------------------------------------------------------------------
/resources/opencv/lenna.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv/lenna.png
--------------------------------------------------------------------------------
/resources/opencv/opencv-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/opencv/opencv-logo.png
--------------------------------------------------------------------------------
/resources/pytorch/data/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/pytorch/data/.keep
--------------------------------------------------------------------------------
/resources/pytorch/models/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gigasquid/libpython-clj-examples/f151c00415c82a144a13959ff7b56f58704ac6f2/resources/pytorch/models/.keep
--------------------------------------------------------------------------------
/resources/ufosightings-since-2010.csv:
--------------------------------------------------------------------------------
1 | ds,y
2 | 2010-01-01,296
3 | 2010-02-01,194
4 | 2010-03-01,265
5 | 2010-04-01,296
6 | 2010-05-01,334
7 | 2010-06-01,392
8 | 2010-07-01,859
9 | 2010-08-01,538
10 | 2010-09-01,457
11 | 2010-10-01,480
12 | 2010-11-01,369
13 | 2010-12-01,310
14 | 2011-01-01,332
15 | 2011-02-01,278
16 | 2011-03-01,336
17 | 2011-04-01,319
18 | 2011-05-01,323
19 | 2011-06-01,418
20 | 2011-07-01,780
21 | 2011-08-01,647
22 | 2011-09-01,562
23 | 2011-10-01,647
24 | 2011-11-01,451
25 | 2011-12-01,539
26 | 2012-01-01,589
27 | 2012-02-01,399
28 | 2012-03-01,540
29 | 2012-04-01,506
30 | 2012-05-01,526
31 | 2012-06-01,775
32 | 2012-07-01,955
33 | 2012-08-01,911
34 | 2012-09-01,784
35 | 2012-10-01,684
36 | 2012-11-01,783
37 | 2012-12-01,680
38 | 2013-01-01,401
39 | 2013-02-01,285
40 | 2013-03-01,401
41 | 2013-04-01,431
42 | 2013-05-01,542
43 | 2013-06-01,656
44 | 2013-07-01,990
45 | 2013-08-01,928
46 | 2013-09-01,799
47 | 2013-10-01,802
48 | 2013-11-01,814
49 | 2013-12-01,774
50 | 2014-01-01,719
51 | 2014-02-01,560
52 | 2014-03-01,526
53 | 2014-04-01,673
54 | 2014-05-01,661
55 | 2014-06-01,791
56 | 2014-07-01,1116
57 | 2014-08-01,937
58 | 2014-09-01,842
59 | 2014-10-01,802
60 | 2014-11-01,551
61 | 2014-12-01,531
62 | 2015-01-01,542
63 | 2015-02-01,347
64 | 2015-03-01,483
65 | 2015-04-01,463
66 | 2015-05-01,468
67 | 2015-06-01,494
68 | 2015-07-01,745
69 | 2015-08-01,630
70 | 2015-09-01,735
71 | 2015-10-01,664
72 | 2015-11-01,906
73 | 2015-12-01,440
74 | 2016-01-01,423
75 | 2016-02-01,436
76 | 2016-03-01,414
77 | 2016-04-01,390
78 | 2016-05-01,388
79 | 2016-06-01,437
80 | 2016-07-01,697
81 | 2016-08-01,560
82 | 2016-09-01,596
83 | 2016-10-01,493
84 | 2016-11-01,498
85 | 2016-12-01,357
86 | 2017-01-01,339
87 | 2017-02-01,422
88 | 2017-03-01,349
89 | 2017-04-01,417
90 | 2017-05-01,365
91 | 2017-06-01,423
92 | 2017-07-01,517
93 | 2017-08-01,414
94 | 2017-09-01,453
95 | 2017-10-01,489
96 | 2017-11-01,358
97 | 2017-12-01,509
98 | 2018-01-01,315
99 | 2018-02-01,241
100 | 2018-03-01,238
101 | 2018-04-01,234
102 | 2018-05-01,251
103 | 2018-06-01,238
104 | 2018-07-01,427
105 | 2018-08-01,371
106 | 2018-09-01,310
107 | 2018-10-01,278
108 | 2018-11-01,252
109 | 2018-12-01,245
110 | 2019-01-01,342
111 | 2019-02-01,213
112 | 2019-03-01,325
113 | 2019-04-01,385
114 | 2019-05-01,540
115 | 2019-06-01,473
116 | 2019-07-01,597
117 | 2019-08-01,469
118 | 2019-09-01,649
119 | 2019-10-01,719
120 | 2019-11-01,695
121 | 2019-12-01,753
122 | 2020-01-01,572
123 | 2020-02-01,153
--------------------------------------------------------------------------------
/src/gigasquid/_configure.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.-configure
2 | (:require
3 | [libpython-clj.python :as py]))
4 |
5 | ; local install
6 | ; (py/initialize! :python-executable "/usr/bin/python3.8"
7 | ; :library-path "/usr/lib/libpython3.8.so.1.0")
8 |
9 | ; virtualenv @ "env" directory
10 | ; (py/initialize! :python-executable "env/bin/python3.8"
11 | ; :library-path "/usr/lib/libpython3.so")
12 |
--------------------------------------------------------------------------------
/src/gigasquid/bokeh/README.md:
--------------------------------------------------------------------------------
1 | # Bokeh
2 |
3 | Bokeh is an interactive visualization library for modern web browsers.
4 | It provides elegant, concise construction of versatile graphics, and affords high-performance interactivity over large or streaming datasets. Bokeh can help anyone who would like to quickly and easily make interactive plots, dashboards, and data applications.
5 |
6 | - Github - [bokeh/bokeh](https://github.com/bokeh/bokeh)
7 | - Official [documentation](https://docs.bokeh.org/en/latest/)
8 |
9 | ## Initial Examples
10 |
11 | Are based on the following examples
12 |
13 | - [Quick Start Guide](https://docs.bokeh.org/en/latest/docs/user_guide/quickstart.html#userguide-quickstart)
14 | - [MultiPolygons](https://github.com/bokeh/bokeh/blob/1.4.0/examples/plotting/notebook/MultiPolygons.ipynb)
15 | - [A line with 10k points to show off the WebGL line implementation](https://github.com/bokeh/bokeh/blob/1.4.0/examples/webgl/line10k.py)
16 |
17 | ## Basic installation
18 |
19 | - Python
20 |
21 | ```shell
22 | pip3 install bokeh
23 | pip3 install numpy ## used in the 3rd example above
24 | ```
25 |
26 | - Clojure
27 |
28 | Just run your favorite `cider-jack-in` if you are on Emacs.
29 | For other editors, you will do the equivalent command for your editor.
30 |
--------------------------------------------------------------------------------
/src/gigasquid/bokeh/core.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.bokeh.core
2 | (:require [libpython-clj.require
3 | :refer [require-python]]
4 | [libpython-clj.python
5 | :as py
6 | :refer [py.
7 | py..
8 | py.-
9 | att-type-map
10 | ->python
11 | ->jvm
12 | ]]
13 | [clojure.java.shell :as sh]
14 | [clojure.pprint :refer [pprint]])
15 | (:import [java.io File]))
16 |
17 | ;;; Python installation
18 | ;;; pip3 install bokeh
19 |
20 | (comment
21 | (require-python '[sklearn.svm :as svm])
22 | (require-python '[numpy :as np])
23 | #_ (require-python '[pandas :as pd])
24 | )
25 |
26 | ;; First require the basic package
27 | (require-python '[bokeh.plotting :as bkp])
28 |
29 | (comment
30 |
31 | ;; Quick getting start guide
32 | ;; https://docs.bokeh.org/en/latest/docs/user_guide/quickstart.html#getting-started
33 | (py/from-import bokeh.plotting figure output_file show curdoc)
34 |
35 | (let [x [1 2 3 4 5]
36 | y [6 7 2 4 5]
37 | p (bkp/figure :title "Simple line example"
38 | :x_axis_label "x"
39 | :y_axis_label "y")]
40 | (py. p line x y :legend "Temp." :line_width 2)
41 | (bkp/show p))
42 |
43 | ;; More plotting example
44 | (let [x [0.1, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0]
45 | y0 (into [] (map (fn [i] (Math/pow i 2)) x))
46 | y1 (into [] (map (fn [i] (Math/pow 10 i)) x))
47 | y2 (into [] (map (fn [i] (Math/pow 10 (Math/pow i 2))) x))
48 | p (bkp/figure :tools "pan,box_zoom,reset,save"
49 | :y_axis_type "log"
50 | :y_range [0.001 (Math/pow 10 11)]
51 | :title "log axis example"
52 | :x_axis_label "sections"
53 | :y_axis_label "particles")]
54 | (py. p line x x
55 | :legend "y=x")
56 |
57 | (py. p circle x x
58 | :legend "y=x"
59 | :fill_color "white"
60 | :size 8)
61 |
62 | (py. p line x y0
63 | :legend "y=x^2"
64 | :line_width 3)
65 |
66 | (py. p line x y1
67 | :legend "y=10^x"
68 | :line_color "red")
69 |
70 | (py. p circle x y1
71 | :legend "y=10^x"
72 | :fill_color "red"
73 | :line_color "red"
74 | :size 6)
75 |
76 | (py. p line x y2
77 | :legend "y=10^x^2"
78 | :line_color "orange"
79 | :line_dash "4 4")
80 |
81 | (bkp/show p))
82 |
83 | )
84 |
85 | (comment
86 | ;; More example
87 | (let [p (bkp/figure
88 | :plot_width 300
89 | :plot_height 300
90 | :tools "pan,reset,save")]
91 | (py. p
92 | circle
93 | [1 2.5 3 2]
94 | [2 3 1 1.6]
95 | :radius 0.3
96 | :alpha 0.5)
97 | (bkp/show p))
98 |
99 | )
100 |
101 |
102 | ;; Providing Data
103 | ;; https://docs.bokeh.org/en/latest/docs/user_guide/data.html
104 |
105 | (comment
106 | (require-python '[bokeh.plotting :as bkp]) ;;=> :ok
107 | (require-python '[bokeh.models :as bkm]) ;;=> :ok
108 |
109 | (let [data {:x_values [1 2 3 4 5]
110 | :y_values [6 7 2 3 6]}
111 | source (bkm/ColumnDataSource :data data)
112 | p (bkp/figure)]
113 | (py. p circle
114 | :x "x_values"
115 | :y "y_values"
116 | :source source)
117 | (bkp/show p))
118 |
119 | )
120 |
--------------------------------------------------------------------------------
/src/gigasquid/bokeh/line10k.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.bokeh.line10k
2 | (:require [libpython-clj.require
3 | :refer [require-python]]
4 | [libpython-clj.python
5 | :as py
6 | :refer [py.
7 | py..
8 | py.-
9 | att-type-map
10 | ->python
11 | ->jvm
12 | ]]
13 | [clojure.java.shell :as sh]
14 | [clojure.pprint :refer [pprint]])
15 | (:import [java.io File]))
16 |
17 | ;;; Python installation
18 | ;;; pip3 install bokeh
19 | ;;; pip3 install numpy
20 |
21 | ;; Based on: https://github.com/bokeh/bokeh/blob/1.4.0/examples/webgl/line10k.py
22 |
23 | ;; First require the basic package
24 | (py/from-import bokeh.plotting figure output_file show curdoc)
25 | (require-python '[bokeh.plotting :as bkp])
26 | (require-python '[numpy :as np])
27 | (require-python '[numpy.random :as np-random])
28 | (require-python '[builtins :as python])
29 |
30 | ;; Note:
31 | #_(= np/pi Math/PI) ;;=> true
32 |
33 | (let [N 10000
34 | x (np/linspace 0 (* 10 np/pi) N)
35 | y (np/add
36 | (np/cos x)
37 | (np/sin (np/add (np/multiply 2 x) 1.25))
38 | (np-random/normal 0 0.001 (python/tuple [N])))
39 | p (bkp/figure :title "A line consisting of 10k points"
40 | :output_backend "webgl")]
41 |
42 | (py. p line x y :color "#22aa22" :line_width 3)
43 | ;; Tips:
44 | ;; To save the output to a file you can use the next line
45 | #_(output_file "line10.html" :title "line10k example")
46 |
47 | ;; Or simply show it immediately via your browser
48 | (bkp/show p))
49 |
--------------------------------------------------------------------------------
/src/gigasquid/bokeh/multi_polygons.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.bokeh.multi-polygons
2 | (:require [libpython-clj.require
3 | :refer [require-python]]
4 | [libpython-clj.python
5 | :as py
6 | :refer [py.
7 | py..
8 | py.-
9 | att-type-map
10 | ->python
11 | ->jvm
12 | ]]
13 | [clojure.java.shell :as sh]
14 | [clojure.pprint :refer [pprint]])
15 | (:import [java.io File]))
16 |
17 | ;;; Python installation
18 | ;;; pip3 install bokeh
19 |
20 | ;; First require the basic package
21 | (require-python '[bokeh.plotting :as bkp])
22 |
23 | (comment
24 | ;; https://github.com/bokeh/bokeh/blob/1.4.0/examples/plotting/notebook/MultiPolygons.ipynb
25 |
26 | (py/from-import bokeh.plotting figure output_file show curdoc)
27 |
28 | (let [p (bkp/figure :title "Polygons with no holes"
29 | :plot_width 300
30 | :plot_height 300
31 | :tools "hover,tap,wheel_zoom,pan,reset,help")]
32 | (py. p multi_polygons
33 | :xs [[[[1, 2, 2, 1, 1]]]]
34 | :ys [[[[3, 3, 4, 4, 3]]]])
35 | (bkp/show p))
36 |
37 | ;; Polygons with holes
38 | (let [p (bkp/figure :title "Polygons with holes"
39 | :plot_width 300
40 | :plot_height 300
41 | :tools "hover,tap,wheel_zoom,pan,reset,help")]
42 | (py. p multi_polygons
43 | :xs [[[[1, 2, 2, 1], [1.2, 1.6, 1.6], [1.8, 1.8, 1.6]]]]
44 | :ys [[[[3, 3, 4, 4], [3.2, 3.6, 3.2], [3.4, 3.8, 3.8]]]])
45 | (bkp/show p))
46 |
47 | )
48 |
49 | (comment
50 | ;; https://github.com/bokeh/bokeh/blob/1.4.0/examples/plotting/notebook/MultiPolygons.ipynb
51 | ;; Now we'll examine a MultiPolygon.
52 | ;; A MultiPolygon is composed of different parts each of which is a Polygon and each of which can have or not have holes.
53 |
54 | ;; In python:
55 | ;; p = figure(plot_width=300, plot_height=300, tools='hover,tap,wheel_zoom,pan,reset,help')
56 | ;; p.multi_polygons(xs=[[[ [1, 1, 2, 2], [1.2, 1.6, 1.6], [1.8, 1.8, 1.6] ], [ [3, 4, 3] ]]],
57 | ;; ys=[[[ [4, 3, 3, 4], [3.2, 3.2, 3.6], [3.4, 3.8, 3.8] ], [ [1, 1, 3] ]]])
58 | ;; show(p)
59 |
60 | (let [p (bkp/figure
61 | :title "Multi-Polygons 1"
62 | :plot_width 300
63 | :plot_height 300
64 | :tools "hover,tap,wheel_zoom,pan,reset,help")
65 | xs [[[[1, 1, 2, 2] [1.2, 1.6, 1.6] [1.8, 1.8, 1.6]] [[3, 4, 3]]]]
66 | ys [[[[4, 3, 3, 4] [3.2, 3.2, 3.6] [3.4, 3.8, 3.8]] [[1, 1, 3]]]]]
67 | (py. p multi_polygons :xs xs :ys ys)
68 | (bkp/show p))
69 |
70 | ;; More example
71 | (let [p (bkp/figure
72 | :title "Multi-Polygons 2"
73 | :plot_width 300
74 | :plot_height 300
75 | :tools "hover,tap,wheel_zoom,pan,reset,help")
76 | xs [[[[1 1 2 2] [1.2 1.6 1.6] [1.8 1.8 1.6]] [[3 3 4]]]
77 | [[[1 2 2 1] [1.3 1.3 1.7 1.7]]]]
78 | ys [[[[4 3 3 4] [3.2 3.2 3.6] [3.4 3.8 3.8]] [[1 3 1]]],
79 | [[[1 1 2 2] [1.3 1.7 1.7 1.3]]]]]
80 | (py. p multi_polygons :xs xs :ys ys)
81 | (bkp/show p))
82 |
83 |
84 | ;; ===================================== ;;
85 | ;; Using multi-polygons glyph directly
86 | ;; TODO: revisit this code
87 | #_
88 | (comment
89 | (py/from-import bokeh.models ColumnDataSource Plot LinearAxis Grid)
90 | (py/from-import bokeh.models.glyphs MultiPolygons)
91 | (py/from-import bokeh.models.tools TapTool WheelZoomTool ResetTool HoverTool)
92 | (py/from-import bokeh.plotting figure output_file show curdoc)
93 |
94 | ;; Note: for this we need to use dict
95 | (require-python '[builtins :as python])
96 |
97 | (let [source (ColumnDataSource(python/dict
98 | :xs [[[[1, 1, 2, 2]
99 | [1.2, 1.6, 1.6]
100 | [1.8, 1.8, 1.6]]
101 | [[3, 3, 4]]]
102 | [[[1, 2, 2, 1]
103 | [1.3, 1.3, 1.7, 1.7]]]]
104 | :ys [[[[4, 3, 3, 4]
105 | [3.2, 3.2, 3.6]
106 | [3.4, 3.8, 3.8]]
107 | [[1, 3, 1]]]
108 | [[[1, 1, 2, 2]
109 | [1.3, 1.7, 1.7, 1.3]]]]
110 | :color ["blue" "red"],
111 | :label ["A" "B"]))
112 | plot (Plot :plot_width 300
113 | :plot_height 300
114 | ;;:tools [(HoverTool) (TapTool) (WheelZoomTool)]
115 | )
116 | glyph (py. MultiPolygons :xs "xs" :ys "ys" :fill_color "color")]
117 | plot
118 | #_(py. plot add_glyph source glyph)
119 | )
120 | )
121 | ;; ===================================== ;;
122 |
123 | ;; TODO:
124 | ;; Using numpy array with MultiPolygons
125 |
--------------------------------------------------------------------------------
/src/gigasquid/diffprivlib.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.diffprivlib
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [gigasquid.plot :as plot]))
5 |
6 | ;;; From https://github.com/IBM/differential-privacy-library
7 |
8 | ;;; Install: pip install diffprivlib
9 |
10 | (require-python '[sklearn.datasets :as datasets])
11 | (require-python '[sklearn.model_selection :as model-selection])
12 | (require-python '[matplotlib.pyplot :as pyplot])
13 | (require-python '[numpy :as np])
14 | (require-python '[diffprivlib.models :as models])
15 | (require-python '[sklearn.metrics :as metrics])
16 | (require-python '[builtins :as python])
17 |
18 | ;;; Using the iris dataset - load with 80/20 split
19 |
20 | (def dataset (datasets/load_iris))
21 | (def iris-data (let [[X-train X-test y-train y-test]
22 | (model-selection/train_test_split (py.- dataset data)
23 | (py.- dataset target)
24 | :test_size 0.2)]
25 | {:X-train X-train :X-test X-test
26 | :y-train y-train :y-test y-test}))
27 |
28 | ;; Now, let's train a differentially private naive Bayes classifier. Our classifier runs just like an sklearn classifier, so you can get up and running quickly.
29 |
30 | ;; diffprivlib.models.GaussianNB can be run without any parameters, although this will throw a warning (we need to specify the bounds parameter to avoid this). The privacy level is controlled by the parameter epsilon, which is passed to the classifier at initialisation (e.g. GaussianNB(epsilon=0.1)). The default is epsilon = 1.0.
31 |
32 | (def clf (models/GaussianNB))
33 | (py. clf fit (:X-train iris-data) (:y-train iris-data))
34 |
35 | ;; We can now classify unseen examples, knowing that the trained model is differentially private and preserves the privacy of the 'individuals' in the training set (flowers are entitled to their privacy too!).
36 |
37 | (py. clf predict (:X-test iris-data))
38 |
39 | ;;=> [1 0 1 1 1 2 1 0 2 2 2 2 1 0 0 2 1 0 1 0 0 1 0 1 2 2 0 2 1 1]
40 |
41 | ;;We can easily evaluate the accuracy of the model for various epsilon values and plot it with matplotlib.
42 |
43 | (def epsilons (np/logspace -2 2 50))
44 | (def bounds (python/list [(python/tuple [4.3 7.9]) (python/tuple [2.0 4.4])
45 | (python/tuple [1.1 6.9]) (python/tuple [0.1 2.5])]))
46 |
47 | (def accuracy (mapv (fn [epsilon]
48 | (let [clf (models/GaussianNB :bounds bounds :epsilon epsilon)
49 | _ (py. clf fit (:X-train iris-data) (:y-train iris-data))
50 | predictions (->> (:X-test iris-data)
51 | (py. clf predict))]
52 | (metrics/accuracy_score(:y-test iris-data) predictions)))
53 | epsilons))
54 |
55 | accuracy
56 | ;;=> [0.3333333333333333 0.36666666666666664 0.36666666666666664 0.36666666666666664 0.36666666666666664 0.2 0.3333333333333333 0.3 0.3333333333333333 0.3333333333333333 0.3 0.3 0.6 0.5666666666666667 0.2 0.7 0.6 0.1 0.6666666666666666 0.9 0.6666666666666666 0.6666666666666666 1.0 0.6 0.8 0.7666666666666667 0.8666666666666667 0.8333333333333334 0.9333333333333333 0.8666666666666667 0.9 1.0 0.9333333333333333 0.9333333333333333 0.9 0.9333333333333333 0.8333333333333334 1.0 0.8 0.8 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0]
57 |
58 | (plot/with-show-one
59 | (pyplot/semilogx epsilons accuracy)
60 | (pyplot/title "Differentially private Naive Bayes accuracy")
61 | (pyplot/xlabel "epsilon")
62 | (pyplot/ylabel "Accuracy"))
63 |
--------------------------------------------------------------------------------
/src/gigasquid/facebook_prophet.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.facebook-prophet
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [gigasquid.plot :as plot]))
5 |
6 | ;;; sudo pip3 install fbprophet
7 | ;;; sudo pip3 install holidays==0.9.12
8 |
9 | ;;; tutorial https://facebook.github.io/prophet/docs/quick_start.html#python-api
10 |
11 | (require-python '[pandas :as pd])
12 | (require-python '[fbprophet :as fbprophet])
13 | (require-python '[matplotlib.pyplot :as pyplot])
14 |
15 | ;; The input to Prophet is always a dataframe with two columns: ds and y. The ds (datestamp) column should be of a format expected by Pandas, ideally YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp. The y column must be numeric, and represents the measurement we wish to forecast.
16 |
17 | ;; As an example, let’s look at a time series of the log daily page views for the Wikipedia page for Peyton Manning. We scraped this data using the Wikipediatrend package in R. Peyton Manning provides a nice example because it illustrates some of Prophet’s features, like multiple seasonality, changing growth rates, and the ability to model special days (such as Manning’s playoff and superbowl appearances). The CSV is available here.
18 |
19 | (def csv-file (slurp "https://raw.githubusercontent.com/facebook/prophet/master/examples/example_wp_log_peyton_manning.csv"))
20 | (spit "manning.csv" csv-file)
21 | (def df (pd/read_csv "manning.csv"))
22 | (py.- df head)
23 | ;;
37 |
38 | ;; We fit the model by instantiating a new Prophet object. Any settings to the forecasting procedure are passed into the constructor. Then you call its fit method and pass in the historical dataframe. Fitting should take 1-5 seconds.
39 |
40 | (def m (fbprophet/Prophet))
41 | (py. m fit df)
42 |
43 | ;; Predictions are then made on a dataframe with a column ds containing the dates for which a prediction is to be made. You can get a suitable dataframe that extends into the future a specified number of days using the helper method Prophet.make_future_dataframe. By default it will also include the dates from the history, so we will see the model fit as well.
44 |
45 | (def future (py. m make_future_dataframe :periods 365))
46 | (py.- future tail)
47 | ;;
61 |
62 | ;; The predict method will assign each row in future a predicted value which it names yhat. If you pass in historical dates, it will provide an in-sample fit. The forecast object here is a new dataframe that includes a column yhat with the forecast, as well as columns for components and uncertainty intervals.
63 |
64 | (def forecast (py. m predict future))
65 | (py.- forecast yhat_lower)
66 | (py/att-type-map forecast)
67 | (def vals (py. forecast __array__ ["ds" "yhat" "yhat_lower" "yhat_upper"]))
68 | (py/python-type vals) ;=> :ndarray
69 | ;; [[Timestamp('2007-12-10 00:00:00') 8.041238819642132 8.219483670063799
70 | ;; ... 0.0 0.0 8.844169826770502]
71 | ;; [Timestamp('2007-12-11 00:00:00') 8.039694770587365 8.037913913183381
72 | ;; ... 0.0 0.0 8.592697395711903]
73 | ;; [Timestamp('2007-12-12 00:00:00') 8.038150721532599 7.768551313613439
74 | ;; ... 0.0 0.0 8.388514099061501]
75 | ;; ...
76 | ;; [Timestamp('2017-01-17 00:00:00') 7.186504354691647 7.597836987450301
77 | ;; ... 0.0 0.0 8.318929898087168]
78 | ;; [Timestamp('2017-01-18 00:00:00') 7.18547676307155 7.496134175329733 ...
79 | ;; 0.0 0.0 8.151543221567003]
80 | ;; [Timestamp('2017-01-19 00:00:00') 7.184449171451455 7.447042010204286
81 | ;; ... 0.0 0.0 8.163477149645047]]
82 |
83 |
84 | (plot/with-show
85 | (py. m plot forecast))
86 |
87 | (plot/with-show
88 | (py. m plot_components forecast))
89 |
90 |
91 | ;;;; We can also do monthly data as well - Let's take an example of UFOsightings
92 | ;;; From 2010 to the present from http://www.nuforc.org/webreports/ndxevent.html
93 |
94 | (def df (pd/read_csv "resources/ufosightings-since-2010.csv"))
95 | (def m (fbprophet/Prophet :seasonality_mode "multiplicative")) ;;; Let's factor in some holiday effects
96 | (py. m fit df)
97 | (def future (py. m make_future_dataframe :periods 48 :freq "M")) ;;; note Monthly prediction
98 | (def forecast (py. m predict future))
99 |
100 | (plot/with-show
101 | (py. m plot forecast))
102 |
103 | (plot/with-show
104 | (py. m plot_components forecast))
105 |
--------------------------------------------------------------------------------
/src/gigasquid/gpt2.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.gpt2
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]))
4 |
5 | ;;; sudo pip3 install torch
6 | ;;; sudo pip3 install transformers
7 |
8 | ;https://huggingface.co/transformers/quickstart.html - OpenAI GPT-2
9 |
10 | (require-python '[transformers :as transformers])
11 | (require-python '[torch :as torch])
12 |
13 |
14 | ;;; Load pre-trained model tokenizer (vocabulary)
15 |
16 | (def tokenizer (py. transformers/GPT2Tokenizer "from_pretrained" "gpt2"))
17 | (def text "Who was Jim Henson ? Jim Henson was a")
18 | ;; encode text input
19 | (def indexed-tokens (py. tokenizer encode text))
20 | indexed-tokens ;=>[8241, 373, 5395, 367, 19069, 5633, 5395, 367, 19069, 373, 257]
21 |
22 | ;; convert indexed tokens to pytorch tensor
23 | (def tokens-tensor (torch/tensor [indexed-tokens]))
24 | tokens-tensor
25 | ;; ([[ 8241, 373, 5395, 367, 19069, 5633, 5395, 367, 19069, 373,
26 | ;; 257]])
27 |
28 | ;;; Load pre-trained model (weights)
29 | ;;; Note: this will take a few minutes to download everything
30 | (def model (py. transformers/GPT2LMHeadModel from_pretrained "gpt2"))
31 |
32 | ;;; Set the model in evaluation mode to deactivate the DropOut modules
33 | ;;; This is IMPORTANT to have reproducible results during evaluation!
34 | (py. model eval)
35 |
36 |
37 | ;;; Predict all tokens
38 | (def predictions (py/with [r (torch/no_grad)]
39 | (first (model tokens-tensor))))
40 |
41 | ;;; get the predicted next sub-word"
42 | (def predicted-index (let [last-word-predictions (-> predictions first last)
43 | arg-max (torch/argmax last-word-predictions)]
44 | (py. arg-max item)))
45 |
46 | predicted-index ;=>582
47 |
48 | (py. tokenizer decode (-> (into [] indexed-tokens)
49 | (conj predicted-index)))
50 |
51 | ;=> "Who was Jim Henson? Jim Henson was a man"
52 |
53 |
54 | ;;;; ===========================
55 |
56 | ;; GPT-2 as well as some other models (GPT, XLNet, Transfo-XL, CTRL) make use of a past or mems attribute which can be used to prevent re-computing the key/value pairs when using sequential decoding. It is useful when generating sequences as a big part of the attention mechanism benefits from previous computations.
57 |
58 | ;; Here is a fully-working example using the past with GPT2LMHeadModel and argmax decoding (which should only be used as an example, as argmax decoding introduces a lot of repetition):
59 |
60 | (def tokenizer (py. transformers/GPT2Tokenizer from_pretrained "gpt2"))
61 | (def model (py. transformers/GPT2LMHeadModel from_pretrained "gpt2"))
62 |
63 | (def generated (into [] (py. tokenizer encode "The Manhattan bridge")))
64 | (def context (torch/tensor [generated]))
65 |
66 |
67 | (defn generate-sequence-step [{:keys [generated-tokens context past]}]
68 | (let [[output past] (model context :past past)
69 | token (torch/argmax (first output))
70 | new-generated (conj generated-tokens (py. token tolist))]
71 | {:generated-tokens new-generated
72 | :context (py. token unsqueeze 0)
73 | :past past
74 | :token token}))
75 |
76 | (defn decode-sequence [{:keys [generated-tokens]}]
77 | (py. tokenizer decode generated-tokens))
78 |
79 | (loop [step {:generated-tokens generated
80 | :context context
81 | :past nil}
82 | i 10]
83 | (if (pos? i)
84 | (recur (generate-sequence-step step) (dec i))
85 | (decode-sequence step)))
86 |
87 | ;=> "The Manhattan bridge\n\nThe Manhattan bridge is a major artery for"
88 |
89 |
90 | ;;; Let's make a nice function to generate text
91 |
92 | (defn generate-text [starting-text num-of-words-to-predict]
93 | (let [tokens (into [] (py. tokenizer encode starting-text))
94 | context (torch/tensor [tokens])
95 | result (reduce
96 | (fn [r i]
97 | (println i)
98 | (generate-sequence-step r))
99 |
100 | {:generated-tokens tokens
101 | :context context
102 | :past nil}
103 |
104 | (range num-of-words-to-predict))]
105 | (decode-sequence result)))
106 |
107 | (generate-text "Natural language processing tasks are typically approached with"
108 | 100)
109 |
110 | ;=> "Clojure is a dynamic, general purpose programming language, combining the approachability and interactive. It is a language that is easy to learn and use, and is easy to use for anyone"
111 |
112 |
113 |
114 | ;;;;;; Better sequence generating
115 | ;;; With temperature to get rid of repititions
116 |
117 | ;;; from https://github.com/huggingface/transformers/issues/1725
118 |
119 | (require-python 'torch.nn.functional)
120 |
121 | (defn sample-sequence-step [{:keys [generated-tokens context past temp]
122 | :or {temp 0.8}}]
123 | (let [[output past] (py/with [r (torch/no_grad)]
124 | (model context :past past))
125 | next-token-logits (torch/div (-> output first last)
126 | (if (pos? temp) temp 1))
127 | token (torch/multinomial
128 | (torch.nn.functional/softmax next-token-logits :dim -1) :num_samples 1)
129 | new-generated (conj generated-tokens (first (py/$a token tolist)))]
130 | {:generated-tokens new-generated
131 | :context (py/$a token unsqueeze 0)
132 | :past past
133 | :token token}))
134 |
135 | (defn generate-text2 [starting-text num-of-words-to-predict temp]
136 | (let [tokens (into [] (py/$a tokenizer encode starting-text))
137 | context (torch/tensor [tokens])
138 | result (reduce
139 | (fn [r i]
140 | (println i)
141 | (sample-sequence-step (assoc r :temp temp)))
142 |
143 | {:generated-tokens tokens
144 | :context context
145 | :past nil}
146 |
147 | (range num-of-words-to-predict))]
148 | (decode-sequence result)))
149 |
150 | (generate-text2 "Natural language processing tasks are typically approached with"
151 | 100
152 | 0.8)
153 |
154 | ;>"Natural language processing tasks are typically approached with distress signals and pleasurable stimuli.\n\n7.2.3. Structural networks\n\nStructural networks are comprised of various layers of information that are coupled with instructions for performing behavioral tasks. Such networks can be used for e.g., associating individual groups with special differential activities (e.g., listening to music, studying a subject's handwriting), or for performing complex tasks such as reading and writing a chart. The presence of structures that are familiar to the participant may also help"
155 |
156 | (generate-text2 "It is thought that cheese was first discovered around 8000 BC around the time when sheep were first domesticated"
157 | 100
158 | 0.8)
159 | ;=>"It is thought that cheese was first discovered around 8000 BC around the time when sheep were first domesticated as sheep. Native American plants and animals associated with such plants are described as being \"mushy, leafy and musky\" from having \"powder-like stalks and narrow niche-like leaves.\" They are believed to have been found in the Cauca Chaco area of South America and northern Mexico. The earliest known cases of cheese in the Americas could be traced back to around 160 BC, when the deposits of the Cauca Chaco were discovered in Colombia, Peru and Argentina"
160 |
161 |
162 | (generate-text2 "Rich Hickey developed Clojure because he wanted a modern Lisp for functional programming, symbiotic with the established Java platform"
163 | 100
164 | 0.8)
165 | "Rich Hickey developed Clojure because he wanted a modern Lisp for functional programming, symbiotic with the established Java platform. He knew that Clojure would make it hard to access any memory through Java, and code a good amount of Lisp. He had much to learn about programming at the time, and Clojure was perfect for him. It was important to understand the dominant language of Lisp, which was Clojure and JVM. Because of this, JVM was named 'Snack: No Slobs in Clojure'. This was a very important order of things, for JVM. Clojure had a major advantage over JVM in"
166 |
167 | (generate-text2 "What is the average rainfall in Florida?"
168 | 100
169 | 0.8)
170 |
--------------------------------------------------------------------------------
/src/gigasquid/igraph.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.igraph
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]))
4 |
5 | ;;; https://igraph.org/python/doc/tutorial/tutorial.html#creating-a-graph-from-scratch
6 |
7 | ;;; igraph is a graph python library
8 |
9 | ;;; sudo pip3 install python-igraph
10 | ;;; sudo pip3 install pycairo
11 |
12 |
13 | (require-python '[igraph :as igraph])
14 |
15 | (def g (igraph/Graph))
16 | (py. g add_vertices 3)
17 | (py. g add_edges [[0 1] [1 2]])
18 |
19 | ;;; it's very stateful from here but
20 | (doto g
21 | (py. add_edges [[2 0]])
22 | (py. add_vertices 3)
23 | (py. add_edges [[2 3] [3 4] [4 5] [5 3]]))
24 |
25 | (igraph/summary g)
26 | ;;; IGRAPH U--- 6 7 --
27 |
28 |
29 | (def g2 (py. (igraph/Graph) Famous "petersen"))
30 | ;;; this actually works fine one I installed everything
31 | ;;; the image will show up
32 | (def plot (igraph/plot g2))
33 | ;;; save the image to disk
34 | (py. plot save "myplot.png")
35 |
--------------------------------------------------------------------------------
/src/gigasquid/lieden.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.lieden
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [clojure.java.shell :as sh]))
5 |
6 | ;;;sudo pip3 install leidenalg
7 |
8 | ;;; you also need to make sure igraph is working and installed too (see igraph.clj)
9 |
10 | ;;; What is leidenalg? https://github.com/vtraag/leidenalg
11 | ;; Implementation of the Leiden algorithm for various quality functions to be used with igraph in Python.
12 | ;;; sudo pip3 install pycairo
13 |
14 | (require-python '[igraph :as ig])
15 | (require-python '[leidenalg :as la])
16 |
17 | ;;https://leidenalg.readthedocs.io/en/latest/intro.html
18 |
19 | ;;Let us then look at one of the most famous examples of network science: the Zachary karate club (it even has a prize named after it):
20 | (def G (py. (ig/Graph) Famous "Zachary"))
21 |
22 | ;;;Now detecting communities with modularity is straightforward
23 |
24 |
25 | (def partition (la/find_partition G la/ModularityVertexPartition))
26 |
27 | ;;; plotting results
28 |
29 | (def plot (ig/plot partition))
30 |
31 | ;;; save the plot png
32 |
33 | (py. plot save "zach.png")
34 |
35 |
36 |
--------------------------------------------------------------------------------
/src/gigasquid/mxnet.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.mxnet
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [clojure.string :as string]))
5 |
6 | ;;; sudo pip3 install mxnet
7 | ;;; sudo pip3 install opencv-python
8 |
9 | (require-python '[mxnet :as mxnet])
10 | (require-python '[mxnet.ndarray :as ndarray])
11 | (require-python '[mxnet.module :as module])
12 | (require-python '[mxnet.io :as io])
13 | (require-python '[mxnet.test_utils :as test-utils])
14 | (require-python '[mxnet.initializer :as initializer])
15 | (require-python '[mxnet.metric :as metric])
16 | (require-python '[mxnet.symbol :as sym])
17 |
18 |
19 | ;;; get the mnist data and format it
20 |
21 | (def mnist (test-utils/get_mnist))
22 | (def train-x (ndarray/array (py. (py/get-item mnist "train_data") "reshape" -1 784)))
23 | (def train-y (ndarray/array (py/get-item mnist "train_label")))
24 | (def test-x (ndarray/array (py. (py/get-item mnist "test_data") "reshape" -1 784)))
25 | (def test-y (ndarray/array (py/get-item mnist "test_label")))
26 |
27 | (def batch-size 100)
28 |
29 | (def train-dataset (io/NDArrayIter :data train-x
30 | :label train-y
31 | :batch_size batch-size
32 | :shuffle true))
33 | (def test-dataset (io/NDArrayIter :data test-x
34 | :label test-y
35 | :batch_size batch-size))
36 |
37 |
38 | (def data-shapes (py.- train-dataset "provide_data"))
39 | (def label-shapes (py.- train-dataset "provide_label"))
40 |
41 | data-shapes ;=> [DataDesc[data,(10, 784),,NCHW]]
42 | label-shapes ;=> [DataDesc[softmax_label,(10,),,NCHW]]
43 |
44 |
45 | ;;;; Setting up the model and initializing it
46 |
47 | (def data (sym/Variable "data"))
48 |
49 | (def net (-> (sym/Variable "data")
50 | (sym/FullyConnected :name "fc1" :num_hidden 128)
51 | (sym/Activation :name "relu1" :act_type "relu")
52 | (sym/FullyConnected :name "fc2" :num_hidden 64)
53 | (sym/Activation :name "relu2" :act_type "relu")
54 | (sym/FullyConnected :name "fc3" :num_hidden 10)
55 | (sym/SoftmaxOutput :name "softmax")))
56 |
57 |
58 |
59 | (def model (py/call-kw mxnet.module/Module [] {:symbol net :context (mxnet/cpu)}))
60 | (py. model bind :data_shapes data-shapes :label_shapes label-shapes)
61 | (py. model init_params)
62 | (py. model init_optimizer :optimizer "adam")
63 | (def acc-metric (mxnet.metric/Accuracy))
64 |
65 |
66 | (defn end-of-data-error? [e]
67 | (string/includes? (.getMessage e) "StopIteration"))
68 |
69 | (defn reset [iter]
70 | (py. iter reset))
71 |
72 | (defn next-batch [iter]
73 | (try (py. iter next)
74 | (catch Exception e
75 | (when-not (end-of-data-error? e)
76 | (throw e)))))
77 |
78 | (defn get-metric [metric]
79 | (py. metric get))
80 |
81 | (defn train-epoch [model dataset metric]
82 | (reset dataset)
83 | (loop [batch (next-batch dataset)
84 | i 0]
85 | (if batch
86 | (do
87 | (py. model forward batch :is_train true)
88 | (py. model backward)
89 | (py. model update)
90 | (py. model update_metric metric (py/get-attr batch "label"))
91 | (when (zero? (mod i 100)) (println "i-" i " Training Accuracy " (py/$a metric get)))
92 | (recur (next-batch dataset) (inc i)))
93 | (println "Final Training Accuracy " (get-metric metric)))))
94 |
95 | (defn test-accuracy [model dataset metric]
96 | (reset dataset)
97 | (loop [batch (next-batch dataset)
98 | i 0]
99 | (if batch
100 | (do
101 | (py. model forward batch)
102 | (py. model update_metric metric (py/get-attr batch "label"))
103 | (when (zero? (mod i 100)) (println "i-" i " Test Accuracy " (py/$a metric get)))
104 | (recur (next-batch dataset) (inc i)))
105 | (println "Final Test Accuracy " (get-metric metric)))))
106 |
107 |
108 | (comment
109 |
110 |
111 | ;;;training
112 | (dotimes [i 3]
113 | (println "========= Epoch " i " ============")
114 | (train-epoch model train-dataset acc-metric))
115 | (get-metric acc-metric) ;=> ('accuracy', 0.9483555555555555)
116 |
117 | ;;;;
118 | (test-accuracy model test-dataset acc-metric)
119 | (get-metric acc-metric) ;=> ('accuracy', 0.9492052631578948)
120 |
121 | ;;visualization
122 |
123 | (py. train-dataset "reset")
124 | (def bd (next-batch train-dataset))
125 | (def data (first (py.- bd "data")))
126 |
127 | (def image (ndarray/slice data :begin 0 :end 1))
128 | (def image2 (py. image "reshape" [28 28]))
129 | (def image3 (-> (ndarray/multiply image2 256)
130 | (ndarray/cast :dtype "uint8")))
131 | (def npimage (py. image3 "asnumpy"))
132 |
133 |
134 | (require-python '[cv2 :as cv2])
135 | (cv2/imwrite "number.jpg" npimage)
136 |
137 |
138 | )
139 |
140 |
141 |
142 |
143 |
144 |
145 |
--------------------------------------------------------------------------------
/src/gigasquid/nltk.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.nltk
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]))
4 |
5 | ;;; What is NLTK ?
6 | ;;; https://www.nltk.org/
7 | ;; ;; NLTK is a leading platform for building Python programs to work with human language data.
8 | ;; It provides easy-to-use interfaces to over 50 corpora and lexical resources such as WordNet,
9 | ;; along with a suite of text processing libraries for classification, tokenization, stemming,
10 | ;; tagging, parsing, and semantic reasoning, wrappers for industrial-strength NLP libraries
11 | ;; and an active discussion forum.
12 |
13 | (require-python '[nltk :as nltk])
14 | (comment
15 |
16 | ;; We will follow some examples from here first
17 | ;; https://www.nltk.org/book/ch01.html
18 |
19 | ;;; you can download individual packages using a parameter
20 | ;(nltk/download "wordnet")
21 | ;;; you can install just what you need for the examples
22 | ;(nltk/download "book")
23 | ;;;; install just the corpora, no grammars or trained models using
24 | ;(nltk/download "all-corpora")
25 | ;;;; or a popular subset
26 | ;(nltk/download "popular")
27 | ;;; or you can download everything with "all"
28 | ;(nltk/download "all") ;;; BEWARE it downloads lots!
29 |
30 | ;;;; Book datasets
31 | (nltk/download "book")
32 | (require-python '[nltk.book :as book])
33 |
34 | (book/texts)
35 | ;;; prints out in repl
36 | ;; text1: Moby Dick by Herman Melville 1851
37 | ;; text2: Sense and Sensibility by Jane Austen 1811
38 | ;; text3: The Book of Genesis
39 | ;; text4: Inaugural Address Corpus
40 | ;; text5: Chat Corpus
41 | ;; text6: Monty Python and the Holy Grail
42 | ;; text7: Wall Street Journal
43 | ;; text8: Personals Corpus
44 | ;; text9: The Man Who Was Thursday by G . K . Chesterton 1908
45 |
46 | book/text1 ;=>
47 | book/text2 ;=>
48 |
49 | ;;; concordance veiw of a givin word gives every occurence
50 |
51 | (py. book/text1 concordance "monstrous")
52 | ;;; displays in repl
53 | ;Displaying 11 of 11 matches:
54 | ;; ong the former , one was of a most monstrous size . ... This came towards us ,
55 | ;; ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r
56 | ;; ll over with a heathenish array of monstrous clubs and spears . Some were thick
57 | ;; d as you gazed , and wondered what monstrous cannibal and savage could ever hav
58 | ;; that has survived the flood ; most monstrous and most mountainous ! That Himmal
59 | ;; they might scout at Moby Dick as a monstrous fable , or still worse and more de
60 | ;; th of Radney .'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l
61 | ;; ing Scenes . In connexion with the monstrous pictures of whales , I am strongly
62 | ;; ere to enter upon those still more monstrous stories of them which are to be fo
63 | ;; ght have been rummaged out of this monstrous cabinet there is no telling . But
64 | ;; of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u
65 |
66 |
67 |
68 | ;;; What other words appear in a similar range of contexts
69 | (py. book/text1 similar "monstrous")
70 | ;;; displays in repl
71 | ;; contemptible christian abundant few part mean careful puzzled
72 | ;; mystifying passing curious loving wise doleful gamesome singular
73 | ;; delightfully perilous fearless
74 |
75 | (py. book/text2 similar "monstrous")
76 | ;; displays in repl
77 | ;; delightfully perilous fearless
78 | ;; very so exceedingly heartily a as good great extremely remarkably
79 | ;; sweet vast amazingly
80 |
81 | ;;; see what sort of methods that this "Text" object has
82 | (py/att-type-map book/text3)
83 | ;;; get the length of the book of Genesis
84 | (py/len book/text3) ;=> 44764
85 | ;; or get the tokens and count them in clojure
86 | (count (py.- book/text3 tokens)) ;=> 44764
87 |
88 | ;;; get the sorted set of tokens
89 | (-> (py.- book/text3 tokens) set count) ;=> 2789
90 |
91 | ;;; lexical diversity (measure of the richness of text )
92 | (defn lexical-diversity [text]
93 | (let [tokens (py.- text tokens)]
94 | (/ (-> tokens set count)
95 | (* 1.0 (count tokens)))))
96 |
97 | (lexical-diversity book/text3) ;=> 0.06230453042623537
98 | (lexical-diversity book/text5) ;=> 0.13477005109975562
99 |
100 |
101 | ;;; Moving onto Chapter 2 https://www.nltk.org/book/ch02.html
102 |
103 | ;;; Accessing Text Corpora
104 |
105 | (require-python '[nltk.corpus :as corpus])
106 |
107 | ;; NLTK includes a small selection of texts from the Project Gutenberg electronic text archive, which contains some 25,000 free electronic books, hosted at http://www.gutenberg.org/. We begin by getting the Python interpreter to load the NLTK package, then ask to see nltk.corpus.gutenberg.fileids(), the file identifiers in this corpus:
108 |
109 | (py. corpus/gutenberg fileids)
110 | ;=> ['austen-emma.txt', 'austen-persuasion.txt', 'austen-sense.txt', 'bible-kjv.txt', 'blake-poems.txt', 'bryant-stories.txt', 'burgess-busterbrown.txt', 'carroll-alice.txt', 'chesterton-ball.txt', 'chesterton-brown.txt', 'chesterton-thursday.txt', 'edgeworth-parents.txt', 'melville-moby_dick.txt', 'milton-paradise.txt', 'shakespeare-caesar.txt', 'shakespeare-hamlet.txt', 'shakespeare-macbeth.txt', 'whitman-leaves.txt']
111 |
112 | ;;; let's pick out emma
113 | (def emma (py. corpus/gutenberg words "austen-emma.txt"))
114 | (py/len emma) ;=>192427
115 |
116 | ;;;;;; Switching over to another tutorial
117 | ;; https://www.datacamp.com/community/tutorials/text-analytics-beginners-nltk
118 |
119 |
120 | ;;; Sentence tokenization
121 | (require-python '[nltk.tokenize :as tokenize])
122 |
123 | (def text "Hello Mr. Smith, how are you doing today? The weather is great, and city is awesome.
124 | The sky is pinkish-blue. You shouldn't eat cardboard")
125 | (def tokenized-sent (tokenize/sent_tokenize text))
126 | tokenized-sent
127 | ;;=> ['Hello Mr. Smith, how are you doing today?', 'The weather is great, and city is awesome.', 'The sky is pinkish-blue.', "You shouldn't eat cardboard"]
128 |
129 |
130 | (def tokenized-word (tokenize/word_tokenize text))
131 | tokenized-word
132 | ;;=> ['Hello', 'Mr.', 'Smith', ',', 'how', 'are', 'you', 'doing', 'today', '?', 'The', 'weather', 'is', 'great', ',', 'and', 'city', 'is', 'awesome', '.', 'The', 'sky', 'is', 'pinkish-blue', '.', 'You', 'should', "n't", 'eat', 'cardboard']
133 |
134 | ;;; Frequency Distribution
135 |
136 | (require-python '[nltk.probability :as probability])
137 |
138 | (def fdist (probability/FreqDist tokenized-word))
139 | fdist ;=>
140 |
141 | (py. fdist most_common)
142 | ;=> [('is', 3), (',', 2), ('The', 2), ('.', 2), ('Hello', 1), ('Mr.', 1), ('Smith', 1), ('how', 1), ('are', 1), ('you', 1), ('doing', 1), ('today', 1), ('?', 1), ('weather', 1), ('great', 1), ('and', 1), ('city', 1), ('awesome', 1), ('sky', 1), ('pinkish-blue', 1), ('You', 1), ('should', 1), ("n't", 1), ('eat', 1), ('cardboard', 1)]
143 |
144 |
145 | ;;; stopwords (considered noise in tett)
146 |
147 | (require-python '[nltk.corpus :as corpus])
148 |
149 | (def stop-words (into #{} (py. corpus/stopwords words "english")))
150 | stop-words
151 | ;=> #{"d" "itself" "more" "didn't" "ain" "won" "hers" "ours" "further" "shouldn" "his" "him" "hasn't" "s" "doesn" "are" "didn" "don't" "very" "you'd" "under" "who" "which" "isn" "of" "this" "after" "once" "up" "off" "she" "shan't" "nor" "does" "theirs" "ll" "yours" "not" "mustn't" "it" "over" "by" "she's" "it's" "hasn" "is" "few" "shouldn't" "why" "doing" "mightn't" "about" "they" "you" "its" "than" "those" "where" "just" "for" "needn" "should" "my" "again" "themselves" "should've" "ourselves" "whom" "yourselves" "because" "any" "most" "you've" "mustn" "you're" "can" "were" "weren" "ma" "did" "was" "that" "mightn" "if" "same" "both" "doesn't" "don" "had" "what" "an" "or" "have" "couldn't" "am" "couldn" "won't" "their" "a" "so" "them" "weren't" "wouldn" "on" "shan" "own" "above" "but" "when" "until" "be" "haven" "t" "having" "out" "aren't" "that'll" "herself" "and" "do" "myself" "i" "down" "hadn" "here" "too" "y" "between" "such" "needn't" "against" "each" "how" "other" "from" "these" "while" "no" "with" "now" "some" "will" "himself" "all" "you'll" "wouldn't" "re" "then" "isn't" "through" "yourself" "has" "haven't" "being" "our" "during" "wasn" "ve" "before" "only" "your" "to" "into" "m" "aren" "we" "as" "wasn't" "he" "me" "at" "below" "o" "the" "her" "been" "there" "in" "hadn't"}
152 |
153 | ;;; removing stopwords
154 |
155 | (def filtered-sent (->> tokenized-sent
156 | (map tokenize/word_tokenize)
157 | (map #(remove stop-words %))))
158 | filtered-sent
159 | ;; (("Hello" "Mr." "Smith" "," "today" "?")
160 | ;; ("The" "weather" "great" "," "city" "awesome" ".")
161 | ;; ("The" "sky" "pinkish-blue" ".")
162 | ;; ("You" "n't" "eat" "cardboard"))
163 |
164 |
165 | ;;;; Lexicon Normalization
166 | ;;stemming
167 |
168 | (require-python '[nltk.stem :as stem])
169 |
170 | (let [ps (stem/PorterStemmer)]
171 | (->> filtered-sent
172 | (map (fn [sent] (map #(py. ps stem %) sent)))))
173 | ;;=> (("hello" "mr." "smith" "," "today" "?")
174 | ;; ("the" "weather" "great" "," "citi" "awesom" ".")
175 | ;; ("the" "sky" "pinkish-blu" ".") ("you" "n't" "eat" "cardboard")
176 |
177 |
178 | ;;; Lemmatization
179 |
180 | (require-python '[nltk.stem.wordnet :as wordnet])
181 |
182 | (let [lem (wordnet/WordNetLemmatizer)
183 | stem (stem/PorterStemmer)
184 | word "flying"]
185 | {:lemmatized-word (py. lem lemmatize word "v")
186 | :stemmed-word (py. stem stem word)})
187 | ;=> {:lemmatized-word "fly", :stemmed-word "fli"}
188 |
189 | ;;; POS Tagging
190 | (let [sent "Albert Einstein was born in Ulm, Germany in 1879."
191 | tokens (nltk/word_tokenize sent)]
192 | {:tokens tokens
193 | :pos-tag (nltk/pos_tag tokens)})
194 | ;; {:tokens
195 | ;; ['Albert', 'Einstein', 'was', 'born', 'in', 'Ulm', ',', 'Germany', 'in', '1879', '.'],
196 | ;; :pos-tag
197 | ;; [('Albert', 'NNP'), ('Einstein', 'NNP'), ('was', 'VBD'), ('born', 'VBN'), ('in', 'IN'), ('Ulm', 'NNP'), (',', ','), ('Germany', 'NNP'), ('in', 'IN'), ('1879', 'CD'), ('.', '.')]}
198 |
199 |
200 |
201 |
202 | )
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
--------------------------------------------------------------------------------
/src/gigasquid/numpy_plot.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.numpy-plot
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [gigasquid.plot :as plot]))
5 |
6 | (require-python '[matplotlib.pyplot :as pyplot])
7 | (require-python '[numpy :as numpy])
8 |
9 | ;;;; you will need matplotlib, numpy, and pillow installed to run this in python3
10 |
11 | ;;; This uses a macro from printing in the plot namespace that uses the shell "open" command
12 | ;;; to show a saved image from pyplot. If you don't have a mac you will need to modify that
13 | ;;; to whatever shell command you have.
14 |
15 | (comment
16 | (def x (numpy/linspace 0 2 50))
17 |
18 | (plot/with-show (matplotlib.pyplot/plot [[1 2 3 4 5] [1 2 3 4 10]] :label "linear"))
19 |
20 | (plot/with-show
21 | (pyplot/plot [x x] :label "linear")
22 | (pyplot/plot [x (py. x "__pow__" 2)] :label "quadratic")
23 | (pyplot/plot [x (py. x "__pow__" 3)] :label "cubic")
24 | (pyplot/xlabel "x label")
25 | (pyplot/ylabel "y label")
26 | (pyplot/title "Simple Plot"))
27 |
28 |
29 | ;;; numpy printing tutorial http://cs231n.github.io/python-numpy-tutorial/#matplotlib-plotting
30 | (let [x (numpy/arange 0 (* 3 numpy/pi) 0.1)
31 | y (numpy/sin x)]
32 | (plot/with-show
33 | (pyplot/plot x y)))
34 |
35 | (let [x (numpy/arange 0 (* 3 numpy/pi) 0.1)
36 | y-sin (numpy/sin x)
37 | y-cos (numpy/cos x)]
38 | (plot/with-show
39 | (pyplot/plot x y-sin)
40 | (pyplot/plot x y-cos)
41 | (pyplot/xlabel "x axis label")
42 | (pyplot/ylabel "y axis label")
43 | (pyplot/title "Sine and Cosine")
44 | (pyplot/legend ["Sine" "Cosine"])))
45 |
46 | ;;;; Subplots
47 |
48 | (let [x (numpy/arange 0 (* 3 numpy/pi) 0.1)
49 | y-sin (numpy/sin x)
50 | y-cos (numpy/cos x)]
51 | (plot/with-show
52 | ;;; set up a subplot gird that has a height of 2 and width of 1
53 | ;; and set the first such subplot as active
54 | (pyplot/subplot 2 1 1)
55 | (pyplot/plot x y-sin)
56 | (pyplot/title "Sine")
57 |
58 | ;;; set the second subplot as active and make the second plot
59 | (pyplot/subplot 2 1 2)
60 | (pyplot/plot x y-cos)
61 | (pyplot/title "Cosine")))
62 |
63 | ;;;;; Images
64 |
65 | (let [img (pyplot/imread "resources/cat.jpg")
66 | img-tinted (numpy/multiply img [1 0.95 0.9])]
67 | (plot/with-show
68 | (pyplot/subplot 1 2 1)
69 | (pyplot/imshow img)
70 | (pyplot/subplot 1 2 2)
71 | (pyplot/imshow (numpy/uint8 img-tinted))))
72 |
73 |
74 | ;;;;; pie chart
75 | ;;;; from https://org/3.1.1/gallery/pie_and_polar_charts/pie_features.html
76 |
77 |
78 | (let [labels ["Frogs" "Hogs" "Dogs" "Logs"]
79 | sizes [15 30 45 10]
80 | explode [0 0.1 0 0] ; only explode the 2nd slice (Hogs)
81 | ]
82 | (plot/with-show
83 | (let [[fig1 ax1] (pyplot/subplots)]
84 | (py. ax1 "pie" sizes :explode explode :labels labels :autopct "%1.1f%%"
85 | :shadow true :startangle 90)
86 | (py. ax1 "axis" "equal")) ;equal aspec ration ensures that pie is drawn as circle
87 | ))
88 | )
89 |
--------------------------------------------------------------------------------
/src/gigasquid/opencv/README.md:
--------------------------------------------------------------------------------
1 | # OpenCV
2 |
3 | - [OpenCV](https://opencv.org/)
4 | - Official OpenCV [documentation](https://opencv.org/)
5 |
6 | ## Basic installation
7 |
8 | - Python
9 |
10 | ```shell
11 | pip3 install numpy matplotlib opencv-contrib-python-headless
12 | ```
13 |
14 | - Clojure
15 |
16 | Just run your favorite `cider-jack-in` if you are on Emacs.
17 | For other editors, you will do the equivalent command for your editor.
18 |
19 | ## Outputs
20 |
21 | 
22 |
23 | - Simple Sketch
24 |
25 | ```clojure
26 | ;; Process image as sketch
27 | (process-image {:input-file "resources/opencv/cat.jpg"
28 | :output-file "resources/opencv/cat-sketch.png"
29 | :tx-fns sketch-image})
30 | ```
31 |
32 | 
33 |
34 | - Cartoonize Image (color)
35 |
36 | ```clojure
37 | (process-image {:input-file "resources/opencv/cat.jpg"
38 | :output-file "resources/opencv/cat-cartoonize-color.png"
39 | :tx-fns cartoonize-image})
40 | ```
41 |
42 | 
43 |
44 | - Cartoonize Image (gray-scale)
45 |
46 | ```clojure
47 | (process-image {:input-file "resources/opencv/cat.jpg"
48 | :output-file "resources/opencv/cat-cartoonize-gray.png"
49 | :tx-fns cartoonize-image-gray})
50 | ```
51 |
52 | 
53 |
--------------------------------------------------------------------------------
/src/gigasquid/opencv/core.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.opencv.core
2 | (:require
3 | [clojure.string :as string]
4 | [gigasquid.utils :refer [display-image]]
5 | [libpython-clj.require
6 | :refer [require-python]]
7 | [libpython-clj.python
8 | :as py
9 | :refer [py.
10 | py..
11 | py.-
12 | att-type-map
13 | ->python
14 | ->jvm
15 | as-map
16 | as-list]]
17 | [clojure.java.shell :as sh]
18 | [clojure.pprint :refer [pprint]])
19 | (:import [java.io File]))
20 |
21 | ;;; Python installation
22 | ;;; sudo pip3 install numpy matplotlib opencv-contrib-python-headless
23 |
24 | (require-python
25 | '[cv2
26 | :as cv2]
27 | '[matplotlib.pyplot
28 | :as pyplot]
29 | '[builtins
30 | :as python
31 | :refer [slice tuple]]
32 | '[numpy
33 | :as np
34 | :refer [array]]
35 | '[operator
36 | :as operator
37 | :refer [getitem]])
38 |
39 | ;; ====================================== ;;
40 | ;; Basic exploration to learn the api
41 | ;; ====================================== ;;
42 | (comment
43 |
44 | (def img (cv2/imread "resources/opencv/opencv-logo.png"))
45 |
46 | (-> img
47 | att-type-map)
48 |
49 | ;; Note: how we de-structure Python's tuple to vector in Clojure
50 | (let [img (cv2/imread "resources/opencv/opencv-logo.png")
51 | [h w c] (py.- img shape)]
52 | [h w c])
53 | ;;=> (99, 82, 3)
54 |
55 | ;; Total number of elements is obtained by img.size
56 | (py.- img size) ;;=> 24354
57 |
58 | (py.- img dtype) ;;=> uint8
59 |
60 | (def img2 (cv2/cvtColor img cv2/COLOR_BGR2GRAY))
61 |
62 | ;; Save the result to the file
63 | (cv2/imwrite "resources/opencv/opencv-gray-logo.png" img2) ;;=> true
64 |
65 | )
66 |
67 | ;; ====================================== ;;
68 | ;; Useful transformation function
69 | ;; ====================================== ;;
70 | (defn ^:private read-input
71 | [input-file]
72 | (let [input-image (cv2/imread input-file)
73 | temp-file (File/createTempFile "opencv-temp" ".png")]
74 | [input-image temp-file]))
75 |
76 | ;; sketch image
77 | (defn sketch-image
78 | [img]
79 | (let [img-gray (cv2/cvtColor img cv2/COLOR_BGR2GRAY)
80 | img-gray (cv2/medianBlur img-gray 5)
81 | edges (cv2/Laplacian img-gray cv2/CV_8U :ksize 5)
82 | [_ thresholded] (cv2/threshold edges 70 255 cv2/THRESH_BINARY_INV)]
83 | thresholded))
84 |
85 | (comment
86 | ;; Sketch the cat image
87 | (let [img (cv2/imread "resources/opencv/cat.jpg")]
88 | (sketch-image img))
89 |
90 | )
91 |
92 | (defn cartoonize-image
93 | ([image]
94 | (cartoonize-image image false))
95 | ([image gray-mode]
96 | (let [thresholed (sketch-image image)
97 | filtered (cv2/bilateralFilter image 10 250 250)
98 | cartoonized (cv2/bitwise_and filtered filtered :mask thresholed)]
99 | (if gray-mode
100 | (cv2/cvtColor cartoonized cv2/COLOR_BGR2GRAY)
101 | cartoonized))))
102 |
103 | (defn cartoonize-image-gray
104 | [image]
105 | (cartoonize-image image true))
106 |
107 | (comment
108 | ;; gray-mode true
109 | (let [image (cv2/imread "resources/opencv/cat.jpg")]
110 | (cartoonize-image image true))
111 |
112 | ;; Or use the wrapper function
113 | (let [image (cv2/imread "resources/cat.jpg")]
114 | (cartoonize-image-gray image))
115 |
116 | ;; color mode
117 | (let [image (cv2/imread "resources/cat.jpg")]
118 | (cartoonize-image image))
119 | )
120 |
121 | ;; Re-usable function for exercising the above functions
122 |
123 | (defn process-image
124 | "Apply opencv function to a given image and optionally show it.
125 |
126 | (process-image {:input-file \"resources/opencv/cat.jpg\"
127 | :output-file \"resources/opencv/cat-sketch.png\"
128 | :xform-fns sketch-image
129 | :open? true})"
130 | [& [{:keys [input-file
131 | output-file
132 | tx-fns
133 | open?]
134 | :or {input-file "resources/opencv/cat.jpg"
135 | open? true}}]]
136 | (pyplot/figure :figsize (python/tuple [14 6]))
137 | (pyplot/suptitle "Example Sketch"
138 | :fontsize 14
139 | :fontweight "bold")
140 | (let [image-src (cv2/imread input-file)
141 | ;; TODO: allow arguments to the function to avoid overload
142 | image-dst (tx-fns image-src)]
143 | (cv2/imwrite output-file image-dst)
144 | (if open?
145 | (display-image output-file)
146 | (println (format "Your output file : %s" output-file)))))
147 |
148 | (comment
149 |
150 | ;; Process image as sketch
151 | (process-image {:input-file "resources/opencv/cat.jpg"
152 | :output-file "resources/opencv/cat-sketch.png"
153 | :tx-fns sketch-image})
154 |
155 | (process-image {:input-file "resources/opencv/cat.jpg"
156 | :output-file "resources/opencv/cat-cartoonize-color.png"
157 | :tx-fns cartoonize-image})
158 |
159 | (process-image {:input-file "resources/opencv/cat.jpg"
160 | :output-file "resources/opencv/cat-cartoonize-gray.png"
161 | :tx-fns cartoonize-image-gray})
162 |
163 | )
164 |
165 | (defn -main
166 | [& args]
167 | (process-image {:input-file "resources/opencv/cat.jpg"
168 | :output-file "resources/opencv/cat-sketch.png"
169 | :tx-fns sketch-image}))
170 |
171 | ;; We can also run it via main function
172 | #_(-main)
173 |
--------------------------------------------------------------------------------
/src/gigasquid/plot.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.plot
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [gigasquid.utils :refer [display-image create-tmp-file]]
5 | [clojure.java.shell :as sh]))
6 |
7 | ;;; This uses the headless version of matplotlib to generate a graph then copy it to the JVM
8 | ;;; where we can then print it
9 |
10 | ;;; have to set the headless mode before requiring pyplot
11 | (def mplt (py/import-module "matplotlib"))
12 | (py. mplt "use" "Agg")
13 |
14 | (require-python '[matplotlib.pyplot :as pyplot])
15 | (require-python 'matplotlib.backends.backend_agg)
16 | (require-python 'numpy)
17 |
18 | (defmacro with-show
19 | "Takes forms with mathplotlib.pyplot to then show locally"
20 | [& body]
21 | `(let [_# (pyplot/clf)
22 | fig# (pyplot/figure)
23 | agg-canvas# (matplotlib.backends.backend_agg/FigureCanvasAgg fig#)
24 | temp-file# (create-tmp-file "tmp-image" ".png")
25 | temp-image# (.getAbsolutePath temp-file#)]
26 | ~(cons 'do body)
27 | (py. agg-canvas# "draw")
28 | (pyplot/savefig temp-image#)
29 | (display-image temp-image#)
30 | (.deleteOnExit temp-file#)))
31 |
32 | ;;;; If you run into mem problems with temporary files try this one
33 |
34 | (defmacro with-show-one
35 | "Takes forms with mathplotlib.pyplot to then show locally"
36 | [& body]
37 | `(let [_# (pyplot/clf)
38 | fig# (pyplot/figure)
39 | agg-canvas# (matplotlib.backends.backend_agg/FigureCanvasAgg fig#)]
40 | ~(cons 'do body)
41 | (py. agg-canvas# "draw")
42 | (pyplot/savefig "temp.png")
43 | (sh/sh "open" "temp.png")))
44 |
45 | (defmacro with-save
46 | "Takes forms with mathplotlib.pyplot to then show locally"
47 | [fname & body]
48 | `(let [_# (pyplot/clf)
49 | fig# (pyplot/figure)
50 | agg-canvas# (matplotlib.backends.backend_agg/FigureCanvasAgg fig#)]
51 | ~(cons 'do body)
52 | (py. agg-canvas# "draw")
53 | (pyplot/savefig ~fname)))
54 |
55 |
56 |
57 |
58 | (comment
59 |
60 | (def x (numpy/linspace 0 2 100))
61 |
62 | (with-show
63 | (pyplot/plot [x x] :label "linear")
64 | (pyplot/plot [x (py. x "__pow__" 2)] :label "quadratic")
65 | (pyplot/plot [x (py. x "__pow__" 3)] :label "cubic")
66 | (pyplot/xlabel "x label")
67 | (pyplot/ylabel "y label")
68 | (pyplot/title "Simple Plot"))
69 |
70 | (with-show (pyplot/plot [[1 2 3 4 5] [1 2 3 4 10]] :label "linear"))
71 |
72 | )
73 |
--------------------------------------------------------------------------------
/src/gigasquid/psutil/README.md:
--------------------------------------------------------------------------------
1 | # psutil
2 |
3 | Cross-platform lib for process and system monitoring in Python
4 |
5 | Official [Documentation](https://psutil.readthedocs.io/en/latest/)
6 |
7 | ## Basic installation
8 |
9 | - Python
10 |
11 | See [installation](https://psutil.readthedocs.io/en/latest/#id1)
12 |
13 | ```shell
14 | # Or just
15 | pip3 install psutil
16 | ```
17 |
18 | - Clojure
19 |
20 | Just run your favorite `cider-jack-in` if you are on Emacs.
21 | For other editors, you will do the equivalent command for your editor.
22 |
--------------------------------------------------------------------------------
/src/gigasquid/psutil/core.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.psutil.core
2 | (:require [libpython-clj.require
3 | :refer [require-python]]
4 | [libpython-clj.python
5 | :as py
6 | :refer [py.
7 | py..
8 | py.-
9 | att-type-map
10 | ->python
11 | ->jvm]]
12 | [clojure.java.shell :as sh]
13 | [clojure.string :as str]
14 | [clojure.pprint :refer [pprint]])
15 | (:import [java.io File]))
16 |
17 | ;; https://psutil.readthedocs.io/en/latest/
18 | ;; https://psutil.readthedocs.io/en/latest/#filtering-and-sorting-processes
19 |
20 | ;; Python
21 | ;; pip3 install pyutil
22 |
23 | (require-python '[builtins :as python])
24 | (require-python '[psutil :as psu])
25 |
26 | (comment
27 | ;; Get information about the process
28 | (def p (psu/Process))
29 |
30 | (py.. p username)
31 | ;;=> "bchoomnuan"
32 |
33 | (py.. p cpu_times)
34 | ;;=> pcputimes(user=52.755533824, system=1.542032128, children_user=0.0, children_system=0.0)
35 |
36 | ;; And the list goes on
37 | (py.. p
38 | ;;cpu_percent
39 | ;;create_time
40 | ;;ppid
41 | ;;status
42 | ;;cmdline ;; intersting one!
43 | ;;as_dict
44 | ;;parents
45 | ;;cwd ;;=> "/Users/bchoomnuan/github/libpython-clj-examples"
46 | ;;uids
47 | ;;gids
48 | ;;terminal
49 | ;;memory_info
50 | ;;memory_full_info
51 | ;;(memory_percent :memtype "rss")
52 |
53 | ;; More fun to try these [I will skip this for now :)]
54 | ;; (send_signal ..)
55 | ;; (suspend)
56 | ;; (resume)
57 | ;; (terminate)
58 | ;; (kill)
59 | ;; (wait :timeout ..)
60 | )
61 | )
62 |
63 | (comment
64 | ;; List all running process name
65 | (def process-names
66 | (let [procs (psu/process_iter["name"])]
67 | (map (fn [p]
68 | (py. p name)) procs)))
69 |
70 | (count process-names) ;;=> 475
71 |
72 | ;; Take a peek at the first few items
73 | (take 5 process-names)
74 | ;;=> ("kernel_task" "launchd" "syslogd" "UserEventAgent" "uninstalld")
75 |
76 | ;; How about finding out all process that have the word "sys" in it?
77 |
78 | (filter (fn [x] (str/index-of (str/lower-case x) "sys"))
79 | process-names)
80 |
81 | ;;=>
82 | #_
83 | ("syslogd" "systemstats" "syspolicyd" "sysmond" "systemstats" "systemsoundserve" "UIKitSystem" "SystemUIServer" "system_installd" "sysextd" "sysdiagnose")
84 |
85 | ;; You can do more of course, see the documentation for idea
86 | ;; https://psutil.readthedocs.io/en/latest/#filtering-and-sorting-processes
87 |
88 | )
89 |
90 | (comment
91 | ;; There are many functions that we can use like getting information about cpu
92 | (def cpu-times (psu/cpu_times))
93 |
94 | cpu-times ;;=> scputimes(user=67053.19, nice=0.0, system=52277.51, idle=1399764.29)
95 |
96 | ;; Note: the return is the Pythong object that we can inspect like
97 | (py.- cpu-times user)
98 | (py.- cpu-times system)
99 | (py.- cpu-times idle)
100 |
101 | ;; How about printing out the percentage of cpu usage every given interval
102 | (dotimes [x 3]
103 | (println (psu/cpu_percent :interval 1)))
104 |
105 | ;;=> ;; in your REPL
106 | ;; 3.4
107 | ;; 3.5
108 | ;; 3.4
109 |
110 | (def cpu-info
111 | (for [x (range 3)]
112 | (psu/cpu_percent :interval 1
113 | :percpu true)))
114 |
115 | (type cpu-info)
116 | ;;=> clojure.lang.LazySeq
117 |
118 | (pprint cpu-info)
119 | ;;=> ;; in your REPL
120 | #_
121 | ([57.4, 1.0, 18.0, 0.0, 10.9, 0.0, 5.1, 0.0, 6.1, 0.0, 4.0, 0.0]
122 | [60.0, 0.0, 17.8, 1.0, 8.0, 1.0, 5.9, 0.0, 5.0, 0.0, 4.0, 0.0]
123 | [57.0, 1.0, 24.8, 3.0, 11.9, 0.0, 7.1, 0.0, 5.0, 0.0, 2.0, 0.0])
124 |
125 | (-> cpu-info first type)
126 | ;;=> :pyobject
127 |
128 | (-> cpu-info first first type) ;;=> java.lang.Double
129 |
130 | (def cpu-info
131 | (for [x (range 3)]
132 | (psu/cpu_times_percent :interval 1
133 | :percpu false)))
134 |
135 | (-> cpu-info
136 | pprint)
137 |
138 | ;;=> in your REPL
139 | ;; (scputimes(user=1.5, nice=0.0, system=2.0, idle=96.5)
140 | ;; scputimes(user=1.0, nice=0.0, system=2.8, idle=96.2)
141 | ;; scputimes(user=0.7, nice=0.0, system=1.8, idle=97.4))
142 |
143 | (psu/cpu_count) ;;=> 12
144 |
145 | (psu/cpu_count :logical false) ;;=> 6
146 |
147 | (psu/cpu_stats)
148 | ;;=> scpustats(ctx_switches=148596, interrupts=866048, soft_interrupts=579676465, syscalls=1635282)
149 |
150 | (psu/cpu_freq)
151 | ;;=> scpufreq(current=2200, min=2200, max=2200)
152 |
153 | (psu/getloadavg)
154 | ;;=> (3.3349609375, 2.94970703125, 2.6689453125)
155 | )
156 |
157 | ;; Memory
158 | (comment
159 |
160 | (psu/virtual_memory)
161 | ;;=> svmem(total=17179869184, available=7311126528, percent=57.4, used=8922701824, free=184188928, active=5441355776, inactive=6698086400, wired=3481346048)
162 |
163 | (psu/swap_memory)
164 | ;;=> sswap(total=3221225472, used=1549008896, free=1672216576, percent=48.1, sin=206163009536, sout=310902784)
165 |
166 | ;; Disks
167 | (psu/disk_partitions)
168 |
169 | #_ [sdiskpart(device='/dev/disk1s6', mountpoint='/', fstype='apfs', opts='ro,local,rootfs,dovolfs,journaled,multilabel'), sdiskpart(device='/dev/disk1s1', mountpoint='/System/Volumes/Data', fstype='apfs', opts='rw,local,dovolfs,dontbrowse,journaled,multilabel'), sdiskpart(device='/dev/disk1s4', mountpoint='/private/var/vm', fstype='apfs', opts='rw,local,dovolfs,dontbrowse,journaled,multilabel'), sdiskpart(device='/dev/disk1s5', mountpoint='/Volumes/Macintosh HD', fstype='apfs', opts='rw,local,dovolfs,journaled,multilabel'), sdiskpart(device='/dev/disk1s3', mountpoint='/Volumes/Recovery', fstype='apfs', opts='rw,local,dovolfs,dontbrowse,journaled,multilabel')]
170 |
171 | (def du (psu/disk_usage "/"))
172 |
173 | ;;=> sdiskusage(total=250685575168, used=10963034112, free=7645040640, percent=58.9)
174 |
175 | (py.- du total)
176 | (py.- du used)
177 | (py.- du percent)
178 |
179 | (psu/disk_io_counters :perdisk false)
180 | ;;=> sdiskio(read_count=15153434, write_count=5535766, read_bytes=278249762816, write_bytes=100455395328, read_time=6554143, write_time=2819768)
181 |
182 | ;; Network
183 | (psu/net_io_counters :pernic true)
184 |
185 | (psu/net_if_addrs)
186 |
187 | (psu/net_if_stats)
188 |
189 | )
190 |
191 | (comment
192 | ;; Sensors!
193 | (psu/sensors_battery)
194 | ;;=> sbattery(percent=97, secsleft=23400, power_plugged=False)
195 |
196 | (psu/swap_memory)
197 | ;;=> sswap(total=3221225472, used=1414791168, free=1806434304, percent=43.9, sin=219395137536, sout=312922112)
198 |
199 | ;; Others
200 | (psu/users)
201 | ;;=> [suser(name='bchoomnuan', terminal='console', host=None, started=1582210432.0, pid=199), suser(name='bchoomnuan', terminal='ttys001', host=None, started=1582428288.0, pid=24455)]
202 |
203 | )
204 |
205 | (comment
206 | ;; Process management
207 | (count (psu/pids)) ;;=> 501
208 |
209 | (last (psu/pids))
210 |
211 | ;; Take random process object
212 | (def p (psu/Process (last (psu/pids))))
213 |
214 | (py. p name) ;;=> "microstackshot"
215 |
216 | (py. p exe) ;;=> "/usr/libexec/microstackshot"
217 | (py. p ppid) ;;=> 1
218 |
219 | (py.. p (children :recursive true))
220 |
221 | )
222 |
223 | (comment
224 |
225 | ;; Take a peek at 5 processes
226 | (doseq [proc (take 5 (psu/process_iter ["pid" "name"]))]
227 | (println proc)
228 | )
229 |
230 | ;;=> Your REPL
231 | ;; psutil.Process(pid=0, name='kernel_task', started='2020-02-20 09:53:34')
232 | ;; psutil.Process(pid=1, name='launchd', started='2020-02-20 09:53:34')
233 | ;; psutil.Process(pid=120, name='syslogd', started='2020-02-20 09:53:41')
234 | ;; psutil.Process(pid=121, name='UserEventAgent', started='2020-02-20 09:53:41')
235 | ;; psutil.Process(pid=124, name='uninstalld', started='2020-02-20 09:53:41')
236 |
237 | (psu/pid_exists 99532) ;;=> true
238 |
239 | )
240 |
241 | ;; There are much more things you can do, just go ahead and looking at the
242 | ;; official documentation to see nice and practical usage of the library.
243 | ;; https://psutil.readthedocs.io/en/latest/#
244 |
--------------------------------------------------------------------------------
/src/gigasquid/pygal/README.md:
--------------------------------------------------------------------------------
1 | # Pygal - Sexy Python Charting
2 |
3 | - Github - [Kozea/pygal](https://github.com/Kozea/pygal)
4 | - Official [documentation](http://www.pygal.org/en/latest/documentation/index.html) page
5 | - Based on the following examples from [pygal.org](http://pygal.org).
6 |
7 | ## Basic installation
8 |
9 | - Python
10 |
11 | ```shell
12 | pip install pygal
13 |
14 | ## Optionally you should also install the following library
15 | pip install lxml # improve rendering speed (except on pypy)
16 |
17 | ## To render png output
18 | pip install cairosvg
19 | pip install tinycss
20 | pip install cssselect
21 | ```
22 |
23 | - Clojure
24 |
25 | Just run your favorite `cider-jack-in` if you are on Emacs.
26 | For other editors, you will do the equivalent command for your editor.
27 |
28 | ### SVG Viewer (if you are on Mac OS)
29 |
30 | To render SVG on the Mac you may like to also install [http://gapplin.wolfrosch.com/](Gapplin) via [Mac's AppStore](https://apps.apple.com/us/app/gapplin/id768053424?mt=12)
31 |
32 | This will make the `open some-file.svg` work properly.
33 |
--------------------------------------------------------------------------------
/src/gigasquid/pygal/core.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.pygal.core
2 | (:require [libpython-clj.require
3 | :refer [require-python]]
4 | [libpython-clj.python
5 | :as py
6 | :refer [py.
7 | py..
8 | py.-
9 | att-type-map
10 | ->python
11 | ->jvm]]
12 | [clojure.java.shell :as sh]
13 | [gigasquid.utils :refer [create-tmp-file
14 | display-image]]
15 | [clojure.pprint :refer [pprint]])
16 | (:import [java.io File]))
17 |
18 | ;;; Python installation
19 | ;;; sudo pip3 install pygal lxml cairosvg tinycss cssselect
20 |
21 | (require-python '[pygal :as pygal])
22 | ;;=> :ok
23 |
24 | ;; http://www.pygal.org/en/latest/documentation/first_steps.html#
25 |
26 | (comment
27 |
28 | ;; For list of configuration see
29 | ;; http://www.pygal.org/en/latest/documentation/configuration/chart.html
30 | (def config (pygal/Config
31 | :pretty_print true
32 | :title "My Pygal Chart"))
33 |
34 | ;; Some configurable settings
35 | (py.- config title) ;;=> "My Pygal Chart"
36 | (py.- config width) ;;=> 800
37 | (py.- config height) ;;=> 600
38 | (py.- config show_legend) ;;=> true
39 | (py.- config fill) ;;=> false
40 | (py.- config style) ;;=> pygal.style.Style
41 | (py.- config legend_at_bottom) ;;=> false
42 | (py.- config legend_box_size) ;;=> 12
43 | (py.- config margin) ;;=> 20
44 | (py.- config max_scale) ;;=> 16
45 | (py.- config min_scale) ;;=> 4
46 | (py.- config pretty_print) ;;=> true
47 |
48 | ;; For full list of options try
49 | (-> config
50 | att-type-map
51 | pprint)
52 |
53 | ;;=> see the useful list in your REPL
54 | #_
55 | {
56 | "__call__" :method,
57 | "__class__" :meta-config,
58 | ;; ...
59 | "_update" :method,
60 | "allow_interruptions" :bool,
61 | "box_mode" :str,
62 | "classes" :list,
63 | "copy" :method,
64 | "css" :list,
65 | "defs" :list,
66 | "disable_xml_declaration" :bool,
67 | "dots_size" :float,
68 | "dynamic_print_values" :bool,
69 | "explicit_size" :bool,
70 | "fill" :bool,
71 | "force_uri_protocol" :str,
72 | "formatter" :none-type,
73 | "half_pie" :bool,
74 | "height" :int,
75 | "include_x_axis" :bool,
76 | "inner_radius" :int,
77 | "interpolate" :none-type,
78 | "interpolation_parameters" :dict,
79 | "interpolation_precision" :int,
80 | "inverse_y_axis" :bool,
81 | "js" :list,
82 | "legend_at_bottom" :bool,
83 | "legend_at_bottom_columns" :none-type,
84 | "legend_box_size" :int,
85 | "logarithmic" :bool,
86 | "margin" :int,
87 | "margin_bottom" :none-type,
88 | "margin_left" :none-type,
89 | "margin_right" :none-type,
90 | "margin_top" :none-type,
91 | "max_scale" :int,
92 | "min_scale" :int,
93 | "missing_value_fill_truncation" :str,
94 | "no_data_text" :str,
95 | "no_prefix" :bool,
96 | "order_min" :none-type,
97 | "pretty_print" :bool,
98 | "print_labels" :bool,
99 | "print_values" :bool,
100 | "print_values_position" :str,
101 | "print_zeroes" :bool,
102 | "range" :none-type,
103 | "rounded_bars" :none-type,
104 | "secondary_range" :none-type,
105 | "show_dots" :bool,
106 | "show_legend" :bool,
107 | "show_minor_x_labels" :bool,
108 | "show_minor_y_labels" :bool,
109 | "show_only_major_dots" :bool,
110 | "show_x_guides" :bool,
111 | "show_x_labels" :bool,
112 | "show_y_guides" :bool,
113 | "show_y_labels" :bool,
114 | "spacing" :int,
115 | "stack_from_top" :bool,
116 | "strict" :bool,
117 | "stroke" :bool,
118 | "stroke_style" :none-type,
119 | "style" :type,
120 | "title" :str,
121 | "to_dict" :method,
122 | "tooltip_border_radius" :int,
123 | "tooltip_fancy_mode" :bool,
124 | "truncate_label" :none-type,
125 | "truncate_legend" :none-type,
126 | "value_formatter" :default,
127 | "width" :int,
128 | "x_label_rotation" :int,
129 | "x_labels" :none-type,
130 | "x_labels_major" :none-type,
131 | "x_labels_major_count" :none-type,
132 | "x_labels_major_every" :none-type,
133 | "x_title" :none-type,
134 | "x_value_formatter" :default,
135 | "xrange" :none-type,
136 | "y_label_rotation" :int,
137 | "y_labels" :none-type,
138 | "y_labels_major" :none-type,
139 | "y_labels_major_count" :none-type,
140 | "y_labels_major_every" :none-type,
141 | "y_title" :none-type,
142 | "zero" :int}
143 | )
144 |
145 | ;; For bar-chart
146 |
147 | (comment
148 |
149 | ;; http://www.pygal.org/en/latest/documentation/configuration/chart.html
150 | (def barchart (pygal/Bar))
151 |
152 | (py. barchart add "Fibonacci" [0 1 1 2 3 5 8 13 21 34 55])
153 | ;;=>
154 |
155 | (py. barchart add "Padovan" [1 1 1 2 2 3 4 5 7 9 12])
156 | ;;=>
157 |
158 | ;; Render will just return the object
159 | (def result (py. barchart render))
160 |
161 | (type result)
162 | ;;=> :pyobject
163 |
164 | ;; To render the result in the browser try
165 | (py. barchart render_in_browser)
166 |
167 | ;; To render the result to file (svg)
168 | (py. barchart render_to_file "bar_chart.svg")
169 | ;;=> you should have the file on your system
170 |
171 | ;; To render the result as png
172 | (py. barchart render_to_png "bar_chart.png")
173 | ;;=> You should have the file on your system
174 |
175 | )
176 |
177 | ;; As we may like to try out different flavor of graph
178 | ;; Let's create simple function to make it easier to explore.
179 |
180 | (defn pg-plot
181 | "Plot a specific type of graph using Pygal.
182 |
183 | Examples:
184 | (pg-plot (pygal/Bar :show_legend true
185 | :title \"Pygal Bar Chart\"
186 | :x_title \"x title\"
187 | :y_title \"y title\"
188 | :fill true)
189 | \"Fibonacci\" [0 1 1 2 3 5 8 13 21 34 55]
190 | \"Padovan\" [1 1 1 2 2 3 4 5 7 9 12])"
191 | [graph & xs]
192 | (let [tmp-file (create-tmp-file "tmp-output" ".svg")
193 | output (.getAbsolutePath tmp-file)]
194 | (doseq [[x y]
195 | (partition 2 xs)]
196 | (py. graph add x y))
197 | (py. graph render_to_file output)
198 | (display-image output)
199 | (.deleteOnExit tmp-file)))
200 |
201 | (comment
202 | ;; Simple bar-graph
203 | (pg-plot (pygal/Bar :show_legend true
204 | :title "Bar Chart Example"
205 | :x_title "x title"
206 | :y_title "y title"
207 | :fill true)
208 | "Fibonacci" [0 1 1 2 3 5 8 13 21 34 55]
209 | "Padovan" [1 1 1 2 2 3 4 5 7 9 12])
210 |
211 | ;; Simple line-graph
212 | (pg-plot (pygal/Line :show_legend true
213 | :title "Line Chart Example")
214 | "Fibonacci" [0 1 1 2 3 5 8 13 21 34 55]
215 | "Padovan" [1 1 1 2 2 3 4 5 7 9 12])
216 |
217 | ;; http://www.pygal.org/en/latest/documentation/types/histogram.html
218 | (pg-plot (pygal/Histogram :show_legend true
219 | :title "Histogram Example")
220 | "Wide Bars" [[5 0 10]
221 | [4 5 13]
222 | [2 0 15]]
223 | "Narrow Bars" [[10 1 2]
224 | [12 4 4.5]
225 | [8 11 13]])
226 |
227 | )
228 |
229 | ;; XY - http://www.pygal.org/en/latest/documentation/types/xy.html
230 |
231 | (comment
232 | ;; Basic
233 | (py/from-import math cos)
234 |
235 | (map (fn [x] [(cos (/ x 10.0)) (/ x 10.0)]) (range -50 50 5))
236 | (map (fn [x] [(/ x 10.0) (cos (/ x 10.0))]) (range -50 50 5))
237 |
238 | ;; ## Python Code:
239 | ;; from math import cos
240 | ;; xy_chart = pygal.XY()
241 | ;; xy_chart.title = 'XY Cosinus'
242 | ;; xy_chart.add('x = cos(y)', [(cos(x / 10.), x / 10.) for x in range(-50, 50, 5)])
243 | ;; xy_chart.add('y = cos(x)', [(x / 10., cos(x / 10.)) for x in range(-50, 50, 5)])
244 | ;; xy_chart.add('x = 1', [(1, -5), (1, 5)])
245 | ;; xy_chart.add('x = -1', [(-1, -5), (-1, 5)])
246 | ;; xy_chart.add('y = 1', [(-5, 1), (5, 1)])
247 | ;; xy_chart.add('y = -1', [(-5, -1), (5, -1)])
248 |
249 | ;; ## Clojure Code - beautiful first class function, compare to Python's list comprehension!
250 | ;; ## I am obviously bias :)
251 | (pg-plot (pygal/XY :title "XY Cosinus Example")
252 | "x = cos(y)" (map (fn [x] [(cos (/ x 10.0)) (/ x 10.0)]) (range -50 50 5))
253 | "y = cos(x)" (map (fn [x] [(/ x 10.0) (cos (/ x 10.0))]) (range -50 50 5))
254 | "x = 1" [[-1 -5] [1 5]]
255 | "x = -1" [[-1 -5] [-1 5]]
256 | "y = 1" [[-5 1] [5 1]]
257 | "y = -1" [[-5 -1] [5 -1]])
258 |
259 | ;; Scatter Plot
260 | (pg-plot (pygal/XY :stroke false
261 | :title "Correlation")
262 | "A" [[0 0] [0.1 0.2] [0.3 0.1] [0.5 1.0] [0.8 0.6] [1.0 1.08] [1.3 1.1] [2, 3.23] [2.43, 2]]
263 | "B" [[0.1 0.15] [0.12 0.23] [0.4 0.3] [0.6 0.4] [0.21 0.21] [0.5 0.3] [0.6 0.8]
264 | [0.7 0.8]]
265 | "C" [[0.05 0.01] [0.13 0.02] [1.5 1.7] [1.52 1.6] [1.8 1.63] [1.5 1.82] [1.7 1.23] [2.1 2.23] [2.3 1.98]]
266 | )
267 |
268 | ;; Time
269 | (py/from-import datetime)
270 |
271 | ;; DateTime
272 | (pg-plot (pygal/DateTimeLine
273 | :title "DateTime Example"
274 | :x_label_rotation 35
275 | :truncate_label -1)
276 | "Series" [[(datetime 2013 1 2 12 0) 300]
277 | [(datetime 2013 1 12 14 30 45) 412]
278 | [(datetime 2013 2 2 6) 823]
279 | [(datetime 2013 2 22 9 45) 672]])
280 |
281 | ;; Date
282 | (py/from-import datetime date)
283 | (pg-plot (pygal/DateLine
284 | :title "Date Example"
285 | :x_label_rotation 25
286 | :x_labels [(date 2013 1 1)
287 | (date 2013 7 1)
288 | (date 2014 1 1)
289 | (date 2015 1 1)
290 | (date 2015 7 1)])
291 | "Series" [[(date 2013 1 2) 213]
292 | [(date 2013 8 2) 281]
293 | [(date 2014 12 7) 198]
294 | [(date 2015 3 21) 120]])
295 |
296 | ;; Time
297 | (py/from-import datetime time)
298 | (pg-plot (pygal/TimeLine
299 | :title "Time Example"
300 | :x_label_rotation 25)
301 | "Series" [[(time) 0]
302 | [(time 6) 5]
303 | [(time 8 30) 12]
304 | [(time 11 59 59) 4]
305 | [(time 18) 10]
306 | [(time 23 30) -1]])
307 |
308 | ;; TimeDelta
309 | (py/from-import datetime timedelta)
310 |
311 | (pg-plot (pygal/TimeDeltaLine
312 | :title "Time Delta Example"
313 | :x_label_rotation 25)
314 | "Series" [[(timedelta) 0]
315 | [(timedelta :seconds 6) 5]
316 | [(timedelta :minutes 11 :seconds 59) 4]
317 | [(timedelta :days 3 :microseconds 30) 12]
318 | [(timedelta :weeks 1) 10]])
319 |
320 | )
321 |
322 | ;; Pie
323 | ;; http://www.pygal.org/en/latest/documentation/types/pie.html#
324 | (comment
325 | ;; Basic
326 | (pg-plot (pygal/Pie :show_legend true
327 | :title "Browser usage in Feb 2012 (in %)")
328 | "IE" 19.5
329 | "Chrome" 36.3
330 | "Safari" 4.5
331 | "Opera" 2.3)
332 |
333 | ;; Multi-series
334 | (pg-plot (pygal/Pie :show_legend true
335 | :title "Browser usage in Feb 2012 (in %)")
336 | "IE" [5.7 10.2 2.6 1]
337 | "Firefox" [0.6 16.8 7.4 2.2 1.2 1 1 1.1 4.3 1]
338 | "Chrome" [0.3 0.9 17.1 15.3 0.6 0.5 1.6]
339 | "Safari" [4.4 0.1]
340 | "Opera" [0.1 1.6 0.1 0.5])
341 |
342 | ;; Donut
343 | (pg-plot (pygal/Pie
344 | :inner_radius 0.4
345 | :show_legend true
346 | :title "Browser usage in Feb 2012 (in %)")
347 | "IE" 19.5
348 | "Chrome" 36.3
349 | "Safari" 4.5
350 | "Opera" 2.3)
351 |
352 | ;; Or a ring
353 | (pg-plot (pygal/Pie
354 | :inner_radius 0.75
355 | :show_legend true
356 | :title "Browser usage in Feb 2012 (in %)")
357 | "IE" 19.5
358 | "Chrome" 36.3
359 | "Safari" 4.5
360 | "Opera" 2.3)
361 |
362 | ;; Or Half pie
363 | (pg-plot (pygal/Pie
364 | :half_pie true
365 | :show_legend true
366 | :title "Browser usage in Feb 2012 (in %)")
367 | "IE" 19.5
368 | "Chrome" 36.3
369 | "Safari" 4.5
370 | "Opera" 2.3)
371 |
372 | )
373 |
374 | ;; Radar http://www.pygal.org/en/latest/documentation/types/radar.html
375 | (comment
376 |
377 | (pg-plot (pygal/Radar
378 | :title "V8 Benchmark Results"
379 | :x_labels ["Richards"
380 | "DeltaBlue"
381 | "Crypto"
382 | "RayTrace"
383 | "EarleyBoyer"
384 | "RegExp"
385 | "Splay"
386 | "NavierStokes"])
387 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607]
388 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450]
389 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669]
390 | "IE", [43, 41, 59, 79, 144, 136, 34, 102])
391 |
392 | )
393 |
394 | ;; Box - http://www.pygal.org/en/latest/documentation/types/box.html#extremes-default
395 | (comment
396 |
397 | ;; Extreme (defaul)
398 | (pg-plot (pygal/Box
399 | :title "V8 Benchmark Results"
400 | :x_labels ["Richards"
401 | "DeltaBlue"
402 | "Crypto"
403 | "RayTrace"
404 | "EarleyBoyer"
405 | "RegExp"
406 | "Splay"
407 | "NavierStokes"])
408 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607]
409 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450]
410 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669]
411 | "IE", [43, 41, 59, 79, 144, 136, 34, 102])
412 |
413 | ;; Extreme (Interquartile range)
414 | (pg-plot (pygal/Box
415 | :box_mode "1.5IQR"
416 | :title "V8 Benchmark Results"
417 | :x_labels ["Richards"
418 | "DeltaBlue"
419 | "Crypto"
420 | "RayTrace"
421 | "EarleyBoyer"
422 | "RegExp"
423 | "Splay"
424 | "NavierStokes"])
425 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607]
426 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450]
427 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669]
428 | "IE", [43, 41, 59, 79, 144, 136, 34, 102])
429 |
430 | ;; Tukey
431 | (pg-plot (pygal/Box
432 | :box_mode "tukey"
433 | :title "V8 Benchmark Results"
434 | :x_labels ["Richards"
435 | "DeltaBlue"
436 | "Crypto"
437 | "RayTrace"
438 | "EarleyBoyer"
439 | "RegExp"
440 | "Splay"
441 | "NavierStokes"])
442 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607]
443 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450]
444 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669]
445 | "IE", [43, 41, 59, 79, 144, 136, 34, 102])
446 |
447 | ;; Standard deviation
448 | (pg-plot (pygal/Box
449 | :box_mode "stdev"
450 | :title "V8 Benchmark Results"
451 | :x_labels ["Richards"
452 | "DeltaBlue"
453 | "Crypto"
454 | "RayTrace"
455 | "EarleyBoyer"
456 | "RegExp"
457 | "Splay"
458 | "NavierStokes"])
459 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607]
460 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450]
461 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669]
462 | "IE", [43, 41, 59, 79, 144, 136, 34, 102])
463 |
464 | ;; Population Standard Deviation
465 | (pg-plot (pygal/Box
466 | :box_mode "pstdev"
467 | :title "V8 Benchmark Results"
468 | :x_labels ["Richards"
469 | "DeltaBlue"
470 | "Crypto"
471 | "RayTrace"
472 | "EarleyBoyer"
473 | "RegExp"
474 | "Splay"
475 | "NavierStokes"])
476 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607]
477 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450]
478 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669]
479 | "IE", [43, 41, 59, 79, 144, 136, 34, 102])
480 |
481 | )
482 |
483 | ;; Dot - http://www.pygal.org/en/latest/documentation/types/dot.html#
484 |
485 | (comment
486 | ;; Basic
487 | (pg-plot (pygal/Dot
488 | :x_label_rotation 30
489 | :title "V8 Benchmark Results"
490 | :x_labels ["Richards"
491 | "DeltaBlue"
492 | "Crypto"
493 | "RayTrace"
494 | "EarleyBoyer"
495 | "RegExp"
496 | "Splay"
497 | "NavierStokes"])
498 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607]
499 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450]
500 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669]
501 | "IE", [43, 41, 59, 79, 144, 136, 34, 102])
502 |
503 | ;; Negative
504 | (pg-plot (pygal/Dot
505 | :x_label_rotation 30)
506 | "Normal" [10 50 76 80 25]
507 | "With negatives" [0 -34 -29 39 -75]
508 | )
509 |
510 | )
511 |
512 | ;; Funnel - http://www.pygal.org/en/latest/documentation/types/funnel.html
513 | (comment
514 |
515 | ;; Basic
516 | (pg-plot (pygal/Funnel
517 | :title "V8 Benchmark Results"
518 | :x_labels ["Richards"
519 | "DeltaBlue"
520 | "Crypto"
521 | "RayTrace"
522 | "EarleyBoyer"
523 | "RegExp"
524 | "Splay"
525 | "NavierStokes"])
526 | "Chrome", [6395, 8212, 7520, 7218, 12464, 1660, 2123, 8607]
527 | "Firefox", [7473, 8099, 11700, 2651, 6361, 1044, 3797, 9450]
528 | "Opera", [3472, 2933, 4203, 5229, 5810, 1828, 9013, 4669]
529 | "IE", [43, 41, 59, 79, 144, 136, 34, 102])
530 | )
531 |
532 | ;;SolidGuage - http://www.pygal.org/en/latest/documentation/types/solidgauge.html
533 |
534 | (comment
535 |
536 | ;; Normal
537 | (pg-plot (pygal/SolidGauge
538 | :inner_radius 0.70
539 | :title "Solid Guage Normal Example"
540 | :value_formatter (fn [x] (format "%s %%" x)))
541 | "Series 1" [{:value 225000 :max_value 1275000}]
542 | "Series 2" [{:value 110 :max_value 100}]
543 | "Series 3" [{:value 3}]
544 | "Series 4" [{:value 51 :max_value 100}
545 | {:value 12 :max_value 100}]
546 | "Series 5" [{:value 79 :max_value 100}]
547 | "Series 6" [{:value 99}]
548 | "Series 7" [{:value 100 :max_value 100}])
549 |
550 |
551 | ;; Half
552 | (let [style (py.- (pygal/Config :value_font_size 10) style)]
553 | (pg-plot (pygal/SolidGauge :half_pie true
554 | :inner_radius 0.70
555 | :title "Solid Guage Half Example"
556 | :style style
557 | :value_formatter (fn [x] (format "%s %%" x)))
558 | "Series 1" [{:value 225000 :max_value 1275000}]
559 | "Series 2" [{:value 110 :max_value 100}]
560 | "Series 3" [{:value 3}]
561 | "Series 4" [{:value 51 :max_value 100}
562 | {:value 12 :max_value 100}]
563 | "Series 5" [{:value 79 :max_value 100}]
564 | "Series 6" [{:value 99}]
565 | "Series 7" [{:value 100 :max_value 100}]))
566 |
567 | )
568 |
569 | ;; TODO: add example for Pyramid
570 | ;; http://www.pygal.org/en/latest/documentation/types/pyramid.html
571 |
572 | ;; Treemap
573 | ;; http://www.pygal.org/en/latest/documentation/types/treemap.html
574 | (comment
575 | (pg-plot (pygal/Treemap
576 | :title "Binary Treemap Example")
577 | "A" [2, 1, 12, 4, 2, 1, 1, 3, 12, 3, 4, nil, 9]
578 | "B" [4, 2, 5, 10, 3, 4, 2, 7, 4, -10, nil, 8, 3, 1]
579 | "C" [3, 8, 3, 3, 5, 3, 3, 5, 4, 12]
580 | "D" [23, 18]
581 | "E" [1, 2, 1, 2, 3, 3, 1, 2, 3,4, 3, 1, 2, 1, 1, 1, 1, 1]
582 | "F" [31]
583 | "G" [5, 9.3, 8.1, 12, 4, 3, 2]
584 | "H" [12, 3, 3])
585 | )
586 |
587 | ;; Value configuration
588 | ;; http://www.pygal.org/en/stable/documentation/configuration/value.html
589 | (comment
590 |
591 | ;; Labels
592 | (pg-plot (pygal/Bar :title "Labels Example")
593 | "First" [{:value 2 :label "This is the first"}]
594 | "Second" [{:value 4 :label "This is the second"}]
595 | "Third" 7
596 | "Fourth" [{:value 5}]
597 | "Fifth" [{:value 3 :label "This is the fifth"}])
598 |
599 | ;; Style
600 | ;; You can force the color of a value by specifying a color key
601 | (pg-plot (pygal/Bar :title "Style with Color Example")
602 | "Series" [{:value 2} 3 4
603 | {:value 10, :color :blue}
604 | {:value 11, :color "rgb(255, 45, 20, 0.6)" 4 2}])
605 |
606 | ;; The color key set the fill and the stroke style. You can also set the css style manually:
607 | (pg-plot (pygal/Bar :title "Style with custom stroke Example")
608 | "Series" [{:value 2} 3 4
609 | {:value 10,
610 | :style "fill: red; stroke: black; stroke-width: 4"}
611 | {:value 11,
612 | :style "fill: rgb(255, 45, 20, 0.6); stroke: black; stroke-dasharray: 15, 10, 5, 10, 15"} 4 2]
613 | )
614 |
615 | ;; Value formatting
616 |
617 | ;; You can add a `formatter` metada for a specific value
618 | ;; Note: we can't use the `pg-plot` method for this as it uses additional arguments
619 | ;; Python Code:
620 | ;; chart = pygal.Bar(print_values=True, value_formatter=lambda x: '{}$'.format(x))
621 | ;; chart.add('bar', [.0002, .0005, .00035], formatter=lambda x: '<%s>' % x)
622 | ;; chart.add('bar', [.0004, {'value': .0009, 'formatter': lambda x: '«%s»' % x}, .001])
623 | ;; chart.render()
624 |
625 | ;; Clojure Code: trying to keep it the same as Python above.
626 | (let [tmp-file (File/createTempFile "tmp-output" ".svg")
627 | output (.getAbsolutePath tmp-file)
628 | graph (pygal/Bar :print_values true
629 | :title "Value Formatting"
630 | :value_formatter (fn [x] (format "%.4f $" x)))]
631 | (py. graph add "bar" [0.0002 0.0005 0.00035]
632 | :formatter (fn [x] (format "<%.4f>" x)))
633 | (py. graph add "baz" [0.0004 {:value 0.0009
634 | :formatter (fn [x] (format "<<%.4f>>" x))}])
635 | (py. graph render_to_file output)
636 | (sh/sh "open" output)
637 | (.deleteOnExit tmp-file))
638 |
639 | ;; Node attributes:
640 | ;; It is possible to pass svg attribute to the node representing value.
641 | (pg-plot (pygal/Line
642 | :title "Node Attributes Example")
643 | "Series" [{:value 1 :node {:r 2}}
644 | {:value 2 :node {:r 4}}
645 | {:value 3 :node {:r 6}}
646 | {:value 4 :node {:r 8}}])
647 |
648 | )
649 |
650 | ;; Links
651 | ;; http://www.pygal.org/en/stable/documentation/configuration/value.html#links
652 |
653 | (comment
654 | ;; Basic
655 | ;; Add hyper links
656 | (pg-plot (pygal/Bar
657 | :title "Link - Basic Example")
658 | "First" [{:value 2
659 | :label "This is the first"
660 | :xlink "http://en.wikipedia.org/wiki/First"}]
661 | "Second" [{:value 4
662 | :label "This is the second"
663 | :xlink "http://en.wikipedia.org/wiki/Second"}]
664 | "Third" 7
665 | "Fourth" [{:value 5
666 | :xlink "http://en.wikipedia.org/wiki/Fourth"}]
667 | "Fifth" [{:value 3
668 | :label "This is the fifth"
669 | :xlink "http://en.wikipedia.org/wiki/Fifth"}])
670 |
671 | ;; Advanced
672 | ;; Specify a map to xlink with all links attributes
673 | (pg-plot (pygal/Bar
674 | :title "Link - Advanced Example")
675 | "First" [{:value 2
676 | :label "This is the first"
677 | :xlink "http://en.wikipedia.org/wiki/First"}]
678 | "Second" [{:value 4
679 | :label "This is the second"
680 | :xlink {:href "http://en.wikipedia.org/wiki/Second"
681 | :target "_top"}}]
682 | "Third" 7
683 | "Fourth" [{:value 5
684 | :xlink {:href "http://en.wikipedia.org/wiki/Fourth"
685 | :target "_blank"}}]
686 | "Fifth" [{:value 3
687 | :label "This is the fifth"
688 | :xlink {:href "http://en.wikipedia.org/wiki/Fifth"
689 | :target "_self"}}])
690 |
691 | ;; TODO: http://www.pygal.org/en/stable/documentation/configuration/value.html#legend
692 | ;; TODO: http://www.pygal.org/en/stable/documentation/configuration/value.html#confidence-intervals
693 | )
694 |
695 | ;; Sparklines - http://www.pygal.org/en/stable/documentation/sparks.html#
696 | ;; TODO:
697 |
698 | ;; Maps
699 | (comment
700 | ;; Require
701 | ;; pip install pygal_maps_world
702 | (require-python '[pygal_maps_world :as pygal-mw])
703 | ;; TODO: continue world map example
704 | )
705 |
706 | ;; http://www.pygal.org/en/latest/documentation/types/maps/index.html
707 | ;; - World Map
708 | ;; - French Map
709 | ;; - Department
710 | ;; - Regions
711 | ;; - Department list
712 | ;; - Region list
713 | ;; - Swiss Map
714 | ;; - Canton
715 | ;; - Canton list
716 |
--------------------------------------------------------------------------------
/src/gigasquid/pytorch_mnist.clj:
--------------------------------------------------------------------------------
1 | ;; This example was ported from pytorch/examples MNIST from:
2 | ;; https://github.com/pytorch/examples.git
3 | (ns gigasquid.pytorch-mnist
4 | (:require
5 | [libpython-clj.python :as py
6 | :refer [py* py** py. py.. py.- $a $.
7 | as-jvm with-gil-stack-rc-context
8 | stack-resource-context
9 | import-module
10 | get-attr get-item att-type-map call call-attr]]
11 | [libpython-clj.require :refer [require-python]]))
12 |
13 | ;;; sudo pip3 install torch
14 | ;;; sudo pip3 install torchvision
15 |
16 | (require-python
17 | '[torch :as torch]
18 | '[torch.cuda :as cuda]
19 | '[torch.onnx :as onnx]
20 | '[torch.nn :as nn :refer [Conv2d Dropout2d Linear]]
21 | '[torch.optim :as optim]
22 | '[torch.utils.data :as tud]
23 | '[torch.nn.functional :as F]
24 | '[torchvision.datasets :as datasets]
25 | '[torchvision.transforms :as transforms]
26 | '[torch.optim.lr_scheduler :as lr_scheduler])
27 |
28 | (def enumerate (-> (py/import-module "builtins")
29 | (get-attr "enumerate")))
30 |
31 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32 |
33 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34 | ;; If you have CUDA but do not want to use it, set this to false
35 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
36 | (def ^:dynamic *use-cuda* (cuda/is_available))
37 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
38 |
39 | (def log-interval 100)
40 |
41 | ;; Yann LeCun:
42 | ;; Training with large minibatches is bad for your health.
43 | ;; More importantly, it's bad for your test error.
44 | ;; Friends dont let friends use minibatches larger than 32.
45 | ;; https://twitter.com/ylecun/status/989610208497360896
46 | ;;
47 | ;; input batch size for training (default: 64)
48 | (def batch-size 32)
49 | ;; input batch size for testing (default: 1000)
50 | (def test-batch-size 1000)
51 | ;; number of epochs to train (default: 14)
52 | (def epochs 14)
53 | ;; learning rate (default: 1.0)
54 | (def learning-rate 1.0)
55 | ;; Learning rate step gamma (default: 0.7)
56 | (def gamma 0.7)
57 | ;; random seed (default: 1)
58 | (def seed 42)
59 |
60 | (def mnist-mean [0.1307])
61 | (def mnist-std [0.3081])
62 |
63 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
64 |
65 | (defonce device (atom nil))
66 | (defonce train-data (atom nil))
67 | (defonce train-loader (atom nil))
68 | (defonce test-data (atom nil))
69 | (defonce test-loader (atom nil))
70 | (defonce model (atom nil))
71 | (defonce optimizer (atom nil))
72 |
73 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
74 |
75 | ;;; load MNIST data from the internet
76 | (defn load-data! []
77 | (let [gpu-opts (if *use-cuda*
78 | {:num_workers 1 :pin_memory true}
79 | {})
80 | mnist-transform (transforms/Compose
81 | [(transforms/ToTensor)
82 | (transforms/Normalize mnist-mean mnist-std)])]
83 | ;; training data and loader
84 | (reset! train-data
85 | (datasets/MNIST "./resources/pytorch/data"
86 | :train true :download true :transform mnist-transform))
87 | (let [kwargs (merge {:batch_size batch-size :shuffle true}
88 | gpu-opts)
89 | args (into [@train-data] (mapcat identity kwargs))]
90 | (reset! train-loader (apply tud/DataLoader args)))
91 |
92 | ;; test data and loader
93 | (reset! test-data
94 | (datasets/MNIST "./resources/pytorch/data"
95 | :train false :download true :transform mnist-transform))
96 | (let [kwargs (merge {:batch_size test-batch-size :shuffle true}
97 | gpu-opts)
98 | args (into [@test-data] (mapcat identity kwargs))]
99 | (reset! test-loader (apply tud/DataLoader args))))
100 |
101 | nil)
102 |
103 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
104 |
105 |
106 | ;;; neural network definition, uses convolutional neural nets (CNNs)
107 | (def MyNet
108 | (py/create-class
109 | "MyNet" [nn/Module]
110 | {"__init__"
111 | (py/make-tuple-instance-fn
112 | (fn [self]
113 | (py. nn/Module __init__ self)
114 | (py/set-attrs!
115 | self
116 | {"conv1" (Conv2d 1 32 3 1)
117 | "conv2" (Conv2d 32 64 3 1)
118 | "dropout1" (Dropout2d 0.25)
119 | "dropout2" (Dropout2d 0.5)
120 | "fc1" (Linear 9216 128)
121 | "fc2" (Linear 128 10)})
122 |
123 | ;; __init__ must return nil
124 | nil))
125 | "forward"
126 | (py/make-tuple-instance-fn
127 | (fn [self x]
128 | (let [x (py. self conv1 x)
129 | x (F/relu x)
130 | x (py. self conv2 x)
131 | x (F/max_pool2d x 2)
132 | x (py. self dropout1 x)
133 | x (torch/flatten x 1)
134 | x (py. self fc1 x)
135 | x (F/relu x)
136 | x (py. self dropout2 x)
137 | x (py. self fc2 x)
138 | output (F/log_softmax x :dim 1)]
139 | output))
140 | :arg-converter as-jvm
141 | :method-name "forward")}))
142 |
143 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
144 |
145 | (defn setup! []
146 | (py/gc!)
147 | (torch/manual_seed seed)
148 | (reset! device (if *use-cuda*
149 | (torch/device "cuda")
150 | (torch/device "cpu")))
151 | (load-data!)
152 | (reset! model
153 | (let [inst (MyNet)]
154 | (py. inst "to" @device)))
155 | (reset! optimizer
156 | (optim/Adadelta (py. @model "parameters")
157 | :lr learning-rate))
158 | nil)
159 |
160 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
161 |
162 | (defn train [args model device train-loader optimizer epoch]
163 | (py. model train)
164 | (dorun
165 | (for [[batch-idx [data target]] (enumerate train-loader)]
166 | (with-gil-stack-rc-context
167 | (let [data (py. data to device)
168 | target (py. target to device)]
169 | (py. optimizer zero_grad)
170 | (let [output (py. model __call__ data)
171 | loss (F/nll_loss output target)]
172 | (py. loss backward)
173 | (py. optimizer step)
174 | (when (= 0 (mod batch-idx log-interval))
175 | (println
176 | (format "Train Epoch: %d [%d/%d (%.1f%%)]\tLoss: %.6f"
177 | epoch
178 | (* batch-idx (int (py. data "__len__")))
179 | (py. (py.- train-loader dataset) "__len__")
180 | (/ (* 100.0 batch-idx) (int (py. train-loader "__len__")))
181 | (py. loss item))))))))))
182 |
183 | (defn test-model [args model device test-loader]
184 | (py. model eval)
185 | (let [test-lost (atom 0)
186 | correct (atom 0)]
187 | (letfn [(test-batch [data target]
188 | (let [data (py. data to device)
189 | target (py. target to device)
190 | output (py. model __call__ data)]
191 | (swap! test-lost +
192 | (py. (F/nll_loss output target :reduction "sum") item))
193 | (let [pred (py. output argmax :dim 1 :keepdim true)]
194 | (swap! correct +
195 | (-> (py. pred eq (py. target view_as pred))
196 | (py. sum)
197 | (py. item))))))]
198 |
199 | ; pytorch crash with "python error in flight"
200 | ; (py/with [ng torch/no_grad]
201 | ; (dorun
202 | ; (for [[data target] test-loader]
203 | ; (with-gil-stack-rc-context
204 | ; (test-batch data target)))))
205 |
206 | ; pytorch crash with "python error in flight"
207 | ; (py/with [ng torch/no_grad]
208 | ; (dorun
209 | ; (for [[data target] test-loader]
210 | ; (stack-resource-context
211 | ; (test-batch data target)))))
212 |
213 | (let [no-grad (torch/no_grad)]
214 | (try
215 | (py. no-grad __enter__)
216 | (dorun
217 | (for [[data target] test-loader]
218 | (with-gil-stack-rc-context
219 | (test-batch data target))))
220 | (finally
221 | (py. no-grad __exit__)))))
222 |
223 | (let [data-set (py.- test-loader dataset)
224 | n (py. data-set __len__)]
225 | (swap! test-lost / (py. data-set __len__))
226 | (println
227 | (format "\nTest set: Average loss: %.4f, Accuracy %d/%d (%.1f%%)\n"
228 | @test-lost @correct
229 | n
230 | (/ (* 100. @correct) (int n)))))))
231 |
232 | (defn train-test-loop!
233 | "RUN THIS IN A CONSOLE REPL IF YOUR EDITOR REPL DOESN'T HAVE STREAMING"
234 | []
235 | (let [scheduler (lr_scheduler/StepLR @optimizer :step_size 1 :gamma gamma)
236 | args {}]
237 | (dorun
238 | (for [epoch (range epochs)]
239 | (do
240 | (train args @model @device @train-loader @optimizer epoch)
241 | (test-model args @model @device @test-loader)
242 | (py. scheduler step))))))
243 |
244 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
245 |
246 | ;;; save the model to the universal ONNX format
247 | ;;; you can use NETRON at https://github.com/lutzroeder/netron to visualize
248 | ;;; this model.
249 | (defn save-model! []
250 | (let [tensor (first (first (seq @train-loader)))
251 | size (vec (py. tensor size))
252 | args (into size [:device "cuda"])
253 | dummy-input (apply torch/randn args)]
254 | (onnx/export @model dummy-input "resources/pytorch/models/mnist_cnn.onnx"
255 | :verbose true
256 | :output_names ["digit_from_0_to_9"])
257 | nil))
258 |
259 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
260 |
261 | (defn --profile-cuda []
262 | (binding [*use-cuda* true]
263 | (setup!)
264 | (train-test-loop!)))
265 |
266 | (defn --profile-no-cuda []
267 | (binding [*use-cuda* false]
268 | (setup!)
269 | (train-test-loop!)))
270 |
271 | (comment
272 | (setup!)
273 | (train-test-loop!)
274 | (save-model!))
275 |
--------------------------------------------------------------------------------
/src/gigasquid/sci_spacy.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.sci-spacy
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [clojure.java.shell :as sh]))
5 |
6 | ;;;; You need to pip install the model
7 | ;; sudo pip3 install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz
8 |
9 | ;;; pip install scispacy
10 |
11 | ;;; tutorial from https://allenai.github.io/scispacy/
12 |
13 | (require-python '[spacy :as spacy])
14 | (require-python '[scispacy :as scispacy])
15 |
16 | (def nlp (spacy/load "en_core_sci_sm"))
17 | (def text "Myeloid derived suppressor cells (MDSC) are immature
18 | myeloid cells with immunosuppressive activity.
19 | They accumulate in tumor-bearing mice and humans
20 | with different types of cancer, including hepatocellular
21 | carcinoma (HCC).")
22 |
23 | (def doc (nlp text))
24 |
25 | (py/python-type (py.- doc sents)) ;=> :generator
26 | (py/python-type (py.- doc ents)) ;=> :tuple
27 |
28 | ;;; basically you should map over these things
29 |
30 | (map (fn [ent] (py.- ent text)) (py.- doc ents))
31 | ;=> ("Myeloid" "suppressor cells" "MDSC" "immature" "myeloid cells" "immunosuppressive activity" "accumulate" "tumor-bearing mice" "humans" "cancer" "hepatocellular \n carcinoma" "HCC")
32 |
33 | ;;; what sort of things can you look at on the ent?
34 | (-> (py.- doc ents) first py/att-type-map) ;;; lots!
35 |
36 | ;; {"_" :underscore,
37 | ;; "__class__" :type,
38 | ;; "__delattr__" :method-wrapper,
39 | ;; "__dir__" :builtin-function-or-method,
40 | ;; "__doc__" :str,
41 | ;; "__eq__" :method-wrapper,
42 | ;; "__format__" :builtin-function-or-method,
43 | ;; "__ge__" :method-wrapper,
44 | ;; "__getattribute__" :method-wrapper,
45 | ;; "__getitem__" :method-wrapper,
46 | ;; "__gt__" :method-wrapper,
47 | ;; "__hash__" :method-wrapper,
48 | ;; "__init__" :method-wrapper,
49 | ;; "__init_subclass__" :builtin-function-or-method,
50 | ;; "__iter__" :method-wrapper,
51 | ;; "__le__" :method-wrapper,
52 | ;; "__len__" :method-wrapper,
53 | ;; "__lt__" :method-wrapper,
54 | ;; "__ne__" :method-wrapper,
55 | ;; "__new__" :builtin-function-or-method,
56 | ;; "__pyx_vtable__" :py-capsule,
57 | ;; "__reduce__" :builtin-function-or-method,
58 | ;; "__reduce_ex__" :builtin-function-or-method,
59 | ;; "__repr__" :method-wrapper,
60 | ;; "__setattr__" :method-wrapper,
61 | ;; "__sizeof__" :builtin-function-or-method,
62 | ;; "__str__" :method-wrapper,
63 | ;; "__subclasshook__" :builtin-function-or-method,
64 | ;; "_fix_dep_copy" :builtin-function-or-method,
65 | ;; "_recalculate_indices" :builtin-function-or-method,
66 | ;; "_vector" :none-type,
67 | ;; "_vector_norm" :none-type,
68 | ;; "as_doc" :builtin-function-or-method,
69 | ;; "conjuncts" :tuple,
70 | ;; "doc" :doc,
71 | ;; "end" :int,
72 | ;; "end_char" :int,
73 | ;; "ent_id" :int,
74 | ;; "ent_id_" :str,
75 | ;; "ents" :list,
76 | ;; "get_extension" :builtin-function-or-method,
77 | ;; "get_lca_matrix" :builtin-function-or-method,
78 | ;; "has_extension" :builtin-function-or-method,
79 | ;; "has_vector" :bool,
80 | ;; "kb_id" :int,
81 | ;; "kb_id_" :str,
82 | ;; "label" :int,
83 | ;; "label_" :str,
84 | ;; "lefts" :generator,
85 | ;; "lemma_" :str,
86 | ;; "lower_" :str,
87 | ;; "merge" :builtin-function-or-method,
88 | ;; "n_lefts" :int,
89 | ;; "n_rights" :int,
90 | ;; "noun_chunks" :generator,
91 | ;; "orth_" :str,
92 | ;; "remove_extension" :builtin-function-or-method,
93 | ;; "rights" :generator,
94 | ;; "root" :token,
95 | ;; "sent" :span,
96 | ;; "sentiment" :float,
97 | ;; "set_extension" :builtin-function-or-method,
98 | ;; "similarity" :builtin-function-or-method,
99 | ;; "start" :int,
100 | ;; "start_char" :int,
101 | ;; "string" :str,
102 | ;; "subtree" :generator,
103 | ;; "tensor" :ndarray,
104 | ;; "text" :str,
105 | ;; "text_with_ws" :str,
106 | ;; "to_array" :builtin-function-or-method,
107 | ;; "upper_" :str,
108 | ;; "vector" :ndarray,
109 | ;; "vector_norm" :float-32,
110 | ;; "vocab" :vocab}
111 |
112 | ;;; same with sentences
113 | (map (fn [sent] (py.- sent text)) (py.- doc sents))
114 | ;; ("Myeloid derived suppressor cells (MDSC) are immature \n myeloid cells with immunosuppressive activity. \n "
115 | ;; "They accumulate in tumor-bearing mice and humans \n with different types of cancer, including hepatocellular \n carcinoma (HCC).")
116 |
117 |
118 | (require-python '[spacy.displacy :as displacy])
119 | (spit "my-pic.svg" (displacy/render (first (py.- doc sents)) :style "dep"))
120 | (sh/sh "open" "-a" "Google Chrome" "my-pic.svg")
121 |
122 |
--------------------------------------------------------------------------------
/src/gigasquid/seaborn.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.seaborn
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [gigasquid.plot :as plot]))
5 |
6 | (require-python '[seaborn :as sns])
7 | (require-python '[matplotlib.pyplot :as pyplot])
8 |
9 | ;;; What is seaborn? Really cool statistical plotting
10 |
11 | ;;; sudo pip3 install seaborn
12 |
13 | (sns/set) ;;; set default style
14 |
15 | ;;; code tutorial from https://seaborn.pydata.org/introduction.html
16 |
17 | (def dots (sns/load_dataset "dots"))
18 | (py. dots head)
19 | ;; align ... firing_rate
20 | ;; 0 dots ... 33.189967
21 | ;; 1 dots ... 31.691726
22 | ;; 2 dots ... 34.279840
23 | ;; 3 dots ... 32.631874
24 | ;; 4 dots ... 35.060487
25 |
26 | ;; [5 rows x 5 columns]
27 |
28 | (take 5 dots) ;=> ("align" "choice" "time" "coherence" "firing_rate")
29 | ;; seaborn will be most powerful when your datasets have a particular organization. This format is alternately called “long-form” or “tidy” data and is described in detail by Hadley Wickham in this academic paper. The rules can be simply stated:
30 |
31 | ;; Each variable is a column
32 |
33 | ;; Each observation is a row
34 |
35 | ;;;; statistical relationship plotting
36 |
37 | (plot/with-show
38 | (sns/relplot :x "time" :y "firing_rate" :col "align"
39 | :hue "choice" :size "coherence" :style "choice"
40 | :facet_kws {:sharex false} :kind "line"
41 | :legend "full" :data dots))
42 |
43 | ;;;; statistical estimateion and error bars
44 |
45 | (def fmri (sns/load_dataset "fmri"))
46 |
47 | (plot/with-show
48 | (sns/relplot :x "timepoint" :y "signal" :col "region"
49 | :hue "event" :style "event" :kind "line"
50 | :data fmri))
51 |
52 | ;;; enhance a scatter plot to include a linear regression model
53 |
54 | (def tips (sns/load_dataset "tips"))
55 | (plot/with-show
56 | (sns/lmplot :x "total_bill" :y "tip" :col "time" :hue "smoker" :data tips))
57 |
58 | ;;; data analysis between caterogical values
59 |
60 | (plot/with-show
61 | (sns/catplot :x "day" :y "total_bill" :hue "smoker" :kind "swarm" :data tips))
62 |
63 | (plot/with-show
64 | (sns/catplot :x "day" :y "total_bill" :hue "smoker" :kind "bar" :data tips))
65 |
66 | ;;; visualizing dataset structure
67 |
68 | (def iris (sns/load_dataset "iris"))
69 | (plot/with-show
70 | (sns/jointplot :x "sepal_length" :y "petal_length" :data iris))
71 |
72 | (plot/with-show
73 | (sns/pairplot :data iris :hue "species"))
74 |
--------------------------------------------------------------------------------
/src/gigasquid/sk_learn/datasets_estimators.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.sk-learn.datasets-estimators
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [gigasquid.plot :as plot]))
5 |
6 | (require-python '[sklearn.datasets :as datasets])
7 | (require-python '[matplotlib.pyplot :as pyplot])
8 | (require-python '[matplotlib.pyplot.cm :as pyplot-cm])
9 |
10 | ;;;; From https://scikit-learn.org/stable/tutorial/statistical_inference/settings.html
11 |
12 | ;;; Taking a look as the standard iris dataset
13 |
14 | (def iris (datasets/load_iris))
15 | (def data (py.- iris data))
16 | (py.- data shape);-> (150, 4)
17 |
18 | ;;; It is made of 150 observations of irises, each described by 4 features: their sepal and petal length and width
19 |
20 | ;;; An example of reshaping is with the digits dataset
21 | ;;; The digits dataset is made of 1797 8x8 images of hand-written digits
22 |
23 | (def digits (datasets/load_digits))
24 | (def digit-images (py.- digits images))
25 | (py.- digit-images shape) ;=> (1797, 8, 8)
26 |
27 | (plot/with-show
28 | (pyplot/imshow (last digit-images) :cmap pyplot-cm/gray_r))
29 |
30 | ;;; To use this dataset we transform each 8x8 image to feature vector of length 64
31 |
32 | (def data (py. digit-images reshape (first (py.- digit-images shape)) -1))
33 |
34 | (py.- data shape) ;=> (1797, 64)
35 |
36 |
37 | ;;;; Estimator objects
38 |
39 | ;;An estimator is any object that learns from data
40 | ; it may be a classification, regression or clustering algorithm or a transformer that extracts/filters useful features from raw data.
41 |
42 | ;;All estimator objects expose a fit method that takes a dataset (usually a 2-d array
43 |
44 |
--------------------------------------------------------------------------------
/src/gigasquid/sk_learn/info.txt:
--------------------------------------------------------------------------------
1 | This is based on the statistical learning for scientific data processing
2 |
3 | https://scikit-learn.org/stable/tutorial/statistical_inference/index.html
4 |
--------------------------------------------------------------------------------
/src/gigasquid/sk_learn/model_selection.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.sk-learn.model-selection
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [gigasquid.plot :as plot]))
5 |
6 | ;;; from https://scikit-learn.org/stable/tutorial/statistical_inference/model_selection.html
7 |
8 | (require-python '[sklearn.datasets :as datasets])
9 | (require-python '[sklearn.model_selection :as model-selection])
10 | (require-python '[sklearn.linear_model :as linear-model])
11 | (require-python '[sklearn.svm :as svm])
12 | (require-python '[numpy :as np])
13 |
14 | (def digits (datasets/load_digits :return_X_y true))
15 | (def x-digits (first digits))
16 | (def y-digits (last digits))
17 | (def svc (svm/SVC :C 1 :kernel "linear"))
18 | (def slice-x-digits (->> x-digits (take 100) (into []) (np/array)))
19 | (def slice-y-digits (->> y-digits (take 100) (into []) (np/array)))
20 | (def slice-x2-digits (->> x-digits (drop 100) (take 100) (into []) (np/array)))
21 | (def slice-y2-digits (->> y-digits (drop 100) (take 100) (into []) (np/array)))
22 | (-> svc
23 | (py. fit slice-x-digits slice-y-digits)
24 | (py. score slice-x2-digits slice-y2-digits)) ;=> 0.93
25 |
26 |
27 | ;;;; We can split into folds we can use for training and testing
28 | ;;; Note here we are doing it a clojure way - but we can use the split method with
29 | ;;; indexes later on
30 |
31 | (def x-folds (np/array_split x-digits 3))
32 | (def y-folds (np/array_split y-digits 3))
33 |
34 | (for [k (range 1 4)]
35 | (let [[test-x train-x1 train-x2 ](take 3 (drop (dec k) (cycle x-folds)))
36 | [test-y train-y1 train-y2] (take 3 (drop (dec k) (cycle y-folds)))
37 | train-x (np/concatenate [train-x1 train-x2])
38 | train-y (np/concatenate [train-y1 train-y2])]
39 | (-> svc
40 | (py. fit train-x train-y)
41 | (py. score test-x test-y))))
42 | ;=>(0.9348914858096828 0.9565943238731218 0.9398998330550918)
43 |
44 | ;;; Cross Validation generators
45 | ;; Scikit-learn has a collection of classes which can be used to generate lists of train/test indices for popular cross-validation strategies.
46 |
47 | ;; They expose a split method which accepts the input dataset to be split and yields the train/test set indices for each iteration of the chosen cross-validation strategy.
48 |
49 | (def X ["a" "a" "a" "b" "b" "c" "c" "c" "c" "c"])
50 | (def k-fold (model-selection/KFold :n_splits 5))
51 | (map (fn [[x y]] {:train x :test y})
52 | (py. k-fold split X))
53 | ;; ({:train [2 3 4 5 6 7 8 9], :test [0 1]}
54 | ;; {:train [0 1 4 5 6 7 8 9], :test [2 3]}
55 | ;; {:train [0 1 2 3 6 7 8 9], :test [4 5]}
56 | ;; {:train [0 1 2 3 4 5 8 9], :test [6 7]}
57 | ;; {:train [0 1 2 3 4 5 6 7], :test [8 9]})
58 |
59 |
60 | ;;; let's understand the generateor for the split and how to use indexes on numpy
61 | (def try-x (first (py. k-fold split x-digits)))
62 | (def indexes (first try-x))
63 | (py.- x-digits shape) ;-> (1797, 64)
64 | (py.- indexes shape) ;=> (1437,)
65 | ;;;; You can use py/get-item to get indexes from numpy
66 | (def test-items (py/get-item x-digits indexes))
67 | (py.- test-items shape) ;=> (1437, 64)
68 |
69 |
70 |
71 | (map (fn [[train-indexes test-indexes]]
72 | (-> svc
73 | (py. fit (py/get-item x-digits train-indexes)
74 | (py/get-item y-digits train-indexes))
75 | (py. score (py/get-item x-digits test-indexes)
76 | (py/get-item y-digits test-indexes))))
77 | (py. k-fold split x-digits))
78 | ;=>(0.9638888888888889 0.9222222222222223 0.9637883008356546 0.9637883008356546 0.9303621169916435)
79 |
80 | ;; The cross-validation score can be directly calculated using the cross_val_score helper. Given an estimator, the cross-validation object and the input dataset, the cross_val_score splits the data repeatedly into a training and a testing set, trains the estimator using the training set and computes the scores based on the testing set for each iteration of cross-validation
81 |
82 | ;;; n_jobs=-1 means the computation will use all cpus
83 | (model-selection/cross_val_score svc x-digits y-digits :cv k-fold :n_jobs -1)
84 | ;=>[0.96388889 0.92222222 0.9637883 0.9637883 0.93036212]
85 |
86 | ;Alternatively, the scoring argument can be provided to specify an alternative scoring method.
87 | (model-selection/cross_val_score svc x-digits y-digits :cv k-fold
88 | :scoring "precision_macro")
89 | ;=>[0.96578289 0.92708922 0.96681476 0.96362897 0.93192644]
90 |
91 |
92 | ;;;; Grid search
93 | ;;scikit-learn provides an object that, given data, computes the score during the fit of an estimator on a parameter grid and chooses the parameters to maximize the cross-validation score. This object takes an estimator during the construction and exposes an estimator API:
94 |
95 | (def Cs (np/logspace -6 -1 10))
96 | (def clf (model-selection/GridSearchCV :estimator svc
97 | :param_grid {:C Cs}
98 | :n_jobs -1))
99 | (def slice-x-digits (->> x-digits (take 1000) (into []) (np/array)))
100 | (def slice-y-digits (->> y-digits (take 1000) (into []) (np/array)))
101 | (def slice-x2-digits (->> x-digits (drop 1000) (take 1000) (into []) (np/array)))
102 | (def slice-y2-digits (->> y-digits (drop 1000) (take 1000) (into []) (np/array)))
103 | (py. clf fit slice-x-digits slice-y-digits)
104 | (py.- clf best_score_) ;=> 0.95
105 | (-> clf (py.- best_estimator_) (py.- C)) ;=> 0.0021544346900318843
106 | (py. clf score slice-x2-digits slice-y2-digits) ;=> 0.946047678795483
107 |
108 |
109 | ;;; Nested cross validation
110 | (model-selection/cross_val_score clf x-digits y-digits)
111 | ;;=>[0.94722222 0.91666667 0.96657382 0.97493036 0.93593315]
112 |
113 |
114 | ;; Cross-validated esitmators
115 |
116 | (def lasso (linear-model/LassoCV))
117 | (def diabetes (datasets/load_diabetes :return_X_y true))
118 | (def x-diabetes (first diabetes))
119 | (def y-diabetes (last diabetes))
120 | (py. lasso fit x-diabetes y-diabetes)
121 | ;;; The estimator chose automatically its lambda:
122 | (py.- lasso alpha_);=> 0.003753767152692203
123 |
124 |
--------------------------------------------------------------------------------
/src/gigasquid/sk_learn/pipelining.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.sk-learn.pipelining
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [gigasquid.plot :as plot]))
5 |
6 | ;We have seen that some estimators can transform data and that some estimators can predict variables. We can also create combined estimators:
7 |
8 | ;;; https://scikit-learn.org/stable/tutorial/statistical_inference/putting_together.html
9 |
10 | (require-python '[sklearn.datasets :as datasets])
11 | (require-python '[sklearn.decomposition :as decomposition])
12 | (require-python '[sklearn.linear_model :as linear-model])
13 | (require-python '[sklearn.pipeline :as pipeline])
14 | (require-python '[sklearn.model_selection :as model-selection])
15 | (require-python '[numpy :as np])
16 | (require-python '[matplotlib.pyplot :as pyplot])
17 |
18 | ;; Define a pipeline to search for the best combination of PCA truncation
19 | ;; and classifier regularization.
20 | (def pca (decomposition/PCA))
21 | (def logistic (linear-model/LogisticRegression :max_iter 10000 :tol 0.1))
22 | (def pipe (pipeline/Pipeline :steps [ ["pca" pca] ["logistic" logistic]]))
23 |
24 | (def digits (datasets/load_digits :return_X_y true))
25 | (def X-digits (first digits))
26 | (def y-digits (last digits))
27 |
28 | ;;; Parameters of pipelines can be set using ‘__’ separated parameter names:
29 |
30 | (def logspace (np/logspace -4 4 4))
31 | (def param-grid {"pca__n_components" [5 15 30 45 64]
32 | "logistic__C" logspace})
33 |
34 |
35 | (def search (model-selection/GridSearchCV :estimator pipe
36 | :param_grid param-grid
37 | :n_jobs -1))
38 | (py. search fit X-digits y-digits)
39 | (py.- search best_score_);=> 0.9198885793871865
40 | (py.- search best_params_)
41 | ;=>{'logistic__C': 0.046415888336127774, 'pca__n_components': 45}
42 |
43 | ;;; Plot the PCA Spectrum
44 | (py. pca fit X-digits)
45 |
46 | (plot/with-show
47 | (let [[fig axes] (pyplot/subplots :nrows 2 :sharex true :figsize [6 6])
48 | val1 (np/arange 1 (inc (py.- pca n_components_)))
49 | val2 (py.- pca explained_variance_ratio_)
50 | ax0 (first axes)
51 | ax1 (last axes)
52 | val3 (-> (py.- search best_estimator_)
53 | (py.- named_steps)
54 | (py/get-item "pca")
55 | (py.- n_components))]
56 | (py. ax0 plot val1 val2 "+" :linewidth 2)
57 | (py. ax0 set_ylabel "PCA explained variance ratio")
58 | (py. ax0 axvline val3 :linestyle ":" :label "n_components chosen")
59 | (py. ax0 legend :prop {"size" 12}))
60 | )
61 |
62 |
--------------------------------------------------------------------------------
/src/gigasquid/sk_learn/supervised_learning.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.sk-learn.supervised-learning
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [gigasquid.plot :as plot]))
5 |
6 |
7 | ;;; From https://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html
8 |
9 | ;; Clasifying irises
10 |
11 | (require-python '[numpy :as np])
12 | (require-python '[numpy.random :as np-random])
13 | (require-python '[sklearn.datasets :as datasets])
14 | (require-python '[matplotlib.pyplot :as pyplot])
15 |
16 | (def iris (datasets/load_iris :return_X_y true))
17 | (def iris-x (first iris))
18 | (def iris-y (last iris))
19 | (take 2 iris-x) ;=> ([5.1 3.5 1.4 0.2] [4.9 3. 1.4 0.2])
20 | (take 2 iris-y) ;=> (0 0)
21 | (np/unique iris-y) ;=> [0 1 2]
22 |
23 |
24 | ;;; K-Nearest neighbors classifier
25 |
26 | ;;; The simplest possible classifier is the nearest neighbor: given a new observation X_test, find in the training set (i.e. the data used to train the estimator) the observation with the closest feature vector.
27 |
28 | ;;Split iris data in train and test data
29 | ;; A random permutation, to split the data randomly
30 |
31 | ;;; here instead of following the python example we are going to use
32 | ;; shuffle and take instead
33 |
34 | (py.- iris-x shape) ;=> (150, 4)
35 | (py.- iris-y shape) ;=> (150,)
36 | (def shuffled-data (->> (map (fn [x y] {:x x :y y}) iris-x iris-y)
37 | (shuffle)))
38 | (def train-data (take 140 shuffled-data))
39 | (def test-data (drop 140 shuffled-data))
40 | (count train-data) ;-> 140
41 | (count test-data) ;-> 10
42 | (def iris-x-train (mapv :x train-data))
43 | (def iris-y-train (mapv :y train-data))
44 | (def iris-x-test (mapv :x test-data))
45 | (def iris-y-test (mapv :y test-data))
46 |
47 |
48 | (require-python '[sklearn.neighbors :as neighbors])
49 | (def knn (neighbors/KNeighborsClassifier))
50 | (py. knn fit iris-x-train iris-y-train)
51 | ;;; predict
52 | (py. knn predict iris-x-test) ;=> [0 0 1 2 2 0 2 2 0 2]
53 | ;;; actual test
54 | iris-y-test ;=> [0 0 1 2 2 0 2 1 0 2]
55 |
56 |
57 | ;;; Linear model - From regression to sparsity
58 | ;; Diabetes dataset
59 |
60 | ;;The diabetes dataset consists of 10 physiological variables (age, sex, weight, blood pressure) measure on 442 patients, and an indication of disease progression after one year:
61 |
62 | (require-python '[sklearn.linear_model :as linear-model])
63 |
64 | (def diabetes (datasets/load_diabetes :return_X_y true))
65 | (def diabetes-x (first diabetes))
66 | (def diabetes-y (last diabetes))
67 | (py.- diabetes-x shape);=> (442, 10)
68 | (- 442 20) ;=> 422
69 | (def diabetes-x-train (->> diabetes-x (take 422) (into []) (np/array)))
70 | (def diabetes-y-train (->> diabetes-y (take 422) (into []) (np/array)))
71 | (def diabetes-x-test (->> diabetes-x (drop 422) (into []) (np/array)))
72 | (def diabetes-y-test (->> diabetes-y (drop 422) (into []) (np/array)))
73 |
74 |
75 | ;;LinearRegression, in its simplest form, fits a linear model to the data set by adjusting a set of parameters in order to make the sum of the squared residuals of the model as small as possible.
76 |
77 | (py/python-type diabetes-x-train);=> :ndarray
78 | (py.- diabetes-x shape);=> (442, 10)
79 | (py.- diabetes-x-train shape);=> (422, 10)
80 |
81 | (def regr (linear-model/LinearRegression))
82 | (py. regr fit diabetes-x-train diabetes-y-train)
83 | (py.- regr coef_)
84 |
85 | ;; [ 3.03499549e-01 -2.37639315e+02 5.10530605e+02 3.27736980e+02
86 | ;; -8.14131709e+02 4.92814588e+02 1.02848452e+02 1.84606489e+02
87 | ;; 7.43519617e+02 7.60951722e+01]
88 |
89 | ;;; The mean square error
90 | (np/mean
91 | (np/square
92 | (np/subtract (py. regr predict diabetes-x-test) diabetes-y-test)));=> 13.41173112391975
93 | (py. regr score diabetes-x diabetes-y);=> 0.5175336599402476
94 |
95 | ;;; shrinkage
96 | ;;If there are few data points per dimension, noise in the observations induces high variance:
97 | (def X [[0.5] [1]])
98 | (def Y [0.5 1])
99 | (def test [[0] [2]])
100 | (def regr (linear-model/LinearRegression))
101 |
102 | (np-random/seed 0)
103 | (plot/with-show
104 | (dotimes [i 6]
105 | (let [this-x (np/multiply 0.1
106 | (np/add
107 | (np-random/normal :size [2 1]) X))
108 | _ (py. regr fit this-x Y)
109 | prediction (py. regr predict test)]
110 | (pyplot/plot test prediction)
111 | (pyplot/scatter this-x Y :s 3))))
112 |
113 | ;;A solution in high-dimensional statistical learning is to shrink the regression coefficients to zero: any two randomly chosen set of observations are likely to be uncorrelated. This is called Ridge regression:
114 |
115 | (def regr (linear-model/Ridge :alpha 1))
116 | (plot/with-show
117 | (dotimes [i 6]
118 | (let [this-x (np/multiply 0.1
119 | (np/add
120 | (np-random/normal :size [2 1]) X))
121 | _ (py. regr fit this-x Y)
122 | prediction (py. regr predict test)]
123 | (pyplot/plot test prediction)
124 | (pyplot/scatter this-x Y :s 3))))
125 |
126 | ;; This is an example of bias/variance tradeoff: the larger the ridge alpha parameter, the higher the bias and the lower the variance.
127 |
128 | ;; We can choose alpha to minimize left out error, this time using the diabetes dataset rather than our synthetic data:
129 |
130 | (def alphas (np/logspace -4 -1 6))
131 | (mapv #(-> regr
132 | (py. set_params :alpha %)
133 | (py. fit diabetes-x-train diabetes-y-train)
134 | (py. score diabetes-x-test diabetes-y-test))
135 | alphas)
136 | ;-=>[0.5851110683883531 0.5852073015444674 0.585467754069849 0.5855512036503915 0.5830717085554161 0.570589994372801]
137 |
138 |
139 | ;;; Sparsity
140 | (def regr (linear-model/Lasso))
141 | (def scores (map #(-> regr
142 | (py. set_params :alpha %)
143 | (py. fit diabetes-x-train diabetes-y-train)
144 | (py. score diabetes-x-test diabetes-y-test))
145 | alphas))
146 | (def best-alpha (->> (map (fn [a s] {:alpha a :score s}) alphas scores)
147 | (sort-by :score)
148 | last))
149 | (-> regr
150 | (py. set_params :alpha best-alpha)
151 | (py. fit diabetes-x-train diabetes-y-train)
152 | (py.- coef_))
153 |
154 | ;; [ 0. -212.43764548 517.19478111 313.77959962 -160.8303982
155 | ;; -0. -187.19554705 69.38229038 508.66011217 71.84239008]
156 |
157 |
158 | ;;;; Classification
159 | ;; For classification, as in the labeling iris task, linear regression is not the right approach as it will give too much weight to data far from the decision frontier. A linear approach is to fit a sigmoid function or logistic function:
160 |
161 | (def log (linear-model/LogisticRegression :C 1e5))
162 | ;;The C parameter controls the amount of regularization in the LogisticRegression object: a large value for C results in less regularization. penalty="l2" gives Shrinkage (i.e. non-sparse coefficients), while penalty="l1" gives Sparsity.
163 | (py. log fit iris-x-train iris-y-train)
164 | (py. log score iris-x-test iris-y-test);=> 1.0
165 |
166 |
167 | ;;;; Support Vector Machines
168 |
169 | (require-python '[sklearn.svm :as svm])
170 |
171 | (def svc (svm/SVC :kernel "linear"))
172 | (py. svc fit iris-x-train iris-y-train)
173 | (;; C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
174 | ;; decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
175 | ;; max_iter=-1, probability=False, random_state=None, shrinking=True,
176 | ;; tol=0.001, verbose=False)
177 |
178 |
--------------------------------------------------------------------------------
/src/gigasquid/sk_learn/unsupervised_learning.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.sk-learn.unsupervised-learning
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [gigasquid.plot :as plot]))
5 |
6 | ;; from https://scikit-learn.org/stable/tutorial/statistical_inference/unsupervised_learning.html
7 |
8 | (require-python '[sklearn.datasets :as datasets])
9 | (require-python '[sklearn.cluster :as cluster])
10 | (require-python '[sklearn.feature_extraction :as feature-extraction])
11 | (require-python '[sklearn.decomposition :as decomposition])
12 | (require-python '[numpy :as np])
13 | (require-python '[numpy.random :as np-random])
14 | (require-python '[scipy.signal :as signal])
15 |
16 | ;;; K-means clustering
17 | (def iris (datasets/load_iris :return_X_y true))
18 | (def iris-x (first iris))
19 | (def iris-y (last iris))
20 |
21 | (def k-means (cluster/KMeans :n_clusters 3))
22 | (py. k-means fit iris-x)
23 | (take-last 10 (py.- k-means labels_));=> (2 2 0 2 2 2 0 2 2 0)
24 | (take-last 10 iris-y) ;=> (2 2 2 2 2 2 2 2 2 2)
25 |
26 | ;;; Feature agglomeration
27 | ;; We have seen that sparsity could be used to mitigate the curse of dimensionality, i.e an insufficient amount of observations compared to the number of features. Another approach is to merge together similar features: feature agglomeration. This approach can be implemented by clustering in the feature direction, in other words clustering the transposed data.
28 | (def digits (datasets/load_digits))
29 | (def images (py.- digits images))
30 | (def X (np/reshape images [(py/len images) -1]))
31 | (py.- (first images) shape) ;=> (8, 8)
32 | (def connectivity (feature-extraction/grid_to_graph 8 8))
33 | (def agglo (cluster/FeatureAgglomeration :connectivity connectivity :n_clusters 32))
34 | (py. agglo fit X)
35 | (def X-reduced (py. agglo transform X))
36 | (def X-approx (py. agglo inverse_transform X-reduced))
37 | (def images-shape (py.- images shape))
38 | images-shape ;=> (1797, 8, 8)
39 | (def images-approx (np/reshape X-approx images-shape))
40 |
41 | ;;; Principal component analyis : PCA
42 |
43 | ;; Create a signal with only 2 useful dimensions
44 | (def x1 (np-random/normal :size 100))
45 | (def x2 (np-random/normal :size 100))
46 | (def x3 (np/add x1 x2))
47 | (def X (np/column_stack [x1 x2 x3]))
48 | (def pca (decomposition/PCA))
49 | (py. pca fit X)
50 | (py.- pca explained_variance_) ;=> [2.90691814e+00 9.90171666e-01 2.83277241e-31]
51 |
52 | ;; As we can see, only the 2 first components are useful
53 | (py/att-type-map pca)
54 | (py/set-attr! pca "n_components" 2)
55 | (py.- pca n_components) ;=>2
56 | (def X-reduced (py. pca fit_transform X))
57 | (py.- X-reduced shape);=> (100, 2)
58 |
59 | ;;;Independent Component Analysis: ICA
60 | ;;Independent component analysis (ICA) selects components so that the distribution of their loadings carries a maximum amount of independent information. It is able to recover non-Gaussian independent signals:
61 |
62 | ;; Generate the sample data
63 | (def time (np/linspace 0 10 2000))
64 | (def s1 (np/sin (np/multiply 2 time)))
65 | (def s2 (np/sign (np/sin (np/multiply 3 time))))
66 | (def s3 (signal/sawtooth (np/multiply 2 np/pi time)))
67 | (def S (np/column_stack [s1 s2 s3]))
68 | (def S (np/add S 0.2 (np-random/normal :size (py.- S shape))))
69 | (def S (np/divide S (py. S std :axis 0)))
70 | ;;; Mix data
71 | (def A (np/array [[1 1 1] [0.5 2 1] [1.5 1 2]])) ;; mixing matrix
72 | (def X (np/dot S (py.- A T)))
73 |
74 | ;; Compute ICA
75 | (def ica (decomposition/FastICA))
76 | (def S_ (py. ica fit_transform X)) ;; get the estimated sources
77 | (def A_ (-> ica
78 | (py.- mixing_)
79 | (py.- T)))
80 | (np/allclose X (np/add (np/dot S_ A_) (py.- ica mean_))) ;=> true
81 |
--------------------------------------------------------------------------------
/src/gigasquid/slicing.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.slicing
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]))
4 |
5 |
6 | (require-python '[builtins :as python])
7 |
8 | ;;https://data-flair.training/blogs/python-slice/
9 |
10 | (def l (py/->py-list [1 2 3 4]))
11 |
12 | ;;; slice object slice(stop) or slice(start, stop, step)
13 |
14 | ;;; sub elements 2 3 4
15 |
16 |
17 | (python/slice 3) ;=> slice(None, 3, None)
18 | (py/get-item l (python/slice 3)) ;=> [1, 2, 3]
19 |
20 |
21 | ;;; with specifiying interval
22 | (py/get-item l (python/slice 1 3)) ;=> [2, 3]
23 |
24 | ;;; negative indices go from right to left
25 | (py/get-item l (python/slice -3 -1)) ;=> [2, 3]
26 |
27 |
28 | ;;; python slicing tuples
29 |
30 | (def t (py/->py-list [1 2 3 4 5]))
31 | (py/get-item t (python/slice 2 4)) ;=> [3, 4]
32 |
33 | (py/get-item t (python/slice -1 -5 -2)) ;=> [5, 3]
34 |
35 | ;;; is equivalent to t[-1:-5:-2]
36 |
37 |
38 | ;;; t[:3] From 0 to 2
39 | ;;; is the same as
40 | (py/get-item t (python/slice nil 3)) ;=> [1, 2, 3]
41 |
42 |
43 | ;;; t[3:] From 3 to the end
44 | ;; is the same as
45 | (py/get-item t (python/slice 3 nil)) ;=> [4, 5]
46 |
47 | ;;; t[:] From beginning to the end
48 | ;;; is the same as
49 | (py/get-item t (python/slice nil nil)) ;=>[1, 2, 3, 4, 5]
50 |
51 |
52 | ;;;; Extended Python slices with step value
53 |
54 | ;;; t[::-1] reverse
55 | (py/get-item t (python/slice nil nil -1)) ;=> [5, 4, 3, 2, 1]
56 |
57 |
58 | ;;; t[::-2] Reverse with step=2
59 | (py/get-item t (python/slice nil nil -2)) ;=> [5, 3, 1]
60 |
61 |
62 | ;; t[:5:-1] Index 5 to end (already ahead of that), right to left; results in empty tuple
63 | (py/get-item t (python/slice nil 5 -1)) ;=> []
64 |
--------------------------------------------------------------------------------
/src/gigasquid/spacy.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.spacy
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]))
4 |
5 | ;;;; What is SpaCy?
6 |
7 | ;;; also natural language toolkit https://spacy.io/usage/spacy-101#whats-spacy
8 | ;;; opinionated library and more Object oriented than NLTK. Has word vector support
9 | ;;; better performance for tokenization and pos tagging (source https://medium.com/@akankshamalhotra24/introduction-to-libraries-of-nlp-in-python-nltk-vs-spacy-42d7b2f128f2)
10 |
11 | ;;; Install pip3 install spacy
12 | ;;;; python3 -m spacy download en_core_web_sm
13 |
14 | (require-python '[spacy :as spacy])
15 |
16 |
17 | ;;; Following this tutorial https://spacy.io/usage/spacy-101#annotat
18 |
19 | ;;; linguistic annotations
20 |
21 |
22 | (def nlp (spacy/load "en_core_web_sm"))
23 |
24 | (let [doc (nlp "Apple is looking at buying U.K. startup for $1 billion")]
25 | (map (fn [token]
26 | [(py.- token text) (py.- token pos_) (py.- token dep_)])
27 | doc))
28 | ;; (["Apple" "PROPN" "nsubj"]
29 | ;; ["is" "AUX" "aux"]
30 | ;; ["looking" "VERB" "ROOT"]
31 | ;; ["at" "ADP" "prep"]
32 | ;; ["buying" "VERB" "pcomp"]
33 | ;; ["U.K." "PROPN" "compound"]
34 | ;; ["startup" "NOUN" "dobj"]
35 | ;; ["for" "ADP" "prep"]
36 | ;; ["$" "SYM" "quantmod"]
37 | ;; ["1" "NUM" "compound"]
38 | ;; ["billion" "NUM" "pobj"])
39 |
40 |
41 | (let [doc (nlp "Apple is looking at buying U.K. startup for $1 billion")]
42 | (map (fn [token]
43 | {:text (py.- token text)
44 | :lemma (py.- token lemma_)
45 | :pos (py.- token pos_)
46 | :tag (py.- token tag_)
47 | :dep (py.- token dep_)
48 | :shape (py.- token shape_)
49 | :alpha (py.- token is_alpha)
50 | :is_stop (py.- token is_stop)} )
51 | doc))
52 |
53 | ;; ({:text "Apple",
54 | ;; :lemma "Apple",
55 | ;; :pos "PROPN",
56 | ;; :tag "NNP",
57 | ;; :dep "nsubj",
58 | ;; :shape "Xxxxx",
59 | ;; :alpha true,
60 | ;; :is_stop false}
61 | ;; {:text "is",
62 | ;; :lemma "be",
63 | ;; :pos "AUX",
64 | ;; :tag "VBZ",
65 | ;; :dep "aux",
66 | ;; :shape "xx",
67 | ;; :alpha true,
68 | ;; :is_stop true}
69 | ;; ...
70 |
71 |
72 | ;;; Named entities
73 |
74 | (let [doc (nlp "Apple is looking at buying U.K. startup for $1 billion")]
75 | (map (fn [ent]
76 | {:text (py.- ent text)
77 | :start-char (py.- ent start_char)
78 | :end-char (py.- ent end_char)
79 | :label (py.- ent label_)} )
80 | (py.- doc ents)))
81 |
82 | ;; ({:text "Apple", :start-char 0, :end-char 5, :label "ORG"}
83 | ;; {:text "U.K.", :start-char 27, :end-char 31, :label "GPE"}
84 | ;; {:text "$1 billion", :start-char 44, :end-char 54, :label "MONEY"})
85 |
86 |
87 | ;;; Word Vectors
88 |
89 | ;; To make them compact and fast, spaCy’s small models (all packages that end in sm) don’t ship with word vectors, and only include context-sensitive tensors. This means you can still use the similarity() methods to compare documents, spans and tokens – but the result won’t be as good, and individual tokens won’t have any vectors assigned. So in order to use real word vectors, you need to download a larger model:
90 |
91 | ;;;python -m spacy download en_core_web_md (medium one)
92 |
93 | ;;; then restart cider to pick up changes
94 |
95 | (require-python '[spacy :as spacy])
96 | (def nlp (spacy/load "en_core_web_md"))
97 |
98 | (let [tokens (nlp "dog cat banana afskfsd")]
99 | (map (fn [token]
100 | {:text (py.- token text)
101 | :has-vector (py.- token has_vector)
102 | :vector_norm (py.- token vector_norm)
103 | :is_oov (py.- token is_oov)} )
104 | tokens))
105 |
106 | ;; ({:text "dog",
107 | ;; :has-vector true,
108 | ;; :vector_norm 7.033673286437988,
109 | ;; :is_oov false}
110 | ;; {:text "cat",
111 | ;; :has-vector true,
112 | ;; :vector_norm 6.680818557739258,
113 | ;; :is_oov false}
114 | ;; {:text "banana",
115 | ;; :has-vector true,
116 | ;; :vector_norm 6.700014114379883,
117 | ;; :is_oov false}
118 | ;; {:text "afskfsd", :has-vector false, :vector_norm 0.0, :is_oov true})
119 |
120 |
121 | ;;; finding similarity
122 |
123 | (let [tokens (nlp "dog cat banana")]
124 | (for [token1 tokens
125 | token2 tokens]
126 | {:token1 (py.- token1 text)
127 | :token2 (py.- token2 text)
128 | :similarity (py. token1 similarity token2)}))
129 |
130 | ;; ({:token1 "dog", :token2 "dog", :similarity 1.0}
131 | ;; {:token1 "dog", :token2 "cat", :similarity 0.8016854524612427}
132 | ;; {:token1 "dog", :token2 "banana", :similarity 0.2432764321565628}
133 | ;; {:token1 "cat", :token2 "dog", :similarity 0.8016854524612427}
134 | ;; {:token1 "cat", :token2 "cat", :similarity 1.0}
135 | ;; {:token1 "cat", :token2 "banana", :similarity 0.28154364228248596}
136 | ;; {:token1 "banana", :token2 "dog", :similarity 0.2432764321565628}
137 | ;; {:token1 "banana", :token2 "cat", :similarity 0.28154364228248596}
138 | ;; {:token1 "banana", :token2 "banana", :similarity 1.0})
139 |
140 |
--------------------------------------------------------------------------------
/src/gigasquid/trimap.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.trimap
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [gigasquid.plot :as plot]))
5 |
6 | ;;;; you will need all the below libraries pip installed
7 |
8 | ;;; What is Trimap? It is a dimensionality reduction library (like umap) but using a different algorithim
9 | ;;https://pypi.org/project/trimap/
10 |
11 | ;;; also see the umap.clj example
12 |
13 | (require-python '[trimap :as trimap])
14 | (require-python '[sklearn.datasets :as sk-data])
15 | (require-python '[matplotlib.pyplot :as pyplot])
16 |
17 | (def digits (sk-data/load_digits))
18 | (def digits-data (py.- digits data))
19 |
20 | (def embedding (py. (trimap/TRIMAP) fit_transform digits-data))
21 | (py.- embedding shape) ;=> (1797, 2)
22 |
23 |
24 | ;; We now have a dataset with 1797 rows (one for each hand-written digit sample), but only 2 columns. We can now plot the resulting embedding, coloring the data points by the class that theyr belong to (i.e. the digit they represent).
25 |
26 | (plot/with-show
27 | (let [x (mapv first embedding)
28 | y (mapv last embedding)
29 | colors (py.- digits target)
30 | bounds (numpy/subtract (numpy/arange 11) 0.5)
31 | ticks (numpy/arange 10)]
32 | (pyplot/scatter x y :c colors :cmap "Spectral" :s 5)
33 | (py. (pyplot/gca) set_aspect "equal" "datalim")
34 | (py. (pyplot/colorbar :boundaries bounds) set_ticks ticks)
35 | (pyplot/title "UMAP projection of the Digits dataset" :fontsize 24)))
36 |
--------------------------------------------------------------------------------
/src/gigasquid/umap.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.umap
2 | (:require [libpython-clj.require :refer [require-python]]
3 | [libpython-clj.python :as py :refer [py. py.. py.-]]
4 | [gigasquid.plot :as plot]))
5 |
6 | ;;;; you will need all these things below installed
7 | ;;; with pip or something else
8 |
9 | ;;; What is umap? - dimensionality reduction library
10 |
11 |
12 | (require-python '[seaborn :as sns])
13 | (require-python '[matplotlib.pyplot :as pyplot])
14 | (require-python '[sklearn.datasets :as sk-data])
15 | (require-python '[sklearn.model_selection :as sk-model])
16 | (require-python '[numpy :as numpy])
17 | (require-python '[pandas :as pandas])
18 | (require-python '[umap :as umap])
19 |
20 |
21 | ;;; Code walkthrough from here https://umap-learn.readthedocs.io/en/latest/basic_usage.html
22 |
23 |
24 | ;;; set the defaults for plotting
25 | (sns/set)
26 |
27 | ;;; IRIS data
28 |
29 | ;; The next step is to get some data to work with. To ease us into things we’ll start with the iris dataset. It isn’t very representative of what real data would look like, but it is small both in number of points and number of features, and will let us get an idea of what the dimension reduction is doing. We can load the iris dataset from sklearn.
30 |
31 | (def iris (sk-data/load_iris))
32 | (print (py.- iris DESCR))
33 |
34 | ;; Iris plants dataset
35 | ;; --------------------
36 |
37 | ;; **Data Set Characteristics:**
38 |
39 | ;; :Number of Instances: 150 (50 in each of three classes)
40 | ;; :Number of Attributes: 4 numeric, predictive attributes and the class
41 | ;; :Attribute Information:
42 | ;; - sepal length in cm
43 | ;; - sepal width in cm
44 | ;; - petal length in cm
45 | ;; - petal width in cm
46 | ;; - class:
47 | ;; - Iris-Setosa
48 | ;; - Iris-Versicolour
49 | ;; - Iris-Virginica
50 |
51 | ;; :Summary Statistics:
52 |
53 | ;; ============== ==== ==== ======= ===== ====================
54 | ;; Min Max Mean SD Class Correlation
55 | ;; ============== ==== ==== ======= ===== ====================
56 | ;; sepal length: 4.3 7.9 5.84 0.83 0.7826
57 | ;; sepal width: 2.0 4.4 3.05 0.43 -0.4194
58 | ;; petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
59 | ;; petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
60 | ;; ============== ==== ==== ======= ===== ====================
61 |
62 | (def iris-df (pandas/DataFrame (py.- iris data) :columns (py.- iris feature_names)))
63 | (py/att-type-map iris-df)
64 |
65 | (def iris-name-series (let [iris-name-map (zipmap (range 3) (py.- iris target_names))]
66 | (pandas/Series (map (fn [item]
67 | (get iris-name-map item))
68 | (py.- iris target)))))
69 |
70 | (py. iris-df __setitem__ "species" iris-name-series)
71 | (py/get-item iris-df "species")
72 | ;; 0 setosa
73 | ;; 1 setosa
74 | ;; 2 setosa
75 | ;; 3 setosa
76 | ;; 4 setosa
77 | ;; ...
78 | ;; 145 virginica
79 | ;; 146 virginica
80 | ;; 147 virginica
81 | ;; 148 virginica
82 | ;; 149 virginica
83 | ;; Name: species, Length: 150, dtype: object
84 |
85 |
86 | (plot/with-show
87 | (sns/pairplot iris-df :hue "species"))
88 |
89 |
90 | ;; Time to reduce dimensions
91 | (def reducer (umap/UMAP))
92 |
93 | ;;; we need to train the reducer to learn about the manifold
94 | ;; fit_tranforms first fits the data and then transforms it into a numpy array
95 |
96 | (def embedding (py. reducer fit_transform (py.- iris data)))
97 | (py.- embedding shape) ;=> (150, 2)
98 |
99 | ;;; 150 samples with 2 column. Each row of the array is a 2-dimensional representation of the corresponding flower. Thus we can plot the embedding as a standard scatterplot and color by the target array (since it applies to the transformed data which is in the same order as the original).
100 |
101 | (first embedding) ;=> [12.449954 -6.0549345]
102 |
103 |
104 | (let [colors (mapv #(py/get-item (sns/color_palette) %)
105 | (py.- iris target))
106 | x (mapv first embedding)
107 | y (mapv last embedding)]
108 | (plot/with-show
109 | (pyplot/scatter x y :c colors)
110 | (py. (pyplot/gca) set_aspect "equal" "datalim")
111 | (pyplot/title "UMAP projection of the Iris dataset" :fontsize 24)))
112 |
113 |
114 | ;;;; Digits Data
115 |
116 | (def digits (sk-data/load_digits))
117 | (print (py.- digits DESCR))
118 |
119 | ;; .. _digits_dataset:
120 |
121 | ;; Optical recognition of handwritten digits dataset
122 | ;; --------------------------------------------------
123 |
124 | ;; **Data Set Characteristics:**
125 |
126 | ;; :Number of Instances: 5620
127 | ;; :Number of Attributes: 64
128 | ;; :Attribute Information: 8x8 image of integer pixels in the range 0..16.
129 | ;; :Missing Attribute Values: None
130 | ;; :Creator: E. Alpaydin (alpaydin '@' boun.edu.tr)
131 | ;; :Date: July; 1998
132 |
133 | ;;; Plot the images to get an idea of what we are looking at
134 |
135 | (plot/with-show
136 | (let [[fig ax-array] (pyplot/subplots 20 20)
137 | axes (py. ax-array flatten)]
138 | (doall (map-indexed (fn [i ax]
139 | (py. ax imshow (py/get-item (py.- digits images) i) :cmap "gray_r"))
140 | axes))
141 | (pyplot/setp axes :xticks [] :yticks [] :frame_on false)
142 | (pyplot/tight_layout :h_pad 0.5 :w_pad 0.01)))
143 |
144 | ;;; Try to do a scatterplot of the first 10 dimessions for the 64 element long of grayscale values
145 |
146 | (def digits-df (pandas/DataFrame (mapv #(take 10 %) (py.- digits data))))
147 | (def digits-target-series (pandas/DataFrame (mapv #(str "Digit " %) (py.- digits target))))
148 | (py. digits-df __setitem__ "digit" digits-target-series)
149 |
150 | (plot/with-show
151 | (sns/pairplot digits-df :hue "digit" :palette "Spectral"))
152 |
153 | ;;;; use umap with the fit instead
154 |
155 | (def reducer (umap/UMAP :random_state 42))
156 | (py. reducer fit (py.- digits data))
157 |
158 | ;;; now we can look at the embedding attribute on the reducer or call transform on the original data
159 | (def embedding (py. reducer transform (py.- digits data)))
160 | (py.- embedding shape) ;=>(1797, 2)
161 |
162 |
163 | ;; We now have a dataset with 1797 rows (one for each hand-written digit sample), but only 2 columns. As with the Iris example we can now plot the resulting embedding, coloring the data points by the class that theyr belong to (i.e. the digit they represent).
164 |
165 | (plot/with-show
166 | (let [x (mapv first embedding)
167 | y (mapv last embedding)
168 | colors (py.- digits target)
169 | bounds (numpy/subtract (numpy/arange 11) 0.5)
170 | ticks (numpy/arange 10)]
171 | (pyplot/scatter x y :c colors :cmap "Spectral" :s 5)
172 | (py. (pyplot/gca) set_aspect "equal" "datalim")
173 | (py. (pyplot/colorbar :boundaries bounds) set_ticks ticks)
174 | (pyplot/title "UMAP projection of the Digits dataset" :fontsize 24)))
175 |
176 | ;;;; Whooo!
177 |
178 |
--------------------------------------------------------------------------------
/src/gigasquid/utils.clj:
--------------------------------------------------------------------------------
1 | (ns gigasquid.utils
2 | (:require
3 | [clojure.string :as string]
4 | [clojure.java.shell :as sh]
5 | [clojure.pprint :refer [pprint]])
6 | (:import [java.io File]))
7 |
8 | (def is-linux?
9 | (= "linux"
10 | (-> "os.name"
11 | System/getProperty
12 | string/lower-case)))
13 |
14 | (def is-mac?
15 | (-> "os.name"
16 | System/getProperty
17 | string/lower-case
18 | (string/starts-with? "mac")))
19 |
20 | (defn display-image
21 | "Display image on OSX or on Linux based system"
22 | [image-file]
23 | (cond
24 | is-mac?
25 | (sh/sh "open" image-file)
26 |
27 | is-linux?
28 | (sh/sh "display" image-file)))
29 |
30 | (defn create-tmp-file
31 | "Return full path of temporary file.
32 |
33 | Example:
34 | (create-tmp-file \"tmp-image\" \".png\") "
35 | [prefix ext]
36 | (File/createTempFile prefix ext))
37 |
--------------------------------------------------------------------------------