├── doc
└── intro.md
├── examples
├── clj
│ ├── clj-español
│ │ ├── doc
│ │ │ └── intro.md
│ │ ├── .gitignore
│ │ ├── test
│ │ │ └── clj_español
│ │ │ │ └── core_test.clj
│ │ ├── README.md
│ │ ├── project.clj
│ │ ├── src
│ │ │ └── clj_español
│ │ │ │ └── core.clj
│ │ └── LICENSE
│ ├── clj-spanish
│ │ ├── doc
│ │ │ └── intro.md
│ │ ├── .gitignore
│ │ ├── test
│ │ │ └── clj_spanish
│ │ │ │ └── core_test.clj
│ │ ├── README.md
│ │ ├── project.clj
│ │ ├── src
│ │ │ └── clj_spanish
│ │ │ │ └── core.clj
│ │ └── LICENSE
│ ├── ஆமை-தமிழ்
│ │ ├── .gitignore
│ │ ├── project.clj
│ │ └── src
│ │ │ └── ஆமை_தமிழ்
│ │ │ └── கரு.clj
│ └── turtle-thamil
│ │ ├── project.clj
│ │ └── src
│ │ └── turtle_thamil
│ │ └── core.clj
├── js
│ ├── setup.sh
│ ├── test01.html
│ ├── test03.html
│ └── test02.html
└── java
│ └── java-examples
│ ├── README.md
│ ├── src
│ └── main
│ │ └── java
│ │ └── clj-thamil
│ │ └── examples
│ │ └── java
│ │ ├── WordSort01.java
│ │ └── WordSort02.java
│ └── pom.xml
├── .gitignore
├── src
└── clj_thamil
│ ├── java
│ └── api
│ │ └── format.cljc
│ ├── subprograms.cljc
│ ├── js
│ └── api
│ │ └── convert.cljs
│ ├── main.clj
│ ├── format
│ ├── analysis.cljc
│ └── convert.cljc
│ ├── core.cljc
│ ├── மொழியியல்.cljc
│ └── format.cljc
├── test
└── clj_thamil
│ ├── format
│ ├── analysis_test.cljc
│ └── convert_test.cljc
│ ├── demo
│ └── trans_demo_01.cljc
│ ├── core_test.cljc
│ ├── மொழியியல்_test.cljc
│ └── format_test.cljc
├── CHANGELOG.md
├── project.clj
├── README.md
└── emacs
└── clojure-mode.el
/doc/intro.md:
--------------------------------------------------------------------------------
1 | # Introduction to clj-thamil-test
2 |
3 | TODO: write [great documentation](http://jacobian.org/writing/great-documentation/what-to-write/)
4 |
--------------------------------------------------------------------------------
/examples/clj/clj-español/doc/intro.md:
--------------------------------------------------------------------------------
1 | # Introduction to clj-español
2 |
3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/)
4 |
--------------------------------------------------------------------------------
/examples/clj/clj-spanish/doc/intro.md:
--------------------------------------------------------------------------------
1 | # Introduction to clj-spanish
2 |
3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/)
4 |
--------------------------------------------------------------------------------
/examples/clj/ஆமை-தமிழ்/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /classes
3 | /checkouts
4 | pom.xml
5 | pom.xml.asc
6 | *.jar
7 | *.class
8 | /.lein-*
9 | /.nrepl-port
10 | .hgignore
11 | .hg/
12 |
--------------------------------------------------------------------------------
/examples/clj/clj-español/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /classes
3 | /checkouts
4 | pom.xml
5 | pom.xml.asc
6 | *.jar
7 | *.class
8 | /.lein-*
9 | /.nrepl-port
10 | .hgignore
11 | .hg/
12 |
--------------------------------------------------------------------------------
/examples/clj/clj-spanish/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /classes
3 | /checkouts
4 | pom.xml
5 | pom.xml.asc
6 | *.jar
7 | *.class
8 | /.lein-*
9 | /.nrepl-port
10 | .hgignore
11 | .hg/
12 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /lib
3 | /classes
4 | /checkouts
5 | pom.xml
6 | pom.xml.asc
7 | *.jar
8 | *.class
9 | .lein-deps-sum
10 | .lein-failures
11 | .lein-plugins
12 | .lein-repl-history
13 | /examples/js/js
14 | /resources/public/js
15 |
--------------------------------------------------------------------------------
/src/clj_thamil/java/api/format.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.java.api.format
2 | (:require [clj-thamil.format :as fmt])
3 | #?(:clj (:import java.util.Comparator)
4 | :clj (:gen-class
5 | :methods [#^{:static true} [word_comp [] java.util.Comparator]])))
6 |
7 | (defn -word_comp [] fmt/word-comp)
8 |
--------------------------------------------------------------------------------
/examples/clj/clj-español/test/clj_español/core_test.clj:
--------------------------------------------------------------------------------
1 | (ns clj-español.core-test
2 | (:require [clojure.test :refer :all]
3 | [clj-español.core :refer :all]))
4 |
5 | (deftest core-test
6 | (let [numbers [2 3 5 7 11]]
7 | (testing "Clojure en español"
8 | (is (= 11 (último numbers))))))
9 |
--------------------------------------------------------------------------------
/examples/clj/clj-spanish/test/clj_spanish/core_test.clj:
--------------------------------------------------------------------------------
1 | (ns clj-spanish.core-test
2 | (:require [clojure.test :refer :all]
3 | [clj-spanish.core :refer :all]))
4 |
5 | (deftest core-test
6 | (let [numbers [2 3 5 7 11]]
7 | (testing "Clojure en español"
8 | (is (= 11 (último numbers))))))
9 |
--------------------------------------------------------------------------------
/examples/js/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | mkdir -p js
4 | lein do clean, compile, cljsbuild once
5 | #cp -r ../../resources/public/js/clj-thamil.js ./js
6 | #cp -r ../../resources/public/js/out/* ./js
7 | rsync --recursive ../../resources/public/js/clj-thamil.js ./js
8 | rsync --recursive ../../resources/public/js/out/ ./js
9 |
--------------------------------------------------------------------------------
/examples/clj/clj-español/README.md:
--------------------------------------------------------------------------------
1 | # clj-español
2 |
3 | A Clojure library designed to ... well, that part is up to you.
4 |
5 | ## Usage
6 |
7 | FIXME
8 |
9 | ## License
10 |
11 | Copyright © 2016 FIXME
12 |
13 | Distributed under the Eclipse Public License either version 1.0 or (at
14 | your option) any later version.
15 |
--------------------------------------------------------------------------------
/examples/clj/clj-spanish/README.md:
--------------------------------------------------------------------------------
1 | # clj-spanish
2 |
3 | A Clojure library designed to ... well, that part is up to you.
4 |
5 | ## Usage
6 |
7 | FIXME
8 |
9 | ## License
10 |
11 | Copyright © 2016 FIXME
12 |
13 | Distributed under the Eclipse Public License either version 1.0 or (at
14 | your option) any later version.
15 |
--------------------------------------------------------------------------------
/examples/clj/clj-español/project.clj:
--------------------------------------------------------------------------------
1 | (defproject clj-español "0.1.0-SNAPSHOT"
2 | :description "FIXME: write description"
3 | :url "http://example.com/FIXME"
4 | :license {:name "Eclipse Public License"
5 | :url "http://www.eclipse.org/legal/epl-v10.html"}
6 | :dependencies [[org.clojure/clojure "1.7.0"]
7 | [clj-thamil "0.1.2"]])
8 |
--------------------------------------------------------------------------------
/examples/clj/clj-spanish/project.clj:
--------------------------------------------------------------------------------
1 | (defproject clj-spanish "0.1.0-SNAPSHOT"
2 | :description "FIXME: write description"
3 | :url "http://example.com/FIXME"
4 | :license {:name "Eclipse Public License"
5 | :url "http://www.eclipse.org/legal/epl-v10.html"}
6 | :dependencies [[org.clojure/clojure "1.7.0"]
7 | [clj-thamil "0.1.2"]])
8 |
--------------------------------------------------------------------------------
/examples/clj/ஆமை-தமிழ்/project.clj:
--------------------------------------------------------------------------------
1 | (defproject ஆமை-தமிழ் "0.1.0-SNAPSHOT"
2 | :description "FIXME: write description"
3 | :url "http://example.com/FIXME"
4 | :license {:name "Eclipse Public License"
5 | :url "http://www.eclipse.org/legal/epl-v10.html"}
6 | :dependencies [[org.clojure/clojure "1.7.0"]
7 | [clj-thamil "0.1.2"]
8 | [com.google/clojure-turtle "0.2.0"]])
9 |
--------------------------------------------------------------------------------
/examples/clj/turtle-thamil/project.clj:
--------------------------------------------------------------------------------
1 | (defproject turtle-thamil "0.1.0-SNAPSHOT"
2 | :description "FIXME: write description"
3 | :url "http://example.com/FIXME"
4 | :license {:name "Eclipse Public License"
5 | :url "http://www.eclipse.org/legal/epl-v10.html"}
6 | :dependencies [[org.clojure/clojure "1.7.0"]
7 | [clj-thamil "0.1.2"]
8 | [com.google/clojure-turtle "0.2.0"]])
9 |
--------------------------------------------------------------------------------
/src/clj_thamil/subprograms.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.subprograms
2 | (:require [clojure.java.io :as jio]
3 | [clj-thamil.format :as fmt]))
4 |
5 | (defn print-as-phonemes
6 | [& args]
7 | (with-open [rdr (jio/reader *in*)]
8 | (let [lines (line-seq rdr)]
9 | (doall
10 | (for [line lines]
11 | (let [phoneme-str (apply str (fmt/str->phonemes line))]
12 | (println phoneme-str)))))))
13 |
--------------------------------------------------------------------------------
/src/clj_thamil/js/api/convert.cljs:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.js.api.convert
2 | (:require [clj-thamil.format.convert :as cvt]))
3 |
4 | (def romanized-to-thamil cvt/romanized->தமிழ்)
5 | (def thamil-to-romanized cvt/தமிழ்->romanized)
6 |
7 | (def tab-to-thamil cvt/tab->தமிழ்)
8 | (def thamil-to-tab cvt/தமிழ்->tab)
9 |
10 | (def bamini-to-thamil cvt/bamini->தமிழ்)
11 | (def thamil-to-bamini cvt/தமிழ்->bamini)
12 |
13 | (def tscii-to-thamil cvt/tscii->தமிழ்)
14 | (def thamil-to-tscii cvt/தமிழ்->tscii)
15 |
16 | (def webulagam-to-thamil cvt/webulagam->தமிழ்)
17 | (def thamil-to-webulagam cvt/தமிழ்->webulagam)
18 |
19 |
20 |
--------------------------------------------------------------------------------
/test/clj_thamil/format/analysis_test.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.format.analysis-test
2 | (:require [clj-thamil.format :as fmt])
3 | (:use clj-thamil.format.analysis
4 | clojure.test))
5 |
6 | (deftest letters-plus-grantha-test
7 | (let [letters-plus-grantha-trie (fmt/make-trie (flatten letters-plus-grantha))
8 | str->letters-plus-grantha (fn [s] (fmt/str->elems letters-plus-grantha-trie s))]
9 | (testing "string fns also working on grantha letters"
10 | (is (= ["ஜி" "மி" "க்" "கி"] (str->letters-plus-grantha "ஜிமிக்கி")))
11 | (is (= ["கு" "ஷி"] (str->letters-plus-grantha "குஷி"))))
12 | (testing "trie-elem-freqs"
13 | (let [s "ஜோடி"]
14 | (is (= (trie-elem-freqs letters-plus-grantha-trie s)
15 | (trie-elem-freqs letters-plus-grantha-trie (str s " abc 123 a3"))))))))
16 |
--------------------------------------------------------------------------------
/examples/clj/turtle-thamil/src/turtle_thamil/core.clj:
--------------------------------------------------------------------------------
1 | (ns turtle-thamil.core
2 | (:require [clojure-turtle.core :as turtle])
3 | (:use clj-thamil.core))
4 |
5 | (def turtle-fns-map '{turtle/forward முன்னால்
6 | turtle/back பின்னால்
7 | turtle/right வலது
8 | turtle/left இடது
9 | turtle/translate இடம்பெயர்
10 | turtle/penup எழுதுகோலெடு
11 | turtle/pendown எழுதுகோல்வை
12 | turtle/clean துப்புரவு
13 | ;; setxy ???
14 | turtle/setheading திசைவை
15 | turtle/home வீடு})
16 |
17 | (def turtle-forms-map '{turtle/repeat மீண்டும்
18 | turtle/all எல்லாம்
19 | turtle/new-window புது-சாளரம்})
20 |
21 | (translate-fns turtle-fns-map)
22 | (translate-forms turtle-forms-map)
23 |
--------------------------------------------------------------------------------
/examples/clj/ஆமை-தமிழ்/src/ஆமை_தமிழ்/கரு.clj:
--------------------------------------------------------------------------------
1 | (ns ஆமை-தமிழ்.கரு
2 | (:require [clojure-turtle.core :as turtle])
3 | (:use clj-thamil.core))
4 |
5 | (translate-forms '{translate-fns மொழிப்பெயர்-செயல்கூறுகள்
6 | translate-forms மொழிப்பெயர்-வடிவங்கள்})
7 |
8 | (வரையறு ஆமை-செயல்கூறுகள்
9 | '{turtle/forward முன்னால்
10 | turtle/back பின்னால்
11 | turtle/right வலது
12 | turtle/left இடது
13 | turtle/translate இடம்பெயர்
14 | turtle/penup எழுதுகோலெடு
15 | turtle/pendown எழுதுகோல்வை
16 | turtle/clean துப்புரவு
17 | ;; setxy ???
18 | turtle/setheading திசைவை
19 | turtle/home வீடு})
20 |
21 | (வரையறு ஆமை-வடிவங்கள்
22 | '{turtle/repeat மீண்டும்
23 | turtle/all எல்லாம்
24 | turtle/new-window புது-சாளரம்})
25 |
26 | (மொழிப்பெயர்-செயல்கூறுகள் ஆமை-செயல்கூறுகள்)
27 | (மொழிப்பெயர்-வடிவங்கள் ஆமை-வடிவங்கள்)
28 |
--------------------------------------------------------------------------------
/src/clj_thamil/main.clj:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.main
2 | (require [clojure.string :as string]
3 | [clj-thamil.format [analysis :as analysis] [convert :as convert]]
4 | [clj-thamil.subprograms :as subprog])
5 | (:gen-class))
6 |
7 | (def ^{:doc "a map that specifies what sub-program to run based on the first arg passed in"}
8 | main-fns
9 | {"freqs" analysis/-main
10 | "osxkeyb" convert/-main
11 | "phonemes" subprog/print-as-phonemes})
12 |
13 | (defn -main [& args]
14 | (assert (pos? (count args)) "Running clj-thamil as an executable requires arguments")
15 | (let [subprog (first args)
16 | default-fn (fn [& args] (throw (Exception. (str "The specified clj-thamil sub-program is misspelled or does not exist. Available sub-programs: [" (string/join ", " (-> main-fns keys sort)) "]"))))
17 | subprog-fn (get main-fns subprog default-fn)
18 | new-args (rest args)]
19 | (apply subprog-fn new-args)))
20 |
--------------------------------------------------------------------------------
/test/clj_thamil/demo/trans_demo_01.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.demo.trans-demo-01
2 | (:use clj-thamil.core))
3 |
4 | (வரையறு-செயல்கூறு தன்னால்-பெருக்கு
5 | [அ]
6 | (* அ அ))
7 |
8 | (வரையறு எண்கள் [108 1008 18 63 6 12 247])
9 |
10 | ;; 1-ஆம் சுற்று
11 |
12 | (வரையறு-செயல்கூறு சுற்று-01
13 | []
14 | (வரி-அச்சிடு "எண்களையும் அவற்றின் சதுர ஆக்கங்களையும்(?) அச்சிடுவது:")
15 | (செய்வரிசை [எண் எண்கள்]
16 | (வரி-அச்சிடு (தொடை "[" எண் "] -> [" (தன்னால்-பெருக்கு எண்) "]"))))
17 |
18 | (வரையறு சதுர-எண்கள் (விவரி தன்னால்-பெருக்கு எண்கள்))
19 |
20 | ;; 2-ஆம் சுற்று
21 |
22 | (வரையறு-செயல்கூறு சுற்று-02
23 | []
24 | (வரி-அச்சிடு "மீண்டும் எண்களையும் சதுர எண்களையும் அச்சிடுவது:")
25 | (செய்யோட்டம்
26 | (விவரி (செயல்கூறு [எ சஎ] (வரி-அச்சிடு எ "*" எ "=" சஎ)) எண்கள் சதுர-எண்கள்)))
27 |
28 | ;; எல்லாச் சுற்றும்
29 |
30 | (வரையறு-செயல்கூறு எல்லாவற்றையும்-ஓட்டு
31 | []
32 | (வைத்துக்கொள் [செயல்கூறுகள் [சுற்று-01 சுற்று-02]
33 | ஓட்டும்-செயல்கூறுகள் (இடைபொருத்து வரி-அச்சிடு செயல்கூறுகள்)]
34 | (செய்வரிசை [செ ஓட்டும்-செயல்கூறுகள்]
35 | (செ))))
36 |
--------------------------------------------------------------------------------
/examples/java/java-examples/README.md:
--------------------------------------------------------------------------------
1 | # Java Examples for clj-thamil
2 |
3 | ## Requirements
4 |
5 | The Java example code requires the clj-thamil artifact to be built and installed. Refer to the [Building](../../../README.md) section on how to build and install the artifact.
6 |
7 | ## Building
8 |
9 | All of the Java examples can be built together by
10 | ```
11 | lein clean
12 | lein install
13 | cd examples/java/java-examples
14 | mvn clean package
15 | ```
16 |
17 | Building the Java examples is separate from the clj-thamil artifact that they depend on.
18 |
19 | ## Usage
20 |
21 | After following the build instructions above, a shaded jar/uberjar will be in the `target` subdirectory, but is not
22 | in itself executable since it contains multiple main methods.
23 | Instead, the uberjar should be provided in the classpath followed by
24 | the class name of the example being run:
25 | ```
26 | java -cp target/java-examples-1.0.jar clj_thamil.examples.java.WordSort01
27 | java -cp target/java-examples-1.0.jar clj_thamil.examples.java.WordSort02
28 | ```
29 |
30 | ## Overview
31 |
32 | * WordSort01 - sorts words based on Thamil alphabetical order
33 | * WordSort02 - sorts words based on Thamil alphabetical order
34 |
--------------------------------------------------------------------------------
/examples/js/test01.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/test/clj_thamil/core_test.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.core-test
2 | (:use clojure.test
3 | clj-thamil.core))
4 |
5 | (defn demo-print-1
6 | []
7 | (println "hello"))
8 |
9 | (defn demo-print-2
10 | []
11 | (println "வணக்கம்"))
12 |
13 | (defn demo-print-3
14 | []
15 | (வரி-அச்சிடு "வணக்கம்"))
16 |
17 | (defn demo-add-1
18 | []
19 | (if (= 4 (+ 2 2))
20 | (println "true")
21 | (println "false")))
22 |
23 | (defn demo-add-2
24 | []
25 | (if (= 4 (+ 2 2))
26 | (println "true")
27 | (println "false"))
28 | (if (= 5 (+ 2 2))
29 | (println "true")
30 | (println "false")))
31 |
32 | (defn demo-add-3
33 | []
34 | (எனில் (= 4 (+ 2 2))
35 | (வரி-அச்சிடு "வாய்மை")
36 | (வரி-அச்சிடு "பொய்மை"))
37 | (எனில் (= 5 (+ 2 2))
38 | (வரி-அச்சிடு "வாய்மை")
39 | (வரி-அச்சிடு "பொய்மை")))
40 |
41 | (வரையறு-செயல்கூறு demo-add-4
42 | []
43 | (எனில் (= 4 (+ 2 2))
44 | (வரி-அச்சிடு "வாய்மை")
45 | (வரி-அச்சிடு "பொய்மை"))
46 | (எனில் (= 5 (+ 2 2))
47 | (வரி-அச்சிடு "வாய்மை")
48 | (வரி-அச்சிடு "பொய்மை")))
49 |
50 | (வரையறு-செயல்கூறு மாதிரி-கூட்டு-5
51 | []
52 | (எனில் (= 4 (+ 2 2))
53 | (வரி-அச்சிடு "வாய்மை")
54 | (வரி-அச்சிடு "பொய்மை"))
55 | (எனில் (= 5 (+ 2 2))
56 | (வரி-அச்சிடு "வாய்மை")
57 | (வரி-அச்சிடு "பொய்மை")))
58 |
59 | (def demo-fns [demo-print-1 demo-print-2 demo-print-3
60 | demo-add-1 demo-add-2 demo-add-3 demo-add-4 மாதிரி-கூட்டு-5])
61 |
62 | (deftest a-test
63 | (testing "FIXME, I fail."
64 | (let [s "வணக்கம்"
65 | hello (fn []
66 | (str "Hello, and " s))
67 | ஒன்று 1
68 | இரண்டு 2
69 | v [ஒன்று இரண்டு "மூன்று"]]
70 | (is (= v [1 2 "மூன்று"]))
71 | (is (= (hello) "Hello, and வணக்கம்")))))
72 |
73 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 | All notable changes to this project will be documented in this file.
3 |
4 | ## [Unreleased][unreleased]
5 |
6 | ## [0.2.0] - 2016-07-30
7 | ### Added
8 | - Examples of Clojure in Spanish, Logo in Thamil
9 | ### Changed
10 | - Using reader conditionals instead of cljx (for compiling common Clojure code to Java and JS targets)
11 | ### Fixed
12 | - JS examples
13 |
14 | ## [0.1.2] - 2015-02-19
15 | ### Added
16 | - Generative testing for font conversion functions
17 | - Java examples using clj-thamil jar artifact
18 | - Doc for clj-thamil and Java examples
19 |
20 | ### Changed
21 | - Font conversion information represented as map instead of seq
22 | - தமிழ்->romanized transliteration for certain consonant clusters
23 |
24 | ### Fixed
25 | - Test file namespace name
26 | - Cljx configs for dirs for source and target for clj, cljs
27 |
28 | ## [0.1.1] - 2015-02-04
29 | ### Added
30 | - 'Translations' of Clojure core library form names via a couple of maps and handful of macros
31 | - Trie functions (create, get-in)
32 | - Function using a trie to extract/convert a string into its elements
33 | - Fns to split a string into a sequence of Thamil letters/phonemes
34 | - Fn to create a Thamil string from a sequence of phonemes
35 | - Sorting fns and comparators for single- and multi-letter Thamil strings
36 | - Seq fns generalized from string functions (index-of, prefix)
37 | - Function to adjust the cursor position in Thamil text
38 | - Functions written in Thamil to perform Thamil grammatical operations
39 | - Pluralize, add suffixes generally, add noun case suffixes
40 | - Letter frequency analysis and result output functions
41 | - Functions to convert between old Thamil font character sets and Unicode
42 | - Using cljx to be forward-compatible with compilation to JS via ClojureScript
43 | - Configs for deploying GPG-signed releases to Clojars
44 |
45 | [unreleased]: https://github.com/echeran/clj-thamil/compare/0.2.0...master
46 | [0.2.0]: https://github.com/echeran/clj-thamil/tree/0.2.0
47 | [0.1.2]: https://github.com/echeran/clj-thamil/tree/0.1.2
48 | [0.1.1]: https://github.com/echeran/clj-thamil/tree/0.1.1
49 |
--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
1 | (defproject clj-thamil "0.2.0"
2 | :description "A project encompassing various Thamil language-specific computing ideas"
3 | :url "https://github.com/echeran/clj-thamil"
4 | :license {:name "Eclipse Public License"
5 | :url "http://www.eclipse.org/legal/epl-v10.html"}
6 | :scm {:name "git"
7 | :url "https://github.com/echeran/clj-thamil"}
8 | :repositories [["releases" {:url "https://clojars.org/repo/"}]]
9 | :deploy-repositories [["clojars" {:creds :gpg}]]
10 | :pom-addition [:developers [:developer
11 | [:name "Elango Cheran"]
12 | [:url "http://www.elangocheran.com"]
13 | [:email "elango.cheran@gmail.com"]
14 | [:timezone "-8"]]]
15 |
16 | :dependencies [[org.clojure/clojure "1.8.0"]
17 | [org.clojure/data.csv "0.1.2"]
18 | [org.clojure/algo.generic "0.1.2"]
19 | [org.clojure/test.check "0.9.0"]
20 | [org.clojure/clojurescript "1.9.89"]]
21 |
22 | :jar-exclusions [#"\.cljx|\.swp|\.swo|\.DS_Store"]
23 |
24 | :aot [clj-thamil.main
25 | clj-thamil.java.api.format
26 | clj-thamil.format.analysis
27 | clj-thamil.format.convert]
28 |
29 | :main clj-thamil.main
30 |
31 | :lein-release {:deploy-via :shell
32 | :shell ["lein" "deploy"]}
33 |
34 | :profiles {:provided {:dependencies []}
35 | :dev {:plugins [[lein-cljsbuild "1.1.3"]]
36 | ;; :cljsbuild {:test-commands {"node" ["node" :node-runner "target/testable.js"]}
37 | ;; :builds [{:source-paths ["target/classes" "target/test-classes"]
38 | ;; :compiler {:output-to "target/testable.js"
39 | ;; :optimizations :advanced
40 | ;; :pretty-print true}}]}
41 |
42 | :cljsbuild {:builds {:app {:source-paths ["src"]
43 | :compiler {:output-to "resources/public/js/clj-thamil.js"
44 | :output-dir "resources/public/js/out"
45 | :externs []
46 | :optimizations :none
47 | :pretty-print true}}}
48 | :test-commands {
49 | ;; no cljs test configured yet
50 | }}}})
51 |
--------------------------------------------------------------------------------
/examples/js/test03.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
24 |
25 |
26 |
27 | | Unsorted தமிழ் words (input) |
28 | Sorted தமிழ் words (output) |
29 |
30 |
31 | |
32 |
33 | |
34 |
35 |
36 | |
37 |
38 |
39 |
40 |
43 |
44 |
67 |
68 |
69 |
70 |
--------------------------------------------------------------------------------
/examples/js/test02.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
23 |
24 |
25 |
26 | | English transliterated input |
27 | தமிழ் output |
28 |
29 |
30 | |
31 |
32 | |
33 |
34 |
35 | |
36 |
37 |
38 | |
39 | |
40 |
41 |
42 | |
43 |
44 |
45 |
46 |
48 |
49 |
82 |
83 |
84 |
85 |
--------------------------------------------------------------------------------
/examples/java/java-examples/src/main/java/clj-thamil/examples/java/WordSort01.java:
--------------------------------------------------------------------------------
1 | package clj_thamil.examples.java;
2 |
3 | import clojure.java.api.Clojure;
4 | import clojure.lang.IFn;
5 | import java.io.File;
6 | import java.io.FileNotFoundException;
7 | import java.io.FileOutputStream;
8 | import java.io.IOException;
9 | import java.io.PrintWriter;
10 | import java.io.UnsupportedEncodingException;
11 | import java.nio.charset.Charset;
12 | import java.util.ArrayList;
13 | import java.util.Arrays;
14 | import java.util.Collections;
15 | import java.util.Comparator;
16 | import java.util.List;
17 | import org.apache.commons.lang3.StringUtils;
18 |
19 | public class WordSort01 {
20 |
21 | public static String utf8String(String s) throws UnsupportedEncodingException {
22 | byte[] array = s.getBytes("UTF-8");
23 | return new String(array, Charset.forName("UTF-8"));
24 | }
25 |
26 | public static void main(String[] args) throws UnsupportedEncodingException,
27 | FileNotFoundException,
28 | IOException {
29 | // (require 'clj-thamil.format)
30 | IFn require = Clojure.var("clojure.core", "require");
31 | require.invoke(Clojure.read("clj-thamil.format"));
32 | // access clj-thamil.format/word-comp, a non-fn var
33 | IFn wordCompVar = Clojure.var("clj-thamil.format", "word-comp");
34 | IFn deref = Clojure.var("clojure.core", "deref");
35 | Comparator wordComp = (Comparator) (deref.invoke(wordCompVar));
36 |
37 | List strs = Arrays.asList(
38 |
39 | "மடம்",
40 | "மட்டம்",
41 | "மட்டும்",
42 | "மடக்கு",
43 | "முடக்கு",
44 | "முடுக்கு",
45 | "படம்",
46 | "குடம்",
47 | "தடம்",
48 | "தடி",
49 | "திட்டம்"
50 |
51 | );
52 | List strs2 = new ArrayList();
53 | strs2.addAll(strs);
54 | Collections.sort(strs2, wordComp);
55 | String list1 = StringUtils.join(strs, "\n");
56 | String list2 = StringUtils.join(strs2, "\n");
57 |
58 | System.out.println("Original list of strings:");
59 | System.out.println(list1);
60 | System.out.println("Sorted list of strings:");
61 | System.out.println(list2);
62 |
63 | // String fileName = "out.txt";
64 | // FileOutputStream fos = new FileOutputStream(new File(fileName));
65 | // PrintWriter pw = new PrintWriter(fos);
66 | // pw.println("Original list of strings:");
67 | // pw.println(list1);
68 | // pw.println("Sorted list of strings:");
69 | // pw.println(list2);
70 | // pw.close();
71 | // fos.close();
72 |
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/examples/java/java-examples/src/main/java/clj-thamil/examples/java/WordSort02.java:
--------------------------------------------------------------------------------
1 | package clj_thamil.examples.java;
2 |
3 | import clojure.java.api.Clojure;
4 | import clojure.lang.IFn;
5 | import java.io.File;
6 | import java.io.FileNotFoundException;
7 | import java.io.FileOutputStream;
8 | import java.io.IOException;
9 | import java.io.PrintWriter;
10 | import java.io.UnsupportedEncodingException;
11 | import java.nio.charset.Charset;
12 | import java.util.ArrayList;
13 | import java.util.Arrays;
14 | import java.util.Collections;
15 | import java.util.Comparator;
16 | import java.util.List;
17 | import org.apache.commons.lang3.StringUtils;
18 |
19 | import clj_thamil.java.api.format;
20 |
21 | public class WordSort02 {
22 |
23 | public static String utf8String(String s) throws UnsupportedEncodingException {
24 | byte[] array = s.getBytes("UTF-8");
25 | return new String(array, Charset.forName("UTF-8"));
26 | }
27 |
28 | public static void main(String[] args) throws UnsupportedEncodingException,
29 | FileNotFoundException,
30 | IOException {
31 | Comparator wordComp = format.word_comp();
32 |
33 | List strs = Arrays.asList(
34 |
35 | "மடம்",
36 | "மட்டம்",
37 | "மட்டும்",
38 | "மடக்கு",
39 | "முடக்கு",
40 | "முடுக்கு",
41 | "படம்",
42 | "குடம்",
43 | "தடம்",
44 | "தடி",
45 | "திட்டம்"
46 |
47 | // "\u0bae\u0b9f\u0bae\u0bcd",
48 | // "\u0bae\u0b9f\u0bcd\u0b9f\u0bae\u0bcd",
49 | // "\u0bae\u0b9f\u0bcd\u0b9f\u0bc1\u0bae\u0bcd",
50 | // "\u0bae\u0b9f\u0b95\u0bcd\u0b95\u0bc1",
51 | // "\u0bae\u0bc1\u0b9f\u0b95\u0bcd\u0b95\u0bc1",
52 | // "\u0bae\u0bc1\u0b9f\u0bc1\u0b95\u0bcd\u0b95\u0bc1",
53 | // "\u0baa\u0b9f\u0bae\u0bcd",
54 | // "\u0b95\u0bc1\u0b9f\u0bae\u0bcd",
55 | // "\u0ba4\u0b9f\u0bae\u0bcd",
56 | // "\u0ba4\u0b9f\u0bbf",
57 | // "\u0ba4\u0bbf\u0b9f\u0bcd\u0b9f\u0bae\u0bcd"
58 |
59 | );
60 | List strs2 = new ArrayList();
61 | strs2.addAll(strs);
62 | Collections.sort(strs2, wordComp);
63 | String list1 = StringUtils.join(strs, "\n");
64 | String list2 = StringUtils.join(strs2, "\n");
65 |
66 | System.out.println("Original list of strings:");
67 | System.out.println(list1);
68 | System.out.println("Sorted list of strings:");
69 | System.out.println(list2);
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/examples/java/java-examples/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 | clj-thamil
5 | java-examples
6 | jar
7 | 1.0
8 | java-examples
9 | https://github.com/echeran/clj-thamil-examples
10 |
11 |
12 | central
13 | https://repo1.maven.org/maven2/
14 |
15 | false
16 |
17 |
18 | true
19 |
20 |
21 |
22 | clojars
23 | https://clojars.org/repo/
24 |
25 | true
26 |
27 |
28 | true
29 |
30 |
31 |
32 | releases
33 | https://clojars.org/repo/
34 |
35 | true
36 |
37 |
38 | true
39 |
40 |
41 |
42 |
43 |
44 | junit
45 | junit
46 | 3.8.1
47 | test
48 |
49 |
50 | clj-thamil
51 | clj-thamil
52 | 0.2.0
53 |
54 |
55 | org.apache.commons
56 | commons-lang3
57 | 3.3.2
58 |
59 |
60 | org.apache.maven.plugins
61 | maven-compiler-plugin
62 | 3.2
63 | maven-plugin
64 |
65 |
66 |
67 |
68 |
69 | org.apache.maven.plugins
70 | maven-compiler-plugin
71 | 3.2
72 |
73 | 1.6
74 | 1.6
75 | ${project.build.sourceEncoding}
76 |
77 |
78 |
79 | org.apache.maven.plugins
80 | maven-shade-plugin
81 | 2.3
82 |
83 |
84 |
85 | package
86 |
87 | shade
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 | UTF-8
103 |
104 |
105 |
--------------------------------------------------------------------------------
/examples/clj/clj-español/src/clj_español/core.clj:
--------------------------------------------------------------------------------
1 | (ns clj-español.core
2 | (:require [clj-thamil.core :refer [translate-fns translate-forms]]))
3 |
4 | (def fns-map '{
5 | take toma
6 | drop baja
7 | inc carga ;; translated to "load" in english
8 | dec extracto ;; based soley on translation. need more info/context
9 | ;; to decide what connotations/denotations would be best
10 | range gama
11 | take-while toma-mientras
12 | drop-while baja-mientras
13 | interleave பின்னு ;; not sure what this means or what function
14 | ;; this macro serves, and thus can't decide how to label
15 | ;; reduce reduce
16 | ;; reducer reductor
17 | map mapa
18 | hash-map hachís-mapa
19 | ;; vector vector
20 | list enumera
21 | set pone
22 | hash-set hachís-pone ;; could use fijo or colocar as "set"
23 | atom átomo
24 | agent agencia ;; or agente
25 | first primero
26 | second segundo
27 | last último
28 | butlast pero-último ;; not sure of what this should communicate.
29 | rest lodemás ;; should we separate the words into "lo demás"
30 | ;; or shorten it to "demás"? Could also use "el resto"
31 | next próximo ;; this is used in the present, but "siguiente" is used
32 | ;; in the past. Not sure which makes more sense.
33 | true cierto
34 | false falso
35 | print imprime
36 | println imprimeln ;; ln means "line" in English, and
37 | ;; line in spanish is simply "linea" so
38 | ;; I thought it appropriate to keep it.
39 | filter forma
40 | remove quita
41 | keep guardar
42 | slurp ventosa;; if this should be a verb, use "sorber"
43 | spit escupe ;; could be "saliva" if it's not an action
44 | seq sec ;; short for "secuenciar"
45 | dorun hazcorrer ;; could also just use "haz" meaning "do"
46 | doall haztodo ;; literally means do it all
47 | str crd ;; short for "cuerda" which translates to string
48 | interpose interpone
49 | find encuentra
50 | get consigue
51 | apply aplica
52 | count cuenta
53 | every? cada?
54 | true? cierto?
55 | false? falso?
56 | concat social
57 | identity identidad
58 | reverse invierte
59 | some alguno
60 | flatten aplana
61 |
62 | boolean booleano ;; sounds like English but couldn't find
63 | ;; a more specific word
64 | })
65 |
66 | (def forms-map '{
67 | if si
68 | when cuando
69 | if-not si-no
70 | when-not cuando-no
71 | ;; def def ;; short for "definir" = define
72 | ;; fn fn ;; short for "función"
73 | ;; defn defn ;; again, it still makes sense to keep the
74 | ;; English equivalents because romance languages
75 | ;; can sometimes have the same abbreviations
76 | let deja
77 | and y
78 | or o
79 | not no
80 | else más ;; or "otro"
81 | loop darvuelta ;; actually two words "dar vuelta"
82 | doseq hazsec
83 | for para ;; could be por, but I think para fits the function better
84 | cond dependela ;; means "depending on the ..." --> dependiendo de la
85 | do haz
86 |
87 | ;; clojure.test
88 | deftest def-prueba ;; not sure if I should hyphenate all the
89 | ;; double words, or keep as a compound?
90 | testing probando
91 | is es
92 | are son
93 | })
94 |
95 | ;; do the actual "translation" for bindings, fns, and any other value
96 | (translate-fns fns-map)
97 |
98 | ;; do the actual "translation" for macros and special forms
99 | (translate-forms forms-map)
100 |
--------------------------------------------------------------------------------
/examples/clj/clj-spanish/src/clj_spanish/core.clj:
--------------------------------------------------------------------------------
1 | (ns clj-spanish.core
2 | (:require [clj-thamil.core :refer [translate-fns translate-forms]]))
3 |
4 | (def fns-map '{
5 | take toma
6 | drop baja
7 | inc carga ;; translated to "load" in english
8 | dec extracto ;; based soley on translation. need more info/context
9 | ;; to decide what connotations/denotations would be best
10 | range gama
11 | take-while toma-mientras
12 | drop-while baja-mientras
13 | interleave பின்னு ;; not sure what this means or what function
14 | ;; this macro serves, and thus can't decide how to label
15 | ;; reduce reduce
16 | ;; reducer reductor
17 | map mapa
18 | hash-map hachís-mapa
19 | ;; vector vector
20 | list enumera
21 | set pone
22 | hash-set hachís-pone ;; could use fijo or colocar as "set"
23 | atom átomo
24 | agent agencia ;; or agente
25 | first primero
26 | second segundo
27 | last último
28 | butlast pero-último ;; not sure of what this should communicate.
29 | rest lodemás ;; should we separate the words into "lo demás"
30 | ;; or shorten it to "demás"? Could also use "el resto"
31 | next próximo ;; this is used in the present, but "siguiente" is used
32 | ;; in the past. Not sure which makes more sense.
33 | true cierto
34 | false falso
35 | print imprime
36 | println imprimeln ;; ln means "line" in English, and
37 | ;; line in spanish is simply "linea" so
38 | ;; I thought it appropriate to keep it.
39 | filter forma
40 | remove quita
41 | keep guardar
42 | slurp ventosa;; if this should be a verb, use "sorber"
43 | spit escupe ;; could be "saliva" if it's not an action
44 | seq sec ;; short for "secuenciar"
45 | dorun hazcorrer ;; could also just use "haz" meaning "do"
46 | doall haztodo ;; literally means do it all
47 | str crd ;; short for "cuerda" which translates to string
48 | interpose interpone
49 | find encuentra
50 | get consigue
51 | apply aplica
52 | count cuenta
53 | every? cada?
54 | true? cierto?
55 | false? falso?
56 | concat social
57 | identity identidad
58 | reverse invierte
59 | some alguno
60 | flatten aplana
61 |
62 | boolean booleano ;; sounds like English but couldn't find
63 | ;; a more specific word
64 | })
65 |
66 | (def forms-map '{
67 | if si
68 | when cuando
69 | if-not si-no
70 | when-not cuando-no
71 | ;; def def ;; short for "definir" = define
72 | ;; fn fn ;; short for "función"
73 | ;; defn defn ;; again, it still makes sense to keep the
74 | ;; English equivalents because romance languages
75 | ;; can sometimes have the same abbreviations
76 | let deja
77 | and y
78 | or o
79 | not no
80 | else más ;; or "otro"
81 | loop darvuelta ;; actually two words "dar vuelta"
82 | doseq hazsec
83 | for para ;; could be por, but I think para fits the function better
84 | cond dependela ;; means "depending on the ..." --> dependiendo de la
85 | do haz
86 |
87 | ;; clojure.test
88 | deftest def-prueba ;; not sure if I should hyphenate all the
89 | ;; double words, or keep as a compound?
90 | testing probando
91 | is es
92 | are son
93 | })
94 |
95 | ;; do the actual "translation" for bindings, fns, and any other value
96 | (translate-fns fns-map)
97 |
98 | ;; do the actual "translation" for macros and special forms
99 | (translate-forms forms-map)
100 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # clj-thamil
2 |
3 | clj-thamil is a Clojure library designed to be a multi-purpose library for Thamil
4 | computing.
5 |
6 | It can be used for natural language processing, designing input
7 | methods, the UI for text editing, etc.
8 |
9 | It can also be used as a basis for programming in Clojure in the
10 | Thamil language.
11 |
12 | Currently, it can support the following:
13 | * programming in Clojure (programming language) in Thamil (natural language)
14 | * natural language processing for Thamil language text
15 | * split a string into Thamil letters (not characters) and phonemes
16 | * combine a sequence of Thamil phonemes back into a proper Thamil string
17 | * sort letters, words, etc. by Thamil alphabetical order
18 | * convert a string between Thamil (Unicode format) and:
19 | * English-transliterated formats
20 | * TAB format
21 | * TSCII format
22 | * Bamini format
23 | * Webulagam format
24 | * basic grammar functions - pluralize, add suffixes, and add noun
25 | case suffixes
26 | * perform a letter frequency analysis on input Thamil text
27 |
28 | For examples of programming in Thamil (natural language), see:
29 | * `test/clj_thamil/demo/core_test.clj` - a gradual
30 | replacement of English into Thamil
31 | * `test/clj_thamil/demo/trans_demo_01.cljc` - just about entirely
32 | in Thamil. Demonstrates squaring numbers differently.
33 | * `src/clj_thamil/மொழியியல்.cljc`- just about
34 | entirely in Thamil. Contains functions for basic grammar in Thamil
35 | (making plurals, adding suffixes, adding noun case suffixes)
36 |
37 | ## Building
38 |
39 | For both Clojure and ClojureScript, compiling, testing, and starting a
40 | REPL can be done with the usual Leiningen commands `lein compile`,
41 | `lein test`, and `lein repl`.
42 |
43 | Compile the source into an executable JAR file (runnable on the JVM,
44 | based on Clojure sources) using the command `lein uberjar`. This is
45 | all you need to do to run one of the standalone processes or
46 | create a jar artifact. `lein install` will install the artifact and
47 | pom.xml into your local Maven cache.
48 |
49 | ## Usage
50 |
51 | ### Examples
52 |
53 | #### Java Examples
54 |
55 | The Java example code is in the Maven project in this repostiory at
56 | [`examples/java/java-examples`](examples/java/java-examples/README.md).
57 | See that page for instructions and building and running.
58 |
59 | #### JavaScript Examples
60 |
61 | The Clojure/-Script code in clj-thamil must be compiled using the
62 | ClojureScript compiler into JavaScript before it can used in JS
63 | runtimes (ex: webpages). In the JS exmaples directory
64 | [`examples/js`][(examples/js), first run `sh setup.sh`. Then load the
65 | HTML files located in the same directory from within a browser that
66 | has JS enabled.
67 |
68 | * test01.html - transliteration of English into Thamil on a JS popup box
69 | * test02.html - transliterates English into Thamil and splits Thamil into letter
70 | * test03.html - sorting Thamil words by Thamil alphabetical order
71 |
72 | ### Command-line processes
73 |
74 | #### Letter frequencies
75 |
76 | The frequency analysis program (`freqs`) can be
77 | run by
78 | ```
79 | cat input | lein run freqs > output
80 | ```
81 |
82 | The frequency analysis program
83 | takes input from the standard input stream and outputs to the standard
84 | output stream.
85 |
86 | The code behind the letter frequency analysis can be found in the namespace
87 | `clj-thamil.format.analysis`.
88 |
89 | #### Phonemes
90 |
91 | The text to phoneme converter program (`phonemes`) can be run by
92 | ```
93 | cat input | lein run phonemes > output
94 | ```
95 |
96 | The phonemes program
97 | takes input from the standard input stream and outputs to the standard
98 | output stream.
99 |
100 | The code behind the phoneme conversion can be found in the namespaces `clj-thamil.subprograms` and `clj-thamil.format`.
101 |
102 | ## Editing
103 |
104 | For programming in Thamil, if using a computer running Mac OS X, use
105 | the Aquamacs program (a Mac OS X-friendly version of Emacs) to ensure that support for Thamil letters works
106 | correctly. If installing the `clojure-mode` package for Clojure
107 | support in Aquamacs, find the `clojure-mode.el` file in your Aquamacs
108 | MELPA/ELPA repository, and replace it with the file
109 | `emacs/clojure-mode.el` in this repository. Then load the newly-saved
110 | `clojure-mode.el` file in the Aquamacs package repository into
111 | Aquamacs, and run the command `M-x emacs-lisp-byte-compile-and-load`.
112 |
113 | ## Mailing List
114 |
115 | Join the [clj-thamil mailing list](https://groups.google.com/forum/#!forum/clj-thamil) to post questions and receive announcements.
116 |
117 | ## License
118 |
119 | Distributed under the Eclipse Public License, the same as Clojure.
120 |
--------------------------------------------------------------------------------
/src/clj_thamil/format/analysis.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.format.analysis
2 | (:require #?(:clj [clojure.java.io :as jio])
3 | [clojure.string :as string]
4 | [clj-thamil.format :as fmt])
5 | #?(:clj (:gen-class)))
6 |
7 | (def letters-plus-grantha
8 | (concat fmt/letters
9 | [["ஜ்" "ஜ" "ஜா" "ஜி" "ஜீ" "ஜு" "ஜூ" "ஜெ" "ஜே" "ஜை" "ஜொ" "ஜோ" "ஜௌ"]
10 | ["ஷ்" "ஷ" "ஷா" "ஷி" "ஷீ" "ஷு" "ஷூ" "ஷெ" "ஷே" "ஷை" "ஷொ" "ஷோ" "ஷௌ"]
11 | ["ஸ்" "ஸ" "ஸா" "ஸி" "ஸீ" "ஸு" "ஸூ" "ஸெ" "ஸே" "ஸை" "ஸொ" "ஸோ" "ஸௌ"]
12 | ["ஹ்" "ஹ" "ஹா" "ஹி" "ஹீ" "ஹு" "ஹூ" "ஹெ" "ஹே" "ஹை" "ஹொ" "ஹோ" "ஹௌ"]
13 | ["க்ஷ்" "க்ஷ" "க்ஷா" "க்ஷி" "க்ஷீ" "க்ஷு" "க்ஷூ" "க்ஷெ" "க்ஷே" "க்ஷை" "க்ஷொ" "க்ஷோ" "க்ஷௌ"]
14 | ["ஶ்ரீ"]]))
15 |
16 | (defn trie-elem-freqs
17 | "given a trie of strings (char seqs) and an input string, return a frequency map for every letter in the trie appearing in the input string"
18 | [trie s]
19 | (let [keep-fn (fn [x]
20 | (when (fmt/in-trie? trie x)
21 | x))
22 | letters (fmt/str->elems trie s)
23 | letters-in-trie (keep keep-fn letters)]
24 | (frequencies letters-in-trie)))
25 |
26 | (defn trie-elem-string-seq-freqs
27 | "given a trie of strings (char seqs) and a sequence of input strings, return a final frequency map for every letter appearing across all strings"
28 | [trie strs]
29 | (apply merge-with + (map (partial trie-elem-freqs trie) strs)))
30 |
31 | (defn freq-grid
32 | "given a sequence of தமிழ் letters (flattened from a letter grid) and a map of those letters' frequences, return the frequencies in the shape of the grid"
33 | [letter-seq freq-map]
34 | (let [freq-seq (map #(or (get freq-map %) 0) letter-seq)
35 | freq-grid (partition-all 13 freq-seq)]
36 | freq-grid))
37 |
38 | ;;
39 | ;; printing functions
40 | ;;
41 |
42 | #?(:clj
43 | (defn print-freq-grid
44 | "given a number grid and the corresponding letter grid, print them to std out"
45 | [freq-grid letter-grid]
46 | (let [print-grid (fn [grid] (doseq [row grid] (println (string/join "\t" row))))]
47 | (println "the letter grid's frequencies:")
48 | (println "")
49 | (print-grid freq-grid)
50 | (println "")
51 | (println "the letter grid used:")
52 | (print-grid letter-grid))))
53 | #?(:clj
54 | (defn print-consonant-row-sums
55 | "given a frequnecy grid in the shape of a letter grid, and the letter grid itself, print out the sums of each consonant's row"
56 | [freq-grid letter-grid]
57 | (let [;; use rest in order to drop the first row = vowel row
58 | row-names (rest (map first letter-grid))
59 | row-sums (rest (map (partial apply +) freq-grid))]
60 | (dorun
61 | (map (fn [rn rs] (println "For consonant:" rn ", there are" rs "instances of it in a C or C+V letter")) row-names row-sums)))))
62 |
63 | #?(:clj
64 | (defn print-vowel-col-sums
65 | "given a frequency gridn in the shape of a letter grid, and the letter grid itself, print out the sums of each vowel's column specifically among consonant and consonant+vowel letters (exclude pure vowels)"
66 | [freq-grid letter-grid]
67 | (let [full-row-freq-grid (->> freq-grid
68 | rest
69 | (filter #(= 13 (count %))))
70 | full-row-letter-grid (->> letter-grid
71 | rest
72 | (filter #(= 13 (count %))))
73 | vowels (first letter-grid)
74 | col-names vowels
75 | freq-cols (apply map list full-row-freq-grid)
76 | col-sums (map (partial apply +) freq-cols)]
77 | (dorun
78 | (map (fn [cn cs] (println "For vowel/ஃ:" cn ", there are" cs "instances of it in a C or C+V letter")) col-names col-sums)))))
79 |
80 | ;;
81 | ;; umbrella printing fn
82 | ;;
83 |
84 | #?(:clj
85 | (defn print-letter-grid-stats-on-strs
86 | "for a given letter grid and a sequence of strings, print out all of the stats"
87 | [letter-grid strs]
88 | (let [letter-seq (flatten letter-grid)
89 | letter-trie (fmt/make-trie letter-seq)
90 | str->letters (fn [s] (fmt/str->elems letter-trie s))
91 | fmap (trie-elem-string-seq-freqs letter-trie strs)
92 | fgrid (freq-grid letter-seq fmap)]
93 | (print-vowel-col-sums fgrid letter-grid)
94 | (println "")
95 | (print-consonant-row-sums fgrid letter-grid)
96 | (println "")
97 | (print-freq-grid fgrid letter-grid))))
98 |
99 | ;;
100 | ;; main fn
101 | ;;
102 |
103 | #?(:clj
104 | (defn -main
105 | [& args]
106 | (with-open [rdr (jio/reader *in*)]
107 | (let [lines (line-seq rdr)
108 | letter-grid letters-plus-grantha]
109 | (print-letter-grid-stats-on-strs letter-grid lines)))))
110 |
--------------------------------------------------------------------------------
/test/clj_thamil/மொழியியல்_test.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.மொழியியல்-test
2 | (:use clojure.test
3 | clj-thamil.மொழியியல்
4 | clj-thamil.core))
5 |
6 | (வரையறு-பரிசோதனை அசை-உயிரெழுத்து-பரிசோதனை
7 | (பரிசோதிக்கும் "குறில், நெடில் செயல்கூறுகள்"
8 | (பரிசோதிக்கும் "குறில் செயல்கூறுகள்"
9 | (ஆகும் (உண்மையா? (குறிலா? "அ")))
10 | (ஆகும் (உண்மையா? (குறிலா? "இ")))
11 | (ஆகும் (உண்மையா? (குறிலா? "உ")))
12 | (ஆகும் (உண்மையா? (குறிலா? "எ")))
13 | (ஆகும் (உண்மையா? (குறிலா? "ஒ")))
14 | (ஆகும் (பொய்மையா? (குறிலா? "ஆ")))
15 | (ஆகும் (பொய்மையா? (குறிலா? "ஈ")))
16 | (ஆகும் (பொய்மையா? (குறிலா? "ஊ")))
17 | (ஆகும் (பொய்மையா? (குறிலா? "ஏ")))
18 | (ஆகும் (பொய்மையா? (குறிலா? "ஓ")))
19 | (ஆகும் (பொய்மையா? (குறிலா? "ஐ")))
20 | (ஆகும் (பொய்மையா? (குறிலா? "ஔ")))
21 | (ஆகும் (பொய்மையா? (குறிலா? nil)))
22 | (ஆகும் (பொய்மையா? (குறிலா? ""))))
23 | (பரிசோதிக்கும் "நெடில் செயல்கூறுகள்"
24 | (ஆகும் (பொய்மையா? (நெடிலா? "அ")))
25 | (ஆகும் (பொய்மையா? (நெடிலா? "இ")))
26 | (ஆகும் (பொய்மையா? (நெடிலா? "உ")))
27 | (ஆகும் (பொய்மையா? (நெடிலா? "எ")))
28 | (ஆகும் (பொய்மையா? (நெடிலா? "ஒ")))
29 | (ஆகும் (உண்மையா? (நெடிலா? "ஆ")))
30 | (ஆகும் (உண்மையா? (நெடிலா? "ஈ")))
31 | (ஆகும் (உண்மையா? (நெடிலா? "ஊ")))
32 | (ஆகும் (உண்மையா? (நெடிலா? "ஏ")))
33 | (ஆகும் (உண்மையா? (நெடிலா? "ஓ")))
34 | (ஆகும் (பொய்மையா? (நெடிலா? "ஐ")))
35 | (ஆகும் (பொய்மையா? (நெடிலா? "ஔ")))
36 | (ஆகும் (பொய்மையா? (நெடிலா? nil)))
37 | (ஆகும் (பொய்மையா? (நெடிலா? ""))))))
38 |
39 | (வரையறு-பரிசோதனை பன்மை-பரிசோதனை
40 | (பரிசோதிக்கும் "பன்மை"
41 | (ஆகும் (= "கற்கள்" (பன்மை "கல்")))
42 | (ஆகும் (= "முட்கள்" (பன்மை "முள்")))
43 | (ஆகும் (= "பற்கள்" (பன்மை "பல்")))
44 | (ஆகும் (= "தீக்கள்" (பன்மை "தீ")))
45 | (ஆகும் (= "பூக்கள்" (பன்மை "பூ")))
46 | (ஆகும் (= "பசுக்கள்" (பன்மை "பசு")))
47 | (ஆகும் (= "காடுகள்" (பன்மை "காடு")))
48 | (ஆகும் (= "மரங்கள்" (பன்மை "மரம்")))
49 | (ஆகும் (= "நாள்கள்" (பன்மை "நாள்")))))
50 |
51 | (வரையறு-பரிசோதனை சந்தி-பொது-விதி-பரிசோதனை
52 | (பரிசோதிக்கும் "சந்தி பொது விதிகள்"
53 | (ஆகும் (= "தமிழ்நாடு" (சந்தி "தமிழ்" "நாடு")))
54 | (ஆகும் (= "தமிழீழம்" (சந்தி "தமிழ்" "ஈழம்")))
55 | (ஆகும் (= "செய்யளம்" (சந்தி "செய்" "அளம்")))
56 | (ஆகும் (= "கல்லாறு" (சந்தி "கல்" "ஆறு")))
57 | (ஆகும் (= "தேயிலை" (சந்தி "தே" "இலை")))
58 | (ஆகும் (= "மலையகம்" (சந்தி "மலை" "அகம்")))
59 | (ஆகும் (= "குடியுரிமை" (சந்தி "குடி" "உரிமை")))
60 | (ஆகும் (= "கையோடு" (சந்தி "கை" "ஓடு")))
61 | (ஆகும் (= "வேருடன்" (சந்தி "வேர்" "உடன்")))
62 | (ஆகும் (= "புழுவுக்கு" (சந்தி "புழு" "உக்கு")))
63 | (ஆகும் (= "புல்லை" (சந்தி "புல்" "ஐ")))
64 | (ஆகும் (= "பிடிக்கும்" (சந்தி "பிடி" "க்கும்")))
65 | (ஆகும் (= "பூவெல்லாம்" (சந்தி "பூ" "எல்லாம்")))
66 | (ஆகும் (= "சிலம்பாட்டம்" (சந்தி "சிலம்பு" "ஆட்டம்")))
67 | (ஆகும் (= "தூண்கள்" (சந்தி "தூண்" "கள்")))))
68 |
69 | (வரையறு-பரிசோதனை வேற்றுமை-பரிசோதனை
70 | (பரிசோதிக்கும் "வேற்றுமை"
71 | (ஆகும் (= "மரத்தை" (வேற்றுமை "மரம்" "ஐ")))
72 | (ஆகும் (= "மரத்துக்கு" (வேற்றுமை "மரம்" "உக்கு")))
73 | (ஆகும் (= "மரத்தில்" (வேற்றுமை "மரம்" "இல்")))
74 | (ஆகும் (= "மரத்தால்" (வேற்றுமை "மரம்" "ஆல்")))
75 | (ஆகும் (= "கெடுவை" (வேற்றுமை "கெடு" "ஐ")))
76 | (ஆகும் (= "கெடுவுக்கு" (வேற்றுமை "கெடு" "உக்கு")))
77 | (ஆகும் (= "கெடுவில்" (வேற்றுமை "கெடு" "இல்")))
78 | (ஆகும் (= "கெடுவால்" (வேற்றுமை "கெடு" "ஆல்")))
79 | (ஆகும் (= "காட்டை" (வேற்றுமை "காடு" "ஐ")))
80 | (ஆகும் (= "காட்டுக்கு" (வேற்றுமை "காடு" "உக்கு")))
81 | (ஆகும் (= "காட்டில்" (வேற்றுமை "காடு" "இல்")))
82 | (ஆகும் (= "காட்டால்" (வேற்றுமை "காடு" "ஆல்")))
83 | (ஆகும் (= "பறுவை" (வேற்றுமை "பறு" "ஐ")))
84 | (ஆகும் (= "பறுவுக்கு" (வேற்றுமை "பறு" "உக்கு")))
85 | (ஆகும் (= "பறுவில்" (வேற்றுமை "பறு" "இல்")))
86 | (ஆகும் (= "பறுவால்" (வேற்றுமை "பறு" "ஆல்")))
87 | (ஆகும் (= "கயிற்றை" (வேற்றுமை "கயிறு" "ஐ")))
88 | (ஆகும் (= "கயிற்றுக்கு" (வேற்றுமை "கயிறு" "உக்கு")))
89 | (ஆகும் (= "கயிற்றில்" (வேற்றுமை "கயிறு" "இல்")))
90 | (ஆகும் (= "கயிற்றால்" (வேற்றுமை "கயிறு" "ஆல்")))
91 | (ஆகும் (= "எழுதுகோலை" (வேற்றுமை "எழுதுகோல்" "ஐ")))
92 | (ஆகும் (= "எழுதுகோலுக்கு" (வேற்றுமை "எழுதுகோல்" "உக்கு")))
93 | (ஆகும் (= "எழுதுகோலில்" (வேற்றுமை "எழுதுகோல்" "இல்")))
94 | (ஆகும் (= "எழுதுகோலால்" (வேற்றுமை "எழுதுகோல்" "ஆல்")))
95 | (பரிசோதிக்கும் "4-ஆம் வேற்றுமை - (உ)க்கு"
96 | (ஆகும் (= "தீயை" (வேற்றுமை "தீ" "ஐ")))
97 | (ஆகும் (= "தீக்கு" (வேற்றுமை "தீ" "உக்கு")))
98 | (ஆகும் (= "தீயில்" (வேற்றுமை "தீ" "இல்")))
99 | (ஆகும் (= "தீயால்" (வேற்றுமை "தீ" "ஆல்")))
100 | (ஆகும் (= "காயை" (வேற்றுமை "காய்" "ஐ")))
101 | (ஆகும் (= "காய்க்கு" (வேற்றுமை "காய்" "உக்கு")))
102 | (ஆகும் (= "பொய்யை" (வேற்றுமை "பொய்" "ஐ")))
103 | (ஆகும் (= "தொலைபேசியை" (வேற்றுமை "தொலைபேசி" "ஐ")))
104 | (ஆகும் (= "தொலைபேசிக்கு" (வேற்றுமை "தொலைபேசி" "உக்கு")))
105 | (ஆகும் (= "தேனீயை" (வேற்றுமை "தேனீ" "ஐ")))
106 | (ஆகும் (= "தேனீக்கு" (வேற்றுமை "தேனீ" "உக்கு"))))))
107 |
--------------------------------------------------------------------------------
/src/clj_thamil/core.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.core)
2 |
3 |
4 | (defmacro translate-fn
5 | [old-name new-name]
6 | `(def ~old-name ~new-name))
7 |
8 | (defmacro translate-fn-symbol
9 | [old-name new-name]
10 | `(def ~(eval new-name) ~(eval old-name)))
11 |
12 | (defmacro translate-fns
13 | [symb-map]
14 | `(do
15 | ~@
16 | (for [[old-form# new-form#] (eval symb-map)]
17 | `(translate-fn-symbol '~old-form# '~new-form#))))
18 |
19 | ;; info on macro-writing macros based on info at
20 | ;; http://amalloy.hubpages.com/hub/Clojure-macro-writing-macros
21 |
22 | (defmacro translate-form
23 | "Does the effective translation of a special form or macro from its old name to its new name. In other words, generalizes the 'manual' process of defining something like:
24 | (defmacro எனில்
25 | [& body]
26 | `(if ~@body))"
27 | [old-name new-name]
28 | `(defmacro ~new-name
29 | [~'& body#]
30 | `(~'~old-name ~@body#)))
31 |
32 | ;; not sure if/how to shorten செயல்கூறு, வரையறு-செயல்கூறு, வைத்துக்கொள்
33 |
34 | (defmacro translate-form-symbol
35 | "Does the effective translation of a special form or macro from its old name to its new name, with the names given as symbols. Helper macro for translate-forms macro"
36 | [old-name new-name]
37 | `(defmacro ~(eval new-name)
38 | [~'& body#]
39 | `(~'~(eval old-name) ~@body#)))
40 |
41 | (defmacro translate-forms
42 | "takes a map of symbols and creates macros that do the translation of the form of the old symbol (key) to the new symbol (val)"
43 | [symb-map]
44 | `(do
45 | ~@
46 | (for [[old-form# new-form#] (eval symb-map)]
47 | `(translate-form-symbol '~old-form# '~new-form#))))
48 |
49 |
50 | (def fns-map '{
51 | take எடு
52 | drop விடு
53 | ;; inc ஏறுமானம்
54 | inc ஏற்று
55 | ;; dec இறங்குமானம்
56 | dec இறக்கு
57 | range வீச்சு
58 | take-while எடு-என்னும்வரை
59 | drop-while விடு-என்னும்வரை
60 | interleave பின்னு
61 | reduce இறுக்கு
62 | ;; reducer இறுக்குவர் ;; ??
63 | map விவரி
64 | hash-map புலவெண்-விவரணையாக்கம்
65 | ;; vector காவி ;; ??
66 | ;; vector நெறியம் ;; ??
67 | list பட்டியல்
68 | set அமைவு
69 | hash-set புலவெண்-அமைவு
70 | atom அணு
71 | agent முகவர்
72 | first முதல்
73 | second இரண்டாம்
74 | last கடைசி
75 | butlast கடைசியின்றி
76 | rest மீதி
77 | next அடுத்த
78 | true வாய்மை ;; should we use வாய்மை, மெய்மை, or உண்மை ? i am
79 | ;; thinking of using வாய்மை or மெய்மை so as to leave உண்மை to continue to
80 | ;; be used in more casual / less formal situations
81 | false பொய்மை
82 | print அச்சிடு
83 | println வரி-அச்சிடு
84 | filter வடி
85 | remove அகற்று
86 | keep கொள்
87 | slurp உறிஞ்சு;; could be சப்பு
88 | spit ஊற்று ;; could be துப்பு
89 | seq வரிசை
90 | dorun செய்யோட்டம்
91 | doall செய்யெல்லாம்
92 | str தொடை
93 | interpose இடைபொருத்து
94 | find கண்டுபிடி
95 | get பெறு
96 | apply செயல்படுத்து
97 | count எண்ணு
98 | every? ஒவ்வொன்றுமா?
99 | true? உண்மையா?
100 | false? பொய்மையா?
101 | concat தொடு
102 | identity அடையாளம்
103 | reverse புரட்டு
104 | some எதாவது
105 | flatten தட்டையாக்கு
106 |
107 | boolean பூலியன்
108 | })
109 |
110 | (def forms-map '{
111 | if எனில்
112 | when என்னும்போது
113 | if-not இல்லெனில்
114 | when-not இல்லென்னும்-போது
115 | def வரையறு
116 | fn செயல்கூறு
117 | defn வரையறு-செயல்கூறு
118 | let வைத்துக்கொள் ;; maybe just கொள்
119 | and மற்றும்
120 | or அல்லது
121 | not அன்று
122 | ;; else அன்றி ?
123 | loop சுற்று
124 | doseq செய்வரிசை
125 | ;; for ஒவ்வொன்றுக்கும்
126 | for ஒன்றொன்றுக்கு
127 | cond பொறுத்து
128 | do செய்
129 |
130 | ;; clojure.test
131 | deftest வரையறு-பரிசோதனை
132 | testing பரிசோதிக்கும்
133 | is ஆகும்
134 | are பல-ஆகும்
135 | })
136 |
137 | ;; do the actual "translation" for bindings, fns, and any other value
138 | (translate-fns fns-map)
139 |
140 | ;; do the actual "translation" for macros and special forms
141 | (translate-forms forms-map)
142 |
--------------------------------------------------------------------------------
/test/clj_thamil/format/convert_test.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.format.convert-test
2 | (:require [clojure.test.check :as sc]
3 | [clojure.test.check.generators :as gen]
4 | [clojure.test.check.properties :as prop :include-macros true]
5 | [clojure.string :as string]
6 | [clj-thamil.format :as fmt]
7 | [clj-thamil.format.convert :as cvt]
8 | [clj-thamil.மொழியியல் :as மொ])
9 | (:use clojure.test
10 | clj-thamil.format.convert))
11 |
12 | (def QCHK-SIZE 100)
13 |
14 | (def A_LOT 100)
15 |
16 | (deftest conversion-test
17 | (testing "romanized -> தமிழ்"
18 | (is (= "தமிழ்" (romanized->தமிழ் "thamiz")))
19 | (is (= "தமிழ்" (romanized->தமிழ் "thamizh")))
20 | (is (= "நீர்" (romanized->தமிழ் "n-iir")))
21 | (is (= "பஃறுளி" (romanized->தமிழ் "paqRuLi")))
22 | (is (= "சின்ன" (romanized->தமிழ் "chinna") (romanized->தமிழ் "sinna")))
23 | (is (= "விகடன்" (romanized->தமிழ் "vikatan") (romanized->தமிழ் "vikadan")))
24 | (is (= "சென்றேன் வென்றேன்" (romanized->தமிழ் "senreen venreen")))
25 | (is (= "வந்தேன்" (romanized->தமிழ் "vantheen")))
26 | (is (= "பாட்டு பாடு" (romanized->தமிழ் "paattu paadu"))))
27 | (testing "தமிழ் -> romanized; translation map inversion"
28 | (is (= "thamizh" (தமிழ்->romanized "தமிழ்")))
29 | (is (not= "thamiz" (தமிழ்->romanized "தமிழ்")))
30 | (is (= "niir" (தமிழ்->romanized "நீர்")))
31 | (is (not= "neer" (தமிழ்->romanized "நீர்")))
32 | (is (= "paambu" (தமிழ்->romanized "பாம்பு")))
33 | (is (not= "paampu" (தமிழ்->romanized "பாம்பு")))
34 | (is (not= "anpu" (தமிழ்->romanized "அன்பு")))
35 | (is (= "anbu" (தமிழ்->romanized "அன்பு")))
36 | (is (not= "panpu" (தமிழ்->romanized "பண்பு")))
37 | (is (= "panbu" (தமிழ்->romanized "பண்பு")))))
38 |
39 | (deftest double-check-test
40 | (testing "from the test.check / double-check Readme"
41 | (is (:result
42 | (sc/quick-check QCHK-SIZE (prop/for-all [v (gen/vector gen/int)]
43 | (= (sort v) (sort (sort v)))))))))
44 |
45 | (deftest convert-fn-invertible
46 | (let [thamil-letters fmt/letter-seq
47 | punct (map str [\. \space \newline])
48 | all-letters (concat thamil-letters punct)
49 | lett-gen (gen/such-that identity (gen/elements all-letters))
50 | thamil-text-gen (gen/fmap string/join (gen/vector lett-gen))
51 | old-font-no-ambig-combo (fn [s]
52 | (let [phonemes (fmt/str->phonemes s)
53 | phoneme-triples (partition 3 1 phonemes)
54 | phoneme-doubles (partition 2 1 phonemes)
55 | ambig1 (fn [[a b c]] (and (மொ/மெய்யெழுத்தா? a)
56 | (= "எ" b)
57 | (= "ள்" c)))
58 | ambig2 (fn [[a b]] (and (= "ஒ" a)
59 | (= "ள்" b)))
60 | ambig3 (fn [[a b c]] (and (மொ/மெய்யெழுத்தா? a)
61 | (#{"எ" "ஏ" "ஆ"} b)
62 | (= "ர்" c)))
63 | no-ambig1 (every? false? (map ambig1 phoneme-triples))
64 | no-ambig2 (every? false? (map ambig2 phoneme-doubles))
65 | no-ambig3 (every? false? (map ambig3 phoneme-doubles))]
66 | (and no-ambig1 no-ambig2 no-ambig3)))
67 | ;; old fonts can't distinguish certain character combinations,
68 | ;; so prevent test cases that could cause that
69 | non-romanized-thamil-text-gen (gen/such-that old-font-no-ambig-combo lett-gen (* QCHK-SIZE A_LOT))
70 | ;; applying converters for old fonts followed by their
71 | ;; inverses should give back the original text
72 | test-prop (fn [f inv] (prop/for-all [t non-romanized-thamil-text-gen]
73 | (= t (-> t f inv))))
74 | test-res (fn [f inv]
75 | (->> (test-prop f inv)
76 | (sc/quick-check QCHK-SIZE)
77 | :result))
78 | ;; only after we've transliterated to romanized can we use
79 | ;; the rule about applying converter + inverse = input, since
80 | ;; the தமிழ்->romanized direction has certain overrides
81 | romanized-test-prop (prop/for-all [t thamil-text-gen]
82 | (let [converted-test-txt (-> t cvt/தமிழ்->romanized cvt/romanized->தமிழ்)]
83 | (= converted-test-txt (-> converted-test-txt cvt/தமிழ்->romanized cvt/romanized->தமிழ்))))
84 | romanized-res (->> romanized-test-prop
85 | (sc/quick-check QCHK-SIZE)
86 | :result)]
87 | (testing "convert and inverse fns for all font formats (except romanized)"
88 | (testing "romanized"
89 | (is (true? romanized-res)))
90 | (testing "tab"
91 | (is (true? (test-res cvt/தமிழ்->tab cvt/tab->தமிழ்))))
92 | (testing "bamini"
93 | (is (true? (test-res cvt/தமிழ்->bamini cvt/bamini->தமிழ்))))
94 | (testing "tscii"
95 | (is (true? (test-res cvt/தமிழ்->tscii cvt/tscii->தமிழ்))))
96 | (testing "webulagam"
97 | (is (true? (test-res cvt/தமிழ்->webulagam cvt/webulagam->தமிழ்)))))))
98 |
--------------------------------------------------------------------------------
/src/clj_thamil/மொழியியல்.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.மொழியியல்
2 | (:require [clj-thamil.format :as fmt])
3 | #?(:clj
4 | (:use clj-thamil.core)
5 | :cljs
6 | (:use-macros [clj-thamil.core :only [வரையறு விவரி மீதி வரையறு-செயல்கூறு பெறு எதாவது பூலியன் என்னும்போது
7 | வைத்துக்கொள் கடைசி பொறுத்து எண்ணு முதல் இரண்டாம் தொடை
8 | கடைசியின்றி அன்று மற்றும் அல்லது தொடு செயல்படுத்து செயல்கூறு]])))
9 |
10 |
11 | (வரையறு மெய்-தொடக்கம்-எழுத்துகள் fmt/c-cv-letters)
12 |
13 | (வரையறு உயிரெழுத்துகள் fmt/vowels)
14 |
15 | (வரையறு மெய்யெழுத்துகள் fmt/consonants)
16 |
17 | (வரையறு உயிர்மெய்யெழுத்துகள் (தட்டையாக்கு (விவரி மீதி மெய்-தொடக்கம்-எழுத்துகள்)))
18 |
19 | (வரையறு தொடை->எழுத்துகள் fmt/str->letters)
20 |
21 | (வரையறு தொடை->ஒலியன்கள் fmt/str->phonemes)
22 |
23 | (வரையறு-செயல்கூறு ஒலியன்கள்->எழுத்து [ஒலியன்கள்] (பெறு fmt/inverse-phoneme-map ஒலியன்கள்))
24 |
25 | ;;;;;;;;
26 | ;; எழுத்து
27 | ;; letters
28 | ;;;;;;;;
29 |
30 | (வரையறு-செயல்கூறு எழுத்தா? [ச] (fmt/in-trie? ச))
31 |
32 | (வரையறு-செயல்கூறு மெய்யெழுத்தா? [எ] (பூலியன் (எதாவது #{எ} மெய்யெழுத்துகள்)))
33 |
34 | (வரையறு-செயல்கூறு உயிரெழுத்தா? [எ] (பூலியன் (எதாவது #{எ} உயிரெழுத்துகள்)))
35 |
36 | (வரையறு-செயல்கூறு உயிர்மெயெழுத்தா? [எ] (பூலியன் (எதாவது #{எ} உயிர்மெய்யெழுத்துகள்)))
37 |
38 | ;;;;;;;;
39 | ;; அசை
40 | ;; syllables
41 | ;;;;;;;;
42 |
43 | (வரையறு குறில்-உயிரெழுத்துகள் #{"அ" "இ" "உ" "எ" "ஒ"})
44 |
45 | (வரையறு நெடில்-உயிரெழுத்துகள் #{"ஆ" "ஈ" "ஊ" "ஏ" "ஓ"})
46 |
47 | (வரையறு-செயல்கூறு நெடிலா?
48 | "எழுத்து நெடில் எழுத்தா என்பதைத் திருப்பிக் கொடுக்கும்
49 | returns whether the letter is நெடில் (has long vowel sound)"
50 | [எழுத்து]
51 | (பூலியன்
52 | (என்னும்போது (எழுத்தா? எழுத்து)
53 | ;; ஒலியன் = phoneme
54 | (வைத்துக்கொள் [ஒலியன்கள் (தொடை->ஒலியன்கள் எழுத்து)
55 | கடைசி-ஒலியன் (கடைசி ஒலியன்கள்)]
56 | (பெறு நெடில்-உயிரெழுத்துகள் கடைசி-ஒலியன்)))))
57 |
58 | (வரையறு-செயல்கூறு குறிலா?
59 | "எழுத்து குறில் எழுத்தா என்பதைத் திருப்பிக் கொடுக்கும்
60 | returns whether the letter is குறில் (has short vowel sound)"
61 | [எழுத்து]
62 | (பூலியன்
63 | (என்னும்போது (எழுத்தா? எழுத்து)
64 | (->> (தொடை->ஒலியன்கள் எழுத்து)
65 | கடைசி
66 | (பெறு குறில்-உயிரெழுத்துகள்)))))
67 |
68 | ;;;;;;;;
69 | ;; ஒலியன்
70 | ;; phonemes
71 | ;;;;;;;;
72 |
73 | (வரையறு முன்னொட்டா? fmt/prefix?)
74 |
75 | (வரையறு பின்னொட்டா? fmt/suffix?)
76 |
77 | ;;;;;;;;
78 | ;; விகுதி
79 | ;; suffixes
80 | ;;;;;;;;
81 |
82 | ;; பன்மை
83 | ;; plurals
84 |
85 | (வரையறு-செயல்கூறு பன்மை
86 | "ஒரு சொல்லை அதன் பன்மை வடிவத்தில் ஆக்குதல்
87 | takes a word and pluralizes it"
88 | [சொல்]
89 | (வைத்துக்கொள் [எழுத்துகள் (தொடை->எழுத்துகள் சொல்)]
90 | (பொறுத்து
91 |
92 | ;; (fmt/seq-prefix? (புரட்டு சொல்) (புரட்டு "கள்"))
93 | (பின்னொட்டா? சொல் "கள்")
94 | சொல்
95 |
96 | (= "ம்" (கடைசி எழுத்துகள்))
97 | (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்) ["ங்கள்"]))
98 |
99 | (மற்றும் (= 1 (எண்ணு எழுத்துகள்))
100 | (நெடிலா? சொல்))
101 | (தொடை சொல் "க்கள்")
102 |
103 | (மற்றும் (= 2 (எண்ணு எழுத்துகள்))
104 | (ஒவ்வொன்றுமா? அடையாளம் (விவரி குறிலா? எழுத்துகள்)))
105 | (தொடை சொல் "க்கள்")
106 |
107 | (மற்றும் (= 2 (எண்ணு எழுத்துகள்))
108 | (குறிலா? (முதல் எழுத்துகள்))
109 | (= "ல்" (இரண்டாம் எழுத்துகள்)))
110 | (தொடை (முதல் எழுத்துகள்) "ற்கள்")
111 |
112 | (மற்றும் (= 2 (எண்ணு எழுத்துகள்))
113 | (குறிலா? (முதல் எழுத்துகள்))
114 | (= "ள்" (இரண்டாம் எழுத்துகள்)))
115 | (தொடை (முதல் எழுத்துகள்) "ட்கள்")
116 |
117 | :அன்றி
118 | (தொடை சொல் "கள்"))))
119 |
120 | ;; சந்தி (விதிகள்)
121 | ;; (rules for) joining words/suffixes
122 |
123 | (வரையறு-செயல்கூறு சந்தி
124 | [சொல்1 சொல்2]
125 | (வைத்துக்கொள் [எழுத்துகள்1 (தொடை->எழுத்துகள் சொல்1)
126 | எழுத்துகள்2 (தொடை->எழுத்துகள் சொல்2)
127 | ஒலியன்கள்1 (தொடை->ஒலியன்கள் சொல்1)
128 | ஒலியன்கள்2 (தொடை->ஒலியன்கள் சொல்2)
129 | சொ1-கஒ (கடைசி ஒலியன்கள்1)
130 | சொ2-முஒ (முதல் ஒலியன்கள்2)]
131 | (பொறுத்து
132 |
133 | (மற்றும் (உயிரெழுத்தா? சொ2-முஒ)
134 | (பெறு #{"இ" "ஈ" "ஏ" "ஐ"} சொ1-கஒ))
135 | (செயல்படுத்து தொடை சொல்1 (ஒலியன்கள்->எழுத்து ["ய்" சொ2-முஒ]) (மீதி சொல்2))
136 |
137 | (மற்றும் (உயிரெழுத்தா? சொ2-முஒ)
138 | (பெறு #{"அ" "ஆ" "ஊ" "ஒ" "ஓ" "ஔ"} சொ1-கஒ))
139 | (செயல்படுத்து தொடை சொல்1 (ஒலியன்கள்->எழுத்து ["வ்" சொ2-முஒ]) (மீதி சொல்2))
140 |
141 | (மற்றும் (உயிரெழுத்தா? சொ2-முஒ)
142 | (= "உ" சொ1-கஒ)
143 | (= 2 (எண்ணு எழுத்துகள்1))
144 | (ஒவ்வொன்றுமா? குறிலா? எழுத்துகள்1))
145 | (செயல்படுத்து தொடை சொல்1 (ஒலியன்கள்->எழுத்து ["வ்" சொ2-முஒ]) (மீதி சொல்2))
146 |
147 | (மற்றும் (உயிரெழுத்தா? சொ2-முஒ)
148 | (= "உ" சொ1-கஒ)
149 | (அன்று (மற்றும் (= 2 (எண்ணு எழுத்துகள்1))
150 | (ஒவ்வொன்றுமா? குறிலா? எழுத்துகள்1))))
151 | (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்1) (ஒலியன்கள்->எழுத்து [(கடைசி (கடைசியின்றி ஒலியன்கள்1)) சொ2-முஒ]) (மீதி சொல்2)))
152 |
153 |
154 | (மற்றும் (உயிரெழுத்தா? சொ2-முஒ)
155 | (= 2 (எண்ணு எழுத்துகள்1))
156 | (குறிலா? (முதல் எழுத்துகள்1))
157 | (மெய்யெழுத்தா? (இரண்டாம் எழுத்துகள்1)))
158 | (செயல்படுத்து தொடை (தொடு சொல்1 [(ஒலியன்கள்->எழுத்து [சொ1-கஒ சொ2-முஒ])] (மீதி சொல்2)))
159 |
160 | (மற்றும் (உயிரெழுத்தா? சொ2-முஒ)
161 | (மெய்யெழுத்தா? சொ1-கஒ))
162 | (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்1) [(ஒலியன்கள்->எழுத்து [சொ1-கஒ சொ2-முஒ])] (மீதி சொல்2)))
163 |
164 | :அன்றி
165 | (தொடை சொல்1 சொல்2)
166 |
167 | )))
168 |
169 | ;; வேற்றுமை
170 | ;; noun cases
171 |
172 | (வரையறு-செயல்கூறு வேற்றுமை-முன்-மாற்றம்
173 | "ஒரு பெயர்ச்சொல்லுக்கு வேற்றுமை விகுதி சேர்க்கும் முன் செய்யவேண்டிய மாற்றம்
174 | change that is required before adding a case suffix to a noun"
175 | [சொல்]
176 | (வைத்துக்கொள் [எழுத்துகள் (தொடை->எழுத்துகள் சொல்)
177 | ஒலியன்கள் (தொடை->ஒலியன்கள் சொல்)
178 | கஎ (கடைசி எழுத்துகள்)
179 | கஒ (கடைசி ஒலியன்கள்)]
180 | (பொறுத்து
181 |
182 | (= "ம்" (கடைசி எழுத்துகள்))
183 | (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்) ["த்த்"]))
184 |
185 | (மற்றும் (பெறு #{"டு" "று"} கஎ)
186 | (அல்லது (மற்றும் (= 2 (எண்ணு எழுத்துகள்))
187 | (ஒவ்வொன்றுமா? குறிலா? எழுத்துகள்))
188 | (மெய்யெழுத்தா? (கடைசி (கடைசியின்றி எழுத்துகள்)))))
189 | சொல்
190 |
191 | (= "டு" கஎ)
192 | (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்) ["ட்ட்"]))
193 |
194 | (= "று" கஎ)
195 | (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்) ["ற்ற்"]))
196 |
197 | :அன்றி
198 | சொல்)))
199 |
200 | (வரையறு-செயல்கூறு வேற்றுமை
201 | "ஒரு பெயர்ச்சொல்லுக்கு ஒரு வேற்றுமை விகுதியைச் சேர்த்தல்
202 | adds a case suffix to a noun"
203 | [சொல் வே]
204 | (வைத்துக்கொள் [எழுத்துகள் (தொடை->எழுத்துகள் சொல்)
205 | ஒலியன்கள் (தொடை->ஒலியன்கள் சொல்)]
206 | (எனில் (மற்றும் (= "உக்கு" வே)
207 | (அல்லது (பெறு #{"இ" "ஈ" "ஐ"} (கடைசி ஒலியன்கள்))
208 | (எதாவது (செயல்கூறு [தொடை] (பின்னொட்டா? சொல் தொடை))
209 | ["ஆய்"])))
210 | (வேற்றுமை சொல் "க்கு")
211 | (-> சொல்
212 | வேற்றுமை-முன்-மாற்றம்
213 | (சந்தி வே)))))
214 |
--------------------------------------------------------------------------------
/examples/clj/clj-español/LICENSE:
--------------------------------------------------------------------------------
1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
4 |
5 | 1. DEFINITIONS
6 |
7 | "Contribution" means:
8 |
9 | a) in the case of the initial Contributor, the initial code and
10 | documentation distributed under this Agreement, and
11 |
12 | b) in the case of each subsequent Contributor:
13 |
14 | i) changes to the Program, and
15 |
16 | ii) additions to the Program;
17 |
18 | where such changes and/or additions to the Program originate from and are
19 | distributed by that particular Contributor. A Contribution 'originates' from
20 | a Contributor if it was added to the Program by such Contributor itself or
21 | anyone acting on such Contributor's behalf. Contributions do not include
22 | additions to the Program which: (i) are separate modules of software
23 | distributed in conjunction with the Program under their own license
24 | agreement, and (ii) are not derivative works of the Program.
25 |
26 | "Contributor" means any person or entity that distributes the Program.
27 |
28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
29 | necessarily infringed by the use or sale of its Contribution alone or when
30 | combined with the Program.
31 |
32 | "Program" means the Contributions distributed in accordance with this
33 | Agreement.
34 |
35 | "Recipient" means anyone who receives the Program under this Agreement,
36 | including all Contributors.
37 |
38 | 2. GRANT OF RIGHTS
39 |
40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
42 | reproduce, prepare derivative works of, publicly display, publicly perform,
43 | distribute and sublicense the Contribution of such Contributor, if any, and
44 | such derivative works, in source code and object code form.
45 |
46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
49 | transfer the Contribution of such Contributor, if any, in source code and
50 | object code form. This patent license shall apply to the combination of the
51 | Contribution and the Program if, at the time the Contribution is added by the
52 | Contributor, such addition of the Contribution causes such combination to be
53 | covered by the Licensed Patents. The patent license shall not apply to any
54 | other combinations which include the Contribution. No hardware per se is
55 | licensed hereunder.
56 |
57 | c) Recipient understands that although each Contributor grants the licenses
58 | to its Contributions set forth herein, no assurances are provided by any
59 | Contributor that the Program does not infringe the patent or other
60 | intellectual property rights of any other entity. Each Contributor disclaims
61 | any liability to Recipient for claims brought by any other entity based on
62 | infringement of intellectual property rights or otherwise. As a condition to
63 | exercising the rights and licenses granted hereunder, each Recipient hereby
64 | assumes sole responsibility to secure any other intellectual property rights
65 | needed, if any. For example, if a third party patent license is required to
66 | allow Recipient to distribute the Program, it is Recipient's responsibility
67 | to acquire that license before distributing the Program.
68 |
69 | d) Each Contributor represents that to its knowledge it has sufficient
70 | copyright rights in its Contribution, if any, to grant the copyright license
71 | set forth in this Agreement.
72 |
73 | 3. REQUIREMENTS
74 |
75 | A Contributor may choose to distribute the Program in object code form under
76 | its own license agreement, provided that:
77 |
78 | a) it complies with the terms and conditions of this Agreement; and
79 |
80 | b) its license agreement:
81 |
82 | i) effectively disclaims on behalf of all Contributors all warranties and
83 | conditions, express and implied, including warranties or conditions of title
84 | and non-infringement, and implied warranties or conditions of merchantability
85 | and fitness for a particular purpose;
86 |
87 | ii) effectively excludes on behalf of all Contributors all liability for
88 | damages, including direct, indirect, special, incidental and consequential
89 | damages, such as lost profits;
90 |
91 | iii) states that any provisions which differ from this Agreement are offered
92 | by that Contributor alone and not by any other party; and
93 |
94 | iv) states that source code for the Program is available from such
95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
96 | or through a medium customarily used for software exchange.
97 |
98 | When the Program is made available in source code form:
99 |
100 | a) it must be made available under this Agreement; and
101 |
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 |
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 |
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 |
111 | 4. COMMERCIAL DISTRIBUTION
112 |
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering. The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor tocontrol, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 |
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 |
144 | 5. NO WARRANTY
145 |
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 |
157 | 6. DISCLAIMER OF LIABILITY
158 |
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 |
168 | 7. GENERAL
169 |
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 |
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 |
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 |
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 |
210 | This Agreement is governed by the laws of the State of New York and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 |
--------------------------------------------------------------------------------
/examples/clj/clj-spanish/LICENSE:
--------------------------------------------------------------------------------
1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
4 |
5 | 1. DEFINITIONS
6 |
7 | "Contribution" means:
8 |
9 | a) in the case of the initial Contributor, the initial code and
10 | documentation distributed under this Agreement, and
11 |
12 | b) in the case of each subsequent Contributor:
13 |
14 | i) changes to the Program, and
15 |
16 | ii) additions to the Program;
17 |
18 | where such changes and/or additions to the Program originate from and are
19 | distributed by that particular Contributor. A Contribution 'originates' from
20 | a Contributor if it was added to the Program by such Contributor itself or
21 | anyone acting on such Contributor's behalf. Contributions do not include
22 | additions to the Program which: (i) are separate modules of software
23 | distributed in conjunction with the Program under their own license
24 | agreement, and (ii) are not derivative works of the Program.
25 |
26 | "Contributor" means any person or entity that distributes the Program.
27 |
28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
29 | necessarily infringed by the use or sale of its Contribution alone or when
30 | combined with the Program.
31 |
32 | "Program" means the Contributions distributed in accordance with this
33 | Agreement.
34 |
35 | "Recipient" means anyone who receives the Program under this Agreement,
36 | including all Contributors.
37 |
38 | 2. GRANT OF RIGHTS
39 |
40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
42 | reproduce, prepare derivative works of, publicly display, publicly perform,
43 | distribute and sublicense the Contribution of such Contributor, if any, and
44 | such derivative works, in source code and object code form.
45 |
46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
49 | transfer the Contribution of such Contributor, if any, in source code and
50 | object code form. This patent license shall apply to the combination of the
51 | Contribution and the Program if, at the time the Contribution is added by the
52 | Contributor, such addition of the Contribution causes such combination to be
53 | covered by the Licensed Patents. The patent license shall not apply to any
54 | other combinations which include the Contribution. No hardware per se is
55 | licensed hereunder.
56 |
57 | c) Recipient understands that although each Contributor grants the licenses
58 | to its Contributions set forth herein, no assurances are provided by any
59 | Contributor that the Program does not infringe the patent or other
60 | intellectual property rights of any other entity. Each Contributor disclaims
61 | any liability to Recipient for claims brought by any other entity based on
62 | infringement of intellectual property rights or otherwise. As a condition to
63 | exercising the rights and licenses granted hereunder, each Recipient hereby
64 | assumes sole responsibility to secure any other intellectual property rights
65 | needed, if any. For example, if a third party patent license is required to
66 | allow Recipient to distribute the Program, it is Recipient's responsibility
67 | to acquire that license before distributing the Program.
68 |
69 | d) Each Contributor represents that to its knowledge it has sufficient
70 | copyright rights in its Contribution, if any, to grant the copyright license
71 | set forth in this Agreement.
72 |
73 | 3. REQUIREMENTS
74 |
75 | A Contributor may choose to distribute the Program in object code form under
76 | its own license agreement, provided that:
77 |
78 | a) it complies with the terms and conditions of this Agreement; and
79 |
80 | b) its license agreement:
81 |
82 | i) effectively disclaims on behalf of all Contributors all warranties and
83 | conditions, express and implied, including warranties or conditions of title
84 | and non-infringement, and implied warranties or conditions of merchantability
85 | and fitness for a particular purpose;
86 |
87 | ii) effectively excludes on behalf of all Contributors all liability for
88 | damages, including direct, indirect, special, incidental and consequential
89 | damages, such as lost profits;
90 |
91 | iii) states that any provisions which differ from this Agreement are offered
92 | by that Contributor alone and not by any other party; and
93 |
94 | iv) states that source code for the Program is available from such
95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
96 | or through a medium customarily used for software exchange.
97 |
98 | When the Program is made available in source code form:
99 |
100 | a) it must be made available under this Agreement; and
101 |
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 |
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 |
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 |
111 | 4. COMMERCIAL DISTRIBUTION
112 |
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering. The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor tocontrol, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 |
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 |
144 | 5. NO WARRANTY
145 |
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 |
157 | 6. DISCLAIMER OF LIABILITY
158 |
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 |
168 | 7. GENERAL
169 |
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 |
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 |
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 |
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 |
210 | This Agreement is governed by the laws of the State of New York and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 |
--------------------------------------------------------------------------------
/test/clj_thamil/format_test.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.format-test
2 | (:use clojure.test
3 | clj-thamil.format
4 | clj-thamil.core))
5 |
6 | (def words ["பந்து" "பந்தி" "பத்து" "பந்துகள்" "பந்தயம்" "பந்தாடு" "பந்தல்"])
7 |
8 | (deftest trie-test
9 | (let [first-word (first words)
10 | first-two-words (take 2 words)]
11 | (testing "creating a trie"
12 | (testing "creating a trie from a sequence of words (default val is attached to terminus)"
13 | (testing "boundary case"
14 | (is (= {} (make-trie []))))
15 | (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} (make-trie [first-word])))
16 | (is (= (make-trie [first-word]) (make-trie (take 1 words))))
17 | (testing "words that share some prefix"
18 | (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil} \u0BBF {nil nil}}}}}} (make-trie first-two-words))))
19 | (testing "words that have no shared prefix"
20 | (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}} \வ {\u0BC6 {\ற {\u0BCD {\ற {\u0BBF {nil nil}}}}}}} (make-trie [first-word "வெற்றி"])))))
21 | (testing "creating a trie from a map of word->terminus-attached-val"
22 | (testing "boundary case"
23 | (is (= {} (make-trie {}))))
24 | (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil 1}}}}}} (make-trie {first-word 1})))
25 | (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil 0
26 | \க {\ள {\u0BCD {nil 3}}}}
27 | \u0BBF {nil 1}
28 | \ய {\ம {\u0BCD {nil 4}}}
29 | \ல {\u0BCD {nil 6}}
30 | \u0BBE {\ட {\u0BC1 {nil 5}}}}}}
31 | \த {\u0BCD {\த {\u0BC1 {nil 2}}}}}}
32 | (make-trie (zipmap words (range)))))))
33 | (testing "trie lookup fns"
34 | (testing "nil as valued attached to terminus of input sequences"
35 | (is (= true (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {\க {\ள {\u0BCD {nil nil}}}}}}}}} "பந்துகள்")))
36 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {\க {\ள {\u0BCD {nil nil}}}}}}}}} "ப")))
37 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {\க {\ள {\u0BCD {nil nil}}}}}}}}} "பந்துக")))
38 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {\க {\ள {\u0BCD {nil nil}}}}}}}}} "பந்து")))
39 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்துகள்")))
40 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "ப")))
41 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்துக")))
42 | (is (= true (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்து")))
43 | (is (= false (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்து"))))
44 | (is (= false (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "ப"))))
45 | (is (= true (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்துகள்"))))
46 | (is (= true (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "கோடு")))))
47 | (testing "non-nil values attached to terminus of input sequences"
48 | (is (= true (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil 1}}}}}} "பந்து")))
49 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil 3.14159}}}}}} "ப")))
50 | (is (= false (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil \a}}}}}} "பந்து"))))
51 | (is (= false (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil true}}}}}} "ப"))))
52 | (testing "splitting words directly into phonemes using phoneme trie"
53 | (is (= ["வ்" "அ" "ண்" "அ" "க்" "க்" "அ" "ம்"] (str->phonemes "வணக்கம்")))
54 | (is (empty? (str->phonemes nil)))
55 | (is (empty? (str->phonemes "")))
56 | (is (= ["அ"] (str->phonemes "அ")))
57 | (is (= ["க்"] (str->phonemes "க்")))
58 | (is (= ["க்" "அ"] (str->phonemes "க")))
59 | (is (= ["க்" "ஊ"] (str->phonemes "கூ")))
60 | (is (= ["வ்" "இ" "ட்" "உ" "த்" "அ" "ல்" "ஐ"] (str->phonemes "விடுதலை")))))
61 | (testing "inverting maps for creating tries for inverse conversion"
62 | (is (= "பக்கம்" (phonemes->str "ப்அக்க்அம்")))
63 | (is (= "தலைய்123ஈடு" (phonemes->str "த்அல்ஐய்123ஈட்உ")))
64 | (is (= "நடு" (phonemes->str "ந்அடு")))))))
65 |
66 | (deftest word-letter-test
67 | (testing "splitting strings of தமிழ் characters into constituent தமிழ் characters"
68 | (is (= [] (str->letters "")))
69 | (is (= [] (str->letters nil)))
70 | (is (= ["த"] (str->letters "த")))
71 | (is (= [" " "த"] (str->letters " த")))
72 | (is (= ["த" " "] (str->letters "த ")))
73 | (is (= ["த்"] (str->letters "த்")))
74 | (is (= ["த" "மி" "ழ்"] (str->letters "தமிழ்")))
75 | (is (= ["த" "மி" "ழ்" " "] (str->letters "தமிழ் ")))
76 | (is (= ["s" "o" "f" "t" "w" "a" "r" "e" "=" "மெ" "ன்" "பொ" "ரு" "ள்" "," "." "." "."] (str->letters "software=மென்பொருள்,..."))))
77 | (testing "letter ordering"
78 | (testing "boundary cases"
79 | (is (= true (letter-before? nil nil)))
80 | (is (= true (letter-before? nil "")))
81 | (is (= false (letter-before? "" nil)))
82 | (is (= true (letter-before? nil "அ")))
83 | (is (= true (letter-before? nil "a")))
84 | (is (= true (letter-before? "a" "அ"))))
85 | (testing "equal inputs"
86 | (is (= false (letter-before? "அ" "அ"))))
87 | (testing "தமிழ்"
88 | (is (= true (letter-before? "அ" "ஆ")))
89 | (is (= true (letter-before? "ஆ" "இ")))
90 | (is (= true (letter-before? "அ" "ஔ")))
91 | (is (= true (letter-before? "ஔ" "ஃ")))
92 | (is (= true (letter-before? "ஃ" "க்")))
93 | (is (= true (letter-before? "க்" "க")))
94 | (is (= true (letter-before? "க" "கா")))
95 | (is (= true (letter-before? "க்" "கௌ")))
96 | (is (= false (letter-before? "க்" "ஃ")))
97 | (is (= true (letter-before? "கௌ" "ங்")))
98 | (is (= false (letter-before? "ங்" "கௌ"))))
99 | (testing "ASCII"
100 | (is (= true (letter-before? "a" "z")))
101 | (is (= true (letter-before? "A" "Z")))
102 | (is (= true (letter-before? "Z" "a")))
103 | (is (= true (letter-before? "0" "9")))
104 | (is (= true (letter-before? "9" "A"))))
105 | (testing "comparator / sorting"
106 | (is (= ["அ" "ஆ" "இ" "ஒ" "ஓ" "ஔ" "ஃ" "க்" "க" "ன்" "ன" "னா" "னு" "னௌ"] (sort-by identity letter-comp ["இ" "க" "ஃ" "ன" "னு" "னௌ" "னா" "ஆ" "க்" "அ" "ஔ" "ஓ" "ன்" "ஒ"])))))
107 | (testing "word ordering"
108 | (testing "equal inputs"
109 | (is (= false (word-before? "அ" "அ"))))
110 | (testing "extra letters in one word"
111 | (is (= false (word-before? "அது" "அ")))
112 | (is (= true (word-before? "அ" "அது"))))
113 | (testing "Unicode 'consonant' vs. Unicode 'consonant+ligature' - ஒருங்குறியில் தமிழ் மெய்யெழுத்து+அகரம் மற்றும் அதே மெய்யெழுத்து {வெறுமன்; அதோடு வேறொரு உயிரெழுத்து}"
114 | (is (= true (word-before? "படம்" "பாடம்")))
115 | (is (= false (word-before? "பாடம்" "படம்")))
116 | (is (= false (word-before? "படம்" "பட்டம்")))
117 | (is (= true (word-before? "பட்டம்" "படம்")))
118 | (is (= false (word-before? "கடமை" "கட்டம்")))
119 | (is (= true (word-before? "கட்டம்" "கடமை")))
120 | (is (= true (word-before? "கட்டு" "கெட்டு")))
121 | (is (= false (word-before? "கெட்டு" "கட்டு")))
122 | (is (= false (word-before? "பைந்தமிழ்" "பந்தல்")))
123 | (is (= true (word-before? "பந்தல்" "பைந்தமிழ்"))))
124 | (testing "order of consonants"
125 | (is (= true (word-before? "பாடம்" "பாதம்")))
126 | (is (= false (word-before? "பாதம்" "பாடம்"))))
127 | (testing "order of vowels"
128 | (is (= true (word-before? "அப்பம்" "ஆப்பம்")))
129 | (is (= false (word-before? "ஆப்பம்" "அப்பம்"))))
130 | (testing "order of vowel vs. consonant, and order of two உயிர்மெய்யெழுத்துகள்"
131 | (is (= false (word-before? "நுளம்பு" "கொசு")))
132 | (is (= true (word-before? "கொசு" "நுளம்பு")))
133 | (is (= true (word-before? "ஈ" "கொசு")))
134 | (is (= false (word-before? "கொசு" "ஈ"))))))
135 |
136 | (deftest util-fn-test
137 | (let [s "abcqwertyuiop"]
138 | (testing "seq-prefix"
139 | (is (= [] (seq-prefix nil nil)))
140 | (is (= [] (seq-prefix nil [])))
141 | (is (= [] (seq-prefix [] nil)))
142 | (is (= [] (seq-prefix nil [1 2])))
143 | (is (= [\a \b \c] (seq-prefix "abcdefgh" s)))
144 | (is (= [\a \b] (seq-prefix "abbb" s)))
145 | (is (= [] (seq-prefix "zyx" s))))
146 | (testing "seq-prefix?"
147 | (is (= false (seq-prefix? nil nil)))
148 | (is (= false (seq-prefix? nil [])))
149 | (is (= false (seq-prefix? [] nil)))
150 | (is (= false (seq-prefix? nil [1 2])))
151 | (is (= false (seq-prefix? "abcdefgh" s)))
152 | (is (= false (seq-prefix? "abbb" s)))
153 | (is (= false (seq-prefix? "zyx" s)))
154 | (is (= false (seq-prefix? "abc" s)))
155 | (is (= true (seq-prefix? s "abc")))
156 | (is (= true (seq-prefix? s "a")))
157 | (is (= true (seq-prefix? s "")))
158 | (is (= true (seq-prefix? s [])))
159 | (is (= true (seq-prefix? s nil))))
160 | (testing "seq-index-of"
161 | (let [check-seq-index-of (fn [s1 s2] (= (.indexOf s1 s2)
162 | (seq-index-of s1 s2)))]
163 | (is (= true (check-seq-index-of "abc" "a")))
164 | (is (true? (check-seq-index-of "a" "abc")))
165 | (is (true? (check-seq-index-of "" "abc")))))
166 | (testing "prefix?"
167 | (is (true? (prefix? "வந்தான்" "")))
168 | (is (true? (prefix? "வந்தான்" "வ்")))
169 | (is (true? (prefix? "வந்தான்" "வ")))
170 | (is (true? (prefix? "வந்தான்" "வந்")))
171 | (is (false? (prefix? "வந்தான்" "வந")))
172 | (is (true? (prefix? "வந்தான்" "வந்த்")))
173 | (is (false? (prefix? "வந்தான்" "வந்து")))
174 | (is (true? (prefix? "வந்தான்" "வந்தா")))
175 | (is (true? (prefix? "வந்தான்" "வந்தான்")))
176 | (is (false? (prefix? "வந்தான்" "வந்தானே")))
177 | (is (true? (prefix? "வந்தானே" "வந்தான்"))))))
178 |
179 | (deftest word-char-traits-test
180 | (testing "word and char traits"
181 | (testing "char traits"
182 | (let [ws-chars [\space \tab \newline]
183 | wordy-chars [\a \Z \0 ]
184 | punct-chars [\- \* \^ \$ \+ \. \_ \; ]
185 | தமிழ்-எழுத்து-unicode-chars [\அ \ஆ \இ \ஔ \ஃ \க \ங \ன]
186 | தமிழ்-எழுத்து-துணை-குறி-unicode-chars [\u0BCD \u0BBE \u0BBF \u0BC0 \u0BC1 \u0BC2 \u0BC6 \u0BC7 \u0BC8 \u0BCA \u0BCB \u0BCC]]
187 | (is (= true (every? true? (map whitespace? ws-chars))))
188 | (is (= true (every? true? (map wordy-char? wordy-chars))))
189 | (is (= true (every? true? (map wordy-char? தமிழ்-எழுத்து-unicode-chars))))
190 | (is (= true (every? true? (map wordy-char? தமிழ்-எழுத்து-துணை-குறி-unicode-chars))))
191 | (is (= true (every? true? (map wordy-char? தமிழ்-எழுத்து-துணை-குறி-unicode-chars))))
192 | (is (= true (every? false? (map wordy-char? punct-chars))))))
193 | (testing "word boundaries"
194 | (let [s1 "aldsk சிக்கல் sdfsdf234234lsdflksjdf Zürich"
195 | s2 " alsfjs"
196 | s3 ""
197 | s4 nil]
198 | (is (= ["aldsk" "சிக்கல்" "sdfsdf234234lsdflksjdf" "Zürich"] (wordy-seq s1)))
199 | (is (= ["alsfjs"] (wordy-seq s2)))
200 | (is (= [] (wordy-seq s3)))
201 | (is (= nil (wordy-seq s4)))))))
202 |
203 | (deftest cursor-pos-test
204 | (let [s1 "aldsk சிக்கல் sdfsdf234234lsdflksjdf Zürich"
205 | s2 " alsfjs"
206 | s3 "a b"
207 | s4 "சிக்கல்"]
208 | (testing "cursor position"
209 | (testing "wordy chunk under cursor"
210 | (is (= "aldsk" (wordy-chunk-under s1 0)))
211 | (is (= "aldsk" (wordy-chunk-under s1 1)))
212 | (is (= "aldsk" (wordy-chunk-under s1 5)))
213 | (is (= "சிக்கல்" (wordy-chunk-under s1 6)))
214 | (is (= "Zürich" (wordy-chunk-under s1 (count s1))))
215 | (is (= "Zürich" (wordy-chunk-under s1 (- (count s1) (count "Zürich")))))
216 | (is (nil? (wordy-chunk-under s2 0)))
217 | (is (nil? (wordy-chunk-under s2 1)))
218 | (is (= "alsfjs" (wordy-chunk-under s2 2)))
219 | (is (= "a" (wordy-chunk-under s3 0)))
220 | (is (= "a" (wordy-chunk-under s3 1)))
221 | (is (nil? (wordy-chunk-under s3 2))))
222 | (testing "cursor position within wordy chunk"
223 | (is (= ["aldsk" 0] (wordy-chunk-and-cursor-pos s1 0)))
224 | (is (= ["aldsk" 1] (wordy-chunk-and-cursor-pos s1 1)))
225 | (is (= ["aldsk" 5] (wordy-chunk-and-cursor-pos s1 5)))
226 | (is (= ["சிக்கல்" 0] (wordy-chunk-and-cursor-pos s1 6)))
227 | (is (= ["Zürich" 6] (wordy-chunk-and-cursor-pos s1 (count s1))))
228 | (is (= ["Zürich" 0] (wordy-chunk-and-cursor-pos s1 (- (count s1) (count "Zürich")))))
229 | (is (nil? (wordy-chunk-and-cursor-pos s2 0)))
230 | (is (nil? (wordy-chunk-and-cursor-pos s2 1)))
231 | (is (= ["alsfjs" 0] (wordy-chunk-and-cursor-pos s2 2)))
232 | (is (= ["a" 0] (wordy-chunk-and-cursor-pos s3 0)))
233 | (is (= ["a" 1] (wordy-chunk-and-cursor-pos s3 1)))
234 | (is (nil? (wordy-chunk-and-cursor-pos s3 2))))
235 | (testing "cursor adjust"
236 | (is (= 2 (cursor-adjust s4 3 :to-first)))
237 | (is (= 4 (cursor-adjust s4 3 :to-last)))
238 | (is (= 4 (cursor-adjust s4 3 nil)))
239 | (is (= 2 (cursor-adjust s4 2 :to-first)))
240 | (is (= 2 (cursor-adjust s4 2 :to-last)))
241 | (is (= 2 (cursor-adjust s4 2 nil)))
242 | (is (= 0 (cursor-adjust s4 0 :to-first)))
243 | (is (= 0 (cursor-adjust s4 0 :to-last)))
244 | (is (= 0 (cursor-adjust s4 0 nil)))
245 | (is (= 7 (cursor-adjust s4 7 :to-first)))
246 | (is (= 7 (cursor-adjust s4 7 :to-last)))
247 | (is (= 7 (cursor-adjust s4 7 nil)))))))
248 |
--------------------------------------------------------------------------------
/src/clj_thamil/format.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.format
2 | (:require [clojure.set :as set])
3 | #?(:clj (:use clj-thamil.core)))
4 |
5 | ;;;;;;;;;;
6 | ;; letters
7 | ;;;;;;;;;;
8 |
9 | (def letters [["ஃ" "அ" "ஆ" "இ" "ஈ" "உ" "ஊ" "எ" "ஏ" "ஐ" "ஒ" "ஓ" "ஔ"]
10 | ["க்" "க" "கா" "கி" "கீ" "கு" "கூ" "கெ" "கே" "கை" "கொ" "கோ" "கௌ"]
11 | ["ங்" "ங" "ஙா" "ஙி" "ஙீ" "ஙு" "ஙூ" "ஙெ" "ஙே" "ஙை" "ஙொ" "ஙோ" "ஙௌ"]
12 | ["ச்" "ச" "சா" "சி" "சீ" "சு" "சூ" "செ" "சே" "சை" "சொ" "சோ" "சௌ"]
13 | ["ஞ்" "ஞ" "ஞா" "ஞி" "ஞீ" "ஞு" "ஞூ" "ஞெ" "ஞே" "ஞை" "ஞொ" "ஞோ" "ஞௌ"]
14 | ["ட்" "ட" "டா" "டி" "டீ" "டு" "டூ" "டெ" "டே" "டை" "டொ" "டோ" "டௌ"]
15 | ["ண்" "ண" "ணா" "ணி" "ணீ" "ணு" "ணூ" "ணெ" "ணே" "ணை" "ணொ" "ணோ" "ணௌ"]
16 | ["த்" "த" "தா" "தி" "தீ" "து" "தூ" "தெ" "தே" "தை" "தொ" "தோ" "தௌ"]
17 | ["ந்" "ந" "நா" "நி" "நீ" "நு" "நூ" "நெ" "நே" "நை" "நொ" "நோ" "நௌ"]
18 | ["ப்" "ப" "பா" "பி" "பீ" "பு" "பூ" "பெ" "பே" "பை" "பொ" "போ" "பௌ"]
19 | ["ம்" "ம" "மா" "மி" "மீ" "மு" "மூ" "மெ" "மே" "மை" "மொ" "மோ" "மௌ"]
20 | ["ய்" "ய" "யா" "யி" "யீ" "யு" "யூ" "யெ" "யே" "யை" "யொ" "யோ" "யௌ"]
21 | ["ர்" "ர" "ரா" "ரி" "ரீ" "ரு" "ரூ" "ரெ" "ரே" "ரை" "ரொ" "ரோ" "ரௌ"]
22 | ["ல்" "ல" "லா" "லி" "லீ" "லு" "லூ" "லெ" "லே" "லை" "லொ" "லோ" "லௌ"]
23 | ["வ்" "வ" "வா" "வி" "வீ" "வு" "வூ" "வெ" "வே" "வை" "வொ" "வோ" "வௌ"]
24 | ["ழ்" "ழ" "ழா" "ழி" "ழீ" "ழு" "ழூ" "ழெ" "ழே" "ழை" "ழொ" "ழோ" "ழௌ"]
25 | ["ள்" "ள" "ளா" "ளி" "ளீ" "ளு" "ளூ" "ளெ" "ளே" "ளை" "ளொ" "ளோ" "ளௌ"]
26 | ["ற்" "ற" "றா" "றி" "றீ" "று" "றூ" "றெ" "றே" "றை" "றொ" "றோ" "றௌ"]
27 | ["ன்" "ன" "னா" "னி" "னீ" "னு" "னூ" "னெ" "னே" "னை" "னொ" "னோ" "னௌ"]])
28 |
29 | (def vowels
30 | (let [vowel-row (first letters)]
31 | (concat (rest vowel-row) [(first vowel-row)])))
32 |
33 | (def c-cv-letters (rest letters))
34 |
35 | (def consonants (map first c-cv-letters))
36 |
37 | ;;;;;;;;;;;
38 | ;; trie fns
39 | ;;;;;;;;;;;
40 |
41 | (defn- trie-add-seq
42 | "take a trie (represented as a nested map) and add a sequence, with an optional value attached to its terminus"
43 | ([trie-map s]
44 | (trie-add-seq trie-map s nil))
45 | ([trie-map s term-val]
46 | (loop [idx (count s)
47 | tm trie-map]
48 | (when-not (neg? idx)
49 | (if (zero? idx)
50 | (if (= 1 (count s))
51 | (assoc-in tm s {nil term-val})
52 | (update-in tm (vec s) assoc-in [nil] term-val))
53 | (let [[pre post] (split-at idx s)]
54 | (if (get-in tm pre)
55 | (update-in tm pre assoc-in (concat post [nil]) term-val)
56 | (recur (dec idx) tm))))))))
57 |
58 | (defn make-trie
59 | "take a sequence (may be nested) of input sequences, or else takes a map (single-level) where keys are sequences and vals are attached to the terminus in trie. fn creates a trie, represented as a nested map."
60 | [sequence]
61 | (if (map? sequence)
62 | (reduce (partial apply trie-add-seq) {} sequence)
63 | (let [s (flatten sequence)]
64 | (reduce trie-add-seq {} s))))
65 |
66 | (def ^{:private true
67 | :doc "a trie that contains all strings representing the individual letters in தமிழ்"}
68 | letter-trie (make-trie letters))
69 |
70 | (defn trie-prefix-subtree
71 | "take a trie and a sequence, look up the sequence in the trie, and return the subtree"
72 | [trie sq]
73 | (get-in trie sq))
74 |
75 | (defn in-trie?
76 | "return whether the sequence exists in the trie"
77 | ([sq]
78 | (in-trie? letter-trie sq))
79 | ([trie sq]
80 | (-> (trie-prefix-subtree trie sq)
81 | (find nil)
82 | boolean)))
83 |
84 | (defn get-in-trie
85 | "return the corresponding value from the trie -- either the combined version of the input seq, or the value attached to the terminus of the input seq in the trie"
86 | [trie sq]
87 | (if (in-trie? trie sq)
88 | (let [subtree (trie-prefix-subtree trie sq)]
89 | (if (nil? (get subtree nil))
90 | (apply str sq)
91 | (get subtree nil)))
92 | (apply str sq)))
93 |
94 | (defn- backfill-new-chars
95 | "a helper fn for str->elems that takes the new-chars array (after knowing that the next character cannot be added to it because the resultant char path would not be in the trie) as input. we now need to process the new-chars array to test whether it (or else, its substrings) are themselves in the trie. we need to work backwards to find the maximally long substring (char seq) that is also in trie.
96 | this fn is set up as O(n^2) on the assumption that input sequences won't be too big (the sequences that make up the paths of the trie don't have too many shared long sequences that start at the trie root).
97 | this fn might be needed to distinguish, for example, between a 3-elem chunk and 2 smaller chnks (ex: \"ksh\" vs \"k\" + \"sh\" -- ignore the fact that க்ஷ் and ஸ் aren't originally Thamil). in fact, this fn probably isn't necessary for original Thamil letters, since they only need 2 codepoints, and may be only an issue for English transliteration of Grantha letters, or more of an issue for others languages which require 3+ chars to form a letter)"
98 | [trie new-chars & [{:keys [flat-output] :as opts}]]
99 | (loop [chars new-chars
100 | in-trie-letters []
101 | idx (count chars)]
102 | (condp = idx
103 | 0 (if-not flat-output (flatten in-trie-letters) in-trie-letters)
104 | 1 (recur (drop 1 chars) (conj in-trie-letters (get-in-trie trie (take 1 chars))) (count (drop 1 chars)))
105 | ;; else
106 | (if (in-trie? trie (take idx chars))
107 | (recur (drop idx chars) (conj in-trie-letters (get-in-trie trie (take idx chars))) (count (drop idx chars)))
108 | (recur chars in-trie-letters (dec idx))))))
109 |
110 | (defn str->elems
111 | "take a string and split it into chunks based on the input trie. for every maximally long sequence in the trie that is detected in the input string, the terminus-attached value is added to the output sequence if it exists (ex: useful for transliteration / format conversion), or else the string chunk itself is added."
112 | ([s]
113 | (str->elems letter-trie s))
114 | ([trie s & [{:keys [transform] :as opts}]]
115 | ;; loop is like a procedural for loop or while loop
116 | ;; this loop is like a for loop, where 0 <= idx < (count s)
117 | (loop [idx 0
118 | new-chars []
119 | letters []]
120 | ;; test if we've consumed our entire input string
121 | (if (= idx (count s))
122 | ;; test whether we have handled entire input string, or if
123 | ;; there are still chars still not fully processed
124 | (if (empty? new-chars)
125 | letters
126 | (concat letters (backfill-new-chars trie new-chars)))
127 | ;; start next iteration
128 | (let [next-char (.charAt s idx)]
129 | ;; if adding the next character makes a prefix in trie no
130 | ;; longer in trie, then we have our maximally long prefix.
131 | ;; if not, just add the char and continue
132 | (if (nil? (trie-prefix-subtree trie (apply str (conj new-chars next-char))))
133 | ;; test whether this is just because we're at the
134 | ;; beginning of our string. if not, return our prefix
135 | ;; and reset our next prefix starting with the new char
136 | (if (empty? new-chars)
137 | (recur (inc idx) (conj new-chars next-char) letters)
138 | (recur (inc idx) [next-char] (concat letters (backfill-new-chars trie new-chars))))
139 | (recur (inc idx) (conj new-chars next-char) letters)))))))
140 |
141 | ;;;;;;;;;;;
142 | ;; letters & phonemes
143 | ;;;;;;;;;;;
144 |
145 | (defn str->letters
146 | "take a string and split it into its constitutent தமிழ் + non-complex letters (non-complex = all left-to-right, 1-to-1 codepoint-to-glyph encodings -- this includes all Western languages)"
147 | [s]
148 | (str->elems letter-trie s))
149 |
150 | (def ^{:doc "a map whose keys are தமிழ் letters and whose values are sequences of the constituent phonemes (represented as strings) of those letters. letters are from the set {உயிர்-, மெய்-, உயிர்மெய்-}எழுத்துகள், phonemes are from the set {உயிர்-,மெய்-}எழுத்துகள்"}
151 | phoneme-map
152 | {"ஃ" ["ஃ"],
153 | "அ" ["அ"],
154 | "ஆ" ["ஆ"],
155 | "இ" ["இ"],
156 | "ஈ" ["ஈ"],
157 | "உ" ["உ"],
158 | "ஊ" ["ஊ"],
159 | "எ" ["எ"],
160 | "ஏ" ["ஏ"],
161 | "ஐ" ["ஐ"],
162 | "ஒ" ["ஒ"],
163 | "ஓ" ["ஓ"],
164 | "ஔ" ["ஔ"],
165 | "க்" ["க்"],
166 | "க" ["க்" "அ"],
167 | "கா" ["க்" "ஆ"],
168 | "கி" ["க்" "இ"],
169 | "கீ" ["க்" "ஈ"],
170 | "கு" ["க்" "உ"],
171 | "கூ" ["க்" "ஊ"],
172 | "கெ" ["க்" "எ"],
173 | "கே" ["க்" "ஏ"],
174 | "கை" ["க்" "ஐ"],
175 | "கொ" ["க்" "ஒ"],
176 | "கோ" ["க்" "ஓ"],
177 | "கௌ" ["க்" "ஔ"],
178 | "ங்" ["ங்"],
179 | "ங" ["ங்" "அ"],
180 | "ஙா" ["ங்" "ஆ"],
181 | "ஙி" ["ங்" "இ"],
182 | "ஙீ" ["ங்" "ஈ"],
183 | "ஙு" ["ங்" "உ"],
184 | "ஙூ" ["ங்" "ஊ"],
185 | "ஙெ" ["ங்" "எ"],
186 | "ஙே" ["ங்" "ஏ"],
187 | "ஙை" ["ங்" "ஐ"],
188 | "ஙொ" ["ங்" "ஒ"],
189 | "ஙோ" ["ங்" "ஓ"],
190 | "ஙௌ" ["ங்" "ஔ"],
191 | "ச்" ["ச்"],
192 | "ச" ["ச்" "அ"],
193 | "சா" ["ச்" "ஆ"],
194 | "சி" ["ச்" "இ"],
195 | "சீ" ["ச்" "ஈ"],
196 | "சு" ["ச்" "உ"],
197 | "சூ" ["ச்" "ஊ"],
198 | "செ" ["ச்" "எ"],
199 | "சே" ["ச்" "ஏ"],
200 | "சை" ["ச்" "ஐ"],
201 | "சொ" ["ச்" "ஒ"],
202 | "சோ" ["ச்" "ஓ"],
203 | "சௌ" ["ச்" "ஔ"],
204 | "ஞ்" ["ஞ்"],
205 | "ஞ" ["ஞ்" "அ"],
206 | "ஞா" ["ஞ்" "ஆ"],
207 | "ஞி" ["ஞ்" "இ"],
208 | "ஞீ" ["ஞ்" "ஈ"],
209 | "ஞு" ["ஞ்" "உ"],
210 | "ஞூ" ["ஞ்" "ஊ"],
211 | "ஞெ" ["ஞ்" "எ"],
212 | "ஞே" ["ஞ்" "ஏ"],
213 | "ஞை" ["ஞ்" "ஐ"],
214 | "ஞொ" ["ஞ்" "ஒ"],
215 | "ஞோ" ["ஞ்" "ஓ"],
216 | "ஞௌ" ["ஞ்" "ஔ"],
217 | "ட்" ["ட்"],
218 | "ட" ["ட்" "அ"],
219 | "டா" ["ட்" "ஆ"],
220 | "டி" ["ட்" "இ"],
221 | "டீ" ["ட்" "ஈ"],
222 | "டு" ["ட்" "உ"],
223 | "டூ" ["ட்" "ஊ"],
224 | "டெ" ["ட்" "எ"],
225 | "டே" ["ட்" "ஏ"],
226 | "டை" ["ட்" "ஐ"],
227 | "டொ" ["ட்" "ஒ"],
228 | "டோ" ["ட்" "ஓ"],
229 | "டௌ" ["ட்" "ஔ"],
230 | "ண்" ["ண்"],
231 | "ண" ["ண்" "அ"],
232 | "ணா" ["ண்" "ஆ"],
233 | "ணி" ["ண்" "இ"],
234 | "ணீ" ["ண்" "ஈ"],
235 | "ணு" ["ண்" "உ"],
236 | "ணூ" ["ண்" "ஊ"],
237 | "ணெ" ["ண்" "எ"],
238 | "ணே" ["ண்" "ஏ"],
239 | "ணை" ["ண்" "ஐ"],
240 | "ணொ" ["ண்" "ஒ"],
241 | "ணோ" ["ண்" "ஓ"],
242 | "ணௌ" ["ண்" "ஔ"],
243 | "த்" ["த்"],
244 | "த" ["த்" "அ"],
245 | "தா" ["த்" "ஆ"],
246 | "தி" ["த்" "இ"],
247 | "தீ" ["த்" "ஈ"],
248 | "து" ["த்" "உ"],
249 | "தூ" ["த்" "ஊ"],
250 | "தெ" ["த்" "எ"],
251 | "தே" ["த்" "ஏ"],
252 | "தை" ["த்" "ஐ"],
253 | "தொ" ["த்" "ஒ"],
254 | "தோ" ["த்" "ஓ"],
255 | "தௌ" ["த்" "ஔ"],
256 | "ந்" ["ந்"],
257 | "ந" ["ந்" "அ"],
258 | "நா" ["ந்" "ஆ"],
259 | "நி" ["ந்" "இ"],
260 | "நீ" ["ந்" "ஈ"],
261 | "நு" ["ந்" "உ"],
262 | "நூ" ["ந்" "ஊ"],
263 | "நெ" ["ந்" "எ"],
264 | "நே" ["ந்" "ஏ"],
265 | "நை" ["ந்" "ஐ"],
266 | "நொ" ["ந்" "ஒ"],
267 | "நோ" ["ந்" "ஓ"],
268 | "நௌ" ["ந்" "ஔ"],
269 | "ப்" ["ப்"],
270 | "ப" ["ப்" "அ"],
271 | "பா" ["ப்" "ஆ"],
272 | "பி" ["ப்" "இ"],
273 | "பீ" ["ப்" "ஈ"],
274 | "பு" ["ப்" "உ"],
275 | "பூ" ["ப்" "ஊ"],
276 | "பெ" ["ப்" "எ"],
277 | "பே" ["ப்" "ஏ"],
278 | "பை" ["ப்" "ஐ"],
279 | "பொ" ["ப்" "ஒ"],
280 | "போ" ["ப்" "ஓ"],
281 | "பௌ" ["ப்" "ஔ"],
282 | "ம்" ["ம்"],
283 | "ம" ["ம்" "அ"],
284 | "மா" ["ம்" "ஆ"],
285 | "மி" ["ம்" "இ"],
286 | "மீ" ["ம்" "ஈ"],
287 | "மு" ["ம்" "உ"],
288 | "மூ" ["ம்" "ஊ"],
289 | "மெ" ["ம்" "எ"],
290 | "மே" ["ம்" "ஏ"],
291 | "மை" ["ம்" "ஐ"],
292 | "மொ" ["ம்" "ஒ"],
293 | "மோ" ["ம்" "ஓ"],
294 | "மௌ" ["ம்" "ஔ"],
295 | "ய்" ["ய்"],
296 | "ய" ["ய்" "அ"],
297 | "யா" ["ய்" "ஆ"],
298 | "யி" ["ய்" "இ"],
299 | "யீ" ["ய்" "ஈ"],
300 | "யு" ["ய்" "உ"],
301 | "யூ" ["ய்" "ஊ"],
302 | "யெ" ["ய்" "எ"],
303 | "யே" ["ய்" "ஏ"],
304 | "யை" ["ய்" "ஐ"],
305 | "யொ" ["ய்" "ஒ"],
306 | "யோ" ["ய்" "ஓ"],
307 | "யௌ" ["ய்" "ஔ"],
308 | "ர்" ["ர்"],
309 | "ர" ["ர்" "அ"],
310 | "ரா" ["ர்" "ஆ"],
311 | "ரி" ["ர்" "இ"],
312 | "ரீ" ["ர்" "ஈ"],
313 | "ரு" ["ர்" "உ"],
314 | "ரூ" ["ர்" "ஊ"],
315 | "ரெ" ["ர்" "எ"],
316 | "ரே" ["ர்" "ஏ"],
317 | "ரை" ["ர்" "ஐ"],
318 | "ரொ" ["ர்" "ஒ"],
319 | "ரோ" ["ர்" "ஓ"],
320 | "ரௌ" ["ர்" "ஔ"],
321 | "ல்" ["ல்"],
322 | "ல" ["ல்" "அ"],
323 | "லா" ["ல்" "ஆ"],
324 | "லி" ["ல்" "இ"],
325 | "லீ" ["ல்" "ஈ"],
326 | "லு" ["ல்" "உ"],
327 | "லூ" ["ல்" "ஊ"],
328 | "லெ" ["ல்" "எ"],
329 | "லே" ["ல்" "ஏ"],
330 | "லை" ["ல்" "ஐ"],
331 | "லொ" ["ல்" "ஒ"],
332 | "லோ" ["ல்" "ஓ"],
333 | "லௌ" ["ல்" "ஔ"],
334 | "வ்" ["வ்"],
335 | "வ" ["வ்" "அ"],
336 | "வா" ["வ்" "ஆ"],
337 | "வி" ["வ்" "இ"],
338 | "வீ" ["வ்" "ஈ"],
339 | "வு" ["வ்" "உ"],
340 | "வூ" ["வ்" "ஊ"],
341 | "வெ" ["வ்" "எ"],
342 | "வே" ["வ்" "ஏ"],
343 | "வை" ["வ்" "ஐ"],
344 | "வொ" ["வ்" "ஒ"],
345 | "வோ" ["வ்" "ஓ"],
346 | "வௌ" ["வ்" "ஔ"],
347 | "ழ்" ["ழ்"],
348 | "ழ" ["ழ்" "அ"],
349 | "ழா" ["ழ்" "ஆ"],
350 | "ழி" ["ழ்" "இ"],
351 | "ழீ" ["ழ்" "ஈ"],
352 | "ழு" ["ழ்" "உ"],
353 | "ழூ" ["ழ்" "ஊ"],
354 | "ழெ" ["ழ்" "எ"],
355 | "ழே" ["ழ்" "ஏ"],
356 | "ழை" ["ழ்" "ஐ"],
357 | "ழொ" ["ழ்" "ஒ"],
358 | "ழோ" ["ழ்" "ஓ"],
359 | "ழௌ" ["ழ்" "ஔ"],
360 | "ள்" ["ள்"],
361 | "ள" ["ள்" "அ"],
362 | "ளா" ["ள்" "ஆ"],
363 | "ளி" ["ள்" "இ"],
364 | "ளீ" ["ள்" "ஈ"],
365 | "ளு" ["ள்" "உ"],
366 | "ளூ" ["ள்" "ஊ"],
367 | "ளெ" ["ள்" "எ"],
368 | "ளே" ["ள்" "ஏ"],
369 | "ளை" ["ள்" "ஐ"],
370 | "ளொ" ["ள்" "ஒ"],
371 | "ளோ" ["ள்" "ஓ"],
372 | "ளௌ" ["ள்" "ஔ"],
373 | "ற்" ["ற்"],
374 | "ற" ["ற்" "அ"],
375 | "றா" ["ற்" "ஆ"],
376 | "றி" ["ற்" "இ"],
377 | "றீ" ["ற்" "ஈ"],
378 | "று" ["ற்" "உ"],
379 | "றூ" ["ற்" "ஊ"],
380 | "றெ" ["ற்" "எ"],
381 | "றே" ["ற்" "ஏ"],
382 | "றை" ["ற்" "ஐ"],
383 | "றொ" ["ற்" "ஒ"],
384 | "றோ" ["ற்" "ஓ"],
385 | "றௌ" ["ற்" "ஔ"],
386 | "ன்" ["ன்"],
387 | "ன" ["ன்" "அ"],
388 | "னா" ["ன்" "ஆ"],
389 | "னி" ["ன்" "இ"],
390 | "னீ" ["ன்" "ஈ"],
391 | "னு" ["ன்" "உ"],
392 | "னூ" ["ன்" "ஊ"],
393 | "னெ" ["ன்" "எ"],
394 | "னே" ["ன்" "ஏ"],
395 | "னை" ["ன்" "ஐ"],
396 | "னொ" ["ன்" "ஒ"],
397 | "னோ" ["ன்" "ஓ"],
398 | "னௌ" ["ன்" "ஔ"]})
399 |
400 | (def ^{:doc "a trie of the individual letters in தமிழ், whose terminus-attached values are sequences of each letter's phonemes -- this trie can be used in str->elems for directly splitting a word into its phonemes"}
401 | phoneme-trie (make-trie phoneme-map))
402 |
403 | (def inverse-phoneme-map (set/map-invert phoneme-map))
404 |
405 | (defn str->phonemes
406 | "take a string and split it into its constitutent தமிழ் phonemes"
407 | [s]
408 | (str->elems phoneme-trie s))
409 |
410 | ;; TODO: create a make-inverse-trie fn
411 | ;; TODO: turn str->elem into seq->elem, use that to refactor phonemes->str
412 |
413 | (defn phonemes->str
414 | "given a seq of phonemes, create a string where the phonemes are combined into their proper letters"
415 | [phoneme-seq]
416 | (let [concat-phoneme-str (apply str phoneme-seq)
417 | inverse-concat-phoneme-map (into {} (for [[k v] inverse-phoneme-map]
418 | [(apply str k) v]))
419 | inverse-concat-phoneme-trie (make-trie inverse-concat-phoneme-map)
420 | combined-phoneme-str (apply str (str->elems inverse-concat-phoneme-trie concat-phoneme-str))]
421 | combined-phoneme-str))
422 |
423 | ;;;;;;;;;;;;;;
424 | ;; sorting fns
425 | ;;;;;;;;;;;;;;
426 |
427 | (def ^{:private false
428 | :doc "a flattened seq of all தமிழ் letters in lexicographical (alphabetical) order -- put anohter way, in the order of அகர முதல் னரக இறுவாய் as the 2500 yr old grammatical compendium தொல்காப்பியம் states in its outset"}
429 | letter-seq (flatten (concat vowels c-cv-letters)))
430 |
431 | (def ^{:doc "a map where the key is a தமிழ் letter, and the value is a number indicating its relative position in sort order"}
432 | sort-map (zipmap letter-seq (range)))
433 |
434 | (defn letter-before?
435 | "a 2-arg predicate indicating whether the first string comes before the second string, but assuming that each string will only represent individual letters"
436 | [s1 s2]
437 | (cond (and (nil? s1) (nil? s2)) true
438 | (and (nil? (get sort-map s1)) (nil? (get sort-map s2))) (boolean (neg? (compare s1 s2)))
439 | (nil? (get sort-map s1)) true
440 | (nil? (get sort-map s2)) false
441 | :else (< (get sort-map s1) (get sort-map s2))))
442 |
443 | (def ^{:doc "a comparator for strings that represent a single letter that respects தமிழ் alphabetical order"}
444 | letter-comp (comparator letter-before?))
445 |
446 | (defn word-before?
447 | "a 2-arg predicate indicating whether the first string comes before the second string lexicographically, handling தமிழ் letters in addition to 1-to-1 codepoint-to-letter encodings"
448 | [str1 str2]
449 | (loop [s1 (str->elems str1)
450 | s2 (str->elems str2)]
451 | (cond (not (seq s1)) (boolean (seq s2))
452 | (not (seq s2)) false
453 | (not= (first s1) (first s2)) (letter-before? (first s1) (first s2))
454 | :else (recur (rest s1) (rest s2)))))
455 |
456 | (def ^{:doc "a comparator for lexicographical comparisons of arbitrary strings (consisting of தமிழ் letters and letters from 1-to-1 encodings)"}
457 | word-comp (comparator word-before?))
458 |
459 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
460 | ;; word & character traits fns
461 | ;; position fns
462 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
463 |
464 | (defn whitespace?
465 | "returns whether a Java Character a.k.a. Unicode codepoint is whitespace or not (according to Java's understanding of Unicode)"
466 | [ch]
467 | (when ch
468 | #?(:clj (Character/isWhitespace ch)
469 | :cljs (boolean (re-seq #"\s" (str ch))))))
470 |
471 | (defn wordy-char?
472 | "take a Java Character a.k.a. Unicode codepoint and return whether it represents a character that might go into a word or identifier. In other words, it is for Unicode like what \\w has representing in regular expressions for ASCII characters -- which is alpha-numeric characters"
473 | [ch]
474 | (when ch
475 | (and
476 | (not (get #{\$ \_} ch))
477 | #?(:clj (Character/isJavaIdentifierPart ch)
478 | :cljs (not (whitespace? (str ch)))))))
479 |
480 | ;; TODO: DRY on seq-prefix & seq-prefix? -- is there a Clojure implementation?
481 |
482 | (defn seq-prefix
483 | "return the shared prefix between the 2 input sequence"
484 | [seq1 seq2]
485 | (loop [s1 seq1
486 | s2 seq2
487 | comm-prefix []]
488 | (let [f1 (first s1)
489 | f2 (first s2)]
490 | (if (or (empty? s1)
491 | (empty? s2)
492 | (not= f1 f2))
493 | comm-prefix
494 | (recur (rest s1) (rest s2) (conj comm-prefix f1))))))
495 |
496 | (defn seq-prefix?
497 | "return whether the query seq is a prefix of the target"
498 | [tgt qry]
499 | (let [pfx (seq-prefix tgt qry)]
500 | (boolean
501 | (and (seq tgt)
502 | (or (= (seq qry) pfx)
503 | (and (empty? qry) (empty? pfx)))))))
504 |
505 | (defn prefix?
506 | "return whether the 2nd word is a prefix of the 1st word, based on தமிழ் phonemes"
507 | [str1 str2]
508 | (let [phonemes1 (str->elems phoneme-trie str1)
509 | phonemes2 (str->elems phoneme-trie str2)]
510 | (seq-prefix? phonemes1 phonemes2)))
511 |
512 | (defn suffix?
513 | "return whether the 2nd word is a suffix of the 1st word, based on தமிழ் phonemes"
514 | [str1 str2]
515 | (let [phonemes1 (str->elems phoneme-trie str1)
516 | phonemes2 (str->elems phoneme-trie str2)]
517 | (seq-prefix? (reverse phonemes1) (reverse phonemes2))))
518 |
519 | ;; TODO: DRY on seq-index-of -- is there already a Clojure implementation?
520 |
521 | (defn seq-index-of
522 | "given a target seq and a query seq, return the 0-based index of the first occurrence of the query seq appearing inside the target seq, or else return -1 (is that Clojure-y, or is returning nil more Clojure-y?)
523 | calls seq-prefix? at every index -- only realizes the target seq as needed, pulls query seq into memory"
524 | [tgt qry]
525 | (let [qlen (count qry)]
526 | (loop [ts tgt
527 | idx 0]
528 | (if (or (empty? ts)
529 | (< (count (take qlen ts)) qlen))
530 | -1
531 | (if (seq-prefix? ts qry)
532 | idx
533 | (recur (rest ts) (inc idx)))))))
534 |
535 | (def ^{:doc "a wrapper around the native fn call that gives the index of the first occurrence of a particular substring"}
536 | index-of
537 | #?(:cljs seq-index-of
538 | :clj (fn [tgt qry]
539 | (.indexOf tgt qry))))
540 |
541 | (defn wordy-seq
542 | "take a string and produce a seq of the Unicode-aware version of the \\w+ regex pattern - basically, split input string into all chunks of non-whitepsace. Originally, I called this fn word-seq, but that is not true for all languages and/or throughout time where there was no spearation between words (ex: Thai, Chinese, Japanese, Latin manuscripts, ancient Thamil stone inscriptions, etc.)"
543 | [s]
544 | (when s
545 | (let [chunks (partition-by wordy-char? s)
546 | word-chunks (filter (comp wordy-char? first) chunks)
547 | words (map (partial apply str) word-chunks)]
548 | words)))
549 |
550 | (defn wordy-chunk-and-cursor-pos
551 | "given a string and an index number that the cursor is on or before, return the wordy chunk that the cursor is in the middle of, and the cursor pos relative to the chunk. if cursor is before or after a word, or at the beginning or end of string, return a falsey value (ex: nil). accepts idx being at end of string (idx == (count s))."
552 | [s idx]
553 | (assert (<= 0 idx) (str "cursor postiion out of range [idx =" idx "]"))
554 | (assert (<= idx (count s)) (str "cursor postiion out of range [idx =" idx "], [str len =" (count s) "]"))
555 | (let [[before after] [(subs s 0 idx) (subs s idx)]
556 | partitions-before (partition-by wordy-char? before)
557 | partitions-after (partition-by wordy-char? after)
558 | wordy-chunks-before (wordy-seq before)
559 | wordy-chunks-after (wordy-seq after)
560 | chunk-seq-wordy? (comp wordy-char? first)
561 | prev-chunk (last wordy-chunks-before)
562 | next-chunk (first wordy-chunks-after)
563 | prev-chunk-wordiness (chunk-seq-wordy? (last partitions-before))
564 | next-chunk-wordiness (chunk-seq-wordy? (first partitions-after))
565 | prev-chunk-idx (if prev-chunk (index-of before prev-chunk) -1)
566 | next-chunk-idx (if next-chunk (index-of after next-chunk) -1)
567 | prev-chunk-flush (= idx (+ prev-chunk-idx (count prev-chunk)))
568 | next-chunk-flush (zero? next-chunk-idx)]
569 | (cond
570 | (and prev-chunk-wordiness next-chunk-wordiness prev-chunk-flush next-chunk-flush) [(str prev-chunk next-chunk) (- idx prev-chunk-idx)]
571 | (and prev-chunk-wordiness prev-chunk-flush) [prev-chunk (- idx prev-chunk-idx)]
572 | (and next-chunk-wordiness next-chunk-flush) [next-chunk 0]
573 | :else nil)))
574 |
575 | (def wordy-chunk-under (comp first wordy-chunk-and-cursor-pos))
576 |
577 | (defn cursor-adjust
578 | "given a string, a cursor position (idx), and a direction, give the new position of the cursor that that is on the boundary of the actual letters"
579 | [s idx direction]
580 | (let [[wordy-chunk rel-idx] (wordy-chunk-and-cursor-pos s idx)
581 | letters (str->letters wordy-chunk)
582 | indices (reductions #(+ %1 (count %2)) 0 letters)
583 | before-idx (->> indices
584 | (take-while #(<= % idx))
585 | last)
586 | after-idx (->> indices
587 | (drop-while #(< % idx))
588 | first)]
589 | (if (= before-idx after-idx)
590 | (do
591 | (assert (= idx before-idx after-idx))
592 | idx)
593 | (case direction
594 | (:to-first :முதல்-நோக்கி) before-idx
595 | (:to-last :பின்-நோக்கி) after-idx
596 | after-idx))))
597 |
--------------------------------------------------------------------------------
/src/clj_thamil/format/convert.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.format.convert
2 | (:require ;; [clojure.algo.generic.functor :as ftor]
3 | [clojure.set :as set]
4 | [clj-thamil.format :as fmt])
5 | #?(:clj (:gen-class)))
6 |
7 | ;; A general note about the conversion and transliteration schemes
8 | ;; defined by the maps in this namespace:
9 | ;;
10 | ;; There may be multiple English letter sequences mapping to the same
11 | ;; தமிழ் letter. Also note that we get the mapping for the reverse conversion
12 | ;; by inverting the map (keys become values, and values become keys).
13 | ;; When multiple keys map to the same value, and you invert the map,
14 | ;; the inverse will have the old value pointing to a single old key
15 | ;; which is determined non-deterministically. Therefore, the inverse
16 | ;; map may need to be "manually adjusted" in that case to select a
17 | ;; default mapping in the inverse map.
18 |
19 |
20 | ;;;;;;;;
21 | ;; தமிழ் <-> Romanized
22 | ;;;;;;;;
23 |
24 | (def ^{:doc "a map of English strings to their தமிழ் phonemes (and consonant clusters)."}
25 | romanized-தமிழ்-phoneme-map
26 | {"g" "க்"
27 | "s" "ச்"
28 | "d" "ட்"
29 | "w" "ந்"
30 | "b" "ப்"
31 | "z" "ழ்"
32 | "mb" "ம்ப்"
33 | "nth" "ந்த்"
34 | "nr" "ன்ற்"
35 | "nd" "ண்ட்"
36 |
37 | "a" "அ"
38 | "aa" "ஆ"
39 | "A" "ஆ"
40 | "i" "இ"
41 | "ii" "ஈ"
42 | "I" "ஈ"
43 | "u" "உ"
44 | "uu" "ஊ"
45 | "U" "ஊ"
46 | "e" "எ"
47 | "ee" "ஏ"
48 | "E" "ஏ"
49 | "ai" "ஐ"
50 | "o" "ஒ"
51 | "oo" "ஓ"
52 | "O" "ஓ"
53 | "au" "ஔ"
54 | "q" "ஃ"
55 | "k" "க்"
56 | "ng" "ங்"
57 | "ch" "ச்"
58 | "nj" "ஞ்"
59 | "t" "ட்"
60 | "N" "ண்"
61 | "th" "த்"
62 | "n-" "ந்"
63 | "p" "ப்"
64 | "m" "ம்"
65 | "y" "ய்"
66 | "r" "ர்"
67 | "l" "ல்"
68 | "v" "வ்"
69 | "zh" "ழ்"
70 | "L" "ள்"
71 | "R" "ற்"
72 | "n" "ன்"})
73 |
74 | (def ^{:doc "designates specific transliterations of phonemes / phoneme clusters in the தமிழ்->English direction (ex: resolving situations where multiple English sequences map to a single தமிழ் phoneme)"}
75 | தமிழ்-romanized-phoneme-overrides
76 | {"ஓ" "O"
77 | "ஏ" "E"
78 | "க்" "k"
79 | "ச்" "ch"
80 | "ட்" "t"
81 | "ந்" "n"
82 | "ப்" "p"
83 | "ழ்" "zh"
84 | "ங்க்" "ng"
85 | "ஆ" "aa"
86 | "ஈ" "ii"
87 | "ஊ" "uu"
88 | "ன்ப்" "nb"
89 | "ண்ப்" "nb"})
90 |
91 | (def ^{:doc "an inverse of romanized-தமிழ்-phoneme-map, but with a few manual mappings for certain தமிழ் letters that can be input in multiple ways (or whose transliteration into English should be different then how it is input via English)"}
92 | தமிழ்-romanized-phoneme-map
93 | (merge (set/map-invert romanized-தமிழ்-phoneme-map)
94 | தமிழ்-romanized-phoneme-overrides))
95 |
96 | (def romanized-தமிழ்-phoneme-trie (fmt/make-trie romanized-தமிழ்-phoneme-map))
97 |
98 | (def தமிழ்-romanized-phoneme-trie (fmt/make-trie தமிழ்-romanized-phoneme-map))
99 |
100 | (defn romanized->தமிழ்
101 | "transliterates a string of English (transliterated தமிழ்) into the தமிழ் that it represents"
102 | [s]
103 | (fmt/phonemes->str (fmt/str->elems romanized-தமிழ்-phoneme-trie s)))
104 |
105 | (defn தமிழ்->romanized
106 | "transliterates a தமிழ் string into English (transliterated தமிழ்)"
107 | [s]
108 | (->> (fmt/str->phonemes s)
109 | (apply str)
110 | (fmt/str->elems தமிழ்-romanized-phoneme-trie)
111 | (apply str)))
112 |
113 | ;;;;;;;;
114 | ;; தமிழ் <-> TAB
115 | ;;;;;;;;
116 |
117 | (def tab-map
118 | {"அ" "Ü"
119 | "ஆ" "Ý"
120 | "இ" "Þ"
121 | "ஈ" "ß"
122 | "உ" "à"
123 | "ஊ" "á"
124 | "எ" "â"
125 | "ஏ" "ã"
126 | "ஐ" "ä"
127 | "ஒ" "å"
128 | "ஓ" "æ"
129 | "ஔ" "å÷"
130 | "ஃ" "ç"
131 | "க்" "è¢"
132 | "க" "è"
133 | "கா" "è£"
134 | "கி" "è¤"
135 | "கீ" "è¦"
136 | "கு" "°"
137 | "கூ" "Ã"
138 | "கெ" "ªè"
139 | "கே" "«è"
140 | "கை" "¬è"
141 | "கொ" "ªè£"
142 | "கோ" "«è£"
143 | "கௌ" "ªè÷"
144 | "ங்" "é¢"
145 | "ங" "é"
146 | "ஙா" "é£"
147 | "ஙி" "é¤"
148 | "ஙீ" "é¦"
149 | "ஙு" "±"
150 | "ஙூ" "Ä"
151 | "ஙெ" "ªé"
152 | "ஙே" "«é"
153 | "ஙை" "¬é"
154 | "ஙொ" "ªé£"
155 | "ஙோ" "«é£"
156 | "ஙௌ" "ªé÷"
157 | "ச்" "ê¢"
158 | "ச" "ê"
159 | "சா" "ê£"
160 | "சி" "ê¤"
161 | "சீ" "ê¦"
162 | "சு" "²"
163 | "சூ" "Å"
164 | "செ" "ªê"
165 | "சே" "«ê"
166 | "சை" "¬ê"
167 | "சொ" "ªê£"
168 | "சோ" "«ê£"
169 | "சௌ" "ªê÷"
170 | "ஞ்" "ë¢"
171 | "ஞ" "ë"
172 | "ஞா" "ë£"
173 | "ஞி" "ë¤"
174 | "ஞீ" "ë¦"
175 | "ஞு" "³"
176 | "ஞூ" "Æ"
177 | "ஞெ" "ªë"
178 | "ஞே" "«ë"
179 | "ஞை" "¬ë"
180 | "ஞொ" "ªë£"
181 | "ஞோ" "«ë£"
182 | "ஞௌ" "ªë÷"
183 | "ட்" "ì¢"
184 | "ட" "ì"
185 | "டா" "ì£"
186 | "டி" "®"
187 | "டீ" "ì¦"
188 | "டு" "´"
189 | "டூ" "Ç"
190 | "டெ" "ªì"
191 | "டே" "«ì"
192 | "டை" "¬ì"
193 | "டொ" "ªì£"
194 | "டோ" "«ì£"
195 | "டௌ" "ªì÷"
196 | "ண்" "í¢"
197 | "ண" "í"
198 | "ணா" "í£"
199 | "ணி" "í¤"
200 | "ணீ" "í¦"
201 | "ணு" "µ"
202 | "ணூ" "È"
203 | "ணெ" "ªí"
204 | "ணே" "«í"
205 | "ணை" "¬í"
206 | "ணொ" "ªí£"
207 | "ணோ" "«í£"
208 | "ணௌ" "ªí÷"
209 | "த்" "î¢"
210 | "த" "î"
211 | "தா" "î£"
212 | "தி" "î¤"
213 | "தீ" "î¦"
214 | "து" "¶"
215 | "தூ" "É"
216 | "தெ" "ªî"
217 | "தே" "«î"
218 | "தை" "¬î"
219 | "தொ" "ªî£"
220 | "தோ" "«î£"
221 | "தௌ" "ªî÷"
222 | "ந்" "ï¢"
223 | "ந" "ï"
224 | "நா" "ï£"
225 | "நி" "ï¤"
226 | "நீ" "ï¦"
227 | "நு" "¸"
228 | "நூ" "Ë"
229 | "நெ" "ªï"
230 | "நே" "«ï"
231 | "நை" "¬ï"
232 | "நொ" "ªï£"
233 | "நோ" "«ï£"
234 | "நௌ" "ªï÷"
235 | "ப்" "ð¢"
236 | "ப" "ð"
237 | "பா" "ð£"
238 | "பி" "ð¤"
239 | "பீ" "ð¦"
240 | "பு" "¹"
241 | "பூ" "Ì"
242 | "பெ" "ªð"
243 | "பே" "«ð"
244 | "பை" "¬ð"
245 | "பொ" "ªð£"
246 | "போ" "«ð£"
247 | "பௌ" "ªð÷"
248 | "ம்" "ñ¢"
249 | "ம" "ñ"
250 | "மா" "ñ£"
251 | "மி" "ñ¤"
252 | "மீ" "ñ¦"
253 | "மு" "º"
254 | "மூ" "Í"
255 | "மெ" "ªñ"
256 | "மே" "«ñ"
257 | "மை" "¬ñ"
258 | "மொ" "ªñ£"
259 | "மோ" "«ñ£"
260 | "மௌ" "ªñ÷"
261 | "ய்" "ò¢"
262 | "ய" "ò"
263 | "யா" "ò£"
264 | "யி" "ò¤"
265 | "யீ" "ò¦"
266 | "யு" "»"
267 | "யூ" "Î"
268 | "யெ" "ªò"
269 | "யே" "«ò"
270 | "யை" "¬ò"
271 | "யொ" "ªò£"
272 | "யோ" "«ò£"
273 | "யௌ" "ªò÷"
274 | "ர்" "ó¢"
275 | "ர" "ó"
276 | "ரா" "ó£"
277 | "ரி" "ó¤"
278 | "ரீ" "ó¦"
279 | "ரு" "¼"
280 | "ரூ" "Ï"
281 | "ரெ" "ªó"
282 | "ரே" "«ó"
283 | "ரை" "¬ó"
284 | "ரொ" "ªó£"
285 | "ரோ" "«ó£"
286 | "ரௌ" "ªó÷"
287 | "ல்" "ô¢"
288 | "ல" "ô"
289 | "லா" "ô£"
290 | "லி" "ô¤"
291 | "லீ" "ô¦"
292 | "லு" "½"
293 | "லூ" "Ö"
294 | "லெ" "ªô"
295 | "லே" "«ô"
296 | "லை" "¬ô"
297 | "லொ" "ªô£"
298 | "லோ" "«ô£"
299 | "லௌ" "ªô÷"
300 | "வ்" "õ¢"
301 | "வ" "õ"
302 | "வா" "õ£"
303 | "வி" "õ¤"
304 | "வீ" "õ¦"
305 | "வு" "¾"
306 | "வூ" "×"
307 | "வெ" "ªõ"
308 | "வே" "«õ"
309 | "வை" "¬õ"
310 | "வொ" "ªõ£"
311 | "வோ" "«õ£"
312 | "வௌ" "ªõ÷"
313 | "ழ்" "ö¢"
314 | "ழ" "ö"
315 | "ழா" "ö£"
316 | "ழி" "ö¤"
317 | "ழீ" "ö¦"
318 | "ழு" "¿"
319 | "ழூ" "Ø"
320 | "ழெ" "ªö"
321 | "ழே" "«ö"
322 | "ழை" "¬ö"
323 | "ழொ" "ªö£"
324 | "ழோ" "«ö£"
325 | "ழௌ" "ªö÷"
326 | "ள்" "÷¢"
327 | "ள" "÷"
328 | "ளா" "÷£"
329 | "ளி" "÷¤"
330 | "ளீ" "÷¦"
331 | "ளு" "À"
332 | "ளூ" "Ù"
333 | "ளெ" "ª÷"
334 | "ளே" "«÷"
335 | "ளை" "¬÷"
336 | "ளொ" "ª÷£"
337 | "ளோ" "«÷£"
338 | "ளௌ" "ª÷÷"
339 | "ற்" "ø¢"
340 | "ற" "ø"
341 | "றா" "ø£"
342 | "றி" "ø¤"
343 | "றீ" "ø¦"
344 | "று" "Á"
345 | "றூ" "Ú"
346 | "றெ" "ªø"
347 | "றே" "«ø"
348 | "றை" "¬ø"
349 | "றொ" "ªø£"
350 | "றோ" "«ø£"
351 | "றௌ" "ªø÷"
352 | "ன்" "ù¢"
353 | "ன" "ù"
354 | "னா" "ù£"
355 | "னி" "ù¤"
356 | "னீ" "ù¦"
357 | "னு" "Â"
358 | "னூ" "Û"
359 | "னெ" "ªù"
360 | "னே" "«ù"
361 | "னை" "¬ù"
362 | "னொ" "ªù£"
363 | "னோ" "«ù£"
364 | "னௌ" "ªù÷"})
365 |
366 | ;;;;;;;;
367 | ;; தமிழ் <-> Bamini
368 | ;;;;;;;;
369 |
370 | (def bamini-map
371 | {"அ" "m"
372 | "ஆ" "M"
373 | "இ" ","
374 | "ஈ" "<"
375 | "உ" "c"
376 | "ஊ" "C"
377 | "எ" "v"
378 | "ஏ" "V"
379 | "ஐ" "I"
380 | "ஒ" "x"
381 | "ஓ" "X"
382 | "ஔ" "xs"
383 | "ஃ" "/"
384 | "க்" "f;"
385 | "க" "f"
386 | "கா" "fh"
387 | "கி" "fp"
388 | "கீ" "fP"
389 | "கு" "F"
390 | "கூ" "$"
391 | "கெ" "nf"
392 | "கே" "Nf"
393 | "கை" "if"
394 | "கொ" "nfh"
395 | "கோ" "Nfh"
396 | "கௌ" "nfs"
397 | "ங்" "q;"
398 | "ங" "q"
399 | "ஙா" "qh"
400 | "ஙி" "qp"
401 | "ஙீ" "qP"
402 | ;; "ஙு" nil
403 | ;; "ஙூ" nil
404 | "ஙெ" "nq"
405 | "ஙே" "Nq"
406 | "ஙை" "iq"
407 | "ஙொ" "nqh"
408 | "ஙோ" "Nqh"
409 | "ஙௌ" "nqs"
410 | "ச்" "r;"
411 | "ச" "r"
412 | "சா" "rh"
413 | "சி" "rp"
414 | "சீ" "rP"
415 | "சு" "R"
416 | "சூ" "#"
417 | "செ" "nr"
418 | "சே" "Nr"
419 | "சை" "ir"
420 | "சொ" "nrh"
421 | "சோ" "Nrh"
422 | "சௌ" "nrs"
423 | "ஞ்" "Q;"
424 | "ஞ" "Q"
425 | "ஞா" "Qh"
426 | "ஞி" "Qp"
427 | "ஞீ" "QP"
428 | ;; "ஞு" nil
429 | ;; "ஞூ" nil
430 | "ஞெ" "nQ"
431 | "ஞே" "NQ"
432 | "ஞை" "iQ"
433 | "ஞொ" "nQh"
434 | "ஞோ" "NQh"
435 | "ஞௌ" "nQs"
436 | "ட்" "l;"
437 | "ட" "l"
438 | "டா" "lh"
439 | "டி" "b"
440 | "டீ" "B"
441 | "டு" "L"
442 | "டூ" "^"
443 | "டெ" "nl"
444 | "டே" "Nl"
445 | "டை" "il"
446 | "டொ" "nlh"
447 | "டோ" "Nlh"
448 | "டௌ" "nls"
449 | "ண்" "z;"
450 | "ண" "z"
451 | "ணா" "zh"
452 | "ணி" "zp"
453 | "ணீ" "zP"
454 | "ணு" "Z"
455 | "ணூ" "Z}"
456 | "ணெ" "nz"
457 | "ணே" "Nz"
458 | "ணை" "iz"
459 | "ணொ" "nzh"
460 | "ணோ" "Nzh"
461 | "ணௌ" "nzs"
462 | "த்" "j;"
463 | "த" "j"
464 | "தா" "jh"
465 | "தி" "jp"
466 | "தீ" "jP"
467 | "து" "J"
468 | "தூ" "J}"
469 | "தெ" "nj"
470 | "தே" "Nj"
471 | "தை" "ij"
472 | "தொ" "njh"
473 | "தோ" "Njh"
474 | "தௌ" "njs"
475 | "ந்" "e;"
476 | "ந" "e"
477 | "நா" "eh"
478 | "நி" "ep"
479 | "நீ" "eP"
480 | "நு" "E"
481 | "நூ" "E}"
482 | "நெ" "ne"
483 | "நே" "Ne"
484 | "நை" "ie"
485 | "நொ" "neh"
486 | "நோ" "Neh"
487 | "நௌ" "nes"
488 | "ப்" "g;"
489 | "ப" "g"
490 | "பா" "gh"
491 | "பி" "gp"
492 | "பீ" "gP"
493 | "பு" "G"
494 | "பூ" "G+"
495 | "பெ" "ng"
496 | "பே" "Ng"
497 | "பை" "ig"
498 | "பொ" "ngh"
499 | "போ" "Ngh"
500 | "பௌ" "ngs"
501 | "ம்" "k;"
502 | "ம" "k"
503 | "மா" "kh"
504 | "மி" "kp"
505 | "மீ" "kP"
506 | "மு" "K"
507 | "மூ" "%"
508 | "மெ" "nk"
509 | "மே" "Nk"
510 | "மை" "ik"
511 | "மொ" "nkh"
512 | "மோ" "Nkh"
513 | "மௌ" "nks"
514 | "ய்" "a;"
515 | "ய" "a"
516 | "யா" "ah"
517 | "யி" "ap"
518 | "யீ" "aP"
519 | "யு" "A"
520 | "யூ" "A+"
521 | "யெ" "na"
522 | "யே" "Na"
523 | "யை" "ia"
524 | "யொ" "nah"
525 | "யோ" "Nah"
526 | "யௌ" "nas"
527 | "ர்" "u;"
528 | "ர" "u"
529 | "ரா" "uh"
530 | "ரி" "up"
531 | "ரீ" "uP"
532 | "ரு" "U"
533 | "ரூ" "&"
534 | "ரெ" "nu"
535 | "ரே" "Nu"
536 | "ரை" "iu"
537 | "ரொ" "nuh"
538 | "ரோ" "Nuh"
539 | "ரௌ" "nus"
540 | "ல்" "y;"
541 | "ல" "y"
542 | "லா" "yh"
543 | "லி" "yp"
544 | "லீ" "yP"
545 | "லு" "Y"
546 | "லூ" "Y}"
547 | "லெ" "ny"
548 | "லே" "Ny"
549 | "லை" "iy"
550 | "லொ" "nyh"
551 | "லோ" "Nyh"
552 | "லௌ" "nys"
553 | "வ்" "t;"
554 | "வ" "t"
555 | "வா" "th"
556 | "வி" "tp"
557 | "வீ" "tP"
558 | "வு" "T"
559 | "வூ" "T+"
560 | "வெ" "nt"
561 | "வே" "Nt"
562 | "வை" "it"
563 | "வொ" "nth"
564 | "வோ" "Nth"
565 | "வௌ" "ntt"
566 | "ழ்" "o;"
567 | "ழ" "o"
568 | "ழா" "oh"
569 | "ழி" "op"
570 | "ழீ" "oP"
571 | "ழு" "O"
572 | "ழூ" "*"
573 | "ழெ" "no"
574 | "ழே" "No"
575 | "ழை" "io"
576 | "ழொ" "noh"
577 | "ழோ" "Noh"
578 | "ழௌ" "noo"
579 | "ள்" "s;"
580 | "ள" "s"
581 | "ளா" "sh"
582 | "ளி" "sp"
583 | "ளீ" "sP"
584 | "ளு" "S"
585 | "ளூ" "Sh"
586 | "ளெ" "ns"
587 | "ளே" "Ns"
588 | "ளை" "is"
589 | "ளொ" "nsh"
590 | "ளோ" "Nsh"
591 | "ளௌ" "nss"
592 | "ற்" "w;"
593 | "ற" "w"
594 | "றா" "wh"
595 | "றி" "wp"
596 | "றீ" "wP"
597 | "று" "W"
598 | "றூ" "W}"
599 | "றெ" "nw"
600 | "றே" "Nw"
601 | "றை" "iw"
602 | "றொ" "nwh"
603 | "றோ" "Nwh"
604 | "றௌ" "nws"
605 | "ன்" "d;"
606 | "ன" "d"
607 | "னா" "dh"
608 | "னி" "dp"
609 | "னீ" "dP"
610 | "னு" "D"
611 | "னூ" "D}"
612 | "னெ" "nd"
613 | "னே" "Nd"
614 | "னை" "id"
615 | "னொ" "ndh"
616 | "னோ" "Ndh"
617 | "னௌ" "nds"
618 |
619 | "ஜ்" "[;"
620 | "ஜ" "["
621 | "ஜா" "[h"
622 | "ஜி" "[p"
623 | "ஜீ" "[P"
624 | "ஜு" "[{"
625 | "ஜூ" "[\""
626 | "ஜெ" "n["
627 | "ஜே" "N["
628 | "ஜை" "i["
629 | "ஜொ" "n[h"
630 | "ஜோ" "N[h"
631 | "ஜௌ" "n[s"
632 |
633 | "ஷ்" "\\;"
634 | "ஷ" "\\"
635 | "ஷா" "\\h"
636 | "ஷி" "\\p"
637 | "ஷீ" "\\P"
638 | "ஷு" "\\{"
639 | "ஷூ" "\\\""
640 | "ஷெ" "n\\"
641 | "ஷே" "N\\"
642 | "ஷை" "i\\"
643 | "ஷொ" "n\\h"
644 | "ஷோ" "N\\h"
645 | "ஷௌ" "n\\s"
646 |
647 | "ஸ்" "];"
648 | "ஸ" "]"
649 | "ஸா" "]h"
650 | "ஸி" "]p"
651 | "ஸீ" "]P"
652 | "ஸு" "]{"
653 | "ஸூ" "]\""
654 | "ஸெ" "n]"
655 | "ஸே" "N]"
656 | "ஸை" "i]"
657 | "ஸொ" "n]h"
658 | "ஸோ" "N]h"
659 | "ஸௌ" "n]s"
660 |
661 | "ஹ்" "`;"
662 | "ஹ" "`"
663 | "ஹா" "`h"
664 | "ஹி" "`p"
665 | "ஹீ" "`P"
666 | "ஹு" "`{"
667 | "ஹூ" "`\""
668 | "ஹெ" "n`"
669 | "ஹே" "N`"
670 | "ஹை" "i`"
671 | "ஹொ" "n`h"
672 | "ஹோ" "N`h"
673 | "ஹௌ" "n`s"
674 |
675 | "க்ஷ்" "~;"
676 |
677 | "ஶ்ரீ" "="
678 |
679 | })
680 |
681 | ;;;;;;;;
682 | ;; தமிழ் <-> TSCII
683 | ;;;;;;;;
684 |
685 | (def tscii-map
686 | {"அ" "«"
687 | "ஆ" "¬"
688 | "இ" ""
689 | "ஈ" "®"
690 | "உ" "¯"
691 | "ஊ" "°"
692 | "எ" "±"
693 | "ஏ" "²"
694 | "ஐ" "³"
695 | "ஒ" "´"
696 | "ஓ" "µ"
697 | "ஔ" "¶"
698 | "ஃ" "∙"
699 | "க்" "ì"
700 | "க" "¸"
701 | "கா" "¸¡"
702 | "கி" "¸¢"
703 | "கீ" "¸£"
704 | "கு" "Ì"
705 | "கூ" "Ü"
706 | "கெ" "¦¸"
707 | "கே" "§¸"
708 | "கை" "¨¸"
709 | "கொ" "¦¸¡"
710 | "கோ" "§¸¡"
711 | "கௌ" "¦¸ª"
712 | "ங்" "í"
713 | "ங" "¹"
714 | "ஙா" "¹¡"
715 | "ஙி" "¹¢"
716 | "ஙீ" "¹£"
717 | "ஙு" "™"
718 | "ஙூ" "›"
719 | "ஙெ" "¦¹"
720 | "ஙே" "§¹"
721 | "ஙை" "¨¹"
722 | "ஙொ" "¦¹¡"
723 | "ஙோ" "§¹¡"
724 | "ஙௌ" "¦¹ª"
725 | "ச்" "î"
726 | "ச" "º"
727 | "சா" "º¡"
728 | "சி" "º¢"
729 | "சீ" "º£"
730 | "சு" "Í"
731 | "சூ" "Ý"
732 | "செ" "¦º"
733 | "சே" "§º"
734 | "சை" "¨º"
735 | "சொ" "¦º¡"
736 | "சோ" "§º¡"
737 | "சௌ" "¦ºª"
738 | "ஞ்" "ï"
739 | "ஞ" "»"
740 | "ஞா" "»¡"
741 | "ஞி" "»¢"
742 | "ஞீ" "»£"
743 | "ஞு" ""
744 | "ஞூ" "œ"
745 | "ஞெ" "¦»"
746 | "ஞே" "§»"
747 | "ஞை" "¨»"
748 | "ஞொ" "¦»¡"
749 | "ஞோ" "§»¡"
750 | "ஞௌ" "¦»ª"
751 | "ட்" "ð"
752 | "ட" "¼"
753 | "டா" "¼¡"
754 | "டி" "Ê"
755 | "டீ" "Ë"
756 | "டு" "Î"
757 | "டூ" "Þ"
758 | "டெ" "¦¼"
759 | "டே" "§¼"
760 | "டை" "¨¼"
761 | "டொ" "¦¼¡"
762 | "டோ" "§¼¡"
763 | "டௌ" "¦¼ª"
764 | "ண்" "ñ"
765 | "ண" "½"
766 | "ணா" "½¡"
767 | "ணி" "½¢"
768 | "ணீ" "½£"
769 | "ணு" "Ï"
770 | "ணூ" "ß"
771 | "ணெ" "¦½"
772 | "ணே" "§½"
773 | "ணை" "¨½"
774 | "ணொ" "¦½¡"
775 | "ணோ" "§½¡"
776 | "ணௌ" "¦½ª"
777 | "த்" "ò"
778 | "த" "¾"
779 | "தா" "¾¡"
780 | "தி" "¾¢"
781 | "தீ" "¾£"
782 | "து" "Ð"
783 | "தூ" "à"
784 | "தெ" "¦¾"
785 | "தே" "§¾"
786 | "தை" "¨¾"
787 | "தொ" "¦¾¡"
788 | "தோ" "§¾¡"
789 | "தௌ" "¦¾ª"
790 | "ந்" "ó"
791 | "ந" "¿"
792 | "நா" "¿¡"
793 | "நி" "¿¢"
794 | "நீ" "¿£"
795 | "நு" "Ñ"
796 | "நூ" "á"
797 | "நெ" "¦¿"
798 | "நே" "§¿"
799 | "நை" "¨¿"
800 | "நொ" "¦¿¡"
801 | "நோ" "§¿¡"
802 | "நௌ" "¦¿ª"
803 | "ப்" "ô"
804 | "ப" "À"
805 | "பா" "À¡"
806 | "பி" "À¢"
807 | "பீ" "À£"
808 | "பு" "Ò"
809 | "பூ" "â"
810 | "பெ" "¦À"
811 | "பே" "§À"
812 | "பை" "¨À"
813 | "பொ" "¦À¡"
814 | "போ" "§À¡"
815 | "பௌ" "¦Àª"
816 | "ம்" "õ"
817 | "ம" "Á"
818 | "மா" "Á¡"
819 | "மி" "Á¢"
820 | "மீ" "Á£"
821 | "மு" "Ó"
822 | "மூ" "ã"
823 | "மெ" "¦Á"
824 | "மே" "§Á"
825 | "மை" "¨Á"
826 | "மொ" "¦Á¡"
827 | "மோ" "§Á¡"
828 | "மௌ" "¦Áª"
829 | "ய்" "ö"
830 | "ய" "Â"
831 | "யா" "¡"
832 | "யி" "¢"
833 | "யீ" "£"
834 | "யு" "Ô"
835 | "யூ" "ä"
836 | "யெ" "¦Â"
837 | "யே" "§Â"
838 | "யை" "¨Â"
839 | "யொ" "¦Â¡"
840 | "யோ" "§Â¡"
841 | "யௌ" "¦Âª"
842 | "ர்" "÷"
843 | "ர" "Ã"
844 | "ரா" "á"
845 | "ரி" "â"
846 | "ரீ" "ã"
847 | "ரு" "Õ"
848 | "ரூ" "å"
849 | "ரெ" "¦Ã"
850 | "ரே" "§Ã"
851 | "ரை" "¨Ã"
852 | "ரொ" "¦Ã¡"
853 | "ரோ" "§Ã¡"
854 | "ரௌ" "¦Ãª"
855 | "ல்" "ø"
856 | "ல" "Ä"
857 | "லா" "Ä¡"
858 | "லி" "Ä¢"
859 | "லீ" "Ä£"
860 | "லு" "Ö"
861 | "லூ" "æ"
862 | "லெ" "¦Ä"
863 | "லே" "§Ä"
864 | "லை" "¨Ä"
865 | "லொ" "¦Ä¡"
866 | "லோ" "§Ä¡"
867 | "லௌ" "¦Äª"
868 | "வ்" "ù"
869 | "வ" "Å"
870 | "வா" "Å¡"
871 | "வி" "Å¢"
872 | "வீ" "Å£"
873 | "வு" "×"
874 | "வூ" "ç"
875 | "வெ" "¦Å"
876 | "வே" "§Å"
877 | "வை" "¨Å"
878 | "வொ" "¦Å¡"
879 | "வோ" "§Å¡"
880 | "வௌ" "¦Åª"
881 | "ழ்" "ú"
882 | "ழ" "Æ"
883 | "ழா" "Æ¡"
884 | "ழி" "Æ¢"
885 | "ழீ" "Æ£"
886 | "ழு" "Ø"
887 | "ழூ" "è"
888 | "ழெ" "¦Æ"
889 | "ழே" "§Æ"
890 | "ழை" "¨Æ"
891 | "ழொ" "¦Æ¡"
892 | "ழோ" "§Æ¡"
893 | "ழௌ" "¦Æª"
894 | "ள்" "û"
895 | "ள" "Ç"
896 | "ளா" "Ç¡"
897 | "ளி" "Ç¢"
898 | "ளீ" "Ç£"
899 | "ளு" "Ù"
900 | "ளூ" "é"
901 | "ளெ" "¦Ç"
902 | "ளே" "§Ç"
903 | "ளை" "¨Ç"
904 | "ளொ" "¦Ç¡"
905 | "ளோ" "§Ç¡"
906 | "ளௌ" "¦Çª"
907 | "ற்" "ü"
908 | "ற" "È"
909 | "றா" "È¡"
910 | "றி" "È¢"
911 | "றீ" "È£"
912 | "று" "Ú"
913 | "றூ" "ê"
914 | "றெ" "¦È"
915 | "றே" "§È"
916 | "றை" "¨È"
917 | "றொ" "¦È¡"
918 | "றோ" "§È¡"
919 | "றௌ" "¦Èª"
920 | "ன்" "ý"
921 | "ன" "É"
922 | "னா" "É¡"
923 | "னி" "É¢"
924 | "னீ" "É£"
925 | "னு" "Û"
926 | "னூ" "ë"
927 | "னெ" "¦É"
928 | "னே" "§É"
929 | "னை" "¨É"
930 | "னொ" "¦É¡"
931 | "னோ" "§É¡"
932 | "னௌ" "¦Éª"})
933 |
934 | ;;;;;;;;
935 | ;; தமிழ் <-> Webulagam
936 | ;;;;;;;;
937 |
938 | (def webulagam-map
939 | {"அ" "m"
940 | "ஆ" "M"
941 | "இ" "ï"
942 | "ஈ" "<"
943 | "உ" "c"
944 | "ஊ" "C"
945 | "எ" "v"
946 | "ஏ" "V"
947 | "ஐ" "I"
948 | "ஒ" "x"
949 | "ஓ" "X"
950 | "ஔ" "xs"
951 | "ஃ" "~"
952 | "க்" "¡"
953 | "க" "f"
954 | "கா" "fh"
955 | "கி" "»"
956 | "கீ" "Ñ"
957 | "கு" "F"
958 | "கூ" "T"
959 | "கெ" "bf"
960 | "கே" "nf"
961 | "கை" "if"
962 | "கொ" "bfh"
963 | "கோ" "nfh"
964 | "கௌ" "bfs"
965 | "ங்" "§"
966 | "ங" "‡"
967 | "ஙா" "‡h"
968 | "ஙி" "À"
969 | "ஙீ" "†"
970 | "ஙு" "¼"
971 | "ஙூ" "½"
972 | "ஙெ" "b‡"
973 | "ஙே" "n‡"
974 | "ஙை" "i‡"
975 | "ஙொ" "b‡h"
976 | "ஙோ" "n‡h"
977 | "ஙௌ" "b‡s"
978 | "ச்" "¢"
979 | "ச" "r"
980 | "சா" "rh"
981 | "சி" "á"
982 | "சீ" "Ó"
983 | "சு" "R"
984 | "சூ" "N"
985 | "செ" "br"
986 | "சே" "nr"
987 | "சை" "ir"
988 | "சொ" "brh"
989 | "சோ" "nrh"
990 | "சௌ" "brs"
991 | "ஞ்" "Š"
992 | "ஞ" "P"
993 | "ஞா" "Ph"
994 | "ஞி" "Á"
995 | "ஞீ" "Ø"
996 | "ஞு" "|"
997 | "ஞூ" "ú"
998 | "ஞெ" "bP"
999 | "ஞே" "nP"
1000 | "ஞை" "iP"
1001 | "ஞொ" "bPh"
1002 | "ஞோ" "nPh"
1003 | "ஞௌ" "bPs"
1004 | "ட்" "£"
1005 | "ட" "l"
1006 | "டா" "lh"
1007 | "டி" "o"
1008 | "டீ" "O"
1009 | "டு" "L"
1010 | "டூ" "^"
1011 | "டெ" "bl"
1012 | "டே" "nl"
1013 | "டை" "il"
1014 | "டொ" "blh"
1015 | "டோ" "nlh"
1016 | "டௌ" "bls"
1017 | "ண்" "©"
1018 | "ண" "z"
1019 | "ணா" "zh"
1020 | "ணி" "Â"
1021 | "ணீ" "Ù"
1022 | "ணு" "Q"
1023 | "ணூ" "û"
1024 | "ணெ" "bz"
1025 | "ணே" "nz"
1026 | "ணை" "iz"
1027 | "ணொ" "bzh"
1028 | "ணோ" "nzh"
1029 | "ணௌ" "bzs"
1030 | "த்" "¤"
1031 | "த" "j"
1032 | "தா" "jh"
1033 | "தி" "â"
1034 | "தீ" "Ô"
1035 | "து" "J"
1036 | "தூ" "ö"
1037 | "தெ" "bj"
1038 | "தே" "nj"
1039 | "தை" "ij"
1040 | "தொ" "bjh"
1041 | "தோ" "njh"
1042 | "தௌ" "bjs"
1043 | "ந்" "ª"
1044 | "ந" "e"
1045 | "நா" "eh"
1046 | "நி" "Ã"
1047 | "நீ" "Ú"
1048 | "நு" "E"
1049 | "நூ" "ü"
1050 | "நெ" "be"
1051 | "நே" "ne"
1052 | "நை" "ie"
1053 | "நொ" "beh"
1054 | "நோ" "neh"
1055 | "நௌ" "bes"
1056 | "ப்" "¥"
1057 | "ப" "g"
1058 | "பா" "gh"
1059 | "பி" "ã"
1060 | "பீ" "Õ"
1061 | "பு" "ò"
1062 | "பூ" "ó"
1063 | "பெ" "bg"
1064 | "பே" "ng"
1065 | "பை" "ig"
1066 | "பொ" "bgh"
1067 | "போ" "ngh"
1068 | "பௌ" "bgs"
1069 | "ம்" "«"
1070 | "ம" "k"
1071 | "மா" "kh"
1072 | "மி" "Ä"
1073 | "மீ" "Û"
1074 | "மு" "K"
1075 | "மூ" "_"
1076 | "மெ" "bk"
1077 | "மே" "nk"
1078 | "மை" "ik"
1079 | "மொ" "bkh"
1080 | "மோ" "nkh"
1081 | "மௌ" "bks"
1082 | "ய்" "Œ"
1083 | "ய" "a"
1084 | "யா" "ah"
1085 | "யி" "Æ"
1086 | "யீ" "p"
1087 | "யு" "í"
1088 | "யூ" "ô"
1089 | "யெ" "ba"
1090 | "யே" "na"
1091 | "யை" "ia"
1092 | "யொ" "bah"
1093 | "யோ" "nah"
1094 | "யௌ" "bas"
1095 | "ர்" "®"
1096 | "ர" "u"
1097 | "ரா" "uh"
1098 | "ரி" "Ç"
1099 | "ரீ" "ß"
1100 | "ரு" "U"
1101 | "ரூ" "%"
1102 | "ரெ" "bu"
1103 | "ரே" "nu"
1104 | "ரை" "iu"
1105 | "ரொ" "buh"
1106 | "ரோ" "nuh"
1107 | "ரௌ" "bus"
1108 | "ல்" "š"
1109 | "ல" "y"
1110 | "லா" "yh"
1111 | "லி" "È"
1112 | "லீ" "ä"
1113 | "லு" "Y"
1114 | "லூ" "ÿ"
1115 | "லெ" "by"
1116 | "லே" "ny"
1117 | "லை" "iy"
1118 | "லொ" "byh"
1119 | "லோ" "nyh"
1120 | "லௌ" "bys"
1121 | "வ்" "›"
1122 | "வ" "t"
1123 | "வா" "th"
1124 | "வி" "É"
1125 | "வீ" "å"
1126 | "வு" "î"
1127 | "வூ" "ñ"
1128 | "வெ" "bt"
1129 | "வே" "nt"
1130 | "வை" "it"
1131 | "வொ" "bth"
1132 | "வோ" "nth"
1133 | "வௌ" "bts"
1134 | "ழ்" "œ"
1135 | "ழ" "H"
1136 | "ழா" "Hh"
1137 | "ழி" "Ê"
1138 | "ழீ" "æ"
1139 | "ழு" "G"
1140 | "ழூ" ">"
1141 | "ழெ" "bH"
1142 | "ழே" "nH"
1143 | "ழை" "iH"
1144 | "ழொ" "bHh"
1145 | "ழோ" "nHh"
1146 | "ழௌ" "bHs"
1147 | "ள்" "Ÿ"
1148 | "ள" "s"
1149 | "ளா" "sh"
1150 | "ளி" "Ë"
1151 | "ளீ" "ç"
1152 | "ளு" "S"
1153 | "ளூ" "q"
1154 | "ளெ" "bs"
1155 | "ளே" "ns"
1156 | "ளை" "is"
1157 | "ளொ" "bsh"
1158 | "ளோ" "nsh"
1159 | "ளௌ" "bss"
1160 | "ற்" "‰"
1161 | "ற" "w"
1162 | "றா" "wh"
1163 | "றி" "¿"
1164 | "றீ" "Ö"
1165 | "று" "W"
1166 | "றூ" "ù"
1167 | "றெ" "bw"
1168 | "றே" "nw"
1169 | "றை" "iw"
1170 | "றொ" "bwh"
1171 | "றோ" "nwh"
1172 | "றௌ" "bws"
1173 | "ன்" "‹"
1174 | "ன" "d"
1175 | "னா" "dh"
1176 | "னி" "Å"
1177 | "னீ" "Ü"
1178 | "னு" "D"
1179 | "னூ" "}"
1180 | "னெ" "bd"
1181 | "னே" "nd"
1182 | "னை" "id"
1183 | "னொ" "bdh"
1184 | "னோ" "ndh"
1185 | "னௌ" "bds"})
1186 |
1187 |
1188 | ;;;;;;;;
1189 | ;; all character sets togeter
1190 | ;;;;;;;;
1191 |
1192 | (defn fill-in-bamini-to-unic-map
1193 | "Add in the entries in the bamini -> unicode conversion map
1194 | that represents the normal way that ர் ரி ரீ get written by hand"
1195 | [to-unic-map]
1196 | (let [;; c-with-அ-letters (map second fmt/c-cv-letters)
1197 | letters fmt/letters
1198 | entries (for [letter (flatten letters)
1199 | r-letter ["ர்" "ரி" "ரீ"]]
1200 | (let [new-val (str letter r-letter)
1201 | new-key (str (get bamini-map letter)
1202 | (get {"ர்" "h;"
1203 | "ரி" "hp"
1204 | "ரீ" "hP"} r-letter))]
1205 | [new-key new-val]))
1206 | extra-entries-map (into {} entries)]
1207 | (merge to-unic-map extra-entries-map)))
1208 |
1209 | (defn fill-charset-map
1210 | [{:keys [from-unic-map to-unic-map] :as m}]
1211 | (let [from-unic-trie (fmt/make-trie from-unic-map)
1212 | to-unic-trie (fmt/make-trie to-unic-map)
1213 | from-unic (fn [s]
1214 | (->> (fmt/str->elems from-unic-trie s)
1215 | (apply str)))
1216 | to-unic (fn [s]
1217 | (->> (fmt/str->elems to-unic-trie s)
1218 | (apply str)))]
1219 | {:to-unicode to-unic
1220 | :from-unicode from-unic}))
1221 |
1222 | (def init-charsets {:tab {:from-unic-map tab-map
1223 | :to-unic-map (set/map-invert tab-map)}
1224 | :bamini {:from-unic-map bamini-map
1225 | :to-unic-map (-> (set/map-invert bamini-map)
1226 | fill-in-bamini-to-unic-map
1227 | (assoc ">" ",")
1228 | (assoc "xsp" "ஒளி")
1229 | (assoc "R+" "சூ")
1230 | (assoc "@" ";"))}
1231 | :tscii {:from-unic-map tscii-map
1232 | :to-unic-map (set/map-invert tscii-map)}
1233 | :webulagam {:from-unic-map webulagam-map
1234 | :to-unic-map (set/map-invert webulagam-map)}})
1235 |
1236 | (defn mmap-vals
1237 | "given a map and a fn, map the fn over the maps vals keeping keys same"
1238 | [f m]
1239 | (letfn [(reduce-fn [curr-map kv]
1240 | (assoc curr-map (first kv) (f (second kv))))]
1241 | (reduce reduce-fn {} m)))
1242 |
1243 | (def charsets (-> (mmap-vals fill-charset-map init-charsets)
1244 | ;; (ftor/fmap fill-charset-map init-charsets)
1245 | ;;(reduce-kv #(%1 %2 (fill-charset-map %3)) {} init-charsets)
1246 | (assoc :romanized {:to-unic romanized->தமிழ்
1247 | :from-unic தமிழ்->romanized})))
1248 |
1249 | ;;;;;;;;
1250 | ;; named fns for convert fns
1251 | ;;;;;;;;
1252 |
1253 | ;; TAB
1254 |
1255 | (def ^{:doc "convert தமிழ் text from unicode to TAB format"}
1256 | தமிழ்->tab (get-in charsets [:tab :from-unicode]))
1257 |
1258 | (def ^{:doc "convert தமிழ் text from TAB to unicode format"}
1259 | tab->தமிழ் (get-in charsets [:tab :to-unicode]))
1260 |
1261 | ;; Bamini
1262 |
1263 | (def ^{:doc "convert தமிழ் text from unicode to Bamini format"}
1264 | தமிழ்->bamini (get-in charsets [:bamini :from-unicode]))
1265 |
1266 | (def ^{:doc "convert தமிழ் text from Bamini to unicode format"}
1267 | bamini->தமிழ் (get-in charsets [:bamini :to-unicode]))
1268 |
1269 | ;; TSCII
1270 |
1271 | (def ^{:doc "convert தமிழ் text from unicode to TSCII format"}
1272 | தமிழ்->tscii (get-in charsets [:tscii :from-unicode]))
1273 |
1274 | (def ^{:doc "convert தமிழ் text from TSCII to unicode format"}
1275 | tscii->தமிழ் (get-in charsets [:tscii :to-unicode]))
1276 |
1277 | ;; Webulagam
1278 |
1279 | (def ^{:doc "convert தமிழ் text from unicode to Webulagam format"}
1280 | தமிழ்->webulagam (get-in charsets [:webulagam :from-unicode]))
1281 |
1282 | (def ^{:doc "convert தமிழ் text from Webulagam to unicode format"}
1283 | webulagam->தமிழ் (get-in charsets [:webulagam :to-unicode]))
1284 |
1285 | ;;;;;;;;
1286 | ;; main
1287 | ;;;;;;;;
1288 |
1289 | (def ^{:doc "version of the Mac OS X input method (keyboard) plugin"}
1290 | OSX-INPUT-METHOD-VER "1.0")
1291 |
1292 | (defn -main
1293 | "generates the output necessary for a Mac OS X 10.x input method (keyboard) plugin"
1294 | [& args]
1295 | (let [vowels (remove #(= % "ஃ") fmt/vowels)
1296 | phon-kv-parts-by-vowel (group-by
1297 | #(boolean (some #{(second %)} vowels))
1298 | romanized-தமிழ்-phoneme-map)
1299 | ஃ-map {"q" "ஃ"}
1300 | vowel-map (into {} (get phon-kv-parts-by-vowel true))
1301 | cons-map (into {} (get phon-kv-parts-by-vowel false))
1302 | cv-map (into {} (for [[eng-c tha-c] cons-map
1303 | [eng-v tha-v] vowel-map]
1304 | [(str eng-c eng-v) (fmt/phonemes->str [tha-c tha-v])]))
1305 | letters-map (merge ஃ-map vowel-map cons-map cv-map)
1306 | letters-lines (map #(str (first %) " " (second %)) letters-map)
1307 | input-chars-str (->> letters-map
1308 | keys
1309 | (map seq)
1310 | (apply concat)
1311 | distinct
1312 | (apply str))
1313 | max-input-code (->> letters-map
1314 | keys
1315 | (map count)
1316 | (apply max))
1317 | lines1 ["METHOD: TABLE"
1318 | "ENCODE: Unicode"
1319 | "PROMPT: கலை"
1320 | "DELIMITER ,"
1321 | (str "VERSION " OSX-INPUT-METHOD-VER)
1322 | (str "MAXINPUTCODE " max-input-code)
1323 | (str "VALIDINPUTKEY " input-chars-str)
1324 | "BEGINCHARACTER"
1325 | ""]
1326 | lines2 [""
1327 | "ENDCHARACTER"]
1328 | all-lines (concat lines1 letters-lines lines2)]
1329 | (dorun (map println all-lines))
1330 |
1331 | ;; (println "hello")
1332 | ))
1333 |
--------------------------------------------------------------------------------
/emacs/clojure-mode.el:
--------------------------------------------------------------------------------
1 | ;;; clojure-mode.el --- Major mode for Clojure code -*- lexical-binding: t; -*-
2 |
3 | ;; Copyright © 2007-2014 Jeffrey Chu, Lennart Staflin, Phil Hagelberg
4 | ;; Copyright © 2013-2014 Bozhidar Batsov
5 | ;;
6 | ;; Authors: Jeffrey Chu
7 | ;; Lennart Staflin
8 | ;; Phil Hagelberg
9 | ;; Bozhidar Batsov
10 | ;; URL: http://github.com/clojure-emacs/clojure-mode
11 | ;; Keywords: languages clojure clojurescript lisp
12 | ;; Version: 3.0.0
13 | ;; X-Original-Version: 3.0.0
14 | ;; Package-Requires: ((emacs "24.1"))
15 |
16 | ;; This file is not part of GNU Emacs.
17 |
18 | ;;; Commentary:
19 |
20 | ;; Provides font-lock, indentation, and navigation for the Clojure
21 | ;; programming language (http://clojure.org).
22 |
23 | ;; Using clojure-mode with paredit is highly recommended. Use paredit
24 | ;; as you would with any other minor mode; for instance:
25 | ;;
26 | ;; ;; require or autoload paredit-mode
27 | ;; (add-hook 'clojure-mode-hook 'paredit-mode)
28 |
29 | ;; See CIDER (http://github.com/clojure-emacs/cider) for
30 | ;; better interaction with subprocesses via nREPL.
31 |
32 | ;;; License:
33 |
34 | ;; This program is free software; you can redistribute it and/or
35 | ;; modify it under the terms of the GNU General Public License
36 | ;; as published by the Free Software Foundation; either version 3
37 | ;; of the License, or (at your option) any later version.
38 | ;;
39 | ;; This program is distributed in the hope that it will be useful,
40 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
41 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
42 | ;; GNU General Public License for more details.
43 | ;;
44 | ;; You should have received a copy of the GNU General Public License
45 | ;; along with GNU Emacs; see the file COPYING. If not, write to the
46 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
47 | ;; Boston, MA 02110-1301, USA.
48 |
49 | ;;; Code:
50 |
51 |
52 | ;;; Compatibility
53 | (eval-and-compile
54 | ;; `setq-local' for Emacs 24.2 and below
55 | (unless (fboundp 'setq-local)
56 | (defmacro setq-local (var val)
57 | "Set variable VAR to value VAL in current buffer."
58 | `(set (make-local-variable ',var) ,val))))
59 |
60 | (eval-when-compile
61 | (defvar calculate-lisp-indent-last-sexp)
62 | (defvar font-lock-beg)
63 | (defvar font-lock-end)
64 | (defvar paredit-space-for-delimiter-predicates)
65 | (defvar paredit-version)
66 | (defvar paredit-mode))
67 |
68 | (require 'cl)
69 | (require 'inf-lisp)
70 | (require 'imenu)
71 |
72 | (declare-function lisp-fill-paragraph "lisp-mode" (&optional justify))
73 |
74 | (defgroup clojure nil
75 | "Major mode for editing Clojure code."
76 | :prefix "clojure-"
77 | :group 'languages
78 | :link '(url-link :tag "Github" "https://github.com/clojure-emacs/clojure-mode")
79 | :link '(emacs-commentary-link :tag "Commentary" "clojure-mode"))
80 |
81 | (defface clojure-keyword-face
82 | '((t (:inherit font-lock-constant-face)))
83 | "Face used to font-lock Clojure keywords (:something)."
84 | :group 'clojure
85 | :package-version '(clojure-mode . "3.0.0"))
86 |
87 | (defface clojure-character-face
88 | '((t (:inherit font-lock-string-face)))
89 | "Face used to font-lock Clojure character literals."
90 | :group 'clojure
91 | :package-version '(clojure-mode . "3.0.0"))
92 |
93 | (defface clojure-interop-method-face
94 | '((t (:inherit font-lock-preprocessor-face)))
95 | "Face used to font-lock interop method names (camelCase)."
96 | :group 'clojure
97 | :package-version '(clojure-mode . "3.0.0"))
98 |
99 | (defcustom clojure-load-command "(clojure.core/load-file \"%s\")\n"
100 | "Format-string for building a Clojure expression to load a file.
101 | This format string should use `%s' to substitute a file name and
102 | should result in a Clojure expression that will command the
103 | inferior Clojure to load that file."
104 | :type 'string
105 | :group 'clojure
106 | :safe 'stringp)
107 |
108 | (defcustom clojure-inf-lisp-command "lein repl"
109 | "The command used by `inferior-lisp-program'."
110 | :type 'string
111 | :group 'clojure
112 | :safe 'stringp)
113 |
114 | (defcustom clojure-defun-style-default-indent nil
115 | "When non-nil, use default indenting for functions and macros.
116 | Otherwise check `define-clojure-indent' and `put-clojure-indent'."
117 | :type 'boolean
118 | :group 'clojure
119 | :safe 'booleanp)
120 |
121 | (defcustom clojure-use-backtracking-indent t
122 | "When non-nil, enable context sensitive indentation."
123 | :type 'boolean
124 | :group 'clojure
125 | :safe 'booleanp)
126 |
127 | (defcustom clojure-max-backtracking 3
128 | "Maximum amount to backtrack up a list to check for context."
129 | :type 'integer
130 | :group 'clojure
131 | :safe 'integerp)
132 |
133 | (defcustom clojure-docstring-fill-column fill-column
134 | "Value of `fill-column' to use when filling a docstring."
135 | :type 'integer
136 | :group 'clojure
137 | :safe 'integerp)
138 |
139 | (defcustom clojure-docstring-fill-prefix-width 2
140 | "Width of `fill-prefix' when filling a docstring.
141 | The default value conforms with the de facto convention for
142 | Clojure docstrings, aligning the second line with the opening
143 | double quotes on the third column."
144 | :type 'integer
145 | :group 'clojure
146 | :safe 'integerp)
147 |
148 | (defcustom clojure-omit-space-between-tag-and-delimiters '(?\[ ?\{)
149 | "Allowed opening delimiter characters after a reader literal tag.
150 | For example, \[ is allowed in :db/id[:db.part/user]."
151 | :type '(set (const :tag "[" ?\[)
152 | (const :tag "{" ?\{)
153 | (const :tag "(" ?\()
154 | (const :tag "\"" ?\"))
155 | :group 'clojure
156 | :safe (lambda (value)
157 | (and (listp value)
158 | (every 'characterp value))))
159 |
160 | (defvar clojure-mode-map
161 | (let ((map (make-sparse-keymap)))
162 | (set-keymap-parent map lisp-mode-shared-map)
163 | (define-key map (kbd "C-M-x") 'lisp-eval-defun)
164 | (define-key map (kbd "C-x C-e") 'lisp-eval-last-sexp)
165 | (define-key map (kbd "C-c C-e") 'lisp-eval-last-sexp)
166 | (define-key map (kbd "C-c C-l") 'clojure-load-file)
167 | (define-key map (kbd "C-c C-r") 'lisp-eval-region)
168 | (define-key map (kbd "C-c C-z") 'clojure-display-inferior-lisp-buffer)
169 | (define-key map (kbd "C-:") 'clojure-toggle-keyword-string)
170 | (easy-menu-define clojure-mode-menu map "Clojure Mode Menu"
171 | '("Clojure"
172 | ["Eval Top-Level Expression" lisp-eval-defun]
173 | ["Eval Last Expression" lisp-eval-last-sexp]
174 | ["Eval Region" lisp-eval-region]
175 | "--"
176 | ["Run Inferior Lisp" clojure-display-inferior-lisp-buffer]
177 | ["Display Inferior Lisp Buffer" clojure-display-inferior-lisp-buffer]
178 | ["Load File" clojure-load-file]
179 | "--"
180 | ["Toggle between string & keyword" clojure-toggle-keyword-string]
181 | ["Mark string" clojure-mark-string]
182 | ["Insert ns form at point" clojure-insert-ns-form-at-point]
183 | ["Insert ns form at beginning" clojure-insert-ns-form]
184 | ["Update ns form" clojure-update-ns]
185 | "--"
186 | ["Version" clojure-mode-display-version]))
187 | map)
188 | "Keymap for Clojure mode. Inherits from `lisp-mode-shared-map'.")
189 |
190 | (defvar clojure-mode-syntax-table
191 | (let ((table (copy-syntax-table emacs-lisp-mode-syntax-table)))
192 | (modify-syntax-entry ?~ "' " table)
193 | (modify-syntax-entry ?\{ "(}" table)
194 | (modify-syntax-entry ?\} "){" table)
195 | (modify-syntax-entry ?\[ "(]" table)
196 | (modify-syntax-entry ?\] ")[" table)
197 | (modify-syntax-entry ?^ "'" table)
198 | ;; Make hash a usual word character
199 | (modify-syntax-entry ?# "_ p" table)
200 | table))
201 |
202 | (defvar clojure-prev-l/c-dir/file nil
203 | "Record last directory and file used in loading or compiling.
204 | This holds a cons cell of the form `(DIRECTORY . FILE)'
205 | describing the last `clojure-load-file' or `clojure-compile-file' command.")
206 |
207 | (defconst clojure-mode-version "3.0.0"
208 | "The current version of `clojure-mode'.")
209 |
210 | (defconst clojure--prettify-symbols-alist
211 | '(("fn" . ?λ)))
212 |
213 | (defun clojure-mode-display-version ()
214 | "Display the current `clojure-mode-version' in the minibuffer."
215 | (interactive)
216 | (message "clojure-mode (version %s)" clojure-mode-version))
217 |
218 | (defun clojure-space-for-delimiter-p (endp delim)
219 | "Prevent paredit from inserting useless spaces.
220 | See `paredit-space-for-delimiter-predicates' for the meaning of
221 | ENDP and DELIM."
222 | (if (derived-mode-p 'clojure-mode)
223 | (save-excursion
224 | (backward-char)
225 | (if (and (or (char-equal delim ?\()
226 | (char-equal delim ?\")
227 | (char-equal delim ?{))
228 | (not endp))
229 | (if (char-equal (char-after) ?#)
230 | (and (not (bobp))
231 | (or (char-equal ?w (char-syntax (char-before)))
232 | (char-equal ?_ (char-syntax (char-before)))))
233 | t)
234 | t))
235 | t))
236 |
237 | (defun clojure-no-space-after-tag (endp delimiter)
238 | "Prevent inserting a space after a reader-literal tag?
239 |
240 | When a reader-literal tag is followed be an opening delimiter
241 | listed in `clojure-omit-space-between-tag-and-delimiters', this
242 | function returns t.
243 |
244 | This allows you to write things like #db/id[:db.part/user]
245 | without inserting a space between the tag and the opening
246 | bracket.
247 |
248 | See `paredit-space-for-delimiter-predicates' for the meaning of
249 | ENDP and DELIMITER."
250 | (if endp
251 | t
252 | (or (not (member delimiter clojure-omit-space-between-tag-and-delimiters))
253 | (save-excursion
254 | (let ((orig-point (point)))
255 | (not (and (re-search-backward
256 | "#\\([a-zA-Z0-9._-]+/\\)?[a-zA-Z0-9._-]+"
257 | (line-beginning-position)
258 | t)
259 | (= orig-point (match-end 0)))))))))
260 |
261 | (defun clojure-paredit-setup ()
262 | "A bit code to make `paredit-mode' play nice with `clojure-mode'."
263 | (when (>= paredit-version 21)
264 | (define-key clojure-mode-map "{" 'paredit-open-curly)
265 | (define-key clojure-mode-map "}" 'paredit-close-curly)
266 | (add-to-list 'paredit-space-for-delimiter-predicates
267 | 'clojure-space-for-delimiter-p)
268 | (add-to-list 'paredit-space-for-delimiter-predicates
269 | 'clojure-no-space-after-tag)))
270 |
271 | ;;;###autoload
272 | (define-derived-mode clojure-mode prog-mode "Clojure"
273 | "Major mode for editing Clojure code.
274 |
275 | \\{clojure-mode-map}"
276 | (setq-local imenu-create-index-function
277 | (lambda ()
278 | (imenu--generic-function '((nil clojure-match-next-def 0)))))
279 | (setq-local indent-tabs-mode nil)
280 | (lisp-mode-variables nil)
281 | (setq fill-paragraph-function 'clojure-fill-paragraph)
282 | (setq adaptive-fill-function 'clojure-adaptive-fill-function)
283 | (setq-local normal-auto-fill-function 'clojure-auto-fill-function)
284 | (setq-local comment-start-skip
285 | "\\(\\(^\\|[^\\\\\n]\\)\\(\\\\\\\\\\)*\\)\\(;+\\|#|\\) *")
286 | (setq-local indent-line-function 'clojure-indent-line)
287 | (setq-local lisp-indent-function 'clojure-indent-function)
288 | (setq-local lisp-doc-string-elt-property 'clojure-doc-string-elt)
289 | (setq-local inferior-lisp-program clojure-inf-lisp-command)
290 | (setq-local parse-sexp-ignore-comments t)
291 | (setq-local prettify-symbols-alist clojure--prettify-symbols-alist)
292 | (clojure-font-lock-setup)
293 | (setq-local open-paren-in-column-0-is-defun-start nil)
294 | (add-hook 'paredit-mode-hook 'clojure-paredit-setup))
295 |
296 | (defsubst clojure-in-docstring-p ()
297 | "Check whether point is in a docstring."
298 | (eq (get-text-property (1- (point-at-eol)) 'face)
299 | 'font-lock-doc-face))
300 |
301 | (defsubst clojure-docstring-fill-prefix ()
302 | "The prefix string used by `clojure-fill-paragraph'.
303 |
304 | It is simply `clojure-docstring-fill-prefix-width' number of spaces."
305 | (make-string clojure-docstring-fill-prefix-width ? ))
306 |
307 | (defun clojure-adaptive-fill-function ()
308 | "Clojure adaptive fill function.
309 | This only takes care of filling docstring correctly."
310 | (when (clojure-in-docstring-p)
311 | (clojure-docstring-fill-prefix)))
312 |
313 | (defun clojure-fill-paragraph (&optional justify)
314 | "Like `fill-paragraph' but handle Clojure docstrings."
315 | (if (clojure-in-docstring-p)
316 | (let ((paragraph-start
317 | (concat paragraph-start
318 | "\\|\\s-*\\([(;:\"[]\\|~@\\|`(\\|#'(\\)"))
319 | (paragraph-separate
320 | (concat paragraph-separate "\\|\\s-*\".*[,\\.]$"))
321 | (fill-column (or clojure-docstring-fill-column fill-column))
322 | (fill-prefix (clojure-docstring-fill-prefix)))
323 | (fill-paragraph justify))
324 | (let ((paragraph-start (concat paragraph-start
325 | "\\|\\s-*\\([(;:\"[]\\|`(\\|#'(\\)"))
326 | (paragraph-separate
327 | (concat paragraph-separate "\\|\\s-*\".*[,\\.[]$")))
328 | (or (fill-comment-paragraph justify)
329 | (fill-paragraph justify))
330 | ;; Always return `t'
331 | t)))
332 |
333 | (defun clojure-auto-fill-function ()
334 | "Clojure auto-fill function."
335 | ;; Check if auto-filling is meaningful.
336 | (let ((fc (current-fill-column)))
337 | (when (and fc (> (current-column) fc))
338 | (let ((fill-column (if (clojure-in-docstring-p)
339 | clojure-docstring-fill-column
340 | fill-column))
341 | (fill-prefix (clojure-adaptive-fill-function)))
342 | (do-auto-fill)))))
343 |
344 | (defun clojure-display-inferior-lisp-buffer ()
345 | "Display a buffer bound to `inferior-lisp-buffer'."
346 | (interactive)
347 | (if (and inferior-lisp-buffer (get-buffer inferior-lisp-buffer))
348 | (pop-to-buffer inferior-lisp-buffer t)
349 | (run-lisp inferior-lisp-program)))
350 |
351 | (defun clojure-load-file (file-name)
352 | "Load a Clojure file FILE-NAME into the inferior Clojure process."
353 | (interactive (comint-get-source "Load Clojure file: "
354 | clojure-prev-l/c-dir/file
355 | '(clojure-mode) t))
356 | (comint-check-source file-name) ; Check to see if buffer needs saved.
357 | (setq clojure-prev-l/c-dir/file (cons (file-name-directory file-name)
358 | (file-name-nondirectory file-name)))
359 | (comint-send-string (inferior-lisp-proc)
360 | (format clojure-load-command file-name))
361 | (switch-to-lisp t))
362 |
363 |
364 |
365 | (defun clojure-match-next-def ()
366 | "Scans the buffer backwards for the next top-level definition.
367 | Called by `imenu--generic-function'."
368 | (when (re-search-backward "^(def\\sw*" nil t)
369 | (save-excursion
370 | (let (found?
371 | (start (point)))
372 | (down-list)
373 | (forward-sexp)
374 | (while (not found?)
375 | (forward-sexp)
376 | (or (if (char-equal ?[ (char-after (point)))
377 | (backward-sexp))
378 | (if (char-equal ?) (char-after (point)))
379 | (backward-sexp)))
380 | (destructuring-bind (def-beg . def-end) (bounds-of-thing-at-point 'sexp)
381 | (if (char-equal ?^ (char-after def-beg))
382 | (progn (forward-sexp) (backward-sexp))
383 | (setq found? t)
384 | (set-match-data (list def-beg def-end)))))
385 | (goto-char start)))))
386 |
387 | (defconst clojure-font-lock-keywords
388 | (eval-when-compile
389 | `(;; Top-level variable definition
390 | (,(concat "(\\(?:clojure.core/\\)?\\("
391 | (regexp-opt '("def" "defonce"))
392 | ;; variable declarations
393 | "\\)\\>"
394 | ;; Any whitespace
395 | "[ \r\n\t]*"
396 | ;; Possibly type or metadata
397 | "\\(?:#?^\\(?:{[^}]*}\\|\\sw+\\)[ \r\n\t]*\\)*"
398 | "\\(\\sw+\\)?")
399 | (1 font-lock-keyword-face)
400 | (2 font-lock-variable-name-face nil t))
401 | ;; Type definition
402 | (,(concat "(\\(?:clojure.core/\\)?\\("
403 | (regexp-opt '("defstruct" "deftype" "defprotocol"
404 | "defrecord"))
405 | ;; type declarations
406 | "\\)\\>"
407 | ;; Any whitespace
408 | "[ \r\n\t]*"
409 | ;; Possibly type or metadata
410 | "\\(?:#?^\\(?:{[^}]*}\\|\\sw+\\)[ \r\n\t]*\\)*"
411 | "\\(\\sw+\\)?")
412 | (1 font-lock-keyword-face)
413 | (2 font-lock-type-face nil t))
414 |
415 | ;; clj-thamil
416 | ;; Function definition (anything that starts with வரையறு and is not
417 | ;; listed above)
418 | (,(concat "(\\(?:[a-z\.-]+/\\)?\\(வரையறு\[a-z\-\]*-?\\)"
419 | ;; Function declarations
420 | "\\>"
421 | ;; Any whitespace
422 | "[ \r\n\t]*"
423 | ;; Possibly type or metadata
424 | "\\(?:#?^\\(?:{[^}]*}\\|\\sw+\\)[ \r\n\t]*\\)*"
425 | "\\(\\sw+\\)?")
426 | (1 font-lock-keyword-face)
427 | (2 font-lock-function-name-face nil t))
428 |
429 | ;; Function definition (anything that starts with def and is not
430 | ;; listed above)
431 | (,(concat "(\\(?:[a-z\.-]+/\\)?\\(def\[a-z\-\]*-?\\)"
432 | ;; Function declarations
433 | "\\>"
434 | ;; Any whitespace
435 | "[ \r\n\t]*"
436 | ;; Possibly type or metadata
437 | "\\(?:#?^\\(?:{[^}]*}\\|\\sw+\\)[ \r\n\t]*\\)*"
438 | "\\(\\sw+\\)?")
439 | (1 font-lock-keyword-face)
440 | (2 font-lock-function-name-face nil t))
441 | ;; (fn name? args ...)
442 | (,(concat "(\\(?:clojure.core/\\)?\\(fn\\)[ \t]+"
443 | ;; Possibly type
444 | "\\(?:#?^\\sw+[ \t]*\\)?"
445 | ;; Possibly name
446 | "\\(t\\sw+\\)?" )
447 | (1 font-lock-keyword-face)
448 | (2 font-lock-function-name-face nil t))
449 | ;; lambda arguments - %, %1, %2, etc
450 | ("\\<%[1-9]?" (0 font-lock-variable-name-face))
451 | ;; Special forms & control structures
452 | (,(concat
453 | "(\\(?:clojure.core/\\)?"
454 | (regexp-opt
455 | '("let" "letfn" "do"
456 | "case" "cond" "cond->" "cond->>" "condp"
457 | "for" "loop" "recur"
458 | "when" "when-not" "when-let" "when-first" "when-some"
459 | "if" "if-let" "if-not" "if-some"
460 | "." ".." "->" "->>" "doto"
461 | "and" "or"
462 | "dosync" "doseq" "dotimes" "dorun" "doall"
463 | "load" "import" "unimport" "ns" "in-ns" "refer"
464 | "try" "catch" "finally" "throw"
465 | "with-open" "with-local-vars" "binding"
466 | "gen-class" "gen-and-load-class" "gen-and-save-class"
467 | "handler-case" "handle" "var" "declare") t)
468 | "\\>")
469 | 1 font-lock-keyword-face)
470 |
471 | ;; clj-thamil
472 | ;; Special forms & control structures
473 | (,(concat
474 | "(\\(?:clojure.core/\\)?"
475 | (regexp-opt
476 | '("வைத்துக்கொள்" "letfn" "செய்"
477 | "case" "பொறுத்து" "cond->" "cond->>" "condp"
478 | "ஒவ்வொன்றுக்கும்" "சுற்று" "recur"
479 | "என்னும்போது" "இல்லென்னும்போது" "when-let" "when-first" "when-some"
480 | "எனில்" "if-let" "இல்லெனில்" "if-some"
481 | "." ".." "->" "->>" "doto"
482 | "மற்றும்" "அல்லது"
483 | "dosync" "செய்வரிசை" "dotimes" "dorun" "செய்யெல்லாம்q"
484 | "load" "import" "unimport" "ns" "in-ns" "refer"
485 | "try" "catch" "finally" "throw"
486 | "with-open" "with-local-vars" "binding"
487 | "gen-class" "gen-and-load-class" "gen-and-save-class"
488 | "handler-case" "handle" "var" "declare") t)
489 | "\\>")
490 | 1 font-lock-keyword-face)
491 |
492 |
493 | (,(concat
494 | "\\<"
495 | (regexp-opt
496 | '("*1" "*2" "*3" "*agent*"
497 | "*allow-unresolved-vars*" "*assert*" "*clojure-version*"
498 | "*command-line-args*" "*compile-files*"
499 | "*compile-path*" "*e" "*err*" "*file*" "*flush-on-newline*"
500 | "*in*" "*macro-meta*" "*math-context*" "*ns*" "*out*"
501 | "*print-dup*" "*print-length*" "*print-level*"
502 | "*print-meta*" "*print-readably*"
503 | "*read-eval*" "*source-path*"
504 | "*use-context-classloader*" "*warn-on-reflection*")
505 | t)
506 | "\\>")
507 | 0 font-lock-builtin-face)
508 | ;; Dynamic variables - *something* or @*something*
509 | ("\\<@?\\(\\*[a-z-]*\\*\\)\\>" 1 font-lock-variable-name-face)
510 | ;; Global constants - nil, true, false
511 | (,(concat
512 | "\\<"
513 | (regexp-opt
514 | '("true" "false" "nil") t)
515 | "\\>")
516 | 0 font-lock-constant-face)
517 | ;; Character literals - \1, \a, \newline, \u0000
518 | ;; FIXME: handle properly some punctuation characters (like commas and semicolumns)
519 | ("\\\\\\([[:punct:]]\\|[a-z0-9]+\\)\\>" 0 'clojure-character-face)
520 | ;; Constant values (keywords), including as metadata e.g. ^:static
521 | ("\\<^?\\(:\\(\\sw\\|\\s_\\)+\\(\\>\\|\\_>\\)\\)" 1 'clojure-keyword-face)
522 | ;; cljx annotations (#+clj and #+cljs)
523 | ("#\\+cljs?\\>" 0 font-lock-preprocessor-face)
524 | ;; Java interop highlighting
525 | ;; CONST SOME_CONST (optionally prefixed by /)
526 | ("\\(?:\\<\\|/\\)\\([A-Z]+\\|\\([A-Z]+_[A-Z1-9_]+\\)\\)\\>" 1 font-lock-constant-face)
527 | ;; .foo .barBaz .qux01 .-flibble .-flibbleWobble
528 | ("\\<\\.-?[a-z][a-zA-Z0-9]*\\>" 0 'clojure-interop-method-face)
529 | ;; Foo Bar$Baz Qux_ World_OpenUDP Foo. Babylon15.
530 | ("\\(?:\\<\\|\\.\\|/\\|#?^\\)\\([A-Z][a-zA-Z0-9_]*[a-zA-Z0-9$_]+\\.?\\>\\)" 1 font-lock-type-face)
531 | ;; foo.bar.baz
532 | ("\\<^?\\([a-z][a-z0-9_-]+\\.\\([a-z][a-z0-9_-]*\\.?\\)+\\)" 1 font-lock-type-face)
533 | ;; (ns namespace) - special handling for single segment namespaces
534 | (,(concat "\\[ \r\n\t]*"
535 | ;; Possibly metadata
536 | "\\(?:\\^?{[^}]+}[ \r\n\t]*\\)*"
537 | ;; namespace
538 | "\\([a-z0-9-]+\\)")
539 | (1 font-lock-type-face nil t))
540 | ;; foo/ Foo/ @Foo/
541 | ("\\<@?\\([a-zA-Z][a-z0-9_-]*\\)/" 1 font-lock-type-face)
542 | ;; fooBar
543 | ("\\(?:\\<\\|/\\)\\([a-z]+[A-Z]+[a-zA-Z0-9$]*\\>\\)" 1 'clojure-interop-method-face)
544 | ;; Highlight grouping constructs in regular expressions
545 | (clojure-font-lock-regexp-groups
546 | (1 'font-lock-regexp-grouping-construct prepend))))
547 | "Default expressions to highlight in Clojure mode.")
548 |
549 | (defun clojure-font-lock-syntactic-face-function (state)
550 | (if (nth 3 state)
551 | ;; This might be a (doc)string or a |...| symbol.
552 | (let ((startpos (nth 8 state)))
553 | (if (eq (char-after startpos) ?|)
554 | ;; This is not a string, but a |...| symbol.
555 | nil
556 | (let* ((listbeg (nth 1 state))
557 | (firstsym (and listbeg
558 | (save-excursion
559 | (goto-char listbeg)
560 | (and (looking-at "([ \t\n]*\\(\\(\\sw\\|\\s_\\)+\\)")
561 | (match-string 1)))))
562 | (docelt (and firstsym
563 | (function-get (intern-soft firstsym)
564 | lisp-doc-string-elt-property))))
565 | (if (and docelt
566 | ;; It's a string in a form that can have a docstring.
567 | ;; Check whether it's in docstring position.
568 | (save-excursion
569 | (when (functionp docelt)
570 | (goto-char (match-end 1))
571 | (setq docelt (funcall docelt)))
572 | (goto-char listbeg)
573 | (forward-char 1)
574 | (condition-case nil
575 | (while (and (> docelt 0) (< (point) startpos)
576 | (progn (forward-sexp 1) t))
577 | ;; ignore metadata and type hints
578 | (unless (looking-at "[ \n\t]*\\(\\^[A-Z:].+\\|\\^?{.+\\)")
579 | (setq docelt (1- docelt))))
580 | (error nil))
581 | (and (zerop docelt) (<= (point) startpos)
582 | (progn (forward-comment (point-max)) t)
583 | (= (point) (nth 8 state)))))
584 | font-lock-doc-face
585 | font-lock-string-face))))
586 | font-lock-comment-face))
587 |
588 | (defun clojure-font-lock-setup ()
589 | "Configures font-lock for editing Clojure code."
590 | (setq-local font-lock-multiline t)
591 | (add-to-list 'font-lock-extend-region-functions
592 | 'clojure-font-lock-extend-region-def t)
593 | (setq font-lock-defaults
594 | '(clojure-font-lock-keywords ; keywords
595 | nil nil
596 | (("+-*/.<>=!?$%_&~^:@" . "w")) ; syntax alist
597 | nil
598 | (font-lock-mark-block-function . mark-defun)
599 | (font-lock-syntactic-face-function
600 | . clojure-font-lock-syntactic-face-function))))
601 |
602 | (defun clojure-font-lock-def-at-point (point)
603 | "Range between the top-most def* and the fourth element after POINT.
604 | Note that this means that there is no guarantee of proper font
605 | locking in def* forms that are not at top level."
606 | (goto-char point)
607 | (condition-case nil
608 | (beginning-of-defun)
609 | (error nil))
610 |
611 | (let ((beg-def (point)))
612 | (when (and (not (= point beg-def))
613 | (looking-at "(def"))
614 | (condition-case nil
615 | (progn
616 | ;; move forward as much as possible until failure (or success)
617 | (forward-char)
618 | (dotimes (_ 4)
619 | (forward-sexp)))
620 | (error nil))
621 | (cons beg-def (point)))))
622 |
623 | (defun clojure-font-lock-extend-region-def ()
624 | "Set region boundaries to include the first four elements of def* forms."
625 | (let ((changed nil))
626 | (let ((def (clojure-font-lock-def-at-point font-lock-beg)))
627 | (when def
628 | (destructuring-bind (def-beg . def-end) def
629 | (when (and (< def-beg font-lock-beg)
630 | (< font-lock-beg def-end))
631 | (setq font-lock-beg def-beg
632 | changed t)))))
633 | (let ((def (clojure-font-lock-def-at-point font-lock-end)))
634 | (when def
635 | (destructuring-bind (def-beg . def-end) def
636 | (when (and (< def-beg font-lock-end)
637 | (< font-lock-end def-end))
638 | (setq font-lock-end def-end
639 | changed t)))))
640 | changed))
641 |
642 | (defun clojure-font-lock-regexp-groups (bound)
643 | "Highlight grouping constructs in regular expression.
644 |
645 | BOUND denotes the maximum number of characters (relative to the
646 | point) to check."
647 | (catch 'found
648 | (while (re-search-forward (concat
649 | ;; A group may start using several alternatives:
650 | "\\(\\(?:"
651 | ;; 1. (? special groups
652 | "(\\?\\(?:"
653 | ;; a) non-capturing group (?:X)
654 | ;; b) independent non-capturing group (?>X)
655 | ;; c) zero-width positive lookahead (?=X)
656 | ;; d) zero-width negative lookahead (?!X)
657 | "[:=!>]\\|"
658 | ;; e) zero-width positive lookbehind (?<=X)
659 | ;; f) zero-width negative lookbehind (?X)
662 | "<[[:alnum:]]+>"
663 | "\\)\\|" ;; end of special groups
664 | ;; 2. normal capturing groups (
665 | ;; 3. we also highlight alternative
666 | ;; separarators |, and closing parens )
667 | "[|()]"
668 | "\\)\\)")
669 | bound t)
670 | (let ((face (get-text-property (1- (point)) 'face)))
671 | (when (and (or (and (listp face)
672 | (memq 'font-lock-string-face face))
673 | (eq 'font-lock-string-face face))
674 | (clojure-string-start t))
675 | (throw 'found t))))))
676 |
677 | ;; Docstring positions
678 | (put 'ns 'clojure-doc-string-elt 2)
679 | (put 'def 'clojure-doc-string-elt 2)
680 | (put 'defn 'clojure-doc-string-elt 2)
681 | (put 'defn- 'clojure-doc-string-elt 2)
682 | (put 'defmulti 'clojure-doc-string-elt 2)
683 | (put 'defmacro 'clojure-doc-string-elt 2)
684 | (put 'definline 'clojure-doc-string-elt 2)
685 | (put 'defprotocol 'clojure-doc-string-elt 2)
686 |
687 | ;; clj-thamil
688 | ;; Docstring positions
689 | (put 'வரையறு 'clojure-doc-string-elt 2)
690 | (put 'வரையறு-செயல்கூறு 'clojure-doc-string-elt 2)
691 |
692 | (defun clojure-indent-line ()
693 | "Indent current line as Clojure code."
694 | (if (clojure-in-docstring-p)
695 | (save-excursion
696 | (beginning-of-line)
697 | (when (looking-at "^\\s-*")
698 | (replace-match (clojure-docstring-fill-prefix))))
699 | (lisp-indent-line)))
700 |
701 | (defun clojure-indent-function (indent-point state)
702 | "This function is the normal value of the variable `lisp-indent-function'.
703 | It is used when indenting a line within a function call, to see if the
704 | called function says anything special about how to indent the line.
705 |
706 | INDENT-POINT is the position where the user typed TAB, or equivalent.
707 | Point is located at the point to indent under (for default indentation);
708 | STATE is the `parse-partial-sexp' state for that position.
709 |
710 | If the current line is in a call to a Lisp function
711 | which has a non-nil property `lisp-indent-function',
712 | that specifies how to do the indentation.
713 |
714 | The property value can be
715 |
716 | - `defun', meaning indent `defun'-style;
717 | - an integer N, meaning indent the first N arguments specially
718 | like ordinary function arguments and then indent any further
719 | arguments like a body;
720 | - a function to call just as this function was called.
721 | If that function returns nil, that means it doesn't specify
722 | the indentation.
723 |
724 | This function also returns nil meaning don't specify the indentation."
725 | (let ((normal-indent (current-column)))
726 | (goto-char (1+ (elt state 1)))
727 | (parse-partial-sexp (point) calculate-lisp-indent-last-sexp 0 t)
728 | (if (and (elt state 2)
729 | (not (looking-at "\\sw\\|\\s_")))
730 | ;; car of form doesn't seem to be a symbol
731 | (progn
732 | (if (not (> (save-excursion (forward-line 1) (point))
733 | calculate-lisp-indent-last-sexp))
734 | (progn (goto-char calculate-lisp-indent-last-sexp)
735 | (beginning-of-line)
736 | (parse-partial-sexp (point)
737 | calculate-lisp-indent-last-sexp 0 t)))
738 | ;; Indent under the list or under the first sexp on the same
739 | ;; line as calculate-lisp-indent-last-sexp. Note that first
740 | ;; thing on that line has to be complete sexp since we are
741 | ;; inside the innermost containing sexp.
742 | (backward-prefix-chars)
743 | (if (and (eq (char-after (point)) ?\[)
744 | (eq (char-after (elt state 1)) ?\())
745 | (+ (current-column) 2) ;; this is probably inside a defn
746 | (current-column)))
747 | (let* ((function (buffer-substring (point)
748 | (progn (forward-sexp 1) (point))))
749 | (open-paren (elt state 1))
750 | (method nil)
751 | (function-tail (first
752 | (last
753 | (split-string (substring-no-properties function) "/")))))
754 | (setq method (get (intern-soft function-tail) 'clojure-indent-function))
755 | (cond ((member (char-after open-paren) '(?\[ ?\{))
756 | (goto-char open-paren)
757 | (1+ (current-column)))
758 | ((or (eq method 'defun)
759 | (and clojure-defun-style-default-indent
760 | ;; largely to preserve useful alignment of :require, etc in ns
761 | (not (string-match "^:" function))
762 | (not method))
763 | (and (null method)
764 | (> (length function) 3)
765 | (string-match "\\`\\(?:\\S +/\\)?\\(def\\|with-\\)"
766 | function)))
767 | (lisp-indent-defform state indent-point))
768 | ((integerp method)
769 | (lisp-indent-specform method state
770 | indent-point normal-indent))
771 | (method
772 | (funcall method indent-point state))
773 | (clojure-use-backtracking-indent
774 | (clojure-backtracking-indent
775 | indent-point state normal-indent)))))))
776 |
777 | (defun clojure-backtracking-indent (indent-point state normal-indent)
778 | "Experimental backtracking support.
779 |
780 | Will upwards in an sexp to check for contextual indenting."
781 | (let (indent (path) (depth 0))
782 | (goto-char (elt state 1))
783 | (while (and (not indent)
784 | (< depth clojure-max-backtracking))
785 | (let ((containing-sexp (point)))
786 | (parse-partial-sexp (1+ containing-sexp) indent-point 1 t)
787 | (when (looking-at "\\sw\\|\\s_")
788 | (let* ((start (point))
789 | (fn (buffer-substring start (progn (forward-sexp 1) (point))))
790 | (meth (get (intern-soft fn) 'clojure-backtracking-indent)))
791 | (let ((n 0))
792 | (when (< (point) indent-point)
793 | (condition-case ()
794 | (progn
795 | (forward-sexp 1)
796 | (while (< (point) indent-point)
797 | (parse-partial-sexp (point) indent-point 1 t)
798 | (incf n)
799 | (forward-sexp 1)))
800 | (error nil)))
801 | (push n path))
802 | (when meth
803 | (let ((def meth))
804 | (dolist (p path)
805 | (if (and (listp def)
806 | (< p (length def)))
807 | (setq def (nth p def))
808 | (if (listp def)
809 | (setq def (car (last def)))
810 | (setq def nil))))
811 | (goto-char (elt state 1))
812 | (when def
813 | (setq indent (+ (current-column) def)))))))
814 | (goto-char containing-sexp)
815 | (condition-case ()
816 | (progn
817 | (backward-up-list 1)
818 | (incf depth))
819 | (error (setq depth clojure-max-backtracking)))))
820 | indent))
821 |
822 | ;; clojure backtracking indent is experimental and the format for these
823 | ;; entries are subject to change
824 | (put 'implement 'clojure-backtracking-indent '(4 (2)))
825 | (put 'letfn 'clojure-backtracking-indent '((2) 2))
826 | (put 'proxy 'clojure-backtracking-indent '(4 4 (2)))
827 | (put 'reify 'clojure-backtracking-indent '((2)))
828 | (put 'deftype 'clojure-backtracking-indent '(4 4 (2)))
829 | (put 'defrecord 'clojure-backtracking-indent '(4 4 (2)))
830 | (put 'defprotocol 'clojure-backtracking-indent '(4 (2)))
831 | (put 'extend-type 'clojure-backtracking-indent '(4 (2)))
832 | (put 'extend-protocol 'clojure-backtracking-indent '(4 (2)))
833 | (put 'specify 'clojure-backtracking-indent '(4 (2)))
834 | (put 'specify! 'clojure-backtracking-indent '(4 (2)))
835 |
836 | (defun put-clojure-indent (sym indent)
837 | (put sym 'clojure-indent-function indent))
838 |
839 | (defmacro define-clojure-indent (&rest kvs)
840 | `(progn
841 | ,@(mapcar (lambda (x) `(put-clojure-indent
842 | (quote ,(first x)) ,(second x)))
843 | kvs)))
844 |
845 | (defun add-custom-clojure-indents (name value)
846 | (custom-set-default name value)
847 | (mapcar (lambda (x)
848 | (put-clojure-indent x 'defun))
849 | value))
850 |
851 | (defcustom clojure-defun-indents nil
852 | "List of additional symbols with defun-style indentation in Clojure.
853 |
854 | You can use this to let Emacs indent your own macros the same way
855 | that it indents built-in macros like with-open. To manually set
856 | it from Lisp code, use (put-clojure-indent 'some-symbol 'defun)."
857 | :type '(repeat symbol)
858 | :group 'clojure
859 | :set 'add-custom-clojure-indents)
860 |
861 | (define-clojure-indent
862 | ;; built-ins
863 | (ns 1)
864 | (fn 'defun)
865 | (def 'defun)
866 | (defn 'defun)
867 | (bound-fn 'defun)
868 | (if 1)
869 | (if-not 1)
870 | (case 1)
871 | (condp 2)
872 | (when 1)
873 | (while 1)
874 | (when-not 1)
875 | (when-first 1)
876 | (do 0)
877 | (future 0)
878 | (comment 0)
879 | (doto 1)
880 | (locking 1)
881 | (proxy 2)
882 | (with-open 1)
883 | (with-precision 1)
884 | (with-local-vars 1)
885 |
886 |
887 | ;; clj-thamil
888 | ;; built-ins
889 | (ns 1)
890 | (செயல்கூறு 'defun)
891 | (வரையறு 'defun)
892 | (வரையறு-செயல்கூறு 'defun)
893 | (bound-fn 'defun)
894 | (எனில் 1)
895 | (இல்லெனில் 1)
896 | (case 1)
897 | (condp 2)
898 | (என்னும்போது 1)
899 | (while 1)
900 | (இல்லென்னும்-போது 1)
901 | (when-first 1)
902 | (செய் 0)
903 | (future 0)
904 | (comment 0)
905 | (doto 1)
906 | (locking 1)
907 | (proxy 2)
908 | (with-open 1)
909 | (with-precision 1)
910 | (with-local-vars 1)
911 |
912 |
913 |
914 | (reify 'defun)
915 | (deftype 2)
916 | (defrecord 2)
917 | (defprotocol 1)
918 | (extend 1)
919 | (extend-protocol 1)
920 | (extend-type 1)
921 |
922 | (try 0)
923 | (catch 2)
924 | (finally 0)
925 |
926 | ;; binding forms
927 | (let 1)
928 | (letfn 1)
929 | (binding 1)
930 | (loop 1)
931 | (for 1)
932 | (doseq 1)
933 | (dotimes 1)
934 | (when-let 1)
935 | (if-let 1)
936 | (when-some 1)
937 | (if-some 1)
938 |
939 | ;; clj-thamil
940 | ;; binding forms
941 | (வைத்துக்கொள் 1)
942 | (letfn 1)
943 | (binding 1)
944 | (சுற்று 1)
945 | (ஒவ்வொன்றுக்கும் 1)
946 | (செய்வரிசை 1)
947 | (dotimes 1)
948 | (when-let 1)
949 | (if-let 1)
950 | (when-some 1)
951 | (if-some 1)
952 |
953 | ;; data structures
954 | (defstruct 1)
955 | (struct-map 1)
956 | (assoc 1)
957 |
958 | (defmethod 'defun)
959 |
960 | ;; clojure.test
961 | (testing 1)
962 | (deftest 'defun)
963 | (are 1)
964 | (use-fixtures 'defun)
965 |
966 | ;; core.logic
967 | (run 'defun)
968 | (run* 'defun)
969 | (fresh 'defun)
970 |
971 | ;; core.async
972 | (alt! 0)
973 | (alt!! 0)
974 | (go 0)
975 | (go-loop 1)
976 | (thread 0))
977 |
978 |
979 |
980 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
981 | ;;
982 | ;; Better docstring filling for clojure-mode
983 | ;;
984 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
985 |
986 | (defun clojure-string-start (&optional regex)
987 | "Return the position of the \" that begins the string at point.
988 | If REGEX is non-nil, return the position of the # that begins the
989 | regex at point. If point is not inside a string or regex, return
990 | nil."
991 | (when (nth 3 (syntax-ppss)) ;; Are we really in a string?
992 | (save-excursion
993 | (save-match-data
994 | ;; Find a quote that appears immediately after whitespace,
995 | ;; beginning of line, hash, or an open paren, brace, or bracket
996 | (re-search-backward "\\(\\s-\\|^\\|#\\|(\\|\\[\\|{\\)\\(\"\\)")
997 | (let ((beg (match-beginning 2)))
998 | (when beg
999 | (if regex
1000 | (and (char-before beg) (char-equal ?# (char-before beg)) (1- beg))
1001 | (when (not (char-equal ?# (char-before beg)))
1002 | beg))))))))
1003 |
1004 | (defun clojure-char-at-point ()
1005 | "Return the char at point or nil if at buffer end."
1006 | (when (not (= (point) (point-max)))
1007 | (buffer-substring-no-properties (point) (1+ (point)))))
1008 |
1009 | (defun clojure-char-before-point ()
1010 | "Return the char before point or nil if at buffer beginning."
1011 | (when (not (= (point) (point-min)))
1012 | (buffer-substring-no-properties (point) (1- (point)))))
1013 |
1014 | ;; TODO: Deal with the fact that when point is exactly at the
1015 | ;; beginning of a string, it thinks that is the end.
1016 | (defun clojure-string-end ()
1017 | "Return the position of the \" that ends the string at point.
1018 |
1019 | Note that point must be inside the string - if point is
1020 | positioned at the opening quote, incorrect results will be
1021 | returned."
1022 | (save-excursion
1023 | (save-match-data
1024 | ;; If we're at the end of the string, just return point.
1025 | (if (and (string= (clojure-char-at-point) "\"")
1026 | (not (string= (clojure-char-before-point) "\\")))
1027 | (point)
1028 | ;; We don't want to get screwed by starting out at the
1029 | ;; backslash in an escaped quote.
1030 | (when (string= (clojure-char-at-point) "\\")
1031 | (backward-char))
1032 | ;; Look for a quote not preceeded by a backslash
1033 | (re-search-forward "[^\\]\\\(\\\"\\)")
1034 | (match-beginning 1)))))
1035 |
1036 | (defun clojure-mark-string ()
1037 | "Mark the string at point."
1038 | (interactive)
1039 | (goto-char (clojure-string-start))
1040 | (forward-char)
1041 | (set-mark (clojure-string-end)))
1042 |
1043 | (defun clojure-toggle-keyword-string ()
1044 | "Convert the string or keyword at point to keyword or string."
1045 | (interactive)
1046 | (let ((original-point (point)))
1047 | (while (and (> (point) 1)
1048 | (not (equal "\"" (buffer-substring-no-properties (point) (+ 1 (point)))))
1049 | (not (equal ":" (buffer-substring-no-properties (point) (+ 1 (point))))))
1050 | (backward-char))
1051 | (cond
1052 | ((equal 1 (point))
1053 | (error "Beginning of file reached, this was probably a mistake"))
1054 | ((equal "\"" (buffer-substring-no-properties (point) (+ 1 (point))))
1055 | (insert ":" (substring (clojure-delete-and-extract-sexp) 1 -1)))
1056 | ((equal ":" (buffer-substring-no-properties (point) (+ 1 (point))))
1057 | (insert "\"" (substring (clojure-delete-and-extract-sexp) 1) "\"")))
1058 | (goto-char original-point)))
1059 |
1060 | (defun clojure-delete-and-extract-sexp ()
1061 | "Delete the sexp and return it."
1062 | (interactive)
1063 | (let ((begin (point)))
1064 | (forward-sexp)
1065 | (let ((result (buffer-substring-no-properties begin (point))))
1066 | (delete-region begin (point))
1067 | result)))
1068 |
1069 |
1070 |
1071 | (defconst clojure-namespace-name-regex
1072 | (rx line-start
1073 | (zero-or-more whitespace)
1074 | "("
1075 | (zero-or-one (group (regexp "clojure.core/")))
1076 | (zero-or-one (submatch "in-"))
1077 | "ns"
1078 | (zero-or-one "+")
1079 | (one-or-more (any whitespace "\n"))
1080 | (zero-or-more (or (submatch (zero-or-one "#")
1081 | "^{"
1082 | (zero-or-more (not (any "}")))
1083 | "}")
1084 | (zero-or-more "^:"
1085 | (one-or-more (not (any whitespace)))))
1086 | (one-or-more (any whitespace "\n")))
1087 | ;; why is this here? oh (in-ns 'foo) or (ns+ :user)
1088 | (zero-or-one (any ":'"))
1089 | (group (one-or-more (not (any "()\"" whitespace))) word-end)))
1090 |
1091 | ;; for testing clojure-namespace-name-regex, you can evaluate this code and make
1092 | ;; sure foo (or whatever the namespace name is) shows up in results. some of
1093 | ;; these currently fail.
1094 | ;; (mapcar (lambda (s) (let ((n (string-match clojure-namespace-name-regex s)))
1095 | ;; (if n (match-string 4 s))))
1096 | ;; '("(ns foo)"
1097 | ;; "(ns
1098 | ;; foo)"
1099 | ;; "(ns foo.baz)"
1100 | ;; "(ns ^:bar foo)"
1101 | ;; "(ns ^:bar ^:baz foo)"
1102 | ;; "(ns ^{:bar true} foo)"
1103 | ;; "(ns #^{:bar true} foo)"
1104 | ;; "(ns #^{:fail {}} foo)"
1105 | ;; "(ns ^{:fail2 {}} foo.baz)"
1106 | ;; "(ns ^{} foo)"
1107 | ;; "(ns ^{:skip-wiki true}
1108 | ;; aleph.netty
1109 | ;; "
1110 | ;; "(ns
1111 | ;; foo)"
1112 | ;; "foo"))
1113 |
1114 |
1115 |
1116 | (defun clojure-expected-ns ()
1117 | "Return the namespace name that the file should have."
1118 | (let* ((project-dir (file-truename
1119 | (locate-dominating-file default-directory
1120 | "project.clj")))
1121 | (relative (substring (file-truename (buffer-file-name))
1122 | (length project-dir)
1123 | (- (length (file-name-extension (buffer-file-name) t))))))
1124 | (replace-regexp-in-string
1125 | "_" "-" (mapconcat 'identity (cdr (split-string relative "/")) "."))))
1126 |
1127 | (defun clojure-insert-ns-form-at-point ()
1128 | "Insert a namespace form at point."
1129 | (interactive)
1130 | (insert (format "(ns %s)" (clojure-expected-ns))))
1131 |
1132 | (defun clojure-insert-ns-form ()
1133 | "Insert a namespace form at the beginning of the buffer."
1134 | (interactive)
1135 | (widen)
1136 | (goto-char (point-min))
1137 | (clojure-insert-ns-form-at-point))
1138 |
1139 | (defun clojure-update-ns ()
1140 | "Update the namespace of the current buffer.
1141 | Useful if a file has been renamed."
1142 | (interactive)
1143 | (let ((nsname (clojure-expected-ns)))
1144 | (when nsname
1145 | (save-excursion
1146 | (save-match-data
1147 | (if (clojure-find-ns)
1148 | (replace-match nsname nil nil nil 4)
1149 | (error "Namespace not found")))))))
1150 |
1151 | (defun clojure-find-ns ()
1152 | "Find the namespace of the current Clojure buffer."
1153 | (let ((regexp clojure-namespace-name-regex))
1154 | (save-excursion
1155 | (save-restriction
1156 | (widen)
1157 | (goto-char (point-min))
1158 | (when (re-search-forward regexp nil t)
1159 | (match-string-no-properties 4))))))
1160 |
1161 | (defun clojure-find-def ()
1162 | "Find the var declaration macro and symbol name of the current form.
1163 | Returns a list pair, e.g. (\"defn\" \"abc\") or (\"deftest\" \"some-test\")."
1164 | (let ((re (concat "(\\(?:\\(?:\\sw\\|\\s_\\)+/\\)?"
1165 | ;; Declaration
1166 | "\\(def\\sw*\\)\\>"
1167 | ;; Any whitespace
1168 | "[ \r\n\t]*"
1169 | ;; Possibly type or metadata
1170 | "\\(?:#?^\\(?:{[^}]*}\\|\\(?:\\sw\\|\\s_\\)+\\)[ \r\n\t]*\\)*"
1171 | ;; Symbol name
1172 | "\\(\\(?:\\sw\\|\\s_\\)+\\)")))
1173 | (save-excursion
1174 | (unless (looking-at re)
1175 | (beginning-of-defun))
1176 | (when (search-forward-regexp re nil t)
1177 | (list (match-string 1)
1178 | (match-string 2))))))
1179 |
1180 | ;;;###autoload
1181 | (add-to-list 'auto-mode-alist
1182 | '("\\.\\(clj[sx]?\\|dtm\\|edn\\)\\'" . clojure-mode))
1183 |
1184 | (provide 'clojure-mode)
1185 |
1186 | ;; Local Variables:
1187 | ;; coding: utf-8
1188 | ;; byte-compile-warnings: (not cl-functions)
1189 | ;; indent-tabs-mode: nil
1190 | ;; End:
1191 |
1192 | ;;; clojure-mode.el ends here
1193 |
--------------------------------------------------------------------------------