├── doc
    └── intro.md
├── examples
    ├── clj
    │   ├── clj-español
    │   │   ├── doc
    │   │   │   └── intro.md
    │   │   ├── .gitignore
    │   │   ├── test
    │   │   │   └── clj_español
    │   │   │   │   └── core_test.clj
    │   │   ├── README.md
    │   │   ├── project.clj
    │   │   ├── src
    │   │   │   └── clj_español
    │   │   │   │   └── core.clj
    │   │   └── LICENSE
    │   ├── clj-spanish
    │   │   ├── doc
    │   │   │   └── intro.md
    │   │   ├── .gitignore
    │   │   ├── test
    │   │   │   └── clj_spanish
    │   │   │   │   └── core_test.clj
    │   │   ├── README.md
    │   │   ├── project.clj
    │   │   ├── src
    │   │   │   └── clj_spanish
    │   │   │   │   └── core.clj
    │   │   └── LICENSE
    │   ├── ஆமை-தமிழ்
    │   │   ├── .gitignore
    │   │   ├── project.clj
    │   │   └── src
    │   │   │   └── ஆமை_தமிழ்
    │   │   │       └── கரு.clj
    │   └── turtle-thamil
    │   │   ├── project.clj
    │   │   └── src
    │   │       └── turtle_thamil
    │   │           └── core.clj
    ├── js
    │   ├── setup.sh
    │   ├── test01.html
    │   ├── test03.html
    │   └── test02.html
    └── java
    │   └── java-examples
    │       ├── README.md
    │       ├── src
    │           └── main
    │           │   └── java
    │           │       └── clj-thamil
    │           │           └── examples
    │           │               └── java
    │           │                   ├── WordSort01.java
    │           │                   └── WordSort02.java
    │       └── pom.xml
├── .gitignore
├── src
    └── clj_thamil
    │   ├── java
    │       └── api
    │       │   └── format.cljc
    │   ├── subprograms.cljc
    │   ├── js
    │       └── api
    │       │   └── convert.cljs
    │   ├── main.clj
    │   ├── format
    │       ├── analysis.cljc
    │       └── convert.cljc
    │   ├── core.cljc
    │   ├── மொழியியல்.cljc
    │   └── format.cljc
├── test
    └── clj_thamil
    │   ├── format
    │       ├── analysis_test.cljc
    │       └── convert_test.cljc
    │   ├── demo
    │       └── trans_demo_01.cljc
    │   ├── core_test.cljc
    │   ├── மொழியியல்_test.cljc
    │   └── format_test.cljc
├── CHANGELOG.md
├── project.clj
├── README.md
└── emacs
    └── clojure-mode.el


/doc/intro.md:
--------------------------------------------------------------------------------
1 | # Introduction to clj-thamil-test
2 | 
3 | TODO: write [great documentation](http://jacobian.org/writing/great-documentation/what-to-write/)
4 | 


--------------------------------------------------------------------------------
/examples/clj/clj-español/doc/intro.md:
--------------------------------------------------------------------------------
1 | # Introduction to clj-español
2 | 
3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/)
4 | 


--------------------------------------------------------------------------------
/examples/clj/clj-spanish/doc/intro.md:
--------------------------------------------------------------------------------
1 | # Introduction to clj-spanish
2 | 
3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/)
4 | 


--------------------------------------------------------------------------------
/examples/clj/ஆமை-தமிழ்/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /classes
 3 | /checkouts
 4 | pom.xml
 5 | pom.xml.asc
 6 | *.jar
 7 | *.class
 8 | /.lein-*
 9 | /.nrepl-port
10 | .hgignore
11 | .hg/
12 | 


--------------------------------------------------------------------------------
/examples/clj/clj-español/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /classes
 3 | /checkouts
 4 | pom.xml
 5 | pom.xml.asc
 6 | *.jar
 7 | *.class
 8 | /.lein-*
 9 | /.nrepl-port
10 | .hgignore
11 | .hg/
12 | 


--------------------------------------------------------------------------------
/examples/clj/clj-spanish/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /classes
 3 | /checkouts
 4 | pom.xml
 5 | pom.xml.asc
 6 | *.jar
 7 | *.class
 8 | /.lein-*
 9 | /.nrepl-port
10 | .hgignore
11 | .hg/
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /lib
 3 | /classes
 4 | /checkouts
 5 | pom.xml
 6 | pom.xml.asc
 7 | *.jar
 8 | *.class
 9 | .lein-deps-sum
10 | .lein-failures
11 | .lein-plugins
12 | .lein-repl-history
13 | /examples/js/js
14 | /resources/public/js
15 | 


--------------------------------------------------------------------------------
/src/clj_thamil/java/api/format.cljc:
--------------------------------------------------------------------------------
1 | (ns clj-thamil.java.api.format
2 |   (:require [clj-thamil.format :as fmt])
3 |   #?(:clj (:import java.util.Comparator)
4 |      :clj (:gen-class
5 |             :methods [#^{:static true} [word_comp [] java.util.Comparator]])))
6 | 
7 | (defn -word_comp [] fmt/word-comp)
8 | 


--------------------------------------------------------------------------------
/examples/clj/clj-español/test/clj_español/core_test.clj:
--------------------------------------------------------------------------------
1 | (ns clj-español.core-test
2 |   (:require [clojure.test :refer :all]
3 |             [clj-español.core :refer :all]))
4 | 
5 | (deftest core-test
6 |   (let [numbers [2 3 5 7 11]]
7 |     (testing "Clojure en español"
8 |       (is (= 11 (último numbers))))))
9 | 


--------------------------------------------------------------------------------
/examples/clj/clj-spanish/test/clj_spanish/core_test.clj:
--------------------------------------------------------------------------------
1 | (ns clj-spanish.core-test
2 |   (:require [clojure.test :refer :all]
3 |             [clj-spanish.core :refer :all]))
4 | 
5 | (deftest core-test
6 |   (let [numbers [2 3 5 7 11]]
7 |     (testing "Clojure en español"
8 |       (is (= 11 (último numbers))))))
9 | 


--------------------------------------------------------------------------------
/examples/js/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | mkdir -p js
4 | lein do clean, compile, cljsbuild once
5 | #cp -r ../../resources/public/js/clj-thamil.js ./js
6 | #cp -r ../../resources/public/js/out/* ./js
7 | rsync --recursive ../../resources/public/js/clj-thamil.js ./js
8 | rsync --recursive ../../resources/public/js/out/ ./js
9 | 


--------------------------------------------------------------------------------
/examples/clj/clj-español/README.md:
--------------------------------------------------------------------------------
 1 | # clj-español
 2 | 
 3 | A Clojure library designed to ... well, that part is up to you.
 4 | 
 5 | ## Usage
 6 | 
 7 | FIXME
 8 | 
 9 | ## License
10 | 
11 | Copyright © 2016 FIXME
12 | 
13 | Distributed under the Eclipse Public License either version 1.0 or (at
14 | your option) any later version.
15 | 


--------------------------------------------------------------------------------
/examples/clj/clj-spanish/README.md:
--------------------------------------------------------------------------------
 1 | # clj-spanish
 2 | 
 3 | A Clojure library designed to ... well, that part is up to you.
 4 | 
 5 | ## Usage
 6 | 
 7 | FIXME
 8 | 
 9 | ## License
10 | 
11 | Copyright © 2016 FIXME
12 | 
13 | Distributed under the Eclipse Public License either version 1.0 or (at
14 | your option) any later version.
15 | 


--------------------------------------------------------------------------------
/examples/clj/clj-español/project.clj:
--------------------------------------------------------------------------------
1 | (defproject clj-español "0.1.0-SNAPSHOT"
2 |   :description "FIXME: write description"
3 |   :url "http://example.com/FIXME"
4 |   :license {:name "Eclipse Public License"
5 |             :url "http://www.eclipse.org/legal/epl-v10.html"}
6 |   :dependencies [[org.clojure/clojure "1.7.0"]
7 |                  [clj-thamil "0.1.2"]])
8 | 


--------------------------------------------------------------------------------
/examples/clj/clj-spanish/project.clj:
--------------------------------------------------------------------------------
1 | (defproject clj-spanish "0.1.0-SNAPSHOT"
2 |   :description "FIXME: write description"
3 |   :url "http://example.com/FIXME"
4 |   :license {:name "Eclipse Public License"
5 |             :url "http://www.eclipse.org/legal/epl-v10.html"}
6 |   :dependencies [[org.clojure/clojure "1.7.0"]
7 |                  [clj-thamil "0.1.2"]])
8 | 


--------------------------------------------------------------------------------
/examples/clj/ஆமை-தமிழ்/project.clj:
--------------------------------------------------------------------------------
1 | (defproject ஆமை-தமிழ் "0.1.0-SNAPSHOT"
2 |   :description "FIXME: write description"
3 |   :url "http://example.com/FIXME"
4 |   :license {:name "Eclipse Public License"
5 |             :url "http://www.eclipse.org/legal/epl-v10.html"}
6 |   :dependencies [[org.clojure/clojure "1.7.0"]
7 |                  [clj-thamil "0.1.2"]
8 |                  [com.google/clojure-turtle "0.2.0"]])
9 | 


--------------------------------------------------------------------------------
/examples/clj/turtle-thamil/project.clj:
--------------------------------------------------------------------------------
1 | (defproject turtle-thamil "0.1.0-SNAPSHOT"
2 |   :description "FIXME: write description"
3 |   :url "http://example.com/FIXME"
4 |   :license {:name "Eclipse Public License"
5 |             :url "http://www.eclipse.org/legal/epl-v10.html"}
6 |   :dependencies [[org.clojure/clojure "1.7.0"] 
7 |                  [clj-thamil "0.1.2"]
8 |                  [com.google/clojure-turtle "0.2.0"]])
9 | 


--------------------------------------------------------------------------------
/src/clj_thamil/subprograms.cljc:
--------------------------------------------------------------------------------
 1 | (ns clj-thamil.subprograms
 2 |   (:require [clojure.java.io :as jio]
 3 |             [clj-thamil.format :as fmt]))
 4 | 
 5 | (defn print-as-phonemes
 6 |   [& args]
 7 |   (with-open [rdr (jio/reader *in*)]
 8 |     (let [lines (line-seq rdr)]
 9 |       (doall
10 |           (for [line lines]
11 |             (let [phoneme-str (apply str (fmt/str->phonemes line))]
12 |               (println phoneme-str)))))))
13 | 


--------------------------------------------------------------------------------
/src/clj_thamil/js/api/convert.cljs:
--------------------------------------------------------------------------------
 1 | (ns clj-thamil.js.api.convert
 2 |   (:require [clj-thamil.format.convert :as cvt]))
 3 | 
 4 | (def romanized-to-thamil cvt/romanized->தமிழ்)
 5 | (def thamil-to-romanized cvt/தமிழ்->romanized)
 6 | 
 7 | (def tab-to-thamil cvt/tab->தமிழ்)
 8 | (def thamil-to-tab cvt/தமிழ்->tab)
 9 | 
10 | (def bamini-to-thamil cvt/bamini->தமிழ்)
11 | (def thamil-to-bamini cvt/தமிழ்->bamini)
12 | 
13 | (def tscii-to-thamil cvt/tscii->தமிழ்)
14 | (def thamil-to-tscii cvt/தமிழ்->tscii)
15 | 
16 | (def webulagam-to-thamil cvt/webulagam->தமிழ்)
17 | (def thamil-to-webulagam cvt/தமிழ்->webulagam)
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/test/clj_thamil/format/analysis_test.cljc:
--------------------------------------------------------------------------------
 1 | (ns clj-thamil.format.analysis-test
 2 |   (:require [clj-thamil.format :as fmt])
 3 |   (:use clj-thamil.format.analysis 
 4 |         clojure.test))
 5 | 
 6 | (deftest letters-plus-grantha-test
 7 |   (let [letters-plus-grantha-trie (fmt/make-trie (flatten letters-plus-grantha))
 8 |         str->letters-plus-grantha (fn [s] (fmt/str->elems letters-plus-grantha-trie s))]
 9 |     (testing "string fns also working on grantha letters"
10 |       (is (= ["ஜி" "மி" "க்" "கி"] (str->letters-plus-grantha "ஜிமிக்கி")))
11 |       (is (= ["கு" "ஷி"] (str->letters-plus-grantha "குஷி"))))
12 |     (testing "trie-elem-freqs"
13 |       (let [s "ஜோடி"]
14 |         (is (= (trie-elem-freqs letters-plus-grantha-trie s)
15 |                (trie-elem-freqs letters-plus-grantha-trie (str s " abc 123 a3"))))))))
16 | 


--------------------------------------------------------------------------------
/examples/clj/turtle-thamil/src/turtle_thamil/core.clj:
--------------------------------------------------------------------------------
 1 | (ns turtle-thamil.core
 2 |   (:require [clojure-turtle.core :as turtle])
 3 |   (:use clj-thamil.core))
 4 | 
 5 | (def turtle-fns-map '{turtle/forward முன்னால்
 6 |                       turtle/back பின்னால்
 7 |                       turtle/right வலது
 8 |                       turtle/left இடது
 9 |                       turtle/translate இடம்பெயர்
10 |                       turtle/penup எழுதுகோலெடு
11 |                       turtle/pendown எழுதுகோல்வை
12 |                       turtle/clean துப்புரவு
13 |                       ;; setxy ???
14 |                       turtle/setheading திசைவை
15 |                       turtle/home வீடு})
16 | 
17 | (def turtle-forms-map '{turtle/repeat மீண்டும்
18 |                         turtle/all எல்லாம்
19 |                         turtle/new-window புது-சாளரம்})
20 | 
21 | (translate-fns turtle-fns-map)
22 | (translate-forms turtle-forms-map)
23 | 


--------------------------------------------------------------------------------
/examples/clj/ஆமை-தமிழ்/src/ஆமை_தமிழ்/கரு.clj:
--------------------------------------------------------------------------------
 1 | (ns ஆமை-தமிழ்.கரு
 2 |   (:require [clojure-turtle.core :as turtle])
 3 |   (:use clj-thamil.core))
 4 | 
 5 | (translate-forms '{translate-fns மொழிப்பெயர்-செயல்கூறுகள்
 6 |                    translate-forms மொழிப்பெயர்-வடிவங்கள்})
 7 | 
 8 | (வரையறு ஆமை-செயல்கூறுகள்
 9 |         '{turtle/forward முன்னால்
10 |           turtle/back பின்னால்
11 |           turtle/right வலது
12 |           turtle/left இடது
13 |           turtle/translate இடம்பெயர்
14 |           turtle/penup எழுதுகோலெடு
15 |           turtle/pendown எழுதுகோல்வை
16 |           turtle/clean துப்புரவு
17 |           ;; setxy ???
18 |           turtle/setheading திசைவை
19 |           turtle/home வீடு})
20 | 
21 | (வரையறு ஆமை-வடிவங்கள்
22 |         '{turtle/repeat மீண்டும்
23 |           turtle/all எல்லாம்
24 |           turtle/new-window புது-சாளரம்})
25 | 
26 | (மொழிப்பெயர்-செயல்கூறுகள் ஆமை-செயல்கூறுகள்)
27 | (மொழிப்பெயர்-வடிவங்கள் ஆமை-வடிவங்கள்)
28 | 


--------------------------------------------------------------------------------
/src/clj_thamil/main.clj:
--------------------------------------------------------------------------------
 1 | (ns clj-thamil.main
 2 |   (require [clojure.string :as string]
 3 |            [clj-thamil.format [analysis :as analysis] [convert :as convert]]
 4 |            [clj-thamil.subprograms :as subprog])
 5 |   (:gen-class))
 6 | 
 7 | (def ^{:doc "a map that specifies what sub-program to run based on the first arg passed in"}
 8 |   main-fns
 9 |   {"freqs" analysis/-main 
10 |    "osxkeyb" convert/-main
11 |    "phonemes" subprog/print-as-phonemes})
12 | 
13 | (defn -main [& args]
14 |   (assert (pos? (count args)) "Running clj-thamil as an executable requires arguments")
15 |   (let [subprog (first args)
16 |         default-fn (fn [& args] (throw (Exception. (str "The specified clj-thamil sub-program is misspelled or does not exist.  Available sub-programs: [" (string/join ", " (-> main-fns keys sort)) "]"))))
17 |         subprog-fn (get main-fns subprog default-fn)
18 |         new-args (rest args)]
19 |     (apply subprog-fn new-args)))
20 | 


--------------------------------------------------------------------------------
/test/clj_thamil/demo/trans_demo_01.cljc:
--------------------------------------------------------------------------------
 1 | (ns clj-thamil.demo.trans-demo-01
 2 |   (:use clj-thamil.core))
 3 | 
 4 | (வரையறு-செயல்கூறு தன்னால்-பெருக்கு
 5 |   [அ]
 6 |   (* அ அ))
 7 | 
 8 | (வரையறு எண்கள் [108 1008 18 63 6 12 247])
 9 | 
10 | ;; 1-ஆம் சுற்று
11 | 
12 | (வரையறு-செயல்கூறு சுற்று-01
13 |   [] 
14 |   (வரி-அச்சிடு "எண்களையும் அவற்றின் சதுர ஆக்கங்களையும்(?) அச்சிடுவது:") 
15 |   (செய்வரிசை [எண் எண்கள்]
16 |             (வரி-அச்சிடு (தொடை "[" எண் "] -> [" (தன்னால்-பெருக்கு எண்) "]"))))
17 | 
18 | (வரையறு சதுர-எண்கள் (விவரி தன்னால்-பெருக்கு எண்கள்))
19 | 
20 | ;; 2-ஆம் சுற்று
21 | 
22 | (வரையறு-செயல்கூறு சுற்று-02
23 |   [] 
24 |   (வரி-அச்சிடு "மீண்டும் எண்களையும் சதுர எண்களையும் அச்சிடுவது:") 
25 |   (செய்யோட்டம்
26 |    (விவரி (செயல்கூறு [எ சஎ] (வரி-அச்சிடு எ "*" எ "=" சஎ)) எண்கள் சதுர-எண்கள்)))
27 | 
28 | ;; எல்லாச் சுற்றும்
29 | 
30 | (வரையறு-செயல்கூறு எல்லாவற்றையும்-ஓட்டு
31 |   []
32 |   (வைத்துக்கொள் [செயல்கூறுகள் [சுற்று-01 சுற்று-02]
33 |               ஓட்டும்-செயல்கூறுகள் (இடைபொருத்து வரி-அச்சிடு செயல்கூறுகள்)]
34 |              (செய்வரிசை [செ ஓட்டும்-செயல்கூறுகள்]
35 |                (செ))))
36 | 


--------------------------------------------------------------------------------
/examples/java/java-examples/README.md:
--------------------------------------------------------------------------------
 1 | # Java Examples for clj-thamil
 2 | 
 3 | ## Requirements
 4 | 
 5 | The Java example code requires the clj-thamil artifact to be built and installed.  Refer to the [Building](../../../README.md) section on how to build and install the artifact.
 6 | 
 7 | ## Building
 8 | 
 9 | All of the Java examples can be built together by
10 | ```
11 | lein clean
12 | lein install
13 | cd examples/java/java-examples
14 | mvn clean package
15 | ```
16 | 
17 | Building the Java examples is separate from the clj-thamil artifact that they depend on.
18 | 
19 | ## Usage
20 | 
21 | After following the build instructions above, a shaded jar/uberjar will be in the `target` subdirectory, but is not
22 | in itself executable since it contains multiple main methods.
23 | Instead, the uberjar should be provided in the classpath followed by
24 | the class name of the example being run:
25 | ```
26 | java -cp target/java-examples-1.0.jar clj_thamil.examples.java.WordSort01
27 | java -cp target/java-examples-1.0.jar clj_thamil.examples.java.WordSort02
28 | ```
29 | 
30 | ## Overview
31 | 
32 | * WordSort01 - sorts words based on Thamil alphabetical order
33 | * WordSort02 - sorts words based on Thamil alphabetical order
34 | 


--------------------------------------------------------------------------------
/examples/js/test01.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |   <head>
 3 |     <meta charset="utf-8">
 4 |   </head>
 5 |   <body>
 6 |     <script charset="utf-8" type="text/javascript" src="js/goog/base.js"></script>
 7 |     <script charset="utf-8" type="text/javascript" src="js/clj-thamil.js"></script>
 8 | 
 9 |     <script charset="utf-8" type="text/javascript" src="js/clojure/string.js"></script>
10 |     <script charset="utf-8" type="text/javascript" src="js/cljs/core.js"></script>
11 | 
12 |     <script charset="utf-8" type="text/javascript" src="js/clj_thamil/core.js"></script>
13 |     <script charset="utf-8" type="text/javascript" src="js/clojure/set.js"></script>
14 |     <script charset="utf-8" type="text/javascript" src="js/clj_thamil/format.js"></script>
15 |     <script charset="utf-8" type="text/javascript" src="js/clj_thamil/format/convert.js"></script>
16 |     <script charset="utf-8" type="text/javascript" src="js/clj_thamil/js/api/convert.js"></script>
17 | 
18 |     <script>
19 | 
20 | // alert(clojure.string.reverse("ClojureScript"));
21 | // alert(clojure.string.reverse(cljs.core.str("ClojureScript", ", ", "rocks")));
22 | 
23 | // alert("" + cljs.core.first([1,2,3]));
24 | 
25 | // alert(cljs.core.str("ClojureScript ", 2));
26 | 
27 | 
28 | alert(clj_thamil.js.api.convert.romanized_to_thamil("vaNakkam"));
29 | 
30 | </script>
31 |   </body>
32 | </html>
33 | 


--------------------------------------------------------------------------------
/test/clj_thamil/core_test.cljc:
--------------------------------------------------------------------------------
 1 | (ns clj-thamil.core-test
 2 |   (:use clojure.test
 3 |         clj-thamil.core))
 4 | 
 5 | (defn demo-print-1
 6 |   []
 7 |   (println "hello"))
 8 | 
 9 | (defn demo-print-2
10 |   []
11 |   (println "வணக்கம்"))
12 | 
13 | (defn demo-print-3
14 |   []
15 |   (வரி-அச்சிடு "வணக்கம்"))
16 | 
17 | (defn demo-add-1
18 |   []
19 |   (if (= 4 (+ 2 2))
20 |     (println "true")
21 |     (println "false")))
22 | 
23 | (defn demo-add-2
24 |   []
25 |   (if (= 4 (+ 2 2))
26 |     (println "true")
27 |     (println "false"))
28 |   (if (= 5 (+ 2 2))
29 |     (println "true")
30 |     (println "false")))
31 | 
32 | (defn demo-add-3
33 |   []
34 |   (எனில் (= 4 (+ 2 2))
35 |         (வரி-அச்சிடு "வாய்மை")
36 |         (வரி-அச்சிடு "பொய்மை"))
37 |   (எனில் (= 5 (+ 2 2))
38 |         (வரி-அச்சிடு "வாய்மை")
39 |         (வரி-அச்சிடு "பொய்மை")))
40 | 
41 | (வரையறு-செயல்கூறு demo-add-4
42 |   []
43 |   (எனில் (= 4 (+ 2 2))
44 |         (வரி-அச்சிடு "வாய்மை")
45 |         (வரி-அச்சிடு "பொய்மை"))
46 |   (எனில் (= 5 (+ 2 2))
47 |         (வரி-அச்சிடு "வாய்மை")
48 |         (வரி-அச்சிடு "பொய்மை")))
49 | 
50 | (வரையறு-செயல்கூறு மாதிரி-கூட்டு-5
51 |   []
52 |   (எனில் (= 4 (+ 2 2))
53 |         (வரி-அச்சிடு "வாய்மை")
54 |         (வரி-அச்சிடு "பொய்மை"))
55 |   (எனில் (= 5 (+ 2 2))
56 |         (வரி-அச்சிடு "வாய்மை")
57 |         (வரி-அச்சிடு "பொய்மை")))
58 | 
59 | (def demo-fns [demo-print-1 demo-print-2 demo-print-3
60 |                demo-add-1 demo-add-2 demo-add-3 demo-add-4 மாதிரி-கூட்டு-5])
61 | 
62 | (deftest a-test
63 |   (testing "FIXME, I fail."
64 |     (let [s "வணக்கம்"
65 |           hello (fn []
66 |                   (str "Hello, and " s)) 
67 |           ஒன்று 1
68 |           இரண்டு 2 
69 |           v [ஒன்று இரண்டு "மூன்று"]]
70 |       (is (= v [1 2 "மூன்று"]))
71 |       (is (= (hello) "Hello, and வணக்கம்")))))
72 | 
73 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Change Log
 2 | All notable changes to this project will be documented in this file.
 3 | 
 4 | ## [Unreleased][unreleased]
 5 | 
 6 | ## [0.2.0] - 2016-07-30
 7 | ### Added
 8 | - Examples of Clojure in Spanish, Logo in Thamil
 9 | ### Changed
10 | - Using reader conditionals instead of cljx (for compiling common Clojure code to Java and JS targets)
11 | ### Fixed
12 | - JS examples
13 | 
14 | ## [0.1.2] - 2015-02-19
15 | ### Added
16 | - Generative testing for font conversion functions
17 | - Java examples using clj-thamil jar artifact
18 | - Doc for clj-thamil and Java examples
19 | 
20 | ### Changed
21 | - Font conversion information represented as map instead of seq
22 | - தமிழ்->romanized transliteration for certain consonant clusters
23 | 
24 | ### Fixed
25 | - Test file namespace name
26 | - Cljx configs for dirs for source and target for clj, cljs
27 | 
28 | ## [0.1.1] - 2015-02-04
29 | ### Added
30 | - 'Translations' of Clojure core library form names via a couple of maps and handful of macros
31 | - Trie functions (create, get-in)
32 | - Function using a trie to extract/convert a string into its elements
33 |   - Fns to split a string into a sequence of Thamil letters/phonemes
34 |   - Fn  to create a Thamil string from a sequence of phonemes
35 | - Sorting fns and comparators for single- and multi-letter Thamil strings 
36 | - Seq fns generalized from string functions (index-of, prefix)
37 | - Function to adjust the cursor position in Thamil text
38 | - Functions written in Thamil to perform Thamil grammatical operations
39 |   - Pluralize, add suffixes generally, add noun case suffixes
40 | - Letter frequency analysis and result output functions
41 | - Functions to convert between old Thamil font character sets and Unicode
42 | - Using cljx to be forward-compatible with compilation to JS via ClojureScript
43 | - Configs for deploying GPG-signed releases to Clojars
44 | 
45 | [unreleased]: https://github.com/echeran/clj-thamil/compare/0.2.0...master
46 | [0.2.0]: https://github.com/echeran/clj-thamil/tree/0.2.0
47 | [0.1.2]: https://github.com/echeran/clj-thamil/tree/0.1.2
48 | [0.1.1]: https://github.com/echeran/clj-thamil/tree/0.1.1
49 | 


--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
 1 | (defproject clj-thamil "0.2.0"
 2 |   :description "A project encompassing various Thamil language-specific computing ideas"
 3 |   :url "https://github.com/echeran/clj-thamil"
 4 |   :license {:name "Eclipse Public License"
 5 |             :url "http://www.eclipse.org/legal/epl-v10.html"}
 6 |   :scm {:name "git"
 7 |         :url "https://github.com/echeran/clj-thamil"}
 8 |   :repositories [["releases" {:url "https://clojars.org/repo/"}]]
 9 |   :deploy-repositories [["clojars" {:creds :gpg}]]
10 |   :pom-addition [:developers [:developer
11 |                               [:name "Elango Cheran"]
12 |                               [:url "http://www.elangocheran.com"]
13 |                               [:email "elango.cheran@gmail.com"]
14 |                               [:timezone "-8"]]] 
15 | 
16 |   :dependencies [[org.clojure/clojure "1.8.0"]
17 |                  [org.clojure/data.csv "0.1.2"]
18 |                  [org.clojure/algo.generic "0.1.2"]
19 |                  [org.clojure/test.check "0.9.0"]
20 |                  [org.clojure/clojurescript "1.9.89"]]
21 |   
22 |   :jar-exclusions [#"\.cljx|\.swp|\.swo|\.DS_Store"]
23 | 
24 |   :aot [clj-thamil.main
25 |         clj-thamil.java.api.format
26 |         clj-thamil.format.analysis
27 |         clj-thamil.format.convert]
28 | 
29 |   :main clj-thamil.main  
30 | 
31 |   :lein-release {:deploy-via :shell
32 |                  :shell ["lein" "deploy"]}
33 | 
34 |   :profiles {:provided {:dependencies []}
35 |              :dev {:plugins [[lein-cljsbuild "1.1.3"]] 
36 |                    ;; :cljsbuild {:test-commands {"node" ["node" :node-runner "target/testable.js"]}
37 |                    ;;             :builds [{:source-paths ["target/classes" "target/test-classes"]
38 |                    ;;                       :compiler {:output-to "target/testable.js"
39 |                    ;;                                  :optimizations :advanced
40 |                    ;;                                  :pretty-print true}}]}
41 |                    
42 |                    :cljsbuild {:builds {:app {:source-paths ["src"]
43 |                                               :compiler {:output-to     "resources/public/js/clj-thamil.js"
44 |                                                          :output-dir    "resources/public/js/out"
45 |                                                          :externs       []
46 |                                                          :optimizations :none
47 |                                                          :pretty-print  true}}}
48 |                                :test-commands {
49 |                                                ;; no cljs test configured yet
50 |                                                }}}})
51 | 


--------------------------------------------------------------------------------
/examples/js/test03.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |   <head>
 3 |     <meta charset="utf-8">
 4 |   </head>
 5 |   <body>
 6 |     <script charset="utf-8" type="text/javascript" src="js/goog/base.js"></script>
 7 |     <script charset="utf-8" type="text/javascript" src="js/clj-thamil.js"></script>
 8 | 
 9 |     <script charset="utf-8" type="text/javascript" src="js/clojure/string.js"></script>
10 |     <script charset="utf-8" type="text/javascript" src="js/cljs/core.js"></script>
11 | 
12 |     <script charset="utf-8" type="text/javascript" src="js/clj_thamil/core.js"></script>
13 |     <script charset="utf-8" type="text/javascript" src="js/clojure/set.js"></script>
14 |     <script charset="utf-8" type="text/javascript" src="js/clj_thamil/format.js"></script>
15 |     <script charset="utf-8" type="text/javascript" src="js/clj_thamil/format/convert.js"></script>
16 |     <script charset="utf-8" type="text/javascript" src="js/clj_thamil/js/api/convert.js"></script>
17 |     <script charset="utf-8" type="text/javascript" src="js/clj_thamil/js/api/format.js"></script>
18 | 
19 |     <script type="text/javascript">
20 |     function insertText(name, text) { var elem = document.getElementById(name); elem.value += text; }
21 |     function setText(name, text) { var elem = document.getElementById(name); elem.value = text; }
22 |     function clearContents(element) { element.value = ''; }
23 |     </script>
24 | 
25 |     <table border="0">
26 |       <tr>
27 |         <th>Unsorted தமிழ் words (input)</th>
28 |         <th>Sorted தமிழ் words (output)</th>
29 |       </tr>
30 |       <tr>
31 |         <td>
32 |           <textarea rows="30" cols="50" id="unsorted">Type here... </textarea>
33 |         </td>
34 |         <td>
35 |           <textarea rows="30" cols="50" id="sorted" readonly>Output comes here automatically </textarea>
36 |         </td>
37 |       </tr>
38 |     </table>
39 | 
40 |     <script type="text/javascript">
41 |       // alert(clj_thamil.js.api.convert.romanized_to_thamil("vaNakkam"));
42 |     </script>
43 | 
44 |     <script type="text/javascript">
45 | 
46 |       var u_elem = document.getElementById("unsorted");
47 |       var s_elem = document.getElementById("sorted");
48 | 
49 |       var sort_thamil_words = function() {
50 |         var u_txt = u_elem.value;
51 |         var u_words = clojure.string.split_lines(u_txt);
52 |         var s_words = clj_thamil.js.api.format.thamil_sort(u_words);
53 |         var s_txt = clojure.string.join("\n", s_words);
54 |         s_elem.value = s_txt;
55 |       }
56 | 
57 |       var txt_onchange = function() {
58 |         sort_thamil_words();
59 |       }
60 |    
61 |       u_elem.onkeyup = txt_onchange;
62 |       u_elem.onblur = txt_onchange;
63 |       s_elem.onkeyup = txt_onchange;
64 |       s_elem.onblur = txt_onchange;
65 |     
66 |     </script>
67 | 
68 |   </body>
69 | </html>
70 | 


--------------------------------------------------------------------------------
/examples/js/test02.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |   <head>
 3 |     <meta charset="utf-8">
 4 |   </head>
 5 |   <body>
 6 |     <script charset="utf-8" type="text/javascript" src="js/goog/base.js"></script>
 7 |     <script charset="utf-8" type="text/javascript" src="js/clj-thamil.js"></script>
 8 | 
 9 |     <script charset="utf-8" type="text/javascript" src="js/clojure/string.js"></script>
10 |     <script charset="utf-8" type="text/javascript" src="js/cljs/core.js"></script>
11 | 
12 |     <script charset="utf-8" type="text/javascript" src="js/clj_thamil/core.js"></script>
13 |     <script charset="utf-8" type="text/javascript" src="js/clojure/set.js"></script>
14 |     <script charset="utf-8" type="text/javascript" src="js/clj_thamil/format.js"></script>
15 |     <script charset="utf-8" type="text/javascript" src="js/clj_thamil/format/convert.js"></script>
16 |     <script charset="utf-8" type="text/javascript" src="js/clj_thamil/js/api/convert.js"></script>
17 | 
18 |     <script type="text/javascript">
19 |     function insertText(name, text) { var elem = document.getElementById(name); elem.value += text; }
20 |     function setText(name, text) { var elem = document.getElementById(name); elem.value = text; }
21 |     function clearContents(element) { element.value = ''; }
22 |     </script>
23 | 
24 |     <table border="0">
25 |       <tr>
26 |         <th>English transliterated input</th>
27 |         <th>தமிழ் output</th>
28 |       </tr>
29 |       <tr>
30 |         <td>
31 |           <textarea rows="10" cols="50" id="english">Type here... </textarea>
32 |         </td>
33 |         <td>
34 |           <textarea rows="10" cols="50" id="thamil" readonly>Output comes here automatically </textarea>
35 |         </td>
36 |       </tr>
37 |       <tr>
38 |         <td> 
39 |         </td>
40 |         <td>
41 |           <textarea rows="10" cols="50" id="letters" readonly>Output split by letters</textarea>
42 |         </td>
43 |       </tr>
44 |     </table>
45 | 
46 |     <script type="text/javascript">
47 |     </script>
48 | 
49 |     <script type="text/javascript">
50 |       var e_elem = document.getElementById("english");
51 |       var t_elem = document.getElementById("thamil");
52 |       var l_elem = document.getElementById("letters");
53 | 
54 |       var transliterate = function() {
55 |         var e_txt = e_elem.value;
56 |         var t_txt = clj_thamil.js.api.convert.romanized_to_thamil(e_txt);
57 |         t_elem.value = t_txt;
58 |       }
59 | 
60 | 
61 |       var split_by_letter = function() {
62 |         var e_txt = e_elem.value;
63 |         var t_txt = clj_thamil.js.api.convert.romanized_to_thamil(e_txt);
64 |         var letters = clj_thamil.format.str__GT_elems(t_txt);
65 |         var l_txt = clojure.string.join(" ", letters);
66 |         l_elem.value = l_txt;    
67 |       }
68 | 
69 |     
70 |       var txt_onchange = function() {
71 |         transliterate();
72 |         split_by_letter();
73 |       }
74 | 
75 |     
76 |       e_elem.onkeyup = txt_onchange;
77 |       e_elem.onblur = txt_onchange;
78 |       t_elem.onkeyup = txt_onchange;
79 |       t_elem.onblur = txt_onchange;
80 |     
81 |     </script>
82 | 
83 |   </body>
84 | </html>
85 | 


--------------------------------------------------------------------------------
/examples/java/java-examples/src/main/java/clj-thamil/examples/java/WordSort01.java:
--------------------------------------------------------------------------------
 1 | package clj_thamil.examples.java;
 2 | 
 3 | import clojure.java.api.Clojure;
 4 | import clojure.lang.IFn;
 5 | import java.io.File;
 6 | import java.io.FileNotFoundException;
 7 | import java.io.FileOutputStream;
 8 | import java.io.IOException;
 9 | import java.io.PrintWriter;
10 | import java.io.UnsupportedEncodingException;
11 | import java.nio.charset.Charset;
12 | import java.util.ArrayList;
13 | import java.util.Arrays;
14 | import java.util.Collections;
15 | import java.util.Comparator;
16 | import java.util.List;
17 | import org.apache.commons.lang3.StringUtils;
18 | 
19 | public class WordSort01 {
20 | 
21 |     public static String utf8String(String s) throws UnsupportedEncodingException {
22 |         byte[] array = s.getBytes("UTF-8");
23 |         return new String(array, Charset.forName("UTF-8"));
24 |     }
25 | 
26 |     public static void main(String[] args) throws UnsupportedEncodingException, 
27 |                                                   FileNotFoundException,
28 |                                                   IOException {
29 |         // (require 'clj-thamil.format)
30 |         IFn require = Clojure.var("clojure.core", "require");
31 |         require.invoke(Clojure.read("clj-thamil.format"));
32 |         // access clj-thamil.format/word-comp, a non-fn var
33 |         IFn wordCompVar = Clojure.var("clj-thamil.format", "word-comp");
34 |         IFn deref = Clojure.var("clojure.core", "deref");
35 |         Comparator wordComp = (Comparator) (deref.invoke(wordCompVar));
36 | 
37 |         List<String> strs = Arrays.asList(
38 |                                           
39 |                                           "மடம்",
40 |                                           "மட்டம்",
41 |                                           "மட்டும்",
42 |                                           "மடக்கு",
43 |                                           "முடக்கு",
44 |                                           "முடுக்கு",
45 |                                           "படம்",
46 |                                           "குடம்",
47 |                                           "தடம்",
48 |                                           "தடி",
49 |                                           "திட்டம்"
50 |                                           
51 |                                           );
52 |         List<String> strs2 = new ArrayList<String>();
53 |         strs2.addAll(strs);
54 |         Collections.sort(strs2, wordComp);
55 |         String list1 = StringUtils.join(strs, "\n");
56 |         String list2 = StringUtils.join(strs2, "\n");
57 | 
58 |         System.out.println("Original list of strings:");
59 |         System.out.println(list1);
60 |         System.out.println("Sorted   list of strings:");
61 |         System.out.println(list2);
62 | 
63 |         // String fileName = "out.txt";
64 |         // FileOutputStream fos = new FileOutputStream(new File(fileName));
65 |         // PrintWriter pw = new PrintWriter(fos);
66 |         // pw.println("Original list of strings:");
67 |         // pw.println(list1);
68 |         // pw.println("Sorted   list of strings:");
69 |         // pw.println(list2);
70 |         // pw.close();
71 |         // fos.close();
72 | 
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/examples/java/java-examples/src/main/java/clj-thamil/examples/java/WordSort02.java:
--------------------------------------------------------------------------------
 1 | package clj_thamil.examples.java;
 2 | 
 3 | import clojure.java.api.Clojure;
 4 | import clojure.lang.IFn;
 5 | import java.io.File;
 6 | import java.io.FileNotFoundException;
 7 | import java.io.FileOutputStream;
 8 | import java.io.IOException;
 9 | import java.io.PrintWriter;
10 | import java.io.UnsupportedEncodingException;
11 | import java.nio.charset.Charset;
12 | import java.util.ArrayList;
13 | import java.util.Arrays;
14 | import java.util.Collections;
15 | import java.util.Comparator;
16 | import java.util.List;
17 | import org.apache.commons.lang3.StringUtils;
18 | 
19 | import clj_thamil.java.api.format;
20 | 
21 | public class WordSort02 {
22 | 
23 |     public static String utf8String(String s) throws UnsupportedEncodingException {
24 |         byte[] array = s.getBytes("UTF-8");
25 |         return new String(array, Charset.forName("UTF-8"));
26 |     }
27 | 
28 |     public static void main(String[] args) throws UnsupportedEncodingException, 
29 |                                                   FileNotFoundException,
30 |                                                   IOException {
31 |         Comparator wordComp = format.word_comp();
32 | 
33 |         List<String> strs = Arrays.asList(
34 |                                           
35 |                                           "மடம்",
36 |                                           "மட்டம்",
37 |                                           "மட்டும்",
38 |                                           "மடக்கு",
39 |                                           "முடக்கு",
40 |                                           "முடுக்கு",
41 |                                           "படம்",
42 |                                           "குடம்",
43 |                                           "தடம்",
44 |                                           "தடி",
45 |                                           "திட்டம்"
46 | 
47 |                                           // "\u0bae\u0b9f\u0bae\u0bcd",
48 |                                           // "\u0bae\u0b9f\u0bcd\u0b9f\u0bae\u0bcd",
49 |                                           // "\u0bae\u0b9f\u0bcd\u0b9f\u0bc1\u0bae\u0bcd",
50 |                                           // "\u0bae\u0b9f\u0b95\u0bcd\u0b95\u0bc1",
51 |                                           // "\u0bae\u0bc1\u0b9f\u0b95\u0bcd\u0b95\u0bc1",
52 |                                           // "\u0bae\u0bc1\u0b9f\u0bc1\u0b95\u0bcd\u0b95\u0bc1",
53 |                                           // "\u0baa\u0b9f\u0bae\u0bcd",
54 |                                           // "\u0b95\u0bc1\u0b9f\u0bae\u0bcd",
55 |                                           // "\u0ba4\u0b9f\u0bae\u0bcd",
56 |                                           // "\u0ba4\u0b9f\u0bbf",
57 |                                           // "\u0ba4\u0bbf\u0b9f\u0bcd\u0b9f\u0bae\u0bcd"
58 |                                           
59 |                                           );
60 |         List<String> strs2 = new ArrayList<String>();
61 |         strs2.addAll(strs);
62 |         Collections.sort(strs2, wordComp);
63 |         String list1 = StringUtils.join(strs, "\n");
64 |         String list2 = StringUtils.join(strs2, "\n");
65 | 
66 |         System.out.println("Original list of strings:");
67 |         System.out.println(list1);
68 |         System.out.println("Sorted   list of strings:");
69 |         System.out.println(list2);
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/examples/java/java-examples/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 |   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  3 |   <modelVersion>4.0.0</modelVersion>
  4 |   <groupId>clj-thamil</groupId>
  5 |   <artifactId>java-examples</artifactId>
  6 |   <packaging>jar</packaging>
  7 |   <version>1.0</version>
  8 |   <name>java-examples</name>
  9 |   <url>https://github.com/echeran/clj-thamil-examples</url>
 10 |   <repositories>
 11 |     <repository>
 12 |       <id>central</id>
 13 |       <url>https://repo1.maven.org/maven2/</url>
 14 |       <snapshots>
 15 |         <enabled>false</enabled>
 16 |       </snapshots>
 17 |       <releases>
 18 |         <enabled>true</enabled>
 19 |       </releases>
 20 |     </repository>
 21 |     <repository>
 22 |       <id>clojars</id>
 23 |       <url>https://clojars.org/repo/</url>
 24 |       <snapshots>
 25 |         <enabled>true</enabled>
 26 |       </snapshots>
 27 |       <releases>
 28 |         <enabled>true</enabled>
 29 |       </releases>
 30 |     </repository>
 31 |     <repository>
 32 |       <id>releases</id>
 33 |       <url>https://clojars.org/repo/</url>
 34 |       <snapshots>
 35 |         <enabled>true</enabled>
 36 |       </snapshots>
 37 |       <releases>
 38 |         <enabled>true</enabled>
 39 |       </releases>
 40 |     </repository>
 41 |   </repositories>
 42 |   <dependencies>
 43 |     <dependency>
 44 |       <groupId>junit</groupId>
 45 |       <artifactId>junit</artifactId>
 46 |       <version>3.8.1</version>
 47 |       <scope>test</scope>
 48 |     </dependency>
 49 |     <dependency>
 50 |       <groupId>clj-thamil</groupId>
 51 |       <artifactId>clj-thamil</artifactId>
 52 |       <version>0.2.0</version>
 53 |     </dependency>
 54 |     <dependency>
 55 |       <groupId>org.apache.commons</groupId>
 56 |       <artifactId>commons-lang3</artifactId>
 57 |       <version>3.3.2</version>
 58 |     </dependency>
 59 |     <dependency>
 60 |       <groupId>org.apache.maven.plugins</groupId>
 61 |       <artifactId>maven-compiler-plugin</artifactId>
 62 |       <version>3.2</version>
 63 |       <type>maven-plugin</type>
 64 |     </dependency>
 65 |   </dependencies>
 66 |   <build>
 67 |     <plugins>
 68 |       <plugin>
 69 |         <groupId>org.apache.maven.plugins</groupId>
 70 |         <artifactId>maven-compiler-plugin</artifactId>
 71 |         <version>3.2</version>
 72 |         <configuration>
 73 |           <source>1.6</source>
 74 |           <target>1.6</target>
 75 |           <encoding>${project.build.sourceEncoding}</encoding>
 76 |         </configuration>
 77 |       </plugin>
 78 |       <plugin>
 79 |         <groupId>org.apache.maven.plugins</groupId>
 80 |         <artifactId>maven-shade-plugin</artifactId>
 81 |         <version>2.3</version>
 82 |         <executions>
 83 |           <execution>
 84 |             <!-- <id>WordSort01</id> -->
 85 |             <phase>package</phase>
 86 |             <goals>
 87 |               <goal>shade</goal>
 88 |             </goals>
 89 |             <!-- <configuration> -->
 90 |             <!--   <transformers> -->
 91 |             <!--     <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> -->
 92 |             <!--       <mainClass>clj_thamil.examples.java.WordSort01</mainClass> -->
 93 |             <!--     </transformer> -->
 94 |             <!--   </transformers> -->
 95 |             <!-- </configuration> -->
 96 |           </execution>
 97 |         </executions>
 98 |       </plugin>
 99 |     </plugins>
100 |   </build>
101 |   <properties>
102 |     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
103 | </properties>
104 | </project>
105 | 


--------------------------------------------------------------------------------
/examples/clj/clj-español/src/clj_español/core.clj:
--------------------------------------------------------------------------------
  1 | (ns clj-español.core
  2 |   (:require [clj-thamil.core :refer [translate-fns translate-forms]]))
  3 | 
  4 | (def fns-map '{
  5 |                take toma
  6 |                drop baja
  7 |                inc carga ;; translated to "load" in english
  8 |                dec extracto ;; based soley on translation. need more info/context 
  9 |                				;; to decide what connotations/denotations would be best 
 10 |                range gama
 11 |                take-while toma-mientras
 12 |                drop-while baja-mientras
 13 |                interleave பின்னு ;; not sure what this means or what function
 14 |                					 ;; this macro serves, and thus can't decide how to label
 15 |                ;; reduce reduce
 16 |                ;; reducer reductor
 17 |                map mapa
 18 |                hash-map hachís-mapa 
 19 |                ;; vector vector 
 20 |                list enumera
 21 |                set pone 
 22 |                hash-set hachís-pone ;; could use fijo or colocar as "set"
 23 |                atom átomo
 24 |                agent agencia ;; or agente
 25 |                first primero
 26 |                second segundo
 27 |                last último
 28 |                butlast pero-último ;; not sure of what this should communicate.
 29 |                rest lodemás ;; should we separate the words into "lo demás" 
 30 |                				;; or shorten it to "demás"? Could also use "el resto"
 31 |                next próximo ;; this is used in the present, but "siguiente" is used
 32 |                				;; in the past. Not sure which makes more sense. 
 33 |                true cierto
 34 |                false falso
 35 |                print imprime 
 36 |                println imprimeln ;; ln means "line" in English, and 
 37 |                					  ;; line in spanish is simply "linea" so 
 38 |                					  ;; I thought it appropriate to keep it.
 39 |                filter forma
 40 |                remove quita
 41 |                keep guardar
 42 |                slurp ventosa;; if this should be a verb, use "sorber"
 43 |                spit escupe ;; could be "saliva" if it's not an action
 44 |                seq sec ;; short for "secuenciar"
 45 |                dorun hazcorrer ;; could also just use "haz" meaning "do"
 46 |                doall haztodo ;; literally means do it all
 47 |                str crd ;; short for "cuerda" which translates to string
 48 |                interpose interpone 
 49 |                find encuentra 
 50 |                get consigue 
 51 |                apply aplica
 52 |                count cuenta
 53 |                every? cada?
 54 |                true? cierto?
 55 |                false? falso?
 56 |                concat social
 57 |                identity identidad
 58 |                reverse invierte
 59 |                some alguno
 60 |                flatten aplana
 61 | 
 62 |                boolean booleano ;; sounds like English but couldn't find
 63 |                					;; a more specific word
 64 |                })
 65 | 
 66 | (def forms-map '{
 67 |                  if si
 68 |                  when cuando
 69 |                  if-not si-no
 70 |                  when-not cuando-no
 71 |                  ;; def def ;; short for "definir" = define
 72 |                  ;; fn fn ;; short for "función" 
 73 |                  ;; defn defn ;; again, it still makes sense to keep the 
 74 |                  		   ;; English equivalents because romance languages
 75 |                  		   ;; can sometimes have the same abbreviations
 76 |                  let deja
 77 |                  and y
 78 |                  or o
 79 |                  not no
 80 |                  else más ;; or "otro"
 81 |                  loop darvuelta ;; actually two words "dar vuelta"
 82 |                  doseq hazsec
 83 |                  for para ;; could be por, but I think para fits the function better
 84 |                  cond dependela ;; means "depending on the ..." --> dependiendo de la
 85 |                  do haz
 86 |                  
 87 |                  ;; clojure.test
 88 |                  deftest def-prueba ;; not sure if I should hyphenate all the
 89 |                  					;; double words, or keep as a compound?
 90 |                  testing probando 
 91 |                  is es
 92 |                  are son
 93 |                 })
 94 | 
 95 | ;; do the actual "translation" for bindings, fns, and any other value
 96 | (translate-fns fns-map)
 97 | 
 98 | ;; do the actual "translation" for macros and special forms
 99 | (translate-forms forms-map)
100 | 


--------------------------------------------------------------------------------
/examples/clj/clj-spanish/src/clj_spanish/core.clj:
--------------------------------------------------------------------------------
  1 | (ns clj-spanish.core
  2 |   (:require [clj-thamil.core :refer [translate-fns translate-forms]]))
  3 | 
  4 | (def fns-map '{
  5 |                take toma
  6 |                drop baja
  7 |                inc carga ;; translated to "load" in english
  8 |                dec extracto ;; based soley on translation. need more info/context 
  9 |                				;; to decide what connotations/denotations would be best 
 10 |                range gama
 11 |                take-while toma-mientras
 12 |                drop-while baja-mientras
 13 |                interleave பின்னு ;; not sure what this means or what function
 14 |                					 ;; this macro serves, and thus can't decide how to label
 15 |                ;; reduce reduce
 16 |                ;; reducer reductor
 17 |                map mapa
 18 |                hash-map hachís-mapa 
 19 |                ;; vector vector 
 20 |                list enumera
 21 |                set pone 
 22 |                hash-set hachís-pone ;; could use fijo or colocar as "set"
 23 |                atom átomo
 24 |                agent agencia ;; or agente
 25 |                first primero
 26 |                second segundo
 27 |                last último
 28 |                butlast pero-último ;; not sure of what this should communicate.
 29 |                rest lodemás ;; should we separate the words into "lo demás" 
 30 |                				;; or shorten it to "demás"? Could also use "el resto"
 31 |                next próximo ;; this is used in the present, but "siguiente" is used
 32 |                				;; in the past. Not sure which makes more sense. 
 33 |                true cierto
 34 |                false falso
 35 |                print imprime 
 36 |                println imprimeln ;; ln means "line" in English, and 
 37 |                					  ;; line in spanish is simply "linea" so 
 38 |                					  ;; I thought it appropriate to keep it.
 39 |                filter forma
 40 |                remove quita
 41 |                keep guardar
 42 |                slurp ventosa;; if this should be a verb, use "sorber"
 43 |                spit escupe ;; could be "saliva" if it's not an action
 44 |                seq sec ;; short for "secuenciar"
 45 |                dorun hazcorrer ;; could also just use "haz" meaning "do"
 46 |                doall haztodo ;; literally means do it all
 47 |                str crd ;; short for "cuerda" which translates to string
 48 |                interpose interpone 
 49 |                find encuentra 
 50 |                get consigue 
 51 |                apply aplica
 52 |                count cuenta
 53 |                every? cada?
 54 |                true? cierto?
 55 |                false? falso?
 56 |                concat social
 57 |                identity identidad
 58 |                reverse invierte
 59 |                some alguno
 60 |                flatten aplana
 61 | 
 62 |                boolean booleano ;; sounds like English but couldn't find
 63 |                					;; a more specific word
 64 |                })
 65 | 
 66 | (def forms-map '{
 67 |                  if si
 68 |                  when cuando
 69 |                  if-not si-no
 70 |                  when-not cuando-no
 71 |                  ;; def def ;; short for "definir" = define
 72 |                  ;; fn fn ;; short for "función" 
 73 |                  ;; defn defn ;; again, it still makes sense to keep the 
 74 |                  		   ;; English equivalents because romance languages
 75 |                  		   ;; can sometimes have the same abbreviations
 76 |                  let deja
 77 |                  and y
 78 |                  or o
 79 |                  not no
 80 |                  else más ;; or "otro"
 81 |                  loop darvuelta ;; actually two words "dar vuelta"
 82 |                  doseq hazsec
 83 |                  for para ;; could be por, but I think para fits the function better
 84 |                  cond dependela ;; means "depending on the ..." --> dependiendo de la
 85 |                  do haz
 86 |                  
 87 |                  ;; clojure.test
 88 |                  deftest def-prueba ;; not sure if I should hyphenate all the
 89 |                  					;; double words, or keep as a compound?
 90 |                  testing probando 
 91 |                  is es
 92 |                  are son
 93 |                 })
 94 | 
 95 | ;; do the actual "translation" for bindings, fns, and any other value
 96 | (translate-fns fns-map)
 97 | 
 98 | ;; do the actual "translation" for macros and special forms
 99 | (translate-forms forms-map)
100 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # clj-thamil
  2 | 
  3 | clj-thamil is a Clojure library designed to be a multi-purpose library for Thamil
  4 | computing.
  5 | 
  6 | It can be used for natural language processing, designing input
  7 | methods, the UI for text editing, etc.
  8 | 
  9 | It can also be used as a basis for programming in Clojure in the
 10 | Thamil language.
 11 | 
 12 | Currently, it can support the following:
 13 | * programming in Clojure (programming language) in Thamil (natural language)
 14 | * natural language processing for Thamil language text
 15 |   * split a string into Thamil letters (not characters) and phonemes
 16 |   * combine a sequence of Thamil phonemes back into a proper Thamil string
 17 |   * sort letters, words, etc. by Thamil alphabetical order
 18 |   * convert a string between Thamil (Unicode format) and:
 19 |     * English-transliterated formats
 20 |     * TAB format
 21 |     * TSCII format
 22 |     * Bamini format
 23 |     * Webulagam format
 24 |   * basic grammar functions - pluralize, add suffixes, and add noun
 25 |     case suffixes
 26 | * perform a letter frequency analysis on input Thamil text
 27 | 
 28 | For examples of programming in Thamil (natural language), see:
 29 | * `test/clj_thamil/demo/core_test.clj` - a gradual
 30 | replacement of English into Thamil
 31 | * `test/clj_thamil/demo/trans_demo_01.cljc` - just about entirely
 32 |   in Thamil.  Demonstrates squaring numbers differently.
 33 | * `src/clj_thamil/மொழியியல்.cljc`- just about
 34 | entirely in Thamil.  Contains functions for basic grammar in Thamil
 35 | (making plurals, adding suffixes, adding noun case suffixes)
 36 | 
 37 | ## Building
 38 | 
 39 | For both Clojure and ClojureScript, compiling, testing, and starting a
 40 | REPL can be done with the usual Leiningen commands `lein compile`,
 41 | `lein test`, and `lein repl`.
 42 | 
 43 | Compile the source into an executable JAR file (runnable on the JVM,
 44 | based on Clojure sources) using the command `lein uberjar`.  This is
 45 | all you need to do to run one of the standalone processes or
 46 | create a jar artifact.  `lein install` will install the artifact and
 47 | pom.xml into your local Maven cache.
 48 | 
 49 | ## Usage
 50 | 
 51 | ### Examples
 52 | 
 53 | #### Java Examples
 54 | 
 55 | The Java example code is in the Maven project in this repostiory at
 56 | [`examples/java/java-examples`](examples/java/java-examples/README.md).
 57 | See that page for instructions and building and running.
 58 | 
 59 | #### JavaScript Examples
 60 | 
 61 | The Clojure/-Script code in clj-thamil must be compiled using the
 62 | ClojureScript compiler into JavaScript before it can used in JS
 63 | runtimes (ex: webpages).  In the JS exmaples directory
 64 | [`examples/js`][(examples/js), first run `sh setup.sh`.  Then load the
 65 | HTML files located in the same directory from within a browser that
 66 | has JS enabled.
 67 | 
 68 | * test01.html - transliteration of English into Thamil on a JS popup box 
 69 | * test02.html - transliterates English into Thamil and splits Thamil into letter
 70 | * test03.html - sorting Thamil words by Thamil alphabetical order
 71 | 
 72 | ### Command-line processes
 73 | 
 74 | #### Letter frequencies
 75 | 
 76 | The frequency analysis program (`freqs`) can be
 77 | run by
 78 | ```
 79 | cat input | lein run freqs > output
 80 | ```
 81 | 
 82 | The frequency analysis program
 83 | takes input from the standard input stream and outputs to the standard
 84 | output stream.
 85 | 
 86 | The code behind the letter frequency analysis can be found in the namespace
 87 | `clj-thamil.format.analysis`.
 88 | 
 89 | #### Phonemes
 90 | 
 91 | The text to phoneme converter program (`phonemes`) can be run by
 92 | ```
 93 | cat input | lein run phonemes > output
 94 | ```
 95 | 
 96 | The phonemes program
 97 | takes input from the standard input stream and outputs to the standard
 98 | output stream.
 99 | 
100 | The code behind the phoneme conversion can be found in the namespaces `clj-thamil.subprograms` and `clj-thamil.format`.
101 | 
102 | ## Editing
103 | 
104 | For programming in Thamil, if using a computer running Mac OS X, use
105 | the Aquamacs program (a Mac OS X-friendly version of Emacs) to ensure that support for Thamil letters works
106 | correctly.  If installing the `clojure-mode` package for Clojure
107 | support in Aquamacs, find the `clojure-mode.el` file in your Aquamacs
108 | MELPA/ELPA repository, and replace it with the file
109 | `emacs/clojure-mode.el` in this repository.  Then load the newly-saved
110 | `clojure-mode.el` file in the Aquamacs package repository into
111 | Aquamacs, and run the command `M-x emacs-lisp-byte-compile-and-load`. 
112 | 
113 | ## Mailing List
114 | 
115 | Join the [clj-thamil mailing list](https://groups.google.com/forum/#!forum/clj-thamil) to post questions and receive announcements.
116 | 
117 | ## License
118 | 
119 | Distributed under the Eclipse Public License, the same as Clojure.
120 | 


--------------------------------------------------------------------------------
/src/clj_thamil/format/analysis.cljc:
--------------------------------------------------------------------------------
  1 | (ns clj-thamil.format.analysis
  2 |   (:require #?(:clj [clojure.java.io :as jio])
  3 |             [clojure.string :as string]
  4 |             [clj-thamil.format :as fmt])
  5 |   #?(:clj (:gen-class)))
  6 | 
  7 | (def letters-plus-grantha
  8 |   (concat fmt/letters 
  9 |           [["ஜ்" "ஜ" "ஜா" "ஜி" "ஜீ" "ஜு" "ஜூ" "ஜெ" "ஜே" "ஜை" "ஜொ" "ஜோ" "ஜௌ"]
 10 |            ["ஷ்" "ஷ" "ஷா" "ஷி" "ஷீ" "ஷு" "ஷூ" "ஷெ" "ஷே" "ஷை" "ஷொ" "ஷோ" "ஷௌ"]
 11 |            ["ஸ்" "ஸ" "ஸா" "ஸி" "ஸீ" "ஸு" "ஸூ" "ஸெ" "ஸே" "ஸை" "ஸொ" "ஸோ" "ஸௌ"]
 12 |            ["ஹ்" "ஹ" "ஹா" "ஹி" "ஹீ" "ஹு" "ஹூ" "ஹெ" "ஹே" "ஹை" "ஹொ" "ஹோ" "ஹௌ"]
 13 |            ["க்ஷ்" "க்ஷ" "க்ஷா" "க்ஷி" "க்ஷீ" "க்ஷு" "க்ஷூ" "க்ஷெ" "க்ஷே" "க்ஷை" "க்ஷொ" "க்ஷோ" "க்ஷௌ"]
 14 |            ["ஶ்ரீ"]]))
 15 | 
 16 | (defn trie-elem-freqs
 17 |   "given a trie of strings (char seqs) and an input string, return a frequency map for every letter in the trie appearing in the input string"
 18 |   [trie s]
 19 |   (let [keep-fn (fn [x]
 20 |                   (when (fmt/in-trie? trie x)
 21 |                     x))
 22 |         letters (fmt/str->elems trie s)
 23 |         letters-in-trie (keep keep-fn letters)]
 24 |     (frequencies letters-in-trie)))
 25 | 
 26 | (defn trie-elem-string-seq-freqs
 27 |   "given a trie of strings (char seqs) and a sequence of input strings, return a final frequency map for every letter appearing across all strings"
 28 |   [trie strs]
 29 |   (apply merge-with + (map (partial trie-elem-freqs trie) strs)))
 30 | 
 31 | (defn freq-grid
 32 |   "given a sequence of தமிழ் letters (flattened from a letter grid) and a map of those letters' frequences, return the frequencies in the shape of the grid"
 33 |   [letter-seq freq-map]
 34 |   (let [freq-seq (map #(or (get freq-map %) 0) letter-seq)
 35 |         freq-grid (partition-all 13 freq-seq)]
 36 |     freq-grid))
 37 | 
 38 | ;;
 39 | ;; printing functions
 40 | ;;
 41 | 
 42 | #?(:clj
 43 |    (defn print-freq-grid
 44 |      "given a number grid and the corresponding letter grid, print them to std out"
 45 |      [freq-grid letter-grid]
 46 |      (let [print-grid (fn [grid] (doseq [row grid] (println (string/join "\t" row))))]
 47 |        (println "the letter grid's frequencies:")
 48 |        (println "")
 49 |        (print-grid freq-grid)
 50 |        (println "")
 51 |        (println "the letter grid used:")
 52 |        (print-grid letter-grid))))
 53 | #?(:clj
 54 |    (defn print-consonant-row-sums
 55 |      "given a frequnecy grid in the shape of a letter grid, and the letter grid itself, print out the sums of each consonant's row"
 56 |      [freq-grid letter-grid]
 57 |      (let [;; use rest in order to drop the first row = vowel row 
 58 |            row-names (rest (map first letter-grid))
 59 |            row-sums (rest (map (partial apply +) freq-grid))]
 60 |        (dorun
 61 |         (map (fn [rn rs] (println "For consonant:" rn ", there are" rs "instances of it in a C or C+V letter")) row-names row-sums)))))
 62 | 
 63 | #?(:clj
 64 |    (defn print-vowel-col-sums
 65 |      "given a frequency gridn in the shape of a letter grid, and the letter grid itself, print out the sums of each vowel's column specifically among consonant and consonant+vowel letters (exclude pure vowels)"
 66 |      [freq-grid letter-grid]
 67 |      (let [full-row-freq-grid (->> freq-grid
 68 |                                    rest
 69 |                                    (filter #(= 13 (count %))))
 70 |            full-row-letter-grid (->> letter-grid
 71 |                                      rest
 72 |                                      (filter #(= 13 (count %))))
 73 |            vowels (first letter-grid)
 74 |            col-names vowels
 75 |            freq-cols (apply map list full-row-freq-grid)
 76 |            col-sums (map (partial apply +) freq-cols)]
 77 |        (dorun
 78 |         (map (fn [cn cs] (println "For vowel/ஃ:" cn ", there are" cs "instances of it in a C or C+V letter")) col-names col-sums)))))
 79 | 
 80 | ;;
 81 | ;; umbrella printing fn
 82 | ;;
 83 | 
 84 | #?(:clj
 85 |    (defn print-letter-grid-stats-on-strs
 86 |      "for a given letter grid and a sequence of strings, print out all of the stats"
 87 |      [letter-grid strs]
 88 |      (let [letter-seq (flatten letter-grid)
 89 |            letter-trie (fmt/make-trie letter-seq)
 90 |            str->letters (fn [s] (fmt/str->elems letter-trie s))
 91 |            fmap (trie-elem-string-seq-freqs letter-trie strs)
 92 |            fgrid (freq-grid letter-seq fmap)] 
 93 |        (print-vowel-col-sums fgrid letter-grid)
 94 |        (println "")
 95 |        (print-consonant-row-sums fgrid letter-grid)
 96 |        (println "")
 97 |        (print-freq-grid fgrid letter-grid))))
 98 | 
 99 | ;;
100 | ;; main fn
101 | ;;
102 | 
103 | #?(:clj
104 |    (defn -main
105 |      [& args]
106 |      (with-open [rdr (jio/reader *in*)]
107 |        (let [lines (line-seq rdr)
108 |              letter-grid letters-plus-grantha]
109 |          (print-letter-grid-stats-on-strs letter-grid lines)))))
110 | 


--------------------------------------------------------------------------------
/test/clj_thamil/மொழியியல்_test.cljc:
--------------------------------------------------------------------------------
  1 | (ns clj-thamil.மொழியியல்-test
  2 |   (:use clojure.test
  3 |         clj-thamil.மொழியியல்
  4 |         clj-thamil.core))
  5 | 
  6 | (வரையறு-பரிசோதனை அசை-உயிரெழுத்து-பரிசோதனை
  7 |   (பரிசோதிக்கும் "குறில், நெடில் செயல்கூறுகள்"
  8 |     (பரிசோதிக்கும் "குறில் செயல்கூறுகள்"
  9 |       (ஆகும் (உண்மையா? (குறிலா? "அ")))
 10 |       (ஆகும் (உண்மையா? (குறிலா? "இ")))
 11 |       (ஆகும் (உண்மையா? (குறிலா? "உ")))
 12 |       (ஆகும் (உண்மையா? (குறிலா? "எ")))
 13 |       (ஆகும் (உண்மையா? (குறிலா? "ஒ")))
 14 |       (ஆகும் (பொய்மையா? (குறிலா? "ஆ")))
 15 |       (ஆகும் (பொய்மையா? (குறிலா? "ஈ")))
 16 |       (ஆகும் (பொய்மையா? (குறிலா? "ஊ")))
 17 |       (ஆகும் (பொய்மையா? (குறிலா? "ஏ")))
 18 |       (ஆகும் (பொய்மையா? (குறிலா? "ஓ")))
 19 |       (ஆகும் (பொய்மையா? (குறிலா? "ஐ")))
 20 |       (ஆகும் (பொய்மையா? (குறிலா? "ஔ")))
 21 |       (ஆகும் (பொய்மையா? (குறிலா? nil)))
 22 |       (ஆகும் (பொய்மையா? (குறிலா? ""))))
 23 |     (பரிசோதிக்கும் "நெடில் செயல்கூறுகள்"
 24 |       (ஆகும் (பொய்மையா? (நெடிலா? "அ")))
 25 |       (ஆகும் (பொய்மையா? (நெடிலா? "இ")))
 26 |       (ஆகும் (பொய்மையா? (நெடிலா? "உ")))
 27 |       (ஆகும் (பொய்மையா? (நெடிலா? "எ")))
 28 |       (ஆகும் (பொய்மையா? (நெடிலா? "ஒ"))) 
 29 |       (ஆகும் (உண்மையா? (நெடிலா? "ஆ")))
 30 |       (ஆகும் (உண்மையா? (நெடிலா? "ஈ")))
 31 |       (ஆகும் (உண்மையா? (நெடிலா? "ஊ")))
 32 |       (ஆகும் (உண்மையா? (நெடிலா? "ஏ")))
 33 |       (ஆகும் (உண்மையா? (நெடிலா? "ஓ")))
 34 |       (ஆகும் (பொய்மையா? (நெடிலா? "ஐ")))
 35 |       (ஆகும் (பொய்மையா? (நெடிலா? "ஔ")))
 36 |       (ஆகும் (பொய்மையா? (நெடிலா? nil)))
 37 |       (ஆகும் (பொய்மையா? (நெடிலா? ""))))))
 38 | 
 39 | (வரையறு-பரிசோதனை பன்மை-பரிசோதனை
 40 |   (பரிசோதிக்கும் "பன்மை"
 41 |     (ஆகும் (= "கற்கள்" (பன்மை "கல்")))
 42 |     (ஆகும் (= "முட்கள்" (பன்மை "முள்")))
 43 |     (ஆகும் (= "பற்கள்" (பன்மை "பல்")))
 44 |     (ஆகும் (= "தீக்கள்" (பன்மை "தீ")))
 45 |     (ஆகும் (= "பூக்கள்" (பன்மை "பூ")))
 46 |     (ஆகும் (= "பசுக்கள்" (பன்மை "பசு")))
 47 |     (ஆகும் (= "காடுகள்" (பன்மை "காடு")))
 48 |     (ஆகும் (= "மரங்கள்" (பன்மை "மரம்")))
 49 |     (ஆகும் (= "நாள்கள்" (பன்மை "நாள்")))))
 50 | 
 51 | (வரையறு-பரிசோதனை சந்தி-பொது-விதி-பரிசோதனை
 52 |   (பரிசோதிக்கும் "சந்தி பொது விதிகள்"
 53 |     (ஆகும் (= "தமிழ்நாடு" (சந்தி "தமிழ்" "நாடு")))
 54 |     (ஆகும் (= "தமிழீழம்" (சந்தி "தமிழ்" "ஈழம்")))
 55 |     (ஆகும் (= "செய்யளம்" (சந்தி "செய்" "அளம்")))
 56 |     (ஆகும் (= "கல்லாறு" (சந்தி "கல்" "ஆறு")))
 57 |     (ஆகும் (= "தேயிலை" (சந்தி "தே" "இலை")))
 58 |     (ஆகும் (= "மலையகம்" (சந்தி "மலை" "அகம்")))
 59 |     (ஆகும் (= "குடியுரிமை" (சந்தி "குடி" "உரிமை")))
 60 |     (ஆகும் (= "கையோடு" (சந்தி "கை" "ஓடு")))
 61 |     (ஆகும் (= "வேருடன்" (சந்தி "வேர்" "உடன்")))
 62 |     (ஆகும் (= "புழுவுக்கு" (சந்தி "புழு" "உக்கு")))
 63 |     (ஆகும் (= "புல்லை" (சந்தி "புல்" "ஐ")))
 64 |     (ஆகும் (= "பிடிக்கும்" (சந்தி "பிடி" "க்கும்")))
 65 |     (ஆகும் (= "பூவெல்லாம்" (சந்தி "பூ" "எல்லாம்")))
 66 |     (ஆகும் (= "சிலம்பாட்டம்" (சந்தி "சிலம்பு" "ஆட்டம்")))
 67 |     (ஆகும் (= "தூண்கள்" (சந்தி "தூண்" "கள்")))))
 68 | 
 69 | (வரையறு-பரிசோதனை வேற்றுமை-பரிசோதனை
 70 |   (பரிசோதிக்கும் "வேற்றுமை"
 71 |     (ஆகும் (= "மரத்தை" (வேற்றுமை "மரம்" "ஐ")))
 72 |     (ஆகும் (= "மரத்துக்கு" (வேற்றுமை "மரம்" "உக்கு")))
 73 |     (ஆகும் (= "மரத்தில்" (வேற்றுமை "மரம்" "இல்")))
 74 |     (ஆகும் (= "மரத்தால்" (வேற்றுமை "மரம்" "ஆல்"))) 
 75 |     (ஆகும் (= "கெடுவை" (வேற்றுமை "கெடு" "ஐ")))
 76 |     (ஆகும் (= "கெடுவுக்கு" (வேற்றுமை "கெடு" "உக்கு")))
 77 |     (ஆகும் (= "கெடுவில்" (வேற்றுமை "கெடு" "இல்")))
 78 |     (ஆகும் (= "கெடுவால்" (வேற்றுமை "கெடு" "ஆல்"))) 
 79 |     (ஆகும் (= "காட்டை" (வேற்றுமை "காடு" "ஐ")))
 80 |     (ஆகும் (= "காட்டுக்கு" (வேற்றுமை "காடு" "உக்கு")))
 81 |     (ஆகும் (= "காட்டில்" (வேற்றுமை "காடு" "இல்")))
 82 |     (ஆகும் (= "காட்டால்" (வேற்றுமை "காடு" "ஆல்"))) 
 83 |     (ஆகும் (= "பறுவை" (வேற்றுமை "பறு" "ஐ")))
 84 |     (ஆகும் (= "பறுவுக்கு" (வேற்றுமை "பறு" "உக்கு")))
 85 |     (ஆகும் (= "பறுவில்" (வேற்றுமை "பறு" "இல்")))
 86 |     (ஆகும் (= "பறுவால்" (வேற்றுமை "பறு" "ஆல்"))) 
 87 |     (ஆகும் (= "கயிற்றை" (வேற்றுமை "கயிறு" "ஐ")))
 88 |     (ஆகும் (= "கயிற்றுக்கு" (வேற்றுமை "கயிறு" "உக்கு")))
 89 |     (ஆகும் (= "கயிற்றில்" (வேற்றுமை "கயிறு" "இல்")))
 90 |     (ஆகும் (= "கயிற்றால்" (வேற்றுமை "கயிறு" "ஆல்"))) 
 91 |     (ஆகும் (= "எழுதுகோலை" (வேற்றுமை "எழுதுகோல்" "ஐ")))
 92 |     (ஆகும் (= "எழுதுகோலுக்கு" (வேற்றுமை "எழுதுகோல்" "உக்கு")))
 93 |     (ஆகும் (= "எழுதுகோலில்" (வேற்றுமை "எழுதுகோல்" "இல்")))
 94 |     (ஆகும் (= "எழுதுகோலால்" (வேற்றுமை "எழுதுகோல்" "ஆல்")))
 95 |     (பரிசோதிக்கும் "4-ஆம் வேற்றுமை - (உ)க்கு"
 96 |      (ஆகும் (= "தீயை" (வேற்றுமை "தீ" "ஐ")))
 97 |      (ஆகும் (= "தீக்கு" (வேற்றுமை "தீ" "உக்கு")))
 98 |      (ஆகும் (= "தீயில்" (வேற்றுமை "தீ" "இல்")))
 99 |      (ஆகும் (= "தீயால்" (வேற்றுமை "தீ" "ஆல்")))
100 |      (ஆகும் (= "காயை" (வேற்றுமை "காய்" "ஐ")))
101 |      (ஆகும் (= "காய்க்கு" (வேற்றுமை "காய்" "உக்கு"))) 
102 |      (ஆகும் (= "பொய்யை" (வேற்றுமை "பொய்" "ஐ")))
103 |      (ஆகும் (= "தொலைபேசியை" (வேற்றுமை "தொலைபேசி" "ஐ")))
104 |      (ஆகும் (= "தொலைபேசிக்கு" (வேற்றுமை "தொலைபேசி" "உக்கு"))) 
105 |      (ஆகும் (= "தேனீயை" (வேற்றுமை "தேனீ" "ஐ")))
106 |      (ஆகும் (= "தேனீக்கு" (வேற்றுமை "தேனீ" "உக்கு"))))))
107 | 


--------------------------------------------------------------------------------
/src/clj_thamil/core.cljc:
--------------------------------------------------------------------------------
  1 | (ns clj-thamil.core)
  2 | 
  3 | 
  4 | (defmacro translate-fn
  5 |   [old-name new-name]
  6 |   `(def ~old-name ~new-name))
  7 | 
  8 | (defmacro translate-fn-symbol
  9 |   [old-name new-name]
 10 |   `(def ~(eval new-name) ~(eval old-name)))
 11 | 
 12 | (defmacro translate-fns
 13 |   [symb-map]
 14 |   `(do
 15 |      ~@
 16 |      (for [[old-form# new-form#] (eval symb-map)]
 17 |        `(translate-fn-symbol '~old-form# '~new-form#))))
 18 | 
 19 | ;; info on macro-writing macros based on info at
 20 | ;; http://amalloy.hubpages.com/hub/Clojure-macro-writing-macros
 21 | 
 22 | (defmacro translate-form
 23 |   "Does the effective translation of a special form or macro from its old name to its new name.  In other words, generalizes the 'manual' process of defining something like:
 24 | (defmacro எனில்
 25 |   [& body]
 26 |   `(if ~@body))"
 27 |   [old-name new-name]
 28 |   `(defmacro ~new-name
 29 |      [~'& body#]
 30 |      `(~'~old-name ~@body#)))
 31 | 
 32 | ;; not sure if/how to shorten செயல்கூறு, வரையறு-செயல்கூறு, வைத்துக்கொள்
 33 | 
 34 | (defmacro translate-form-symbol
 35 |   "Does the effective translation of a special form or macro from its old name to its new name, with the names given as symbols. Helper macro for translate-forms macro"
 36 |   [old-name new-name]
 37 |   `(defmacro ~(eval new-name)
 38 |      [~'& body#]
 39 |      `(~'~(eval old-name) ~@body#)))
 40 | 
 41 | (defmacro translate-forms
 42 |   "takes a map of symbols and creates macros that do the translation of the form of the old symbol (key) to the new symbol (val)"
 43 |   [symb-map] 
 44 |   `(do
 45 |      ~@
 46 |      (for [[old-form# new-form#] (eval symb-map)]
 47 |         `(translate-form-symbol '~old-form# '~new-form#))))
 48 | 
 49 | 
 50 | (def fns-map '{
 51 |                take எடு
 52 |                drop விடு
 53 |                ;; inc ஏறுமானம்
 54 |                inc ஏற்று
 55 |                ;; dec இறங்குமானம்
 56 |                dec இறக்கு
 57 |                range வீச்சு
 58 |                take-while எடு-என்னும்வரை
 59 |                drop-while விடு-என்னும்வரை
 60 |                interleave பின்னு
 61 |                reduce இறுக்கு
 62 |                ;; reducer இறுக்குவர் ;; ??
 63 |                map விவரி
 64 |                hash-map புலவெண்-விவரணையாக்கம் 
 65 |                ;; vector காவி ;; ??
 66 |                ;; vector நெறியம் ;; ??
 67 |                list பட்டியல்
 68 |                set அமைவு
 69 |                hash-set புலவெண்-அமைவு
 70 |                atom அணு
 71 |                agent முகவர்
 72 |                first முதல்
 73 |                second இரண்டாம்
 74 |                last கடைசி
 75 |                butlast கடைசியின்றி
 76 |                rest மீதி
 77 |                next அடுத்த
 78 |                true வாய்மை ;; should we use வாய்மை, மெய்மை, or உண்மை ?  i am
 79 |                ;; thinking of using வாய்மை or மெய்மை so as to leave உண்மை to continue to
 80 |                ;; be used in more casual / less formal situations
 81 |                false பொய்மை
 82 |                print அச்சிடு
 83 |                println வரி-அச்சிடு
 84 |                filter வடி
 85 |                remove அகற்று
 86 |                keep கொள்
 87 |                slurp உறிஞ்சு;; could be சப்பு
 88 |                spit ஊற்று ;; could be துப்பு
 89 |                seq வரிசை
 90 |                dorun செய்யோட்டம்
 91 |                doall செய்யெல்லாம்
 92 |                str தொடை
 93 |                interpose இடைபொருத்து
 94 |                find கண்டுபிடி
 95 |                get பெறு
 96 |                apply செயல்படுத்து
 97 |                count எண்ணு
 98 |                every? ஒவ்வொன்றுமா?
 99 |                true? உண்மையா?
100 |                false? பொய்மையா?
101 |                concat தொடு
102 |                identity அடையாளம்
103 |                reverse புரட்டு
104 |                some எதாவது
105 |                flatten தட்டையாக்கு
106 | 
107 |                boolean பூலியன்
108 |                })
109 | 
110 | (def forms-map '{
111 |                  if எனில்
112 |                  when என்னும்போது
113 |                  if-not இல்லெனில்
114 |                  when-not இல்லென்னும்-போது
115 |                  def வரையறு
116 |                  fn செயல்கூறு
117 |                  defn வரையறு-செயல்கூறு
118 |                  let வைத்துக்கொள் ;; maybe just கொள்
119 |                  and மற்றும்
120 |                  or அல்லது
121 |                  not அன்று
122 |                  ;; else அன்றி ?
123 |                  loop சுற்று
124 |                  doseq செய்வரிசை
125 |                  ;; for ஒவ்வொன்றுக்கும்
126 |                  for ஒன்றொன்றுக்கு
127 |                  cond பொறுத்து
128 |                  do செய்
129 |                  
130 |                  ;; clojure.test
131 |                  deftest வரையறு-பரிசோதனை
132 |                  testing பரிசோதிக்கும்
133 |                  is ஆகும்
134 |                  are பல-ஆகும்
135 |                 })
136 | 
137 | ;; do the actual "translation" for bindings, fns, and any other value
138 | (translate-fns fns-map)
139 | 
140 | ;; do the actual "translation" for macros and special forms
141 | (translate-forms forms-map)
142 | 


--------------------------------------------------------------------------------
/test/clj_thamil/format/convert_test.cljc:
--------------------------------------------------------------------------------
 1 | (ns clj-thamil.format.convert-test
 2 |   (:require [clojure.test.check :as sc]
 3 |             [clojure.test.check.generators :as gen]
 4 |             [clojure.test.check.properties :as prop :include-macros true]
 5 |             [clojure.string :as string]
 6 |             [clj-thamil.format :as fmt]
 7 |             [clj-thamil.format.convert :as cvt]
 8 |             [clj-thamil.மொழியியல் :as மொ])
 9 |   (:use clojure.test
10 |         clj-thamil.format.convert))
11 | 
12 | (def QCHK-SIZE 100)
13 | 
14 | (def A_LOT 100)
15 | 
16 | (deftest conversion-test
17 |   (testing "romanized -> தமிழ்"
18 |     (is (= "தமிழ்" (romanized->தமிழ் "thamiz")))
19 |     (is (= "தமிழ்" (romanized->தமிழ் "thamizh")))
20 |     (is (= "நீர்" (romanized->தமிழ் "n-iir")))
21 |     (is (= "பஃறுளி" (romanized->தமிழ் "paqRuLi")))
22 |     (is (= "சின்ன" (romanized->தமிழ் "chinna") (romanized->தமிழ் "sinna")))
23 |     (is (= "விகடன்" (romanized->தமிழ் "vikatan") (romanized->தமிழ் "vikadan")))
24 |     (is (= "சென்றேன் வென்றேன்" (romanized->தமிழ் "senreen venreen")))
25 |     (is (= "வந்தேன்" (romanized->தமிழ் "vantheen")))
26 |     (is (= "பாட்டு பாடு" (romanized->தமிழ் "paattu paadu"))))
27 |   (testing "தமிழ் -> romanized; translation map inversion"
28 |     (is (= "thamizh" (தமிழ்->romanized "தமிழ்")))
29 |     (is (not= "thamiz" (தமிழ்->romanized "தமிழ்")))
30 |     (is (= "niir" (தமிழ்->romanized "நீர்")))
31 |     (is (not= "neer" (தமிழ்->romanized "நீர்")))
32 |     (is (= "paambu" (தமிழ்->romanized "பாம்பு")))
33 |     (is (not= "paampu" (தமிழ்->romanized "பாம்பு")))
34 |     (is (not= "anpu" (தமிழ்->romanized "அன்பு")))
35 |     (is (= "anbu" (தமிழ்->romanized "அன்பு")))
36 |     (is (not= "panpu" (தமிழ்->romanized "பண்பு")))
37 |     (is (= "panbu" (தமிழ்->romanized "பண்பு")))))
38 | 
39 | (deftest double-check-test
40 |   (testing "from the test.check / double-check Readme"
41 |     (is (:result
42 |          (sc/quick-check QCHK-SIZE (prop/for-all [v (gen/vector gen/int)]
43 |                                            (= (sort v) (sort (sort v)))))))))
44 | 
45 | (deftest convert-fn-invertible
46 |   (let [thamil-letters fmt/letter-seq
47 |         punct (map str [\. \space \newline])
48 |         all-letters (concat thamil-letters punct)
49 |         lett-gen (gen/such-that identity (gen/elements all-letters))
50 |         thamil-text-gen (gen/fmap string/join (gen/vector lett-gen))
51 |         old-font-no-ambig-combo (fn [s]
52 |                                   (let [phonemes (fmt/str->phonemes s)
53 |                                         phoneme-triples (partition 3 1 phonemes)
54 |                                         phoneme-doubles (partition 2 1 phonemes)
55 |                                         ambig1 (fn [[a b c]] (and (மொ/மெய்யெழுத்தா? a)
56 |                                                                  (= "எ" b)
57 |                                                                  (= "ள்" c)))
58 |                                         ambig2 (fn [[a b]] (and (= "ஒ" a)
59 |                                                                (= "ள்" b)))
60 |                                         ambig3 (fn [[a b c]] (and (மொ/மெய்யெழுத்தா? a)
61 |                                                                  (#{"எ" "ஏ" "ஆ"} b)
62 |                                                                  (= "ர்" c)))
63 |                                         no-ambig1 (every? false? (map ambig1 phoneme-triples))
64 |                                         no-ambig2 (every? false? (map ambig2 phoneme-doubles))
65 |                                         no-ambig3 (every? false? (map ambig3 phoneme-doubles))]
66 |                                     (and no-ambig1 no-ambig2 no-ambig3)))
67 |         ;; old fonts can't distinguish certain character combinations,
68 |         ;; so prevent test cases that could cause that
69 |         non-romanized-thamil-text-gen (gen/such-that old-font-no-ambig-combo lett-gen (* QCHK-SIZE A_LOT))
70 |         ;; applying converters for old fonts followed by their
71 |         ;; inverses should give back the original text
72 |         test-prop (fn [f inv] (prop/for-all [t non-romanized-thamil-text-gen]
73 |                                             (= t (-> t f inv))))
74 |         test-res (fn [f inv]
75 |                    (->> (test-prop f inv)
76 |                         (sc/quick-check QCHK-SIZE)
77 |                         :result))
78 |         ;; only after we've transliterated to romanized can we use
79 |         ;; the rule about applying converter + inverse = input, since
80 |         ;; the தமிழ்->romanized direction has certain overrides
81 |         romanized-test-prop (prop/for-all [t thamil-text-gen]
82 |                                           (let [converted-test-txt (-> t cvt/தமிழ்->romanized cvt/romanized->தமிழ்)]
83 |                                             (= converted-test-txt (-> converted-test-txt cvt/தமிழ்->romanized cvt/romanized->தமிழ்))))
84 |         romanized-res (->> romanized-test-prop
85 |                            (sc/quick-check QCHK-SIZE)
86 |                            :result)]
87 |     (testing "convert and inverse fns for all font formats (except romanized)"
88 |       (testing "romanized"
89 |         (is (true? romanized-res)))
90 |       (testing "tab"
91 |         (is (true? (test-res cvt/தமிழ்->tab cvt/tab->தமிழ்))))
92 |       (testing "bamini"
93 |         (is (true? (test-res cvt/தமிழ்->bamini cvt/bamini->தமிழ்))))
94 |       (testing "tscii"
95 |         (is (true? (test-res cvt/தமிழ்->tscii cvt/tscii->தமிழ்))))
96 |       (testing "webulagam"
97 |         (is (true? (test-res cvt/தமிழ்->webulagam cvt/webulagam->தமிழ்)))))))
98 | 


--------------------------------------------------------------------------------
/src/clj_thamil/மொழியியல்.cljc:
--------------------------------------------------------------------------------
  1 | (ns clj-thamil.மொழியியல்
  2 |   (:require [clj-thamil.format :as fmt])
  3 |   #?(:clj
  4 |      (:use clj-thamil.core)
  5 |      :cljs
  6 |      (:use-macros [clj-thamil.core :only [வரையறு விவரி மீதி வரையறு-செயல்கூறு பெறு எதாவது பூலியன் என்னும்போது
  7 |                                           வைத்துக்கொள் கடைசி பொறுத்து எண்ணு முதல் இரண்டாம் தொடை
  8 |                                           கடைசியின்றி அன்று மற்றும் அல்லது தொடு செயல்படுத்து செயல்கூறு]])))
  9 | 
 10 | 
 11 | (வரையறு மெய்-தொடக்கம்-எழுத்துகள் fmt/c-cv-letters)
 12 | 
 13 | (வரையறு உயிரெழுத்துகள் fmt/vowels)
 14 | 
 15 | (வரையறு மெய்யெழுத்துகள் fmt/consonants)
 16 | 
 17 | (வரையறு உயிர்மெய்யெழுத்துகள் (தட்டையாக்கு (விவரி மீதி மெய்-தொடக்கம்-எழுத்துகள்)))
 18 | 
 19 | (வரையறு தொடை->எழுத்துகள் fmt/str->letters)
 20 | 
 21 | (வரையறு தொடை->ஒலியன்கள் fmt/str->phonemes)
 22 | 
 23 | (வரையறு-செயல்கூறு ஒலியன்கள்->எழுத்து [ஒலியன்கள்] (பெறு fmt/inverse-phoneme-map ஒலியன்கள்))
 24 | 
 25 | ;;;;;;;;
 26 | ;; எழுத்து
 27 | ;; letters
 28 | ;;;;;;;;
 29 | 
 30 | (வரையறு-செயல்கூறு எழுத்தா? [ச] (fmt/in-trie? ச))
 31 | 
 32 | (வரையறு-செயல்கூறு மெய்யெழுத்தா? [எ] (பூலியன் (எதாவது #{எ} மெய்யெழுத்துகள்)))
 33 | 
 34 | (வரையறு-செயல்கூறு உயிரெழுத்தா? [எ] (பூலியன் (எதாவது #{எ} உயிரெழுத்துகள்)))
 35 | 
 36 | (வரையறு-செயல்கூறு உயிர்மெயெழுத்தா? [எ] (பூலியன் (எதாவது #{எ} உயிர்மெய்யெழுத்துகள்)))
 37 | 
 38 | ;;;;;;;;
 39 | ;; அசை
 40 | ;; syllables
 41 | ;;;;;;;;
 42 | 
 43 | (வரையறு குறில்-உயிரெழுத்துகள் #{"அ" "இ" "உ" "எ" "ஒ"})
 44 | 
 45 | (வரையறு நெடில்-உயிரெழுத்துகள் #{"ஆ" "ஈ" "ஊ" "ஏ" "ஓ"})
 46 | 
 47 | (வரையறு-செயல்கூறு நெடிலா?
 48 |   "எழுத்து நெடில் எழுத்தா என்பதைத் திருப்பிக் கொடுக்கும்
 49 |   returns whether the letter is நெடில் (has long vowel sound)"
 50 |   [எழுத்து]
 51 |   (பூலியன்
 52 |    (என்னும்போது (எழுத்தா? எழுத்து)
 53 |      ;; ஒலியன் = phoneme
 54 |      (வைத்துக்கொள் [ஒலியன்கள் (தொடை->ஒலியன்கள் எழுத்து)
 55 |                  கடைசி-ஒலியன் (கடைசி ஒலியன்கள்)]
 56 |        (பெறு நெடில்-உயிரெழுத்துகள் கடைசி-ஒலியன்)))))
 57 | 
 58 | (வரையறு-செயல்கூறு குறிலா?
 59 |   "எழுத்து குறில் எழுத்தா என்பதைத் திருப்பிக் கொடுக்கும்
 60 |   returns whether the letter is குறில் (has short vowel sound)"
 61 |   [எழுத்து]
 62 |   (பூலியன்
 63 |    (என்னும்போது (எழுத்தா? எழுத்து)
 64 |      (->> (தொடை->ஒலியன்கள் எழுத்து)
 65 |           கடைசி
 66 |           (பெறு குறில்-உயிரெழுத்துகள்)))))
 67 | 
 68 | ;;;;;;;;
 69 | ;; ஒலியன்
 70 | ;; phonemes
 71 | ;;;;;;;;
 72 | 
 73 | (வரையறு முன்னொட்டா? fmt/prefix?)
 74 | 
 75 | (வரையறு பின்னொட்டா? fmt/suffix?)
 76 | 
 77 | ;;;;;;;;
 78 | ;; விகுதி
 79 | ;; suffixes
 80 | ;;;;;;;;
 81 | 
 82 | ;; பன்மை
 83 | ;; plurals
 84 | 
 85 | (வரையறு-செயல்கூறு பன்மை
 86 |   "ஒரு சொல்லை அதன் பன்மை வடிவத்தில் ஆக்குதல்
 87 |   takes a word and pluralizes it"
 88 |   [சொல்]
 89 |   (வைத்துக்கொள் [எழுத்துகள் (தொடை->எழுத்துகள் சொல்)]
 90 |     (பொறுத்து
 91 | 
 92 |      ;; (fmt/seq-prefix? (புரட்டு சொல்) (புரட்டு "கள்"))
 93 |      (பின்னொட்டா? சொல் "கள்")
 94 |      சொல்
 95 |      
 96 |      (= "ம்" (கடைசி எழுத்துகள்))
 97 |      (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்) ["ங்கள்"]))
 98 |      
 99 |      (மற்றும் (= 1 (எண்ணு எழுத்துகள்))
100 |             (நெடிலா? சொல்))
101 |      (தொடை சொல் "க்கள்")
102 | 
103 |      (மற்றும் (= 2 (எண்ணு எழுத்துகள்))
104 |             (ஒவ்வொன்றுமா? அடையாளம் (விவரி குறிலா? எழுத்துகள்)))
105 |      (தொடை சொல் "க்கள்")
106 | 
107 |      (மற்றும் (= 2 (எண்ணு எழுத்துகள்))
108 |             (குறிலா? (முதல் எழுத்துகள்))
109 |             (= "ல்" (இரண்டாம் எழுத்துகள்)))
110 |      (தொடை (முதல் எழுத்துகள்) "ற்கள்")
111 | 
112 |      (மற்றும் (= 2 (எண்ணு எழுத்துகள்))
113 |             (குறிலா? (முதல் எழுத்துகள்))
114 |             (= "ள்" (இரண்டாம் எழுத்துகள்)))
115 |      (தொடை (முதல் எழுத்துகள்) "ட்கள்")
116 |      
117 |      :அன்றி
118 |      (தொடை சொல் "கள்"))))
119 | 
120 | ;; சந்தி (விதிகள்)
121 | ;; (rules for) joining words/suffixes
122 | 
123 | (வரையறு-செயல்கூறு சந்தி
124 |   [சொல்1 சொல்2]
125 |   (வைத்துக்கொள் [எழுத்துகள்1 (தொடை->எழுத்துகள் சொல்1)
126 |               எழுத்துகள்2 (தொடை->எழுத்துகள் சொல்2)
127 |               ஒலியன்கள்1 (தொடை->ஒலியன்கள் சொல்1)
128 |               ஒலியன்கள்2 (தொடை->ஒலியன்கள் சொல்2)
129 |               சொ1-கஒ (கடைசி ஒலியன்கள்1)
130 |               சொ2-முஒ (முதல் ஒலியன்கள்2)]
131 |     (பொறுத்து
132 | 
133 |      (மற்றும் (உயிரெழுத்தா? சொ2-முஒ)
134 |             (பெறு #{"இ" "ஈ" "ஏ" "ஐ"} சொ1-கஒ))
135 |      (செயல்படுத்து தொடை சொல்1 (ஒலியன்கள்->எழுத்து ["ய்" சொ2-முஒ]) (மீதி சொல்2))
136 | 
137 |      (மற்றும் (உயிரெழுத்தா? சொ2-முஒ)
138 |             (பெறு #{"அ" "ஆ" "ஊ" "ஒ" "ஓ" "ஔ"} சொ1-கஒ))
139 |      (செயல்படுத்து தொடை சொல்1 (ஒலியன்கள்->எழுத்து ["வ்" சொ2-முஒ]) (மீதி சொல்2))
140 | 
141 |      (மற்றும் (உயிரெழுத்தா? சொ2-முஒ)
142 |             (= "உ" சொ1-கஒ)
143 |             (= 2 (எண்ணு எழுத்துகள்1))
144 |             (ஒவ்வொன்றுமா? குறிலா? எழுத்துகள்1))
145 |      (செயல்படுத்து தொடை சொல்1 (ஒலியன்கள்->எழுத்து ["வ்" சொ2-முஒ]) (மீதி சொல்2))
146 | 
147 |      (மற்றும் (உயிரெழுத்தா? சொ2-முஒ)
148 |             (= "உ" சொ1-கஒ)
149 |             (அன்று (மற்றும் (= 2 (எண்ணு எழுத்துகள்1))
150 |                          (ஒவ்வொன்றுமா? குறிலா? எழுத்துகள்1))))
151 |      (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்1) (ஒலியன்கள்->எழுத்து [(கடைசி (கடைசியின்றி ஒலியன்கள்1)) சொ2-முஒ]) (மீதி சொல்2)))
152 | 
153 |      
154 |      (மற்றும் (உயிரெழுத்தா? சொ2-முஒ)
155 |             (= 2 (எண்ணு எழுத்துகள்1))
156 |             (குறிலா? (முதல் எழுத்துகள்1))
157 |             (மெய்யெழுத்தா? (இரண்டாம் எழுத்துகள்1)))
158 |      (செயல்படுத்து தொடை (தொடு சொல்1 [(ஒலியன்கள்->எழுத்து [சொ1-கஒ சொ2-முஒ])] (மீதி சொல்2)))
159 | 
160 |      (மற்றும் (உயிரெழுத்தா? சொ2-முஒ)
161 |             (மெய்யெழுத்தா? சொ1-கஒ))
162 |      (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்1) [(ஒலியன்கள்->எழுத்து [சொ1-கஒ சொ2-முஒ])] (மீதி சொல்2)))
163 | 
164 |      :அன்றி
165 |      (தொடை சொல்1 சொல்2)
166 |      
167 |      )))
168 | 
169 | ;; வேற்றுமை
170 | ;; noun cases
171 | 
172 | (வரையறு-செயல்கூறு வேற்றுமை-முன்-மாற்றம்
173 |   "ஒரு பெயர்ச்சொல்லுக்கு வேற்றுமை விகுதி சேர்க்கும் முன் செய்யவேண்டிய மாற்றம்
174 |   change that is required before adding a case suffix to a noun"
175 |   [சொல்]
176 |   (வைத்துக்கொள் [எழுத்துகள் (தொடை->எழுத்துகள் சொல்)
177 |               ஒலியன்கள் (தொடை->ஒலியன்கள் சொல்)
178 |               கஎ (கடைசி எழுத்துகள்)
179 |               கஒ (கடைசி ஒலியன்கள்)]
180 |     (பொறுத்து
181 | 
182 |      (= "ம்" (கடைசி எழுத்துகள்))
183 |      (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்) ["த்த்"]))
184 | 
185 |      (மற்றும் (பெறு #{"டு" "று"} கஎ)
186 |             (அல்லது (மற்றும் (= 2 (எண்ணு எழுத்துகள்))
187 |                           (ஒவ்வொன்றுமா? குறிலா? எழுத்துகள்))
188 |                    (மெய்யெழுத்தா? (கடைசி (கடைசியின்றி எழுத்துகள்)))))
189 |      சொல்
190 | 
191 |      (= "டு" கஎ)
192 |      (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்) ["ட்ட்"]))
193 | 
194 |      (= "று" கஎ)
195 |      (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்) ["ற்ற்"]))
196 | 
197 |      :அன்றி
198 |      சொல்)))
199 | 
200 | (வரையறு-செயல்கூறு வேற்றுமை
201 |   "ஒரு பெயர்ச்சொல்லுக்கு ஒரு வேற்றுமை விகுதியைச் சேர்த்தல்
202 |   adds a case suffix to a noun"
203 |   [சொல் வே]
204 |   (வைத்துக்கொள் [எழுத்துகள் (தொடை->எழுத்துகள் சொல்)
205 |               ஒலியன்கள் (தொடை->ஒலியன்கள் சொல்)]
206 |     (எனில் (மற்றும் (= "உக்கு" வே)
207 |                  (அல்லது (பெறு #{"இ" "ஈ" "ஐ"} (கடைசி ஒலியன்கள்))
208 |                         (எதாவது (செயல்கூறு [தொடை] (பின்னொட்டா? சொல் தொடை))
209 |                                 ["ஆய்"])))
210 |       (வேற்றுமை சொல் "க்கு")
211 |       (-> சொல்
212 |           வேற்றுமை-முன்-மாற்றம்
213 |           (சந்தி வே)))))
214 | 


--------------------------------------------------------------------------------
/examples/clj/clj-español/LICENSE:
--------------------------------------------------------------------------------
  1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
  2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
  3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
  4 | 
  5 | 1. DEFINITIONS
  6 | 
  7 | "Contribution" means:
  8 | 
  9 | a) in the case of the initial Contributor, the initial code and
 10 | documentation distributed under this Agreement, and
 11 | 
 12 | b) in the case of each subsequent Contributor:
 13 | 
 14 | i) changes to the Program, and
 15 | 
 16 | ii) additions to the Program;
 17 | 
 18 | where such changes and/or additions to the Program originate from and are
 19 | distributed by that particular Contributor. A Contribution 'originates' from
 20 | a Contributor if it was added to the Program by such Contributor itself or
 21 | anyone acting on such Contributor's behalf. Contributions do not include
 22 | additions to the Program which: (i) are separate modules of software
 23 | distributed in conjunction with the Program under their own license
 24 | agreement, and (ii) are not derivative works of the Program.
 25 | 
 26 | "Contributor" means any person or entity that distributes the Program.
 27 | 
 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
 29 | necessarily infringed by the use or sale of its Contribution alone or when
 30 | combined with the Program.
 31 | 
 32 | "Program" means the Contributions distributed in accordance with this
 33 | Agreement.
 34 | 
 35 | "Recipient" means anyone who receives the Program under this Agreement,
 36 | including all Contributors.
 37 | 
 38 | 2. GRANT OF RIGHTS
 39 | 
 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
 42 | reproduce, prepare derivative works of, publicly display, publicly perform,
 43 | distribute and sublicense the Contribution of such Contributor, if any, and
 44 | such derivative works, in source code and object code form.
 45 | 
 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
 49 | transfer the Contribution of such Contributor, if any, in source code and
 50 | object code form.  This patent license shall apply to the combination of the
 51 | Contribution and the Program if, at the time the Contribution is added by the
 52 | Contributor, such addition of the Contribution causes such combination to be
 53 | covered by the Licensed Patents. The patent license shall not apply to any
 54 | other combinations which include the Contribution. No hardware per se is
 55 | licensed hereunder.
 56 | 
 57 | c) Recipient understands that although each Contributor grants the licenses
 58 | to its Contributions set forth herein, no assurances are provided by any
 59 | Contributor that the Program does not infringe the patent or other
 60 | intellectual property rights of any other entity. Each Contributor disclaims
 61 | any liability to Recipient for claims brought by any other entity based on
 62 | infringement of intellectual property rights or otherwise. As a condition to
 63 | exercising the rights and licenses granted hereunder, each Recipient hereby
 64 | assumes sole responsibility to secure any other intellectual property rights
 65 | needed, if any. For example, if a third party patent license is required to
 66 | allow Recipient to distribute the Program, it is Recipient's responsibility
 67 | to acquire that license before distributing the Program.
 68 | 
 69 | d) Each Contributor represents that to its knowledge it has sufficient
 70 | copyright rights in its Contribution, if any, to grant the copyright license
 71 | set forth in this Agreement.
 72 | 
 73 | 3. REQUIREMENTS
 74 | 
 75 | A Contributor may choose to distribute the Program in object code form under
 76 | its own license agreement, provided that:
 77 | 
 78 | a) it complies with the terms and conditions of this Agreement; and
 79 | 
 80 | b) its license agreement:
 81 | 
 82 | i) effectively disclaims on behalf of all Contributors all warranties and
 83 | conditions, express and implied, including warranties or conditions of title
 84 | and non-infringement, and implied warranties or conditions of merchantability
 85 | and fitness for a particular purpose;
 86 | 
 87 | ii) effectively excludes on behalf of all Contributors all liability for
 88 | damages, including direct, indirect, special, incidental and consequential
 89 | damages, such as lost profits;
 90 | 
 91 | iii) states that any provisions which differ from this Agreement are offered
 92 | by that Contributor alone and not by any other party; and
 93 | 
 94 | iv) states that source code for the Program is available from such
 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
 96 | or through a medium customarily used for software exchange.
 97 | 
 98 | When the Program is made available in source code form:
 99 | 
100 | a) it must be made available under this Agreement; and
101 | 
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 | 
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 | 
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 | 
111 | 4. COMMERCIAL DISTRIBUTION
112 | 
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering.  The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor tocontrol, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 | 
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 | 
144 | 5. NO WARRANTY
145 | 
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 | 
157 | 6. DISCLAIMER OF LIABILITY
158 | 
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 | 
168 | 7. GENERAL
169 | 
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 | 
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 | 
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 | 
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 | 
210 | This Agreement is governed by the laws of the State of New York and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 | 


--------------------------------------------------------------------------------
/examples/clj/clj-spanish/LICENSE:
--------------------------------------------------------------------------------
  1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
  2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
  3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
  4 | 
  5 | 1. DEFINITIONS
  6 | 
  7 | "Contribution" means:
  8 | 
  9 | a) in the case of the initial Contributor, the initial code and
 10 | documentation distributed under this Agreement, and
 11 | 
 12 | b) in the case of each subsequent Contributor:
 13 | 
 14 | i) changes to the Program, and
 15 | 
 16 | ii) additions to the Program;
 17 | 
 18 | where such changes and/or additions to the Program originate from and are
 19 | distributed by that particular Contributor. A Contribution 'originates' from
 20 | a Contributor if it was added to the Program by such Contributor itself or
 21 | anyone acting on such Contributor's behalf. Contributions do not include
 22 | additions to the Program which: (i) are separate modules of software
 23 | distributed in conjunction with the Program under their own license
 24 | agreement, and (ii) are not derivative works of the Program.
 25 | 
 26 | "Contributor" means any person or entity that distributes the Program.
 27 | 
 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
 29 | necessarily infringed by the use or sale of its Contribution alone or when
 30 | combined with the Program.
 31 | 
 32 | "Program" means the Contributions distributed in accordance with this
 33 | Agreement.
 34 | 
 35 | "Recipient" means anyone who receives the Program under this Agreement,
 36 | including all Contributors.
 37 | 
 38 | 2. GRANT OF RIGHTS
 39 | 
 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
 42 | reproduce, prepare derivative works of, publicly display, publicly perform,
 43 | distribute and sublicense the Contribution of such Contributor, if any, and
 44 | such derivative works, in source code and object code form.
 45 | 
 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
 49 | transfer the Contribution of such Contributor, if any, in source code and
 50 | object code form.  This patent license shall apply to the combination of the
 51 | Contribution and the Program if, at the time the Contribution is added by the
 52 | Contributor, such addition of the Contribution causes such combination to be
 53 | covered by the Licensed Patents. The patent license shall not apply to any
 54 | other combinations which include the Contribution. No hardware per se is
 55 | licensed hereunder.
 56 | 
 57 | c) Recipient understands that although each Contributor grants the licenses
 58 | to its Contributions set forth herein, no assurances are provided by any
 59 | Contributor that the Program does not infringe the patent or other
 60 | intellectual property rights of any other entity. Each Contributor disclaims
 61 | any liability to Recipient for claims brought by any other entity based on
 62 | infringement of intellectual property rights or otherwise. As a condition to
 63 | exercising the rights and licenses granted hereunder, each Recipient hereby
 64 | assumes sole responsibility to secure any other intellectual property rights
 65 | needed, if any. For example, if a third party patent license is required to
 66 | allow Recipient to distribute the Program, it is Recipient's responsibility
 67 | to acquire that license before distributing the Program.
 68 | 
 69 | d) Each Contributor represents that to its knowledge it has sufficient
 70 | copyright rights in its Contribution, if any, to grant the copyright license
 71 | set forth in this Agreement.
 72 | 
 73 | 3. REQUIREMENTS
 74 | 
 75 | A Contributor may choose to distribute the Program in object code form under
 76 | its own license agreement, provided that:
 77 | 
 78 | a) it complies with the terms and conditions of this Agreement; and
 79 | 
 80 | b) its license agreement:
 81 | 
 82 | i) effectively disclaims on behalf of all Contributors all warranties and
 83 | conditions, express and implied, including warranties or conditions of title
 84 | and non-infringement, and implied warranties or conditions of merchantability
 85 | and fitness for a particular purpose;
 86 | 
 87 | ii) effectively excludes on behalf of all Contributors all liability for
 88 | damages, including direct, indirect, special, incidental and consequential
 89 | damages, such as lost profits;
 90 | 
 91 | iii) states that any provisions which differ from this Agreement are offered
 92 | by that Contributor alone and not by any other party; and
 93 | 
 94 | iv) states that source code for the Program is available from such
 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
 96 | or through a medium customarily used for software exchange.
 97 | 
 98 | When the Program is made available in source code form:
 99 | 
100 | a) it must be made available under this Agreement; and
101 | 
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 | 
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 | 
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 | 
111 | 4. COMMERCIAL DISTRIBUTION
112 | 
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering.  The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor tocontrol, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 | 
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 | 
144 | 5. NO WARRANTY
145 | 
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 | 
157 | 6. DISCLAIMER OF LIABILITY
158 | 
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 | 
168 | 7. GENERAL
169 | 
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 | 
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 | 
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 | 
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 | 
210 | This Agreement is governed by the laws of the State of New York and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 | 


--------------------------------------------------------------------------------
/test/clj_thamil/format_test.cljc:
--------------------------------------------------------------------------------
  1 | (ns clj-thamil.format-test
  2 |   (:use clojure.test
  3 |         clj-thamil.format
  4 |         clj-thamil.core))
  5 | 
  6 | (def words ["பந்து" "பந்தி" "பத்து" "பந்துகள்" "பந்தயம்" "பந்தாடு" "பந்தல்"])
  7 | 
  8 | (deftest trie-test
  9 |   (let [first-word (first words)
 10 |         first-two-words (take 2 words)]
 11 |     (testing "creating a trie"
 12 |       (testing "creating a trie from a sequence of words (default val is attached to terminus)"
 13 |         (testing "boundary case"
 14 |           (is (= {} (make-trie []))))
 15 |         (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} (make-trie [first-word])))
 16 |         (is (= (make-trie [first-word]) (make-trie (take 1 words))))
 17 |         (testing "words that share some prefix"
 18 |           (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil} \u0BBF {nil nil}}}}}} (make-trie first-two-words))))
 19 |         (testing "words that have no shared prefix"
 20 |           (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}} \வ {\u0BC6 {\ற {\u0BCD {\ற {\u0BBF {nil nil}}}}}}} (make-trie [first-word "வெற்றி"])))))
 21 |       (testing "creating a trie from a map of word->terminus-attached-val"
 22 |         (testing "boundary case"
 23 |           (is (= {} (make-trie {}))))
 24 |         (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil 1}}}}}} (make-trie {first-word 1})))
 25 |         (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil 0
 26 |                                             \க {\ள {\u0BCD {nil 3}}}}
 27 |                                     \u0BBF {nil 1}
 28 |                                     \ய {\ம {\u0BCD {nil 4}}}
 29 |                                     \ல {\u0BCD {nil 6}}
 30 |                                     \u0BBE {\ட {\u0BC1 {nil 5}}}}}}
 31 |                     \த {\u0BCD {\த {\u0BC1 {nil 2}}}}}}
 32 |                (make-trie (zipmap words (range)))))))
 33 |     (testing "trie lookup fns"
 34 |       (testing "nil as valued attached to terminus of input sequences"
 35 |         (is (= true (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {\க {\ள {\u0BCD {nil nil}}}}}}}}} "பந்துகள்")))
 36 |         (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {\க {\ள {\u0BCD {nil nil}}}}}}}}} "ப")))
 37 |         (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {\க {\ள {\u0BCD {nil nil}}}}}}}}} "பந்துக")))
 38 |         (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {\க {\ள {\u0BCD {nil nil}}}}}}}}} "பந்து")))
 39 |         (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்துகள்")))
 40 |         (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "ப")))
 41 |         (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்துக")))
 42 |         (is (= true (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்து")))
 43 |         (is (= false (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்து"))))
 44 |         (is (= false (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "ப"))))
 45 |         (is (= true (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்துகள்"))))
 46 |         (is (= true (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "கோடு"))))) 
 47 |       (testing "non-nil values attached to terminus of input sequences"
 48 |         (is (= true (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil 1}}}}}} "பந்து")))
 49 |         (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil 3.14159}}}}}} "ப")))
 50 |         (is (= false (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil \a}}}}}} "பந்து"))))
 51 |         (is (= false (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil true}}}}}} "ப"))))
 52 |         (testing "splitting words directly into phonemes using phoneme trie"
 53 |           (is (= ["வ்" "அ" "ண்" "அ" "க்" "க்" "அ" "ம்"] (str->phonemes "வணக்கம்")))
 54 |           (is (empty? (str->phonemes nil)))
 55 |           (is (empty? (str->phonemes "")))
 56 |           (is (= ["அ"] (str->phonemes "அ")))
 57 |           (is (= ["க்"] (str->phonemes "க்")))
 58 |           (is (= ["க்" "அ"] (str->phonemes "க")))
 59 |           (is (= ["க்" "ஊ"] (str->phonemes "கூ")))
 60 |           (is (= ["வ்" "இ" "ட்" "உ" "த்" "அ" "ல்" "ஐ"] (str->phonemes "விடுதலை")))))
 61 |       (testing "inverting maps for creating tries for inverse conversion"
 62 |         (is (= "பக்கம்" (phonemes->str "ப்அக்க்அம்")))
 63 |         (is (= "தலைய்123ஈடு" (phonemes->str "த்அல்ஐய்123ஈட்உ")))
 64 |         (is (= "நடு" (phonemes->str "ந்அடு")))))))
 65 | 
 66 | (deftest word-letter-test
 67 |   (testing "splitting strings of தமிழ் characters into constituent தமிழ் characters"
 68 |     (is (= [] (str->letters "")))
 69 |     (is (= [] (str->letters nil)))
 70 |     (is (= ["த"] (str->letters "த")))
 71 |     (is (= [" " "த"] (str->letters " த")))
 72 |     (is (= ["த" " "] (str->letters "த ")))
 73 |     (is (= ["த்"] (str->letters "த்")))
 74 |     (is (= ["த" "மி" "ழ்"] (str->letters "தமிழ்")))
 75 |     (is (= ["த" "மி" "ழ்" " "] (str->letters "தமிழ் ")))
 76 |     (is (= ["s" "o" "f" "t" "w" "a" "r" "e" "=" "மெ" "ன்" "பொ" "ரு" "ள்" "," "." "." "."] (str->letters "software=மென்பொருள்,..."))))
 77 |   (testing "letter ordering"
 78 |     (testing "boundary cases"
 79 |       (is (= true (letter-before? nil nil)))
 80 |       (is (= true (letter-before? nil "")))
 81 |       (is (= false (letter-before? "" nil)))
 82 |       (is (= true (letter-before? nil "அ")))
 83 |       (is (= true (letter-before? nil "a")))
 84 |       (is (= true (letter-before? "a" "அ"))))
 85 |     (testing "equal inputs"
 86 |       (is (= false (letter-before? "அ" "அ"))))
 87 |     (testing "தமிழ்"
 88 |       (is (= true (letter-before? "அ" "ஆ")))
 89 |       (is (= true (letter-before? "ஆ" "இ")))
 90 |       (is (= true (letter-before? "அ" "ஔ")))
 91 |       (is (= true (letter-before? "ஔ" "ஃ")))
 92 |       (is (= true (letter-before? "ஃ" "க்")))
 93 |       (is (= true (letter-before? "க்" "க")))
 94 |       (is (= true (letter-before? "க" "கா")))
 95 |       (is (= true (letter-before? "க்" "கௌ")))
 96 |       (is (= false (letter-before? "க்" "ஃ")))
 97 |       (is (= true (letter-before? "கௌ" "ங்")))
 98 |       (is (= false (letter-before? "ங்" "கௌ"))))
 99 |     (testing "ASCII"
100 |       (is (= true (letter-before? "a" "z")))
101 |       (is (= true (letter-before? "A" "Z")))
102 |       (is (= true (letter-before? "Z" "a")))
103 |       (is (= true (letter-before? "0" "9")))
104 |       (is (= true (letter-before? "9" "A"))))
105 |     (testing "comparator / sorting"
106 |       (is (= ["அ" "ஆ" "இ" "ஒ" "ஓ" "ஔ" "ஃ" "க்" "க" "ன்" "ன" "னா" "னு" "னௌ"] (sort-by identity letter-comp ["இ" "க" "ஃ" "ன" "னு" "னௌ" "னா" "ஆ" "க்" "அ" "ஔ" "ஓ" "ன்" "ஒ"])))))
107 |   (testing "word ordering"
108 |     (testing "equal inputs"
109 |       (is (= false (word-before? "அ" "அ"))))
110 |     (testing "extra letters in one word"
111 |       (is (= false (word-before? "அது" "அ")))
112 |       (is (= true (word-before? "அ" "அது"))))
113 |     (testing "Unicode 'consonant' vs. Unicode 'consonant+ligature' - ஒருங்குறியில் தமிழ் மெய்யெழுத்து+அகரம் மற்றும் அதே மெய்யெழுத்து {வெறுமன்; அதோடு வேறொரு உயிரெழுத்து}"
114 |       (is (= true (word-before? "படம்" "பாடம்")))
115 |       (is (= false (word-before? "பாடம்" "படம்")))
116 |       (is (= false (word-before? "படம்" "பட்டம்")))
117 |       (is (= true (word-before? "பட்டம்" "படம்")))
118 |       (is (= false (word-before? "கடமை" "கட்டம்")))
119 |       (is (= true (word-before? "கட்டம்" "கடமை")))
120 |       (is (= true (word-before? "கட்டு" "கெட்டு")))
121 |       (is (= false (word-before? "கெட்டு" "கட்டு")))
122 |       (is (= false (word-before? "பைந்தமிழ்" "பந்தல்")))
123 |       (is (= true (word-before? "பந்தல்" "பைந்தமிழ்"))))
124 |     (testing "order of consonants"
125 |       (is (= true (word-before? "பாடம்" "பாதம்")))
126 |       (is (= false (word-before? "பாதம்" "பாடம்"))))
127 |     (testing "order of vowels"
128 |       (is (= true (word-before? "அப்பம்" "ஆப்பம்")))
129 |       (is (= false (word-before? "ஆப்பம்" "அப்பம்"))))
130 |     (testing "order of vowel vs. consonant, and order of two உயிர்மெய்யெழுத்துகள்"
131 |       (is (= false (word-before? "நுளம்பு" "கொசு")))
132 |       (is (= true (word-before? "கொசு" "நுளம்பு")))
133 |       (is (= true (word-before? "ஈ" "கொசு")))
134 |       (is (= false (word-before? "கொசு" "ஈ"))))))
135 | 
136 | (deftest util-fn-test
137 |   (let [s "abcqwertyuiop"]
138 |     (testing "seq-prefix" 
139 |       (is (= [] (seq-prefix nil nil)))
140 |       (is (= [] (seq-prefix nil [])))
141 |       (is (= [] (seq-prefix [] nil)))
142 |       (is (= [] (seq-prefix nil [1 2])))
143 |       (is (= [\a \b \c] (seq-prefix "abcdefgh" s)))
144 |       (is (= [\a \b] (seq-prefix "abbb" s)))
145 |       (is (= [] (seq-prefix "zyx" s))))
146 |     (testing "seq-prefix?" 
147 |       (is (= false (seq-prefix? nil nil)))
148 |       (is (= false (seq-prefix? nil [])))
149 |       (is (= false (seq-prefix? [] nil)))
150 |       (is (= false (seq-prefix? nil [1 2])))
151 |       (is (= false (seq-prefix? "abcdefgh" s)))
152 |       (is (= false (seq-prefix? "abbb" s)))
153 |       (is (= false (seq-prefix? "zyx" s))) 
154 |       (is (= false (seq-prefix? "abc" s))) 
155 |       (is (= true (seq-prefix? s "abc")))
156 |       (is (= true (seq-prefix? s "a")))
157 |       (is (= true (seq-prefix? s "")))
158 |       (is (= true (seq-prefix? s [])))
159 |       (is (= true (seq-prefix? s nil))))
160 |     (testing "seq-index-of"
161 |       (let [check-seq-index-of (fn [s1 s2] (= (.indexOf s1 s2)
162 |                                          (seq-index-of s1 s2)))]
163 |         (is (= true (check-seq-index-of "abc" "a")))
164 |         (is (true? (check-seq-index-of "a" "abc")))
165 |         (is (true? (check-seq-index-of "" "abc")))))
166 |     (testing "prefix?" 
167 |       (is (true? (prefix? "வந்தான்" "")))
168 |       (is (true? (prefix? "வந்தான்" "வ்")))
169 |       (is (true? (prefix? "வந்தான்" "வ")))
170 |       (is (true? (prefix? "வந்தான்" "வந்")))
171 |       (is (false? (prefix? "வந்தான்" "வந")))
172 |       (is (true? (prefix? "வந்தான்" "வந்த்")))
173 |       (is (false? (prefix? "வந்தான்" "வந்து")))
174 |       (is (true? (prefix? "வந்தான்" "வந்தா")))
175 |       (is (true? (prefix? "வந்தான்" "வந்தான்")))
176 |       (is (false? (prefix? "வந்தான்" "வந்தானே")))
177 |       (is (true? (prefix? "வந்தானே" "வந்தான்"))))))
178 | 
179 | (deftest word-char-traits-test 
180 |   (testing "word and char traits"
181 |     (testing "char traits"
182 |       (let [ws-chars [\space \tab \newline]
183 |             wordy-chars [\a \Z \0 ]
184 |             punct-chars [\- \* \^ \$ \+ \. \_ \; ]
185 |             தமிழ்-எழுத்து-unicode-chars [\அ \ஆ \இ \ஔ \ஃ \க \ங \ன]
186 |             தமிழ்-எழுத்து-துணை-குறி-unicode-chars [\u0BCD \u0BBE \u0BBF \u0BC0 \u0BC1 \u0BC2 \u0BC6 \u0BC7 \u0BC8 \u0BCA \u0BCB \u0BCC]]
187 |         (is (= true (every? true? (map whitespace? ws-chars))))
188 |         (is (= true (every? true? (map wordy-char? wordy-chars))))
189 |         (is (= true (every? true? (map wordy-char? தமிழ்-எழுத்து-unicode-chars))))
190 |         (is (= true (every? true? (map wordy-char? தமிழ்-எழுத்து-துணை-குறி-unicode-chars))))
191 |         (is (= true (every? true? (map wordy-char? தமிழ்-எழுத்து-துணை-குறி-unicode-chars))))
192 |         (is (= true (every? false? (map wordy-char? punct-chars))))))
193 |     (testing "word boundaries"
194 |       (let [s1 "aldsk சிக்கல் sdfsdf234234lsdflksjdf Zürich"
195 |             s2 "  alsfjs"
196 |             s3 ""
197 |             s4 nil]
198 |         (is (= ["aldsk" "சிக்கல்" "sdfsdf234234lsdflksjdf" "Zürich"] (wordy-seq s1)))
199 |         (is (= ["alsfjs"] (wordy-seq s2)))
200 |         (is (= [] (wordy-seq s3)))
201 |         (is (= nil (wordy-seq s4)))))))
202 | 
203 | (deftest cursor-pos-test
204 |   (let [s1 "aldsk சிக்கல் sdfsdf234234lsdflksjdf Zürich"
205 |         s2 "  alsfjs"
206 |         s3 "a    b"
207 |         s4 "சிக்கல்"]
208 |     (testing "cursor position"
209 |       (testing "wordy chunk under cursor"
210 |         (is (= "aldsk" (wordy-chunk-under s1 0)))
211 |         (is (= "aldsk" (wordy-chunk-under s1 1)))
212 |         (is (= "aldsk" (wordy-chunk-under s1 5)))
213 |         (is (= "சிக்கல்" (wordy-chunk-under s1 6)))
214 |         (is (= "Zürich" (wordy-chunk-under s1 (count s1))))
215 |         (is (= "Zürich" (wordy-chunk-under s1 (- (count s1) (count "Zürich")))))
216 |         (is (nil? (wordy-chunk-under s2 0)))
217 |         (is (nil? (wordy-chunk-under s2 1)))
218 |         (is (= "alsfjs" (wordy-chunk-under s2 2)))
219 |         (is (= "a" (wordy-chunk-under s3 0)))
220 |         (is (= "a" (wordy-chunk-under s3 1)))
221 |         (is (nil? (wordy-chunk-under s3 2))))
222 |       (testing "cursor position within wordy chunk"
223 |         (is (= ["aldsk" 0] (wordy-chunk-and-cursor-pos s1 0)))
224 |         (is (= ["aldsk" 1] (wordy-chunk-and-cursor-pos s1 1)))
225 |         (is (= ["aldsk" 5] (wordy-chunk-and-cursor-pos s1 5)))
226 |         (is (= ["சிக்கல்" 0] (wordy-chunk-and-cursor-pos s1 6)))
227 |         (is (= ["Zürich" 6] (wordy-chunk-and-cursor-pos s1 (count s1))))
228 |         (is (= ["Zürich" 0] (wordy-chunk-and-cursor-pos s1 (- (count s1) (count "Zürich")))))
229 |         (is (nil? (wordy-chunk-and-cursor-pos s2 0)))
230 |         (is (nil? (wordy-chunk-and-cursor-pos s2 1)))
231 |         (is (= ["alsfjs" 0] (wordy-chunk-and-cursor-pos s2 2)))
232 |         (is (= ["a" 0] (wordy-chunk-and-cursor-pos s3 0)))
233 |         (is (= ["a" 1] (wordy-chunk-and-cursor-pos s3 1)))
234 |         (is (nil? (wordy-chunk-and-cursor-pos s3 2))))
235 |       (testing "cursor adjust"
236 |         (is (= 2 (cursor-adjust s4 3 :to-first)))
237 |         (is (= 4 (cursor-adjust s4 3 :to-last)))
238 |         (is (= 4 (cursor-adjust s4 3 nil))) 
239 |         (is (= 2 (cursor-adjust s4 2 :to-first)))
240 |         (is (= 2 (cursor-adjust s4 2 :to-last)))
241 |         (is (= 2 (cursor-adjust s4 2 nil))) 
242 |         (is (= 0 (cursor-adjust s4 0 :to-first)))
243 |         (is (= 0 (cursor-adjust s4 0 :to-last)))
244 |         (is (= 0 (cursor-adjust s4 0 nil))) 
245 |         (is (= 7 (cursor-adjust s4 7 :to-first)))
246 |         (is (= 7 (cursor-adjust s4 7 :to-last)))
247 |         (is (= 7 (cursor-adjust s4 7 nil)))))))
248 | 


--------------------------------------------------------------------------------
/src/clj_thamil/format.cljc:
--------------------------------------------------------------------------------
  1 | (ns clj-thamil.format
  2 |   (:require [clojure.set :as set])
  3 |   #?(:clj (:use clj-thamil.core)))
  4 | 
  5 | ;;;;;;;;;;
  6 | ;; letters
  7 | ;;;;;;;;;;
  8 | 
  9 | (def letters [["ஃ" "அ" "ஆ" "இ" "ஈ" "உ" "ஊ" "எ" "ஏ" "ஐ" "ஒ" "ஓ" "ஔ"]
 10 |               ["க்" "க" "கா" "கி" "கீ" "கு" "கூ" "கெ" "கே" "கை" "கொ" "கோ" "கௌ"]
 11 |               ["ங்" "ங" "ஙா" "ஙி" "ஙீ" "ஙு" "ஙூ" "ஙெ" "ஙே" "ஙை" "ஙொ" "ஙோ" "ஙௌ"]
 12 |               ["ச்" "ச" "சா" "சி" "சீ" "சு" "சூ" "செ" "சே" "சை" "சொ" "சோ" "சௌ"]
 13 |               ["ஞ்" "ஞ" "ஞா" "ஞி" "ஞீ" "ஞு" "ஞூ" "ஞெ" "ஞே" "ஞை" "ஞொ" "ஞோ" "ஞௌ"]
 14 |               ["ட்" "ட" "டா" "டி" "டீ" "டு" "டூ" "டெ" "டே" "டை" "டொ" "டோ" "டௌ"]
 15 |               ["ண்" "ண" "ணா" "ணி" "ணீ" "ணு" "ணூ" "ணெ" "ணே" "ணை" "ணொ" "ணோ" "ணௌ"]
 16 |               ["த்" "த" "தா" "தி" "தீ" "து" "தூ" "தெ" "தே" "தை" "தொ" "தோ" "தௌ"]
 17 |               ["ந்" "ந" "நா" "நி" "நீ" "நு" "நூ" "நெ" "நே" "நை" "நொ" "நோ" "நௌ"]
 18 |               ["ப்" "ப" "பா" "பி" "பீ" "பு" "பூ" "பெ" "பே" "பை" "பொ" "போ" "பௌ"]
 19 |               ["ம்" "ம" "மா" "மி" "மீ" "மு" "மூ" "மெ" "மே" "மை" "மொ" "மோ" "மௌ"]
 20 |               ["ய்" "ய" "யா" "யி" "யீ" "யு" "யூ" "யெ" "யே" "யை" "யொ" "யோ" "யௌ"]
 21 |               ["ர்" "ர" "ரா" "ரி" "ரீ" "ரு" "ரூ" "ரெ" "ரே" "ரை" "ரொ" "ரோ" "ரௌ"]
 22 |               ["ல்" "ல" "லா" "லி" "லீ" "லு" "லூ" "லெ" "லே" "லை" "லொ" "லோ" "லௌ"]
 23 |               ["வ்" "வ" "வா" "வி" "வீ" "வு" "வூ" "வெ" "வே" "வை" "வொ" "வோ" "வௌ"]
 24 |               ["ழ்" "ழ" "ழா" "ழி" "ழீ" "ழு" "ழூ" "ழெ" "ழே" "ழை" "ழொ" "ழோ" "ழௌ"]
 25 |               ["ள்" "ள" "ளா" "ளி" "ளீ" "ளு" "ளூ" "ளெ" "ளே" "ளை" "ளொ" "ளோ" "ளௌ"]
 26 |               ["ற்" "ற" "றா" "றி" "றீ" "று" "றூ" "றெ" "றே" "றை" "றொ" "றோ" "றௌ"]
 27 |               ["ன்" "ன" "னா" "னி" "னீ" "னு" "னூ" "னெ" "னே" "னை" "னொ" "னோ" "னௌ"]])
 28 | 
 29 | (def vowels
 30 |   (let [vowel-row (first letters)]
 31 |     (concat (rest vowel-row) [(first vowel-row)])))
 32 | 
 33 | (def c-cv-letters (rest letters))
 34 | 
 35 | (def consonants (map first c-cv-letters))
 36 | 
 37 | ;;;;;;;;;;;
 38 | ;; trie fns
 39 | ;;;;;;;;;;;
 40 | 
 41 | (defn- trie-add-seq
 42 |   "take a trie (represented as a nested map) and add a sequence, with an optional value attached to its terminus"
 43 |   ([trie-map s]
 44 |      (trie-add-seq trie-map s nil))
 45 |   ([trie-map s term-val] 
 46 |      (loop [idx (count s)
 47 |             tm trie-map]
 48 |        (when-not (neg? idx)
 49 |          (if (zero? idx)
 50 |            (if (= 1 (count s))
 51 |              (assoc-in tm s {nil term-val})
 52 |              (update-in tm (vec s) assoc-in [nil] term-val))
 53 |            (let [[pre post] (split-at idx s)] 
 54 |              (if (get-in tm pre)
 55 |                (update-in tm pre assoc-in (concat post [nil]) term-val)
 56 |                (recur (dec idx) tm))))))))
 57 | 
 58 | (defn make-trie
 59 |   "take a sequence (may be nested) of input sequences, or else takes a map (single-level) where keys are sequences and vals are attached to the terminus in trie. fn creates a trie, represented as a nested map."
 60 |   [sequence]
 61 |   (if (map? sequence)
 62 |     (reduce (partial apply trie-add-seq) {} sequence)
 63 |     (let [s (flatten sequence)]
 64 |       (reduce trie-add-seq {} s))))
 65 | 
 66 | (def ^{:private true
 67 |        :doc "a trie that contains all strings representing the individual letters in தமிழ்"}
 68 |   letter-trie (make-trie letters))
 69 | 
 70 | (defn trie-prefix-subtree
 71 |   "take a trie and a sequence, look up the sequence in the trie, and return the subtree"
 72 |   [trie sq]
 73 |   (get-in trie sq))
 74 | 
 75 | (defn in-trie?
 76 |   "return whether the sequence exists in the trie"
 77 |   ([sq]
 78 |      (in-trie? letter-trie sq))
 79 |   ([trie sq]
 80 |      (-> (trie-prefix-subtree trie sq)
 81 |          (find nil)
 82 |          boolean)))
 83 | 
 84 | (defn get-in-trie
 85 |   "return the corresponding value from the trie -- either the combined version of the input seq, or the value attached to the terminus of the input seq in the trie"
 86 |   [trie sq] 
 87 |   (if (in-trie? trie sq)
 88 |     (let [subtree (trie-prefix-subtree trie sq)]
 89 |       (if (nil? (get subtree nil))
 90 |         (apply str sq)
 91 |         (get subtree nil)))
 92 |     (apply str sq)))
 93 | 
 94 | (defn- backfill-new-chars
 95 |   "a helper fn for str->elems that takes the new-chars array (after knowing that the next character cannot be added to it because the resultant char path would not be in the trie) as input. we now need to process the new-chars array to test whether it (or else, its substrings) are themselves in the trie.  we need to work backwards to find the maximally long substring (char seq) that is also in trie.
 96 |   this fn is set up as O(n^2) on the assumption that input sequences won't be too big (the sequences that make up the paths of the trie don't have too many shared long sequences that start at the trie root).
 97 |   this fn might be needed to distinguish, for example, between a 3-elem chunk and 2 smaller chnks (ex: \"ksh\" vs \"k\" + \"sh\" -- ignore the fact that க்ஷ் and ஸ் aren't originally Thamil).  in fact, this fn probably isn't necessary for original Thamil letters, since they only need 2 codepoints, and may be only an issue for English transliteration of Grantha letters, or more of an issue for others languages which require 3+ chars to form a letter)"
 98 |   [trie new-chars & [{:keys [flat-output] :as opts}]]
 99 |   (loop [chars new-chars
100 |          in-trie-letters []
101 |          idx (count chars)]
102 |     (condp = idx
103 |       0 (if-not flat-output (flatten in-trie-letters) in-trie-letters)
104 |       1 (recur (drop 1 chars) (conj in-trie-letters (get-in-trie trie (take 1 chars))) (count (drop 1 chars)))
105 |       ;; else
106 |       (if (in-trie? trie (take idx chars))
107 |         (recur (drop idx chars) (conj in-trie-letters (get-in-trie trie (take idx chars))) (count (drop idx chars)))
108 |         (recur chars in-trie-letters (dec idx))))))
109 | 
110 | (defn str->elems
111 |   "take a string and split it into chunks based on the input trie.  for every maximally long sequence in the trie that is detected in the input string, the terminus-attached value is added to the output sequence if it exists (ex: useful for transliteration / format conversion), or else the string chunk itself is added."
112 |   ([s]
113 |      (str->elems letter-trie s))
114 |   ([trie s & [{:keys [transform] :as opts}]]
115 |      ;; loop is like a procedural for loop or while loop
116 |      ;; this loop is like a for loop, where 0 <= idx < (count s)
117 |      (loop [idx 0
118 |             new-chars []
119 |             letters []]
120 |        ;; test if we've consumed our entire input string
121 |        (if (= idx (count s))
122 |          ;; test whether we have handled entire input string, or if
123 |          ;; there are still chars still not fully processed
124 |          (if (empty? new-chars)
125 |            letters
126 |            (concat letters (backfill-new-chars trie new-chars)))
127 |          ;; start next iteration
128 |          (let [next-char (.charAt s idx)]
129 |            ;; if adding the next character makes a prefix in trie no
130 |            ;; longer in trie, then we have our maximally long prefix.
131 |            ;; if not, just add the char and continue
132 |            (if (nil? (trie-prefix-subtree trie (apply str (conj new-chars next-char))))
133 |              ;; test whether this is just because we're at the
134 |              ;; beginning of our string.  if not, return our prefix
135 |              ;; and reset our next prefix starting with the new char
136 |              (if (empty? new-chars)
137 |                (recur (inc idx) (conj new-chars next-char) letters)
138 |                (recur (inc idx) [next-char] (concat letters (backfill-new-chars trie new-chars))))
139 |              (recur (inc idx) (conj new-chars next-char) letters)))))))
140 | 
141 | ;;;;;;;;;;;
142 | ;; letters & phonemes
143 | ;;;;;;;;;;;
144 | 
145 | (defn str->letters
146 |   "take a string and split it into its constitutent தமிழ் + non-complex letters (non-complex = all left-to-right, 1-to-1 codepoint-to-glyph encodings -- this includes all Western languages)"
147 |   [s]
148 |   (str->elems letter-trie s))
149 | 
150 | (def ^{:doc "a map whose keys are தமிழ் letters and whose values are sequences of the constituent phonemes (represented as strings) of those letters. letters are from the set {உயிர்-, மெய்-, உயிர்மெய்-}எழுத்துகள், phonemes are from the set {உயிர்-,மெய்-}எழுத்துகள்"}
151 |   phoneme-map
152 |   {"ஃ" ["ஃ"],
153 |    "அ" ["அ"],
154 |    "ஆ" ["ஆ"],
155 |    "இ" ["இ"],
156 |    "ஈ" ["ஈ"],
157 |    "உ" ["உ"],
158 |    "ஊ" ["ஊ"],
159 |    "எ" ["எ"],
160 |    "ஏ" ["ஏ"],
161 |    "ஐ" ["ஐ"],
162 |    "ஒ" ["ஒ"],
163 |    "ஓ" ["ஓ"],
164 |    "ஔ" ["ஔ"],
165 |    "க்" ["க்"],
166 |    "க" ["க்" "அ"],
167 |    "கா" ["க்" "ஆ"],
168 |    "கி" ["க்" "இ"],
169 |    "கீ" ["க்" "ஈ"],
170 |    "கு" ["க்" "உ"],
171 |    "கூ" ["க்" "ஊ"],
172 |    "கெ" ["க்" "எ"],
173 |    "கே" ["க்" "ஏ"],
174 |    "கை" ["க்" "ஐ"],
175 |    "கொ" ["க்" "ஒ"],
176 |    "கோ" ["க்" "ஓ"],
177 |    "கௌ" ["க்" "ஔ"],
178 |    "ங்" ["ங்"],
179 |    "ங" ["ங்" "அ"],
180 |    "ஙா" ["ங்" "ஆ"],
181 |    "ஙி" ["ங்" "இ"],
182 |    "ஙீ" ["ங்" "ஈ"],
183 |    "ஙு" ["ங்" "உ"],
184 |    "ஙூ" ["ங்" "ஊ"],
185 |    "ஙெ" ["ங்" "எ"],
186 |    "ஙே" ["ங்" "ஏ"],
187 |    "ஙை" ["ங்" "ஐ"],
188 |    "ஙொ" ["ங்" "ஒ"],
189 |    "ஙோ" ["ங்" "ஓ"],
190 |    "ஙௌ" ["ங்" "ஔ"],
191 |    "ச்" ["ச்"],
192 |    "ச" ["ச்" "அ"],
193 |    "சா" ["ச்" "ஆ"],
194 |    "சி" ["ச்" "இ"],
195 |    "சீ" ["ச்" "ஈ"],
196 |    "சு" ["ச்" "உ"],
197 |    "சூ" ["ச்" "ஊ"],
198 |    "செ" ["ச்" "எ"],
199 |    "சே" ["ச்" "ஏ"],
200 |    "சை" ["ச்" "ஐ"],
201 |    "சொ" ["ச்" "ஒ"],
202 |    "சோ" ["ச்" "ஓ"],
203 |    "சௌ" ["ச்" "ஔ"],
204 |    "ஞ்" ["ஞ்"],
205 |    "ஞ" ["ஞ்" "அ"],
206 |    "ஞா" ["ஞ்" "ஆ"],
207 |    "ஞி" ["ஞ்" "இ"],
208 |    "ஞீ" ["ஞ்" "ஈ"],
209 |    "ஞு" ["ஞ்" "உ"],
210 |    "ஞூ" ["ஞ்" "ஊ"],
211 |    "ஞெ" ["ஞ்" "எ"],
212 |    "ஞே" ["ஞ்" "ஏ"],
213 |    "ஞை" ["ஞ்" "ஐ"],
214 |    "ஞொ" ["ஞ்" "ஒ"],
215 |    "ஞோ" ["ஞ்" "ஓ"],
216 |    "ஞௌ" ["ஞ்" "ஔ"],
217 |    "ட்" ["ட்"],
218 |    "ட" ["ட்" "அ"],
219 |    "டா" ["ட்" "ஆ"],
220 |    "டி" ["ட்" "இ"],
221 |    "டீ" ["ட்" "ஈ"],
222 |    "டு" ["ட்" "உ"],
223 |    "டூ" ["ட்" "ஊ"],
224 |    "டெ" ["ட்" "எ"],
225 |    "டே" ["ட்" "ஏ"],
226 |    "டை" ["ட்" "ஐ"],
227 |    "டொ" ["ட்" "ஒ"],
228 |    "டோ" ["ட்" "ஓ"],
229 |    "டௌ" ["ட்" "ஔ"],
230 |    "ண்" ["ண்"],
231 |    "ண" ["ண்" "அ"],
232 |    "ணா" ["ண்" "ஆ"],
233 |    "ணி" ["ண்" "இ"],
234 |    "ணீ" ["ண்" "ஈ"],
235 |    "ணு" ["ண்" "உ"],
236 |    "ணூ" ["ண்" "ஊ"],
237 |    "ணெ" ["ண்" "எ"],
238 |    "ணே" ["ண்" "ஏ"],
239 |    "ணை" ["ண்" "ஐ"],
240 |    "ணொ" ["ண்" "ஒ"],
241 |    "ணோ" ["ண்" "ஓ"],
242 |    "ணௌ" ["ண்" "ஔ"],
243 |    "த்" ["த்"],
244 |    "த" ["த்" "அ"],
245 |    "தா" ["த்" "ஆ"],
246 |    "தி" ["த்" "இ"],
247 |    "தீ" ["த்" "ஈ"],
248 |    "து" ["த்" "உ"],
249 |    "தூ" ["த்" "ஊ"],
250 |    "தெ" ["த்" "எ"],
251 |    "தே" ["த்" "ஏ"],
252 |    "தை" ["த்" "ஐ"],
253 |    "தொ" ["த்" "ஒ"],
254 |    "தோ" ["த்" "ஓ"],
255 |    "தௌ" ["த்" "ஔ"],
256 |    "ந்" ["ந்"],
257 |    "ந" ["ந்" "அ"],
258 |    "நா" ["ந்" "ஆ"],
259 |    "நி" ["ந்" "இ"],
260 |    "நீ" ["ந்" "ஈ"],
261 |    "நு" ["ந்" "உ"],
262 |    "நூ" ["ந்" "ஊ"],
263 |    "நெ" ["ந்" "எ"],
264 |    "நே" ["ந்" "ஏ"],
265 |    "நை" ["ந்" "ஐ"],
266 |    "நொ" ["ந்" "ஒ"],
267 |    "நோ" ["ந்" "ஓ"],
268 |    "நௌ" ["ந்" "ஔ"],
269 |    "ப்" ["ப்"],
270 |    "ப" ["ப்" "அ"],
271 |    "பா" ["ப்" "ஆ"],
272 |    "பி" ["ப்" "இ"],
273 |    "பீ" ["ப்" "ஈ"],
274 |    "பு" ["ப்" "உ"],
275 |    "பூ" ["ப்" "ஊ"],
276 |    "பெ" ["ப்" "எ"],
277 |    "பே" ["ப்" "ஏ"],
278 |    "பை" ["ப்" "ஐ"],
279 |    "பொ" ["ப்" "ஒ"],
280 |    "போ" ["ப்" "ஓ"],
281 |    "பௌ" ["ப்" "ஔ"],
282 |    "ம்" ["ம்"],
283 |    "ம" ["ம்" "அ"],
284 |    "மா" ["ம்" "ஆ"],
285 |    "மி" ["ம்" "இ"],
286 |    "மீ" ["ம்" "ஈ"],
287 |    "மு" ["ம்" "உ"],
288 |    "மூ" ["ம்" "ஊ"],
289 |    "மெ" ["ம்" "எ"],
290 |    "மே" ["ம்" "ஏ"],
291 |    "மை" ["ம்" "ஐ"],
292 |    "மொ" ["ம்" "ஒ"],
293 |    "மோ" ["ம்" "ஓ"],
294 |    "மௌ" ["ம்" "ஔ"],
295 |    "ய்" ["ய்"],
296 |    "ய" ["ய்" "அ"],
297 |    "யா" ["ய்" "ஆ"],
298 |    "யி" ["ய்" "இ"],
299 |    "யீ" ["ய்" "ஈ"],
300 |    "யு" ["ய்" "உ"],
301 |    "யூ" ["ய்" "ஊ"],
302 |    "யெ" ["ய்" "எ"],
303 |    "யே" ["ய்" "ஏ"],
304 |    "யை" ["ய்" "ஐ"],
305 |    "யொ" ["ய்" "ஒ"],
306 |    "யோ" ["ய்" "ஓ"],
307 |    "யௌ" ["ய்" "ஔ"],
308 |    "ர்" ["ர்"],
309 |    "ர" ["ர்" "அ"],
310 |    "ரா" ["ர்" "ஆ"],
311 |    "ரி" ["ர்" "இ"],
312 |    "ரீ" ["ர்" "ஈ"],
313 |    "ரு" ["ர்" "உ"],
314 |    "ரூ" ["ர்" "ஊ"],
315 |    "ரெ" ["ர்" "எ"],
316 |    "ரே" ["ர்" "ஏ"],
317 |    "ரை" ["ர்" "ஐ"],
318 |    "ரொ" ["ர்" "ஒ"],
319 |    "ரோ" ["ர்" "ஓ"],
320 |    "ரௌ" ["ர்" "ஔ"],
321 |    "ல்" ["ல்"],
322 |    "ல" ["ல்" "அ"],
323 |    "லா" ["ல்" "ஆ"],
324 |    "லி" ["ல்" "இ"],
325 |    "லீ" ["ல்" "ஈ"],
326 |    "லு" ["ல்" "உ"],
327 |    "லூ" ["ல்" "ஊ"],
328 |    "லெ" ["ல்" "எ"],
329 |    "லே" ["ல்" "ஏ"],
330 |    "லை" ["ல்" "ஐ"],
331 |    "லொ" ["ல்" "ஒ"],
332 |    "லோ" ["ல்" "ஓ"],
333 |    "லௌ" ["ல்" "ஔ"],
334 |    "வ்" ["வ்"],
335 |    "வ" ["வ்" "அ"],
336 |    "வா" ["வ்" "ஆ"],
337 |    "வி" ["வ்" "இ"],
338 |    "வீ" ["வ்" "ஈ"],
339 |    "வு" ["வ்" "உ"],
340 |    "வூ" ["வ்" "ஊ"],
341 |    "வெ" ["வ்" "எ"],
342 |    "வே" ["வ்" "ஏ"],
343 |    "வை" ["வ்" "ஐ"],
344 |    "வொ" ["வ்" "ஒ"],
345 |    "வோ" ["வ்" "ஓ"],
346 |    "வௌ" ["வ்" "ஔ"],
347 |    "ழ்" ["ழ்"],
348 |    "ழ" ["ழ்" "அ"],
349 |    "ழா" ["ழ்" "ஆ"],
350 |    "ழி" ["ழ்" "இ"],
351 |    "ழீ" ["ழ்" "ஈ"],
352 |    "ழு" ["ழ்" "உ"],
353 |    "ழூ" ["ழ்" "ஊ"],
354 |    "ழெ" ["ழ்" "எ"],
355 |    "ழே" ["ழ்" "ஏ"],
356 |    "ழை" ["ழ்" "ஐ"],
357 |    "ழொ" ["ழ்" "ஒ"],
358 |    "ழோ" ["ழ்" "ஓ"],
359 |    "ழௌ" ["ழ்" "ஔ"],
360 |    "ள்" ["ள்"],
361 |    "ள" ["ள்" "அ"],
362 |    "ளா" ["ள்" "ஆ"],
363 |    "ளி" ["ள்" "இ"],
364 |    "ளீ" ["ள்" "ஈ"],
365 |    "ளு" ["ள்" "உ"],
366 |    "ளூ" ["ள்" "ஊ"],
367 |    "ளெ" ["ள்" "எ"],
368 |    "ளே" ["ள்" "ஏ"],
369 |    "ளை" ["ள்" "ஐ"],
370 |    "ளொ" ["ள்" "ஒ"],
371 |    "ளோ" ["ள்" "ஓ"],
372 |    "ளௌ" ["ள்" "ஔ"],
373 |    "ற்" ["ற்"],
374 |    "ற" ["ற்" "அ"],
375 |    "றா" ["ற்" "ஆ"],
376 |    "றி" ["ற்" "இ"],
377 |    "றீ" ["ற்" "ஈ"],
378 |    "று" ["ற்" "உ"],
379 |    "றூ" ["ற்" "ஊ"],
380 |    "றெ" ["ற்" "எ"],
381 |    "றே" ["ற்" "ஏ"],
382 |    "றை" ["ற்" "ஐ"],
383 |    "றொ" ["ற்" "ஒ"],
384 |    "றோ" ["ற்" "ஓ"],
385 |    "றௌ" ["ற்" "ஔ"],
386 |    "ன்" ["ன்"],
387 |    "ன" ["ன்" "அ"],
388 |    "னா" ["ன்" "ஆ"],
389 |    "னி" ["ன்" "இ"],
390 |    "னீ" ["ன்" "ஈ"],
391 |    "னு" ["ன்" "உ"],
392 |    "னூ" ["ன்" "ஊ"],
393 |    "னெ" ["ன்" "எ"],
394 |    "னே" ["ன்" "ஏ"],
395 |    "னை" ["ன்" "ஐ"],
396 |    "னொ" ["ன்" "ஒ"],
397 |    "னோ" ["ன்" "ஓ"],
398 |    "னௌ" ["ன்" "ஔ"]})
399 | 
400 | (def  ^{:doc "a trie of the individual letters in தமிழ், whose terminus-attached values are sequences of each letter's phonemes -- this trie can be used in str->elems for directly splitting a word into its phonemes"}
401 |   phoneme-trie (make-trie phoneme-map))
402 | 
403 | (def inverse-phoneme-map (set/map-invert phoneme-map))
404 | 
405 | (defn str->phonemes
406 |   "take a string and split it into its constitutent தமிழ் phonemes"
407 |   [s]
408 |   (str->elems phoneme-trie s))
409 | 
410 | ;; TODO: create a make-inverse-trie fn
411 | ;; TODO: turn str->elem into seq->elem, use that to refactor phonemes->str
412 | 
413 | (defn phonemes->str
414 |   "given a seq of phonemes, create a string where the phonemes are combined into their proper letters"
415 |   [phoneme-seq]
416 |   (let [concat-phoneme-str (apply str phoneme-seq)
417 |         inverse-concat-phoneme-map (into {} (for [[k v] inverse-phoneme-map]
418 |                                               [(apply str k) v]))
419 |         inverse-concat-phoneme-trie (make-trie inverse-concat-phoneme-map)
420 |         combined-phoneme-str (apply str (str->elems inverse-concat-phoneme-trie concat-phoneme-str))]
421 |     combined-phoneme-str))
422 | 
423 | ;;;;;;;;;;;;;;
424 | ;; sorting fns
425 | ;;;;;;;;;;;;;; 
426 | 
427 | (def ^{:private false
428 |        :doc "a flattened seq of all தமிழ் letters in lexicographical (alphabetical) order -- put anohter way, in the order of அகர முதல் னரக இறுவாய் as the 2500 yr old grammatical compendium தொல்காப்பியம் states in its outset"}
429 |   letter-seq (flatten (concat vowels c-cv-letters)))
430 | 
431 | (def ^{:doc "a map where the key is a தமிழ் letter, and the value is a number indicating its relative position in sort order"}
432 |   sort-map (zipmap letter-seq (range)))
433 | 
434 | (defn letter-before?
435 |   "a 2-arg predicate indicating whether the first string comes before the second string, but assuming that each string will only represent individual letters"
436 |   [s1 s2]
437 |   (cond (and (nil? s1) (nil? s2)) true
438 |         (and (nil? (get sort-map s1)) (nil? (get sort-map s2))) (boolean (neg? (compare s1 s2)))
439 |         (nil? (get sort-map s1)) true
440 |         (nil? (get sort-map s2)) false
441 |         :else (< (get sort-map s1) (get sort-map s2))))
442 | 
443 | (def ^{:doc "a comparator for strings that represent a single letter that respects தமிழ் alphabetical order"}
444 |   letter-comp (comparator letter-before?))
445 | 
446 | (defn word-before?
447 |   "a 2-arg predicate indicating whether the first string comes before the second string lexicographically, handling தமிழ் letters in addition to 1-to-1 codepoint-to-letter encodings"
448 |   [str1 str2]
449 |   (loop [s1 (str->elems str1)
450 |          s2 (str->elems str2)]
451 |     (cond (not (seq s1)) (boolean (seq s2))
452 |           (not (seq s2)) false 
453 |           (not= (first s1) (first s2)) (letter-before? (first s1) (first s2))
454 |           :else (recur (rest s1) (rest s2)))))
455 | 
456 | (def ^{:doc "a comparator for lexicographical comparisons of arbitrary strings (consisting of தமிழ் letters and letters from 1-to-1 encodings)"}
457 |   word-comp (comparator word-before?))
458 | 
459 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
460 | ;; word & character traits fns
461 | ;; position fns
462 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
463 | 
464 | (defn whitespace?
465 |   "returns whether a Java Character a.k.a. Unicode codepoint is whitespace or not (according to Java's understanding of Unicode)"
466 |   [ch]
467 |   (when ch
468 |     #?(:clj (Character/isWhitespace ch)
469 |        :cljs (boolean (re-seq #"\s" (str ch))))))
470 | 
471 | (defn wordy-char?
472 |   "take a Java Character a.k.a. Unicode codepoint and return whether it represents a character that might go into a word or identifier.  In other words, it is for Unicode like what \\w has representing in regular expressions for ASCII characters -- which is alpha-numeric characters"
473 |   [ch]
474 |   (when ch
475 |     (and
476 |      (not (get #{\$ \_} ch))
477 |      #?(:clj (Character/isJavaIdentifierPart ch)
478 |         :cljs (not (whitespace? (str ch)))))))
479 | 
480 | ;; TODO: DRY on seq-prefix & seq-prefix? -- is there a Clojure implementation?
481 | 
482 | (defn seq-prefix
483 |   "return the shared prefix between the 2 input sequence"
484 |   [seq1 seq2] 
485 |   (loop [s1 seq1
486 |          s2 seq2
487 |          comm-prefix []]
488 |     (let [f1 (first s1)
489 |           f2 (first s2)]
490 |       (if (or (empty? s1)
491 |               (empty? s2)
492 |               (not= f1 f2))
493 |         comm-prefix
494 |         (recur (rest s1) (rest s2) (conj comm-prefix f1))))))
495 | 
496 | (defn seq-prefix?
497 |   "return whether the query seq is a prefix of the target"
498 |   [tgt qry]
499 |   (let [pfx (seq-prefix tgt qry)]
500 |     (boolean
501 |      (and (seq tgt)
502 |           (or (= (seq qry) pfx)
503 |               (and (empty? qry) (empty? pfx)))))))
504 | 
505 | (defn prefix?
506 |   "return whether the 2nd word is a prefix of the 1st word, based on தமிழ் phonemes"
507 |   [str1 str2]
508 |   (let [phonemes1 (str->elems phoneme-trie str1)
509 |         phonemes2 (str->elems phoneme-trie str2)]
510 |     (seq-prefix? phonemes1 phonemes2)))
511 | 
512 | (defn suffix?
513 |   "return whether the 2nd word is a suffix of the 1st word, based on தமிழ் phonemes"
514 |   [str1 str2]
515 |   (let [phonemes1 (str->elems phoneme-trie str1)
516 |         phonemes2 (str->elems phoneme-trie str2)]
517 |     (seq-prefix? (reverse phonemes1) (reverse phonemes2))))
518 | 
519 | ;; TODO: DRY on seq-index-of -- is there already a Clojure implementation?
520 | 
521 | (defn seq-index-of
522 |   "given a target seq and a query seq, return the 0-based index of the first occurrence of the query seq appearing inside the target seq, or else return -1 (is that Clojure-y, or is returning nil more Clojure-y?)
523 |   calls seq-prefix? at every index -- only realizes the target seq as needed, pulls query seq into memory"
524 |   [tgt qry]
525 |   (let [qlen (count qry)]
526 |     (loop [ts tgt
527 |            idx 0]
528 |       (if (or (empty? ts)
529 |               (< (count (take qlen ts)) qlen))
530 |         -1
531 |         (if (seq-prefix? ts qry)
532 |           idx
533 |           (recur (rest ts) (inc idx)))))))
534 | 
535 | (def ^{:doc "a wrapper around the native fn call that gives the index of the first occurrence of a particular substring"}
536 |   index-of
537 |   #?(:cljs seq-index-of 
538 |      :clj (fn [tgt qry]
539 |              (.indexOf tgt qry))))
540 | 
541 | (defn wordy-seq
542 |   "take a string and produce a seq of the Unicode-aware version of the \\w+ regex pattern - basically, split input string into all chunks of non-whitepsace.  Originally, I called this fn word-seq, but that is not true for all languages and/or throughout time where there was no spearation between words (ex: Thai, Chinese, Japanese, Latin manuscripts, ancient Thamil stone inscriptions, etc.)"
543 |   [s]
544 |   (when s
545 |     (let [chunks (partition-by wordy-char? s)
546 |           word-chunks (filter (comp wordy-char? first) chunks)
547 |           words (map (partial apply str) word-chunks)]
548 |       words)))
549 | 
550 | (defn wordy-chunk-and-cursor-pos
551 |   "given a string and an index number that the cursor is on or before, return the wordy chunk that the cursor is in the middle of, and the cursor pos relative to the chunk. if cursor is before or after a word, or at the beginning or end of string, return a falsey value (ex: nil).  accepts idx being at end of string (idx == (count s))."
552 |   [s idx]
553 |   (assert (<= 0 idx) (str "cursor postiion out of range [idx =" idx "]"))
554 |   (assert (<= idx (count s)) (str "cursor postiion out of range [idx =" idx "], [str len =" (count s) "]"))
555 |   (let [[before after] [(subs s 0 idx) (subs s idx)]
556 |         partitions-before (partition-by wordy-char? before)
557 |         partitions-after (partition-by wordy-char? after)
558 |         wordy-chunks-before (wordy-seq before)
559 |         wordy-chunks-after (wordy-seq after)
560 |         chunk-seq-wordy? (comp wordy-char? first)
561 |         prev-chunk (last wordy-chunks-before)
562 |         next-chunk (first wordy-chunks-after) 
563 |         prev-chunk-wordiness (chunk-seq-wordy? (last partitions-before))
564 |         next-chunk-wordiness (chunk-seq-wordy? (first partitions-after))
565 |         prev-chunk-idx (if prev-chunk (index-of before prev-chunk) -1)
566 |         next-chunk-idx (if next-chunk (index-of after next-chunk) -1)
567 |         prev-chunk-flush (= idx (+ prev-chunk-idx (count prev-chunk)))
568 |         next-chunk-flush (zero? next-chunk-idx)]
569 |     (cond
570 |      (and prev-chunk-wordiness next-chunk-wordiness prev-chunk-flush next-chunk-flush) [(str prev-chunk next-chunk) (- idx prev-chunk-idx)]
571 |      (and prev-chunk-wordiness prev-chunk-flush) [prev-chunk (- idx prev-chunk-idx)]
572 |      (and  next-chunk-wordiness next-chunk-flush) [next-chunk 0]
573 |      :else nil)))
574 | 
575 | (def wordy-chunk-under (comp first wordy-chunk-and-cursor-pos))
576 | 
577 | (defn cursor-adjust
578 |   "given a string, a cursor position (idx), and a direction, give the new position of the cursor that that is on the boundary of the actual letters"
579 |   [s idx direction]
580 |   (let [[wordy-chunk rel-idx] (wordy-chunk-and-cursor-pos s idx)
581 |         letters (str->letters wordy-chunk)
582 |         indices (reductions #(+ %1 (count %2)) 0 letters)
583 |         before-idx (->> indices
584 |                         (take-while #(<= % idx))
585 |                         last)
586 |         after-idx (->> indices
587 |                        (drop-while #(< % idx))
588 |                        first)]
589 |     (if (= before-idx after-idx)
590 |       (do
591 |         (assert (= idx before-idx after-idx))
592 |         idx)
593 |       (case direction
594 |         (:to-first :முதல்-நோக்கி) before-idx
595 |         (:to-last :பின்-நோக்கி)  after-idx
596 |         after-idx))))
597 | 


--------------------------------------------------------------------------------
/src/clj_thamil/format/convert.cljc:
--------------------------------------------------------------------------------
   1 | (ns clj-thamil.format.convert
   2 |   (:require ;; [clojure.algo.generic.functor :as ftor]
   3 |             [clojure.set :as set] 
   4 |             [clj-thamil.format :as fmt])
   5 |   #?(:clj (:gen-class)))
   6 | 
   7 | ;; A general note about the conversion and transliteration schemes
   8 | ;; defined by the maps in this namespace:
   9 | ;;
  10 | ;; There may be multiple English letter sequences mapping to the same
  11 | ;; தமிழ் letter.  Also note that we get the mapping for the reverse conversion
  12 | ;; by inverting the map (keys become values, and values become keys).
  13 | ;; When multiple keys map to the same value, and you invert the map,
  14 | ;; the inverse will have the old value pointing to a single old key
  15 | ;; which is determined non-deterministically.  Therefore, the inverse
  16 | ;; map may need to be "manually adjusted" in that case to select a
  17 | ;; default mapping in the inverse map.
  18 | 
  19 | 
  20 | ;;;;;;;;
  21 | ;; தமிழ் <-> Romanized
  22 | ;;;;;;;;
  23 | 
  24 | (def ^{:doc "a map of English strings to their தமிழ் phonemes (and consonant clusters)."}
  25 |   romanized-தமிழ்-phoneme-map
  26 |   {"g" "க்"
  27 |    "s" "ச்"
  28 |    "d" "ட்"
  29 |    "w" "ந்"
  30 |    "b" "ப்"
  31 |    "z" "ழ்"
  32 |    "mb" "ம்ப்"
  33 |    "nth" "ந்த்"
  34 |    "nr" "ன்ற்"
  35 |    "nd" "ண்ட்"
  36 | 
  37 |    "a" "அ"
  38 |    "aa" "ஆ"
  39 |    "A" "ஆ"
  40 |    "i" "இ"
  41 |    "ii" "ஈ"
  42 |    "I" "ஈ"
  43 |    "u" "உ"
  44 |    "uu" "ஊ"
  45 |    "U" "ஊ"
  46 |    "e" "எ"
  47 |    "ee" "ஏ"
  48 |    "E" "ஏ"
  49 |    "ai" "ஐ"
  50 |    "o" "ஒ"
  51 |    "oo" "ஓ"
  52 |    "O" "ஓ"
  53 |    "au" "ஔ"
  54 |    "q" "ஃ"
  55 |    "k" "க்"
  56 |    "ng" "ங்"
  57 |    "ch" "ச்"
  58 |    "nj" "ஞ்"
  59 |    "t" "ட்"
  60 |    "N" "ண்"
  61 |    "th" "த்"
  62 |    "n-" "ந்"
  63 |    "p" "ப்"
  64 |    "m" "ம்"
  65 |    "y" "ய்"
  66 |    "r" "ர்"
  67 |    "l" "ல்"
  68 |    "v" "வ்"
  69 |    "zh" "ழ்"
  70 |    "L" "ள்"
  71 |    "R" "ற்"
  72 |    "n" "ன்"})
  73 | 
  74 | (def ^{:doc "designates specific transliterations of phonemes / phoneme clusters in the தமிழ்->English direction (ex: resolving situations where multiple English sequences map to a single தமிழ் phoneme)"}
  75 |   தமிழ்-romanized-phoneme-overrides
  76 |   {"ஓ" "O" 
  77 |    "ஏ" "E"
  78 |    "க்" "k"
  79 |    "ச்" "ch"
  80 |    "ட்" "t"
  81 |    "ந்" "n"
  82 |    "ப்" "p"
  83 |    "ழ்" "zh"
  84 |    "ங்க்" "ng"
  85 |    "ஆ" "aa"
  86 |    "ஈ" "ii"
  87 |    "ஊ" "uu"
  88 |    "ன்ப்" "nb"
  89 |    "ண்ப்" "nb"})
  90 | 
  91 | (def ^{:doc "an inverse of romanized-தமிழ்-phoneme-map, but with a few manual mappings for certain தமிழ் letters that can be input in multiple ways (or whose transliteration into English should be different then how it is input via English)"}
  92 |   தமிழ்-romanized-phoneme-map
  93 |   (merge (set/map-invert romanized-தமிழ்-phoneme-map)
  94 |          தமிழ்-romanized-phoneme-overrides))
  95 | 
  96 | (def romanized-தமிழ்-phoneme-trie (fmt/make-trie romanized-தமிழ்-phoneme-map))
  97 | 
  98 | (def தமிழ்-romanized-phoneme-trie (fmt/make-trie தமிழ்-romanized-phoneme-map))
  99 | 
 100 | (defn romanized->தமிழ்
 101 |   "transliterates a string of English (transliterated தமிழ்) into the தமிழ் that it represents"
 102 |   [s]
 103 |   (fmt/phonemes->str (fmt/str->elems romanized-தமிழ்-phoneme-trie s)))
 104 | 
 105 | (defn தமிழ்->romanized
 106 |   "transliterates a தமிழ் string into English (transliterated தமிழ்)"
 107 |   [s]
 108 |   (->> (fmt/str->phonemes s)
 109 |        (apply str)
 110 |        (fmt/str->elems தமிழ்-romanized-phoneme-trie)
 111 |        (apply str)))
 112 | 
 113 | ;;;;;;;;
 114 | ;; தமிழ் <-> TAB
 115 | ;;;;;;;;
 116 | 
 117 | (def tab-map
 118 |   {"அ" "Ü"
 119 |    "ஆ" "Ý"
 120 |    "இ" "Þ"
 121 |    "ஈ" "ß"
 122 |    "உ" "à"
 123 |    "ஊ" "á"
 124 |    "எ" "â"
 125 |    "ஏ" "ã"
 126 |    "ஐ" "ä"
 127 |    "ஒ" "å"
 128 |    "ஓ" "æ"
 129 |    "ஔ" "å÷"
 130 |    "ஃ" "ç"
 131 |    "க்" "è¢"
 132 |    "க" "è"
 133 |    "கா" "è£"
 134 |    "கி" "è¤"
 135 |    "கீ" "è¦"
 136 |    "கு" "°"
 137 |    "கூ" "Ã"
 138 |    "கெ" "ªè"
 139 |    "கே" "«è"
 140 |    "கை" "¬è"
 141 |    "கொ" "ªè£"
 142 |    "கோ" "«è£"
 143 |    "கௌ" "ªè÷"
 144 |    "ங்" "é¢"
 145 |    "ங" "é"
 146 |    "ஙா" "é£"
 147 |    "ஙி" "é¤"
 148 |    "ஙீ" "é¦"
 149 |    "ஙு" "±"
 150 |    "ஙூ" "Ä"
 151 |    "ஙெ" "ªé"
 152 |    "ஙே" "«é"
 153 |    "ஙை" "¬é"
 154 |    "ஙொ" "ªé£"
 155 |    "ஙோ" "«é£"
 156 |    "ஙௌ" "ªé÷"
 157 |    "ச்" "ê¢"
 158 |    "ச" "ê"
 159 |    "சா" "ê£"
 160 |    "சி" "ê¤"
 161 |    "சீ" "ê¦"
 162 |    "சு" "²"
 163 |    "சூ" "Å"
 164 |    "செ" "ªê"
 165 |    "சே" "«ê"
 166 |    "சை" "¬ê"
 167 |    "சொ" "ªê£"
 168 |    "சோ" "«ê£"
 169 |    "சௌ" "ªê÷"
 170 |    "ஞ்" "ë¢"
 171 |    "ஞ" "ë"
 172 |    "ஞா" "ë£"
 173 |    "ஞி" "ë¤"
 174 |    "ஞீ" "ë¦"
 175 |    "ஞு" "³"
 176 |    "ஞூ" "Æ"
 177 |    "ஞெ" "ªë"
 178 |    "ஞே" "«ë"
 179 |    "ஞை" "¬ë"
 180 |    "ஞொ" "ªë£"
 181 |    "ஞோ" "«ë£"
 182 |    "ஞௌ" "ªë÷"
 183 |    "ட்" "ì¢"
 184 |    "ட" "ì"
 185 |    "டா" "ì£"
 186 |    "டி" "®"
 187 |    "டீ" "ì¦"
 188 |    "டு" "´"
 189 |    "டூ" "Ç"
 190 |    "டெ" "ªì"
 191 |    "டே" "«ì"
 192 |    "டை" "¬ì"
 193 |    "டொ" "ªì£"
 194 |    "டோ" "«ì£"
 195 |    "டௌ" "ªì÷"
 196 |    "ண்" "í¢"
 197 |    "ண" "í"
 198 |    "ணா" "í£"
 199 |    "ணி" "í¤"
 200 |    "ணீ" "í¦"
 201 |    "ணு" "µ"
 202 |    "ணூ" "È"
 203 |    "ணெ" "ªí"
 204 |    "ணே" "«í"
 205 |    "ணை" "¬í"
 206 |    "ணொ" "ªí£"
 207 |    "ணோ" "«í£"
 208 |    "ணௌ" "ªí÷"
 209 |    "த்" "î¢"
 210 |    "த" "î"
 211 |    "தா" "î£"
 212 |    "தி" "î¤"
 213 |    "தீ" "î¦"
 214 |    "து" "¶"
 215 |    "தூ" "É"
 216 |    "தெ" "ªî"
 217 |    "தே" "«î"
 218 |    "தை" "¬î"
 219 |    "தொ" "ªî£"
 220 |    "தோ" "«î£"
 221 |    "தௌ" "ªî÷"
 222 |    "ந்" "ï¢"
 223 |    "ந" "ï"
 224 |    "நா" "ï£"
 225 |    "நி" "ï¤"
 226 |    "நீ" "ï¦"
 227 |    "நு" "¸"
 228 |    "நூ" "Ë"
 229 |    "நெ" "ªï"
 230 |    "நே" "«ï"
 231 |    "நை" "¬ï"
 232 |    "நொ" "ªï£"
 233 |    "நோ" "«ï£"
 234 |    "நௌ" "ªï÷"
 235 |    "ப்" "ð¢"
 236 |    "ப" "ð"
 237 |    "பா" "ð£"
 238 |    "பி" "ð¤"
 239 |    "பீ" "ð¦"
 240 |    "பு" "¹"
 241 |    "பூ" "Ì"
 242 |    "பெ" "ªð"
 243 |    "பே" "«ð"
 244 |    "பை" "¬ð"
 245 |    "பொ" "ªð£"
 246 |    "போ" "«ð£"
 247 |    "பௌ" "ªð÷"
 248 |    "ம்" "ñ¢"
 249 |    "ம" "ñ"
 250 |    "மா" "ñ£"
 251 |    "மி" "ñ¤"
 252 |    "மீ" "ñ¦"
 253 |    "மு" "º"
 254 |    "மூ" "Í"
 255 |    "மெ" "ªñ"
 256 |    "மே" "«ñ"
 257 |    "மை" "¬ñ"
 258 |    "மொ" "ªñ£"
 259 |    "மோ" "«ñ£"
 260 |    "மௌ" "ªñ÷"
 261 |    "ய்" "ò¢"
 262 |    "ய" "ò"
 263 |    "யா" "ò£"
 264 |    "யி" "ò¤"
 265 |    "யீ" "ò¦"
 266 |    "யு" "»"
 267 |    "யூ" "Î"
 268 |    "யெ" "ªò"
 269 |    "யே" "«ò"
 270 |    "யை" "¬ò"
 271 |    "யொ" "ªò£"
 272 |    "யோ" "«ò£"
 273 |    "யௌ" "ªò÷"
 274 |    "ர்" "ó¢"
 275 |    "ர" "ó"
 276 |    "ரா" "ó£"
 277 |    "ரி" "ó¤"
 278 |    "ரீ" "ó¦"
 279 |    "ரு" "¼"
 280 |    "ரூ" "Ï"
 281 |    "ரெ" "ªó"
 282 |    "ரே" "«ó"
 283 |    "ரை" "¬ó"
 284 |    "ரொ" "ªó£"
 285 |    "ரோ" "«ó£"
 286 |    "ரௌ" "ªó÷"
 287 |    "ல்" "ô¢"
 288 |    "ல" "ô"
 289 |    "லா" "ô£"
 290 |    "லி" "ô¤"
 291 |    "லீ" "ô¦"
 292 |    "லு" "½"
 293 |    "லூ" "Ö"
 294 |    "லெ" "ªô"
 295 |    "லே" "«ô"
 296 |    "லை" "¬ô"
 297 |    "லொ" "ªô£"
 298 |    "லோ" "«ô£"
 299 |    "லௌ" "ªô÷"
 300 |    "வ்" "õ¢"
 301 |    "வ" "õ"
 302 |    "வா" "õ£"
 303 |    "வி" "õ¤"
 304 |    "வீ" "õ¦"
 305 |    "வு" "¾"
 306 |    "வூ" "×"
 307 |    "வெ" "ªõ"
 308 |    "வே" "«õ"
 309 |    "வை" "¬õ"
 310 |    "வொ" "ªõ£"
 311 |    "வோ" "«õ£"
 312 |    "வௌ" "ªõ÷"
 313 |    "ழ்" "ö¢"
 314 |    "ழ" "ö"
 315 |    "ழா" "ö£"
 316 |    "ழி" "ö¤"
 317 |    "ழீ" "ö¦"
 318 |    "ழு" "¿"
 319 |    "ழூ" "Ø"
 320 |    "ழெ" "ªö"
 321 |    "ழே" "«ö"
 322 |    "ழை" "¬ö"
 323 |    "ழொ" "ªö£"
 324 |    "ழோ" "«ö£"
 325 |    "ழௌ" "ªö÷"
 326 |    "ள்" "÷¢"
 327 |    "ள" "÷"
 328 |    "ளா" "÷£"
 329 |    "ளி" "÷¤"
 330 |    "ளீ" "÷¦"
 331 |    "ளு" "À"
 332 |    "ளூ" "Ù"
 333 |    "ளெ" "ª÷"
 334 |    "ளே" "«÷"
 335 |    "ளை" "¬÷"
 336 |    "ளொ" "ª÷£"
 337 |    "ளோ" "«÷£"
 338 |    "ளௌ" "ª÷÷"
 339 |    "ற்" "ø¢"
 340 |    "ற" "ø"
 341 |    "றா" "ø£"
 342 |    "றி" "ø¤"
 343 |    "றீ" "ø¦"
 344 |    "று" "Á"
 345 |    "றூ" "Ú"
 346 |    "றெ" "ªø"
 347 |    "றே" "«ø"
 348 |    "றை" "¬ø"
 349 |    "றொ" "ªø£"
 350 |    "றோ" "«ø£"
 351 |    "றௌ" "ªø÷"
 352 |    "ன்" "ù¢"
 353 |    "ன" "ù"
 354 |    "னா" "ù£"
 355 |    "னி" "ù¤"
 356 |    "னீ" "ù¦"
 357 |    "னு" "Â"
 358 |    "னூ" "Û"
 359 |    "னெ" "ªù"
 360 |    "னே" "«ù"
 361 |    "னை" "¬ù"
 362 |    "னொ" "ªù£"
 363 |    "னோ" "«ù£"
 364 |    "னௌ" "ªù÷"})
 365 | 
 366 | ;;;;;;;;
 367 | ;; தமிழ் <-> Bamini
 368 | ;;;;;;;;
 369 | 
 370 | (def bamini-map
 371 |   {"அ" "m"
 372 |    "ஆ" "M"
 373 |    "இ" ","
 374 |    "ஈ" "<"
 375 |    "உ" "c"
 376 |    "ஊ" "C"
 377 |    "எ" "v"
 378 |    "ஏ" "V"
 379 |    "ஐ" "I"
 380 |    "ஒ" "x"
 381 |    "ஓ" "X"
 382 |    "ஔ" "xs"
 383 |    "ஃ" "/"
 384 |    "க்" "f;"
 385 |    "க" "f"
 386 |    "கா" "fh"
 387 |    "கி" "fp"
 388 |    "கீ" "fP"
 389 |    "கு" "F"
 390 |    "கூ" "$"
 391 |    "கெ" "nf"
 392 |    "கே" "Nf"
 393 |    "கை" "if"
 394 |    "கொ" "nfh"
 395 |    "கோ" "Nfh"
 396 |    "கௌ" "nfs"
 397 |    "ங்" "q;"
 398 |    "ங" "q"
 399 |    "ஙா" "qh"
 400 |    "ஙி" "qp"
 401 |    "ஙீ" "qP"
 402 |    ;; "ஙு" nil
 403 |    ;; "ஙூ" nil
 404 |    "ஙெ" "nq"
 405 |    "ஙே" "Nq"
 406 |    "ஙை" "iq"
 407 |    "ஙொ" "nqh"
 408 |    "ஙோ" "Nqh"
 409 |    "ஙௌ" "nqs"
 410 |    "ச்" "r;"
 411 |    "ச" "r"
 412 |    "சா" "rh"
 413 |    "சி" "rp"
 414 |    "சீ" "rP"
 415 |    "சு" "R"
 416 |    "சூ" "#"
 417 |    "செ" "nr"
 418 |    "சே" "Nr"
 419 |    "சை" "ir"
 420 |    "சொ" "nrh"
 421 |    "சோ" "Nrh"
 422 |    "சௌ" "nrs"
 423 |    "ஞ்" "Q;"
 424 |    "ஞ" "Q"
 425 |    "ஞா" "Qh"
 426 |    "ஞி" "Qp"
 427 |    "ஞீ" "QP"
 428 |    ;; "ஞு" nil
 429 |    ;; "ஞூ" nil
 430 |    "ஞெ" "nQ"
 431 |    "ஞே" "NQ"
 432 |    "ஞை" "iQ"
 433 |    "ஞொ" "nQh"
 434 |    "ஞோ" "NQh"
 435 |    "ஞௌ" "nQs"
 436 |    "ட்" "l;"
 437 |    "ட" "l"
 438 |    "டா" "lh"
 439 |    "டி" "b"
 440 |    "டீ" "B"
 441 |    "டு" "L"
 442 |    "டூ" "^"
 443 |    "டெ" "nl"
 444 |    "டே" "Nl"
 445 |    "டை" "il"
 446 |    "டொ" "nlh"
 447 |    "டோ" "Nlh"
 448 |    "டௌ" "nls"
 449 |    "ண்" "z;"
 450 |    "ண" "z"
 451 |    "ணா" "zh"
 452 |    "ணி" "zp"
 453 |    "ணீ" "zP"
 454 |    "ணு" "Z"
 455 |    "ணூ" "Z}"
 456 |    "ணெ" "nz"
 457 |    "ணே" "Nz"
 458 |    "ணை" "iz"
 459 |    "ணொ" "nzh"
 460 |    "ணோ" "Nzh"
 461 |    "ணௌ" "nzs"
 462 |    "த்" "j;"
 463 |    "த" "j"
 464 |    "தா" "jh"
 465 |    "தி" "jp"
 466 |    "தீ" "jP"
 467 |    "து" "J"
 468 |    "தூ" "J}"
 469 |    "தெ" "nj"
 470 |    "தே" "Nj"
 471 |    "தை" "ij"
 472 |    "தொ" "njh"
 473 |    "தோ" "Njh"
 474 |    "தௌ" "njs"
 475 |    "ந்" "e;"
 476 |    "ந" "e"
 477 |    "நா" "eh"
 478 |    "நி" "ep"
 479 |    "நீ" "eP"
 480 |    "நு" "E"
 481 |    "நூ" "E}"
 482 |    "நெ" "ne"
 483 |    "நே" "Ne"
 484 |    "நை" "ie"
 485 |    "நொ" "neh"
 486 |    "நோ" "Neh"
 487 |    "நௌ" "nes"
 488 |    "ப்" "g;"
 489 |    "ப" "g"
 490 |    "பா" "gh"
 491 |    "பி" "gp"
 492 |    "பீ" "gP"
 493 |    "பு" "G"
 494 |    "பூ" "G+"
 495 |    "பெ" "ng"
 496 |    "பே" "Ng"
 497 |    "பை" "ig"
 498 |    "பொ" "ngh"
 499 |    "போ" "Ngh"
 500 |    "பௌ" "ngs"
 501 |    "ம்" "k;"
 502 |    "ம" "k"
 503 |    "மா" "kh"
 504 |    "மி" "kp"
 505 |    "மீ" "kP"
 506 |    "மு" "K"
 507 |    "மூ" "%"
 508 |    "மெ" "nk"
 509 |    "மே" "Nk"
 510 |    "மை" "ik"
 511 |    "மொ" "nkh"
 512 |    "மோ" "Nkh"
 513 |    "மௌ" "nks"
 514 |    "ய்" "a;"
 515 |    "ய" "a"
 516 |    "யா" "ah"
 517 |    "யி" "ap"
 518 |    "யீ" "aP"
 519 |    "யு" "A"
 520 |    "யூ" "A+"
 521 |    "யெ" "na"
 522 |    "யே" "Na"
 523 |    "யை" "ia"
 524 |    "யொ" "nah"
 525 |    "யோ" "Nah"
 526 |    "யௌ" "nas"
 527 |    "ர்" "u;"
 528 |    "ர" "u"
 529 |    "ரா" "uh"
 530 |    "ரி" "up"
 531 |    "ரீ" "uP"
 532 |    "ரு" "U"
 533 |    "ரூ" "&"
 534 |    "ரெ" "nu"
 535 |    "ரே" "Nu"
 536 |    "ரை" "iu"
 537 |    "ரொ" "nuh"
 538 |    "ரோ" "Nuh"
 539 |    "ரௌ" "nus"
 540 |    "ல்" "y;"
 541 |    "ல" "y"
 542 |    "லா" "yh"
 543 |    "லி" "yp"
 544 |    "லீ" "yP"
 545 |    "லு" "Y"
 546 |    "லூ" "Y}"
 547 |    "லெ" "ny"
 548 |    "லே" "Ny"
 549 |    "லை" "iy"
 550 |    "லொ" "nyh"
 551 |    "லோ" "Nyh"
 552 |    "லௌ" "nys"
 553 |    "வ்" "t;"
 554 |    "வ" "t"
 555 |    "வா" "th"
 556 |    "வி" "tp"
 557 |    "வீ" "tP"
 558 |    "வு" "T"
 559 |    "வூ" "T+"
 560 |    "வெ" "nt"
 561 |    "வே" "Nt"
 562 |    "வை" "it"
 563 |    "வொ" "nth"
 564 |    "வோ" "Nth"
 565 |    "வௌ" "ntt"
 566 |    "ழ்" "o;"
 567 |    "ழ" "o"
 568 |    "ழா" "oh"
 569 |    "ழி" "op"
 570 |    "ழீ" "oP"
 571 |    "ழு" "O"
 572 |    "ழூ" "*"
 573 |    "ழெ" "no"
 574 |    "ழே" "No"
 575 |    "ழை" "io"
 576 |    "ழொ" "noh"
 577 |    "ழோ" "Noh"
 578 |    "ழௌ" "noo"
 579 |    "ள்" "s;"
 580 |    "ள" "s"
 581 |    "ளா" "sh"
 582 |    "ளி" "sp"
 583 |    "ளீ" "sP"
 584 |    "ளு" "S"
 585 |    "ளூ" "Sh"
 586 |    "ளெ" "ns"
 587 |    "ளே" "Ns"
 588 |    "ளை" "is"
 589 |    "ளொ" "nsh"
 590 |    "ளோ" "Nsh"
 591 |    "ளௌ" "nss"
 592 |    "ற்" "w;"
 593 |    "ற" "w"
 594 |    "றா" "wh"
 595 |    "றி" "wp"
 596 |    "றீ" "wP"
 597 |    "று" "W"
 598 |    "றூ" "W}"
 599 |    "றெ" "nw"
 600 |    "றே" "Nw"
 601 |    "றை" "iw"
 602 |    "றொ" "nwh"
 603 |    "றோ" "Nwh"
 604 |    "றௌ" "nws"
 605 |    "ன்" "d;"
 606 |    "ன" "d"
 607 |    "னா" "dh"
 608 |    "னி" "dp"
 609 |    "னீ" "dP"
 610 |    "னு" "D"
 611 |    "னூ" "D}"
 612 |    "னெ" "nd"
 613 |    "னே" "Nd"
 614 |    "னை" "id"
 615 |    "னொ" "ndh"
 616 |    "னோ" "Ndh"
 617 |    "னௌ" "nds"
 618 | 
 619 |    "ஜ்" "[;"
 620 |    "ஜ" "["
 621 |    "ஜா" "[h"
 622 |    "ஜி" "[p"
 623 |    "ஜீ" "[P"
 624 |    "ஜு" "[{"
 625 |    "ஜூ" "[\""
 626 |    "ஜெ" "n["
 627 |    "ஜே" "N["
 628 |    "ஜை" "i["
 629 |    "ஜொ" "n[h"
 630 |    "ஜோ" "N[h"
 631 |    "ஜௌ" "n[s"
 632 | 
 633 |    "ஷ்" "\\;"
 634 |    "ஷ" "\\"
 635 |    "ஷா" "\\h"
 636 |    "ஷி" "\\p"
 637 |    "ஷீ" "\\P"
 638 |    "ஷு" "\\{"
 639 |    "ஷூ" "\\\""
 640 |    "ஷெ" "n\\"
 641 |    "ஷே" "N\\"
 642 |    "ஷை" "i\\"
 643 |    "ஷொ" "n\\h"
 644 |    "ஷோ" "N\\h"
 645 |    "ஷௌ" "n\\s"
 646 | 
 647 |    "ஸ்" "];"
 648 |    "ஸ" "]"
 649 |    "ஸா" "]h"
 650 |    "ஸி" "]p"
 651 |    "ஸீ" "]P"
 652 |    "ஸு" "]{"
 653 |    "ஸூ" "]\""
 654 |    "ஸெ" "n]"
 655 |    "ஸே" "N]"
 656 |    "ஸை" "i]"
 657 |    "ஸொ" "n]h"
 658 |    "ஸோ" "N]h"
 659 |    "ஸௌ" "n]s"
 660 | 
 661 |    "ஹ்" "`;"
 662 |    "ஹ" "`"
 663 |    "ஹா" "`h"
 664 |    "ஹி" "`p"
 665 |    "ஹீ" "`P"
 666 |    "ஹு" "`{"
 667 |    "ஹூ" "`\""
 668 |    "ஹெ" "n`"
 669 |    "ஹே" "N`"
 670 |    "ஹை" "i`"
 671 |    "ஹொ" "n`h"
 672 |    "ஹோ" "N`h"
 673 |    "ஹௌ" "n`s"
 674 | 
 675 |    "க்ஷ்" "~;"
 676 | 
 677 |    "ஶ்ரீ" "="
 678 |    
 679 |    }) 
 680 | 
 681 | ;;;;;;;;
 682 | ;; தமிழ் <-> TSCII
 683 | ;;;;;;;;
 684 | 
 685 | (def tscii-map
 686 |   {"அ" "«"
 687 |    "ஆ" "¬"
 688 |    "இ" "­"
 689 |    "ஈ" "®"
 690 |    "உ" "¯"
 691 |    "ஊ" "°"
 692 |    "எ" "±"
 693 |    "ஏ" "²"
 694 |    "ஐ" "³"
 695 |    "ஒ" "´"
 696 |    "ஓ" "µ"
 697 |    "ஔ" "¶"
 698 |    "ஃ" "∙"
 699 |    "க்" "ì"
 700 |    "க" "¸"
 701 |    "கா" "¸¡"
 702 |    "கி" "¸¢"
 703 |    "கீ" "¸£"
 704 |    "கு" "Ì"
 705 |    "கூ" "Ü"
 706 |    "கெ" "¦¸"
 707 |    "கே" "§¸"
 708 |    "கை" "¨¸"
 709 |    "கொ" "¦¸¡"
 710 |    "கோ" "§¸¡"
 711 |    "கௌ" "¦¸ª"
 712 |    "ங்" "í"
 713 |    "ங" "¹"
 714 |    "ஙா" "¹¡"
 715 |    "ஙி" "¹¢"
 716 |    "ஙீ" "¹£"
 717 |    "ஙு" "™"
 718 |    "ஙூ" "›"
 719 |    "ஙெ" "¦¹"
 720 |    "ஙே" "§¹"
 721 |    "ஙை" "¨¹"
 722 |    "ஙொ" "¦¹¡"
 723 |    "ஙோ" "§¹¡"
 724 |    "ஙௌ" "¦¹ª"
 725 |    "ச்" "î"
 726 |    "ச" "º"
 727 |    "சா" "º¡"
 728 |    "சி" "º¢"
 729 |    "சீ" "º£"
 730 |    "சு" "Í"
 731 |    "சூ" "Ý"
 732 |    "செ" "¦º"
 733 |    "சே" "§º"
 734 |    "சை" "¨º"
 735 |    "சொ" "¦º¡"
 736 |    "சோ" "§º¡"
 737 |    "சௌ" "¦ºª"
 738 |    "ஞ்" "ï"
 739 |    "ஞ" "»"
 740 |    "ஞா" "»¡"
 741 |    "ஞி" "»¢"
 742 |    "ஞீ" "»£"
 743 |    "ஞு" ""
 744 |    "ஞூ" "œ"
 745 |    "ஞெ" "¦»"
 746 |    "ஞே" "§»"
 747 |    "ஞை" "¨»"
 748 |    "ஞொ" "¦»¡"
 749 |    "ஞோ" "§»¡"
 750 |    "ஞௌ" "¦»ª"
 751 |    "ட்" "ð"
 752 |    "ட" "¼"
 753 |    "டா" "¼¡"
 754 |    "டி" "Ê"
 755 |    "டீ" "Ë"
 756 |    "டு" "Î"
 757 |    "டூ" "Þ"
 758 |    "டெ" "¦¼"
 759 |    "டே" "§¼"
 760 |    "டை" "¨¼"
 761 |    "டொ" "¦¼¡"
 762 |    "டோ" "§¼¡"
 763 |    "டௌ" "¦¼ª"
 764 |    "ண்" "ñ"
 765 |    "ண" "½"
 766 |    "ணா" "½¡"
 767 |    "ணி" "½¢"
 768 |    "ணீ" "½£"
 769 |    "ணு" "Ï"
 770 |    "ணூ" "ß"
 771 |    "ணெ" "¦½"
 772 |    "ணே" "§½"
 773 |    "ணை" "¨½"
 774 |    "ணொ" "¦½¡"
 775 |    "ணோ" "§½¡"
 776 |    "ணௌ" "¦½ª"
 777 |    "த்" "ò"
 778 |    "த" "¾"
 779 |    "தா" "¾¡"
 780 |    "தி" "¾¢"
 781 |    "தீ" "¾£"
 782 |    "து" "Ð"
 783 |    "தூ" "à"
 784 |    "தெ" "¦¾"
 785 |    "தே" "§¾"
 786 |    "தை" "¨¾"
 787 |    "தொ" "¦¾¡"
 788 |    "தோ" "§¾¡"
 789 |    "தௌ" "¦¾ª"
 790 |    "ந்" "ó"
 791 |    "ந" "¿"
 792 |    "நா" "¿¡"
 793 |    "நி" "¿¢"
 794 |    "நீ" "¿£"
 795 |    "நு" "Ñ"
 796 |    "நூ" "á"
 797 |    "நெ" "¦¿"
 798 |    "நே" "§¿"
 799 |    "நை" "¨¿"
 800 |    "நொ" "¦¿¡"
 801 |    "நோ" "§¿¡"
 802 |    "நௌ" "¦¿ª"
 803 |    "ப்" "ô"
 804 |    "ப" "À"
 805 |    "பா" "À¡"
 806 |    "பி" "À¢"
 807 |    "பீ" "À£"
 808 |    "பு" "Ò"
 809 |    "பூ" "â"
 810 |    "பெ" "¦À"
 811 |    "பே" "§À"
 812 |    "பை" "¨À"
 813 |    "பொ" "¦À¡"
 814 |    "போ" "§À¡"
 815 |    "பௌ" "¦Àª"
 816 |    "ம்" "õ"
 817 |    "ம" "Á"
 818 |    "மா" "Á¡"
 819 |    "மி" "Á¢"
 820 |    "மீ" "Á£"
 821 |    "மு" "Ó"
 822 |    "மூ" "ã"
 823 |    "மெ" "¦Á"
 824 |    "மே" "§Á"
 825 |    "மை" "¨Á"
 826 |    "மொ" "¦Á¡"
 827 |    "மோ" "§Á¡"
 828 |    "மௌ" "¦Áª"
 829 |    "ய்" "ö"
 830 |    "ய" "Â"
 831 |    "யா" "Â¡"
 832 |    "யி" "Â¢"
 833 |    "யீ" "Â£"
 834 |    "யு" "Ô"
 835 |    "யூ" "ä"
 836 |    "யெ" "¦Â"
 837 |    "யே" "§Â"
 838 |    "யை" "¨Â"
 839 |    "யொ" "¦Â¡"
 840 |    "யோ" "§Â¡"
 841 |    "யௌ" "¦Âª"
 842 |    "ர்" "÷"
 843 |    "ர" "Ã"
 844 |    "ரா" "Ã¡"
 845 |    "ரி" "Ã¢"
 846 |    "ரீ" "Ã£"
 847 |    "ரு" "Õ"
 848 |    "ரூ" "å"
 849 |    "ரெ" "¦Ã"
 850 |    "ரே" "§Ã"
 851 |    "ரை" "¨Ã"
 852 |    "ரொ" "¦Ã¡"
 853 |    "ரோ" "§Ã¡"
 854 |    "ரௌ" "¦Ãª"
 855 |    "ல்" "ø"
 856 |    "ல" "Ä"
 857 |    "லா" "Ä¡"
 858 |    "லி" "Ä¢"
 859 |    "லீ" "Ä£"
 860 |    "லு" "Ö"
 861 |    "லூ" "æ"
 862 |    "லெ" "¦Ä"
 863 |    "லே" "§Ä"
 864 |    "லை" "¨Ä"
 865 |    "லொ" "¦Ä¡"
 866 |    "லோ" "§Ä¡"
 867 |    "லௌ" "¦Äª"
 868 |    "வ்" "ù"
 869 |    "வ" "Å"
 870 |    "வா" "Å¡"
 871 |    "வி" "Å¢"
 872 |    "வீ" "Å£"
 873 |    "வு" "×"
 874 |    "வூ" "ç"
 875 |    "வெ" "¦Å"
 876 |    "வே" "§Å"
 877 |    "வை" "¨Å"
 878 |    "வொ" "¦Å¡"
 879 |    "வோ" "§Å¡"
 880 |    "வௌ" "¦Åª"
 881 |    "ழ்" "ú"
 882 |    "ழ" "Æ"
 883 |    "ழா" "Æ¡"
 884 |    "ழி" "Æ¢"
 885 |    "ழீ" "Æ£"
 886 |    "ழு" "Ø"
 887 |    "ழூ" "è"
 888 |    "ழெ" "¦Æ"
 889 |    "ழே" "§Æ"
 890 |    "ழை" "¨Æ"
 891 |    "ழொ" "¦Æ¡"
 892 |    "ழோ" "§Æ¡"
 893 |    "ழௌ" "¦Æª"
 894 |    "ள்" "û"
 895 |    "ள" "Ç"
 896 |    "ளா" "Ç¡"
 897 |    "ளி" "Ç¢"
 898 |    "ளீ" "Ç£"
 899 |    "ளு" "Ù"
 900 |    "ளூ" "é"
 901 |    "ளெ" "¦Ç"
 902 |    "ளே" "§Ç"
 903 |    "ளை" "¨Ç"
 904 |    "ளொ" "¦Ç¡"
 905 |    "ளோ" "§Ç¡"
 906 |    "ளௌ" "¦Çª"
 907 |    "ற்" "ü"
 908 |    "ற" "È"
 909 |    "றா" "È¡"
 910 |    "றி" "È¢"
 911 |    "றீ" "È£"
 912 |    "று" "Ú"
 913 |    "றூ" "ê"
 914 |    "றெ" "¦È"
 915 |    "றே" "§È"
 916 |    "றை" "¨È"
 917 |    "றொ" "¦È¡"
 918 |    "றோ" "§È¡"
 919 |    "றௌ" "¦Èª"
 920 |    "ன்" "ý"
 921 |    "ன" "É"
 922 |    "னா" "É¡"
 923 |    "னி" "É¢"
 924 |    "னீ" "É£"
 925 |    "னு" "Û"
 926 |    "னூ" "ë"
 927 |    "னெ" "¦É"
 928 |    "னே" "§É"
 929 |    "னை" "¨É"
 930 |    "னொ" "¦É¡"
 931 |    "னோ" "§É¡"
 932 |    "னௌ" "¦Éª"})
 933 | 
 934 | ;;;;;;;;
 935 | ;; தமிழ் <-> Webulagam
 936 | ;;;;;;;;
 937 | 
 938 | (def webulagam-map
 939 |   {"அ" "m"
 940 |    "ஆ" "M"
 941 |    "இ" "ï"
 942 |    "ஈ" "<"
 943 |    "உ" "c"
 944 |    "ஊ" "C"
 945 |    "எ" "v"
 946 |    "ஏ" "V"
 947 |    "ஐ" "I"
 948 |    "ஒ" "x"
 949 |    "ஓ" "X"
 950 |    "ஔ" "xs"
 951 |    "ஃ" "~"
 952 |    "க்" "¡"
 953 |    "க" "f"
 954 |    "கா" "fh"
 955 |    "கி" "»"
 956 |    "கீ" "Ñ"
 957 |    "கு" "F"
 958 |    "கூ" "T"
 959 |    "கெ" "bf"
 960 |    "கே" "nf"
 961 |    "கை" "if"
 962 |    "கொ" "bfh"
 963 |    "கோ" "nfh"
 964 |    "கௌ" "bfs"
 965 |    "ங்" "§"
 966 |    "ங" "‡"
 967 |    "ஙா" "‡h"
 968 |    "ஙி" "À"
 969 |    "ஙீ" "†"
 970 |    "ஙு" "¼"
 971 |    "ஙூ" "½"
 972 |    "ஙெ" "b‡"
 973 |    "ஙே" "n‡"
 974 |    "ஙை" "i‡"
 975 |    "ஙொ" "b‡h"
 976 |    "ஙோ" "n‡h"
 977 |    "ஙௌ" "b‡s"
 978 |    "ச்" "¢"
 979 |    "ச" "r"
 980 |    "சா" "rh"
 981 |    "சி" "á"
 982 |    "சீ" "Ó"
 983 |    "சு" "R"
 984 |    "சூ" "N"
 985 |    "செ" "br"
 986 |    "சே" "nr"
 987 |    "சை" "ir"
 988 |    "சொ" "brh"
 989 |    "சோ" "nrh"
 990 |    "சௌ" "brs"
 991 |    "ஞ்" "Š"
 992 |    "ஞ" "P"
 993 |    "ஞா" "Ph"
 994 |    "ஞி" "Á"
 995 |    "ஞீ" "Ø"
 996 |    "ஞு" "|"
 997 |    "ஞூ" "ú"
 998 |    "ஞெ" "bP"
 999 |    "ஞே" "nP"
1000 |    "ஞை" "iP"
1001 |    "ஞொ" "bPh"
1002 |    "ஞோ" "nPh"
1003 |    "ஞௌ" "bPs"
1004 |    "ட்" "£"
1005 |    "ட" "l"
1006 |    "டா" "lh"
1007 |    "டி" "o"
1008 |    "டீ" "O"
1009 |    "டு" "L"
1010 |    "டூ" "^"
1011 |    "டெ" "bl"
1012 |    "டே" "nl"
1013 |    "டை" "il"
1014 |    "டொ" "blh"
1015 |    "டோ" "nlh"
1016 |    "டௌ" "bls"
1017 |    "ண்" "©"
1018 |    "ண" "z"
1019 |    "ணா" "zh"
1020 |    "ணி" "Â"
1021 |    "ணீ" "Ù"
1022 |    "ணு" "Q"
1023 |    "ணூ" "û"
1024 |    "ணெ" "bz"
1025 |    "ணே" "nz"
1026 |    "ணை" "iz"
1027 |    "ணொ" "bzh"
1028 |    "ணோ" "nzh"
1029 |    "ணௌ" "bzs"
1030 |    "த்" "¤"
1031 |    "த" "j"
1032 |    "தா" "jh"
1033 |    "தி" "â"
1034 |    "தீ" "Ô"
1035 |    "து" "J"
1036 |    "தூ" "ö"
1037 |    "தெ" "bj"
1038 |    "தே" "nj"
1039 |    "தை" "ij"
1040 |    "தொ" "bjh"
1041 |    "தோ" "njh"
1042 |    "தௌ" "bjs"
1043 |    "ந்" "ª"
1044 |    "ந" "e"
1045 |    "நா" "eh"
1046 |    "நி" "Ã"
1047 |    "நீ" "Ú"
1048 |    "நு" "E"
1049 |    "நூ" "ü"
1050 |    "நெ" "be"
1051 |    "நே" "ne"
1052 |    "நை" "ie"
1053 |    "நொ" "beh"
1054 |    "நோ" "neh"
1055 |    "நௌ" "bes"
1056 |    "ப்" "¥"
1057 |    "ப" "g"
1058 |    "பா" "gh"
1059 |    "பி" "ã"
1060 |    "பீ" "Õ"
1061 |    "பு" "ò"
1062 |    "பூ" "ó"
1063 |    "பெ" "bg"
1064 |    "பே" "ng"
1065 |    "பை" "ig"
1066 |    "பொ" "bgh"
1067 |    "போ" "ngh"
1068 |    "பௌ" "bgs"
1069 |    "ம்" "«"
1070 |    "ம" "k"
1071 |    "மா" "kh"
1072 |    "மி" "Ä"
1073 |    "மீ" "Û"
1074 |    "மு" "K"
1075 |    "மூ" "_"
1076 |    "மெ" "bk"
1077 |    "மே" "nk"
1078 |    "மை" "ik"
1079 |    "மொ" "bkh"
1080 |    "மோ" "nkh"
1081 |    "மௌ" "bks"
1082 |    "ய்" "Œ"
1083 |    "ய" "a"
1084 |    "யா" "ah"
1085 |    "யி" "Æ"
1086 |    "யீ" "p"
1087 |    "யு" "í"
1088 |    "யூ" "ô"
1089 |    "யெ" "ba"
1090 |    "யே" "na"
1091 |    "யை" "ia"
1092 |    "யொ" "bah"
1093 |    "யோ" "nah"
1094 |    "யௌ" "bas"
1095 |    "ர்" "®"
1096 |    "ர" "u"
1097 |    "ரா" "uh"
1098 |    "ரி" "Ç"
1099 |    "ரீ" "ß"
1100 |    "ரு" "U"
1101 |    "ரூ" "%"
1102 |    "ரெ" "bu"
1103 |    "ரே" "nu"
1104 |    "ரை" "iu"
1105 |    "ரொ" "buh"
1106 |    "ரோ" "nuh"
1107 |    "ரௌ" "bus"
1108 |    "ல்" "š"
1109 |    "ல" "y"
1110 |    "லா" "yh"
1111 |    "லி" "È"
1112 |    "லீ" "ä"
1113 |    "லு" "Y"
1114 |    "லூ" "ÿ"
1115 |    "லெ" "by"
1116 |    "லே" "ny"
1117 |    "லை" "iy"
1118 |    "லொ" "byh"
1119 |    "லோ" "nyh"
1120 |    "லௌ" "bys"
1121 |    "வ்" "›"
1122 |    "வ" "t"
1123 |    "வா" "th"
1124 |    "வி" "É"
1125 |    "வீ" "å"
1126 |    "வு" "î"
1127 |    "வூ" "ñ"
1128 |    "வெ" "bt"
1129 |    "வே" "nt"
1130 |    "வை" "it"
1131 |    "வொ" "bth"
1132 |    "வோ" "nth"
1133 |    "வௌ" "bts"
1134 |    "ழ்" "œ"
1135 |    "ழ" "H"
1136 |    "ழா" "Hh"
1137 |    "ழி" "Ê"
1138 |    "ழீ" "æ"
1139 |    "ழு" "G"
1140 |    "ழூ" ">"
1141 |    "ழெ" "bH"
1142 |    "ழே" "nH"
1143 |    "ழை" "iH"
1144 |    "ழொ" "bHh"
1145 |    "ழோ" "nHh"
1146 |    "ழௌ" "bHs"
1147 |    "ள்" "Ÿ"
1148 |    "ள" "s"
1149 |    "ளா" "sh"
1150 |    "ளி" "Ë"
1151 |    "ளீ" "ç"
1152 |    "ளு" "S"
1153 |    "ளூ" "q"
1154 |    "ளெ" "bs"
1155 |    "ளே" "ns"
1156 |    "ளை" "is"
1157 |    "ளொ" "bsh"
1158 |    "ளோ" "nsh"
1159 |    "ளௌ" "bss"
1160 |    "ற்" "‰"
1161 |    "ற" "w"
1162 |    "றா" "wh"
1163 |    "றி" "¿"
1164 |    "றீ" "Ö"
1165 |    "று" "W"
1166 |    "றூ" "ù"
1167 |    "றெ" "bw"
1168 |    "றே" "nw"
1169 |    "றை" "iw"
1170 |    "றொ" "bwh"
1171 |    "றோ" "nwh"
1172 |    "றௌ" "bws"
1173 |    "ன்" "‹"
1174 |    "ன" "d"
1175 |    "னா" "dh"
1176 |    "னி" "Å"
1177 |    "னீ" "Ü"
1178 |    "னு" "D"
1179 |    "னூ" "}"
1180 |    "னெ" "bd"
1181 |    "னே" "nd"
1182 |    "னை" "id"
1183 |    "னொ" "bdh"
1184 |    "னோ" "ndh"
1185 |    "னௌ" "bds"})
1186 | 
1187 | 
1188 | ;;;;;;;;
1189 | ;; all character sets togeter
1190 | ;;;;;;;;
1191 | 
1192 | (defn fill-in-bamini-to-unic-map
1193 |   "Add in the entries in the bamini -> unicode conversion map
1194 |   that represents the normal way that ர் ரி ரீ get written by hand"
1195 |   [to-unic-map]
1196 |   (let [;; c-with-அ-letters (map second fmt/c-cv-letters)
1197 |         letters fmt/letters
1198 |         entries (for [letter (flatten letters)
1199 |                       r-letter ["ர்" "ரி" "ரீ"]]
1200 |                   (let [new-val (str letter r-letter)
1201 |                         new-key (str (get bamini-map letter)
1202 |                                      (get {"ர்" "h;"
1203 |                                            "ரி" "hp"
1204 |                                            "ரீ" "hP"} r-letter))]
1205 |                     [new-key new-val]))
1206 |         extra-entries-map (into {} entries)]
1207 |     (merge to-unic-map extra-entries-map)))
1208 | 
1209 | (defn fill-charset-map
1210 |   [{:keys [from-unic-map to-unic-map] :as m}]
1211 |   (let [from-unic-trie (fmt/make-trie from-unic-map)
1212 |         to-unic-trie (fmt/make-trie to-unic-map)
1213 |         from-unic (fn [s]
1214 |                   (->> (fmt/str->elems from-unic-trie s)
1215 |                        (apply str)))
1216 |         to-unic (fn [s]
1217 |                     (->> (fmt/str->elems to-unic-trie s)
1218 |                          (apply str)))]
1219 |     {:to-unicode to-unic
1220 |      :from-unicode from-unic}))
1221 | 
1222 | (def init-charsets {:tab {:from-unic-map tab-map
1223 |                           :to-unic-map (set/map-invert tab-map)}
1224 |                     :bamini {:from-unic-map bamini-map
1225 |                              :to-unic-map (-> (set/map-invert bamini-map)
1226 |                                               fill-in-bamini-to-unic-map
1227 |                                               (assoc ">" ",")
1228 |                                               (assoc "xsp" "ஒளி")
1229 |                                               (assoc "R+" "சூ")
1230 |                                               (assoc "@" ";"))}
1231 |                     :tscii {:from-unic-map tscii-map
1232 |                             :to-unic-map (set/map-invert tscii-map)}
1233 |                     :webulagam {:from-unic-map webulagam-map
1234 |                                 :to-unic-map (set/map-invert webulagam-map)}})
1235 | 
1236 | (defn mmap-vals
1237 |   "given a map and a fn, map the fn over the maps vals keeping keys same"
1238 |   [f m]
1239 |   (letfn [(reduce-fn [curr-map kv]
1240 |             (assoc curr-map (first kv) (f (second kv))))]
1241 |     (reduce reduce-fn {} m)))
1242 | 
1243 | (def charsets (-> (mmap-vals fill-charset-map init-charsets)
1244 |                   ;; (ftor/fmap fill-charset-map init-charsets)
1245 |                   ;;(reduce-kv #(%1 %2 (fill-charset-map %3)) {} init-charsets)
1246 |                   (assoc :romanized {:to-unic romanized->தமிழ்
1247 |                                      :from-unic தமிழ்->romanized})))
1248 | 
1249 | ;;;;;;;;
1250 | ;; named fns for convert fns
1251 | ;;;;;;;;
1252 | 
1253 | ;; TAB
1254 | 
1255 | (def ^{:doc "convert தமிழ் text from unicode to TAB format"}
1256 |   தமிழ்->tab (get-in charsets [:tab :from-unicode]))
1257 | 
1258 | (def ^{:doc "convert தமிழ் text from TAB to unicode format"}
1259 |   tab->தமிழ் (get-in charsets [:tab :to-unicode]))
1260 | 
1261 | ;; Bamini
1262 | 
1263 | (def ^{:doc "convert தமிழ் text from unicode to Bamini format"}
1264 |   தமிழ்->bamini (get-in charsets [:bamini :from-unicode]))
1265 | 
1266 | (def ^{:doc "convert தமிழ் text from Bamini to unicode format"}
1267 |   bamini->தமிழ் (get-in charsets [:bamini :to-unicode]))
1268 | 
1269 | ;; TSCII
1270 | 
1271 | (def ^{:doc "convert தமிழ் text from unicode to TSCII format"}
1272 |   தமிழ்->tscii (get-in charsets [:tscii :from-unicode]))
1273 | 
1274 | (def ^{:doc "convert தமிழ் text from TSCII to unicode format"}
1275 |   tscii->தமிழ் (get-in charsets [:tscii :to-unicode]))
1276 | 
1277 | ;; Webulagam
1278 | 
1279 | (def ^{:doc "convert தமிழ் text from unicode to Webulagam format"}
1280 |   தமிழ்->webulagam (get-in charsets [:webulagam :from-unicode]))
1281 | 
1282 | (def ^{:doc "convert தமிழ் text from Webulagam to unicode format"}
1283 |   webulagam->தமிழ் (get-in charsets [:webulagam :to-unicode]))
1284 | 
1285 | ;;;;;;;;
1286 | ;; main
1287 | ;;;;;;;;
1288 | 
1289 | (def ^{:doc "version of the Mac OS X input method (keyboard) plugin"}
1290 |   OSX-INPUT-METHOD-VER "1.0")
1291 | 
1292 | (defn -main
1293 |   "generates the output necessary for a Mac OS X 10.x input method (keyboard) plugin"
1294 |   [& args]
1295 |   (let [vowels (remove #(= % "ஃ") fmt/vowels)
1296 |         phon-kv-parts-by-vowel (group-by
1297 |                                 #(boolean (some #{(second %)} vowels))
1298 |                                 romanized-தமிழ்-phoneme-map)
1299 |         ஃ-map {"q" "ஃ"}
1300 |         vowel-map (into {} (get phon-kv-parts-by-vowel true))
1301 |         cons-map (into {} (get phon-kv-parts-by-vowel false))
1302 |         cv-map (into {} (for [[eng-c tha-c] cons-map
1303 |                               [eng-v tha-v] vowel-map]
1304 |                           [(str eng-c eng-v) (fmt/phonemes->str [tha-c tha-v])]))
1305 |         letters-map (merge ஃ-map vowel-map cons-map cv-map)
1306 |         letters-lines (map #(str (first %) " " (second %)) letters-map)
1307 |         input-chars-str (->> letters-map
1308 |                              keys
1309 |                              (map seq)
1310 |                              (apply concat)
1311 |                              distinct
1312 |                              (apply str))
1313 |         max-input-code (->> letters-map
1314 |                             keys
1315 |                             (map count)
1316 |                             (apply max))
1317 |         lines1 ["METHOD: TABLE"
1318 |                 "ENCODE: Unicode"
1319 |                 "PROMPT: கலை"
1320 |                 "DELIMITER ,"
1321 |                 (str "VERSION " OSX-INPUT-METHOD-VER)
1322 |                 (str "MAXINPUTCODE " max-input-code)
1323 |                 (str "VALIDINPUTKEY " input-chars-str)
1324 |                 "BEGINCHARACTER"
1325 |                 ""]
1326 |         lines2 [""
1327 |                 "ENDCHARACTER"]
1328 |         all-lines (concat lines1 letters-lines lines2)]
1329 |     (dorun (map println all-lines))
1330 | 
1331 |     ;; (println "hello")
1332 |     ))
1333 | 


--------------------------------------------------------------------------------
/emacs/clojure-mode.el:
--------------------------------------------------------------------------------
   1 | ;;; clojure-mode.el --- Major mode for Clojure code -*- lexical-binding: t; -*-
   2 | 
   3 | ;; Copyright © 2007-2014 Jeffrey Chu, Lennart Staflin, Phil Hagelberg
   4 | ;; Copyright © 2013-2014 Bozhidar Batsov
   5 | ;;
   6 | ;; Authors: Jeffrey Chu <jochu0@gmail.com>
   7 | ;;       Lennart Staflin <lenst@lysator.liu.se>
   8 | ;;       Phil Hagelberg <technomancy@gmail.com>
   9 | ;;       Bozhidar Batsov <bozhidar@batsov.com>
  10 | ;; URL: http://github.com/clojure-emacs/clojure-mode
  11 | ;; Keywords: languages clojure clojurescript lisp
  12 | ;; Version: 3.0.0
  13 | ;; X-Original-Version: 3.0.0
  14 | ;; Package-Requires: ((emacs "24.1"))
  15 | 
  16 | ;; This file is not part of GNU Emacs.
  17 | 
  18 | ;;; Commentary:
  19 | 
  20 | ;; Provides font-lock, indentation, and navigation for the Clojure
  21 | ;; programming language (http://clojure.org).
  22 | 
  23 | ;; Using clojure-mode with paredit is highly recommended.  Use paredit
  24 | ;; as you would with any other minor mode; for instance:
  25 | ;;
  26 | ;;   ;; require or autoload paredit-mode
  27 | ;;   (add-hook 'clojure-mode-hook 'paredit-mode)
  28 | 
  29 | ;; See CIDER (http://github.com/clojure-emacs/cider) for
  30 | ;; better interaction with subprocesses via nREPL.
  31 | 
  32 | ;;; License:
  33 | 
  34 | ;; This program is free software; you can redistribute it and/or
  35 | ;; modify it under the terms of the GNU General Public License
  36 | ;; as published by the Free Software Foundation; either version 3
  37 | ;; of the License, or (at your option) any later version.
  38 | ;;
  39 | ;; This program is distributed in the hope that it will be useful,
  40 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  41 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  42 | ;; GNU General Public License for more details.
  43 | ;;
  44 | ;; You should have received a copy of the GNU General Public License
  45 | ;; along with GNU Emacs; see the file COPYING.  If not, write to the
  46 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  47 | ;; Boston, MA 02110-1301, USA.
  48 | 
  49 | ;;; Code:
  50 | 
  51 | 
  52 | ;;; Compatibility
  53 | (eval-and-compile
  54 |   ;; `setq-local' for Emacs 24.2 and below
  55 |   (unless (fboundp 'setq-local)
  56 |     (defmacro setq-local (var val)
  57 |       "Set variable VAR to value VAL in current buffer."
  58 |       `(set (make-local-variable ',var) ,val))))
  59 | 
  60 | (eval-when-compile
  61 |   (defvar calculate-lisp-indent-last-sexp)
  62 |   (defvar font-lock-beg)
  63 |   (defvar font-lock-end)
  64 |   (defvar paredit-space-for-delimiter-predicates)
  65 |   (defvar paredit-version)
  66 |   (defvar paredit-mode))
  67 | 
  68 | (require 'cl)
  69 | (require 'inf-lisp)
  70 | (require 'imenu)
  71 | 
  72 | (declare-function lisp-fill-paragraph  "lisp-mode" (&optional justify))
  73 | 
  74 | (defgroup clojure nil
  75 |   "Major mode for editing Clojure code."
  76 |   :prefix "clojure-"
  77 |   :group 'languages
  78 |   :link '(url-link :tag "Github" "https://github.com/clojure-emacs/clojure-mode")
  79 |   :link '(emacs-commentary-link :tag "Commentary" "clojure-mode"))
  80 | 
  81 | (defface clojure-keyword-face
  82 |   '((t (:inherit font-lock-constant-face)))
  83 |   "Face used to font-lock Clojure keywords (:something)."
  84 |   :group 'clojure
  85 |   :package-version '(clojure-mode . "3.0.0"))
  86 | 
  87 | (defface clojure-character-face
  88 |   '((t (:inherit font-lock-string-face)))
  89 |   "Face used to font-lock Clojure character literals."
  90 |   :group 'clojure
  91 |   :package-version '(clojure-mode . "3.0.0"))
  92 | 
  93 | (defface clojure-interop-method-face
  94 |   '((t (:inherit font-lock-preprocessor-face)))
  95 |   "Face used to font-lock interop method names (camelCase)."
  96 |   :group 'clojure
  97 |   :package-version '(clojure-mode . "3.0.0"))
  98 | 
  99 | (defcustom clojure-load-command  "(clojure.core/load-file \"%s\")\n"
 100 |   "Format-string for building a Clojure expression to load a file.
 101 | This format string should use `%s' to substitute a file name and
 102 | should result in a Clojure expression that will command the
 103 | inferior Clojure to load that file."
 104 |   :type 'string
 105 |   :group 'clojure
 106 |   :safe 'stringp)
 107 | 
 108 | (defcustom clojure-inf-lisp-command "lein repl"
 109 |   "The command used by `inferior-lisp-program'."
 110 |   :type 'string
 111 |   :group 'clojure
 112 |   :safe 'stringp)
 113 | 
 114 | (defcustom clojure-defun-style-default-indent nil
 115 |   "When non-nil, use default indenting for functions and macros.
 116 | Otherwise check `define-clojure-indent' and `put-clojure-indent'."
 117 |   :type 'boolean
 118 |   :group 'clojure
 119 |   :safe 'booleanp)
 120 | 
 121 | (defcustom clojure-use-backtracking-indent t
 122 |   "When non-nil, enable context sensitive indentation."
 123 |   :type 'boolean
 124 |   :group 'clojure
 125 |   :safe 'booleanp)
 126 | 
 127 | (defcustom clojure-max-backtracking 3
 128 |   "Maximum amount to backtrack up a list to check for context."
 129 |   :type 'integer
 130 |   :group 'clojure
 131 |   :safe 'integerp)
 132 | 
 133 | (defcustom clojure-docstring-fill-column fill-column
 134 |   "Value of `fill-column' to use when filling a docstring."
 135 |   :type 'integer
 136 |   :group 'clojure
 137 |   :safe 'integerp)
 138 | 
 139 | (defcustom clojure-docstring-fill-prefix-width 2
 140 |   "Width of `fill-prefix' when filling a docstring.
 141 | The default value conforms with the de facto convention for
 142 | Clojure docstrings, aligning the second line with the opening
 143 | double quotes on the third column."
 144 |   :type 'integer
 145 |   :group 'clojure
 146 |   :safe 'integerp)
 147 | 
 148 | (defcustom clojure-omit-space-between-tag-and-delimiters '(?\[ ?\{)
 149 |   "Allowed opening delimiter characters after a reader literal tag.
 150 | For example, \[ is allowed in :db/id[:db.part/user]."
 151 |   :type '(set (const :tag "[" ?\[)
 152 |               (const :tag "{" ?\{)
 153 |               (const :tag "(" ?\()
 154 |               (const :tag "\"" ?\"))
 155 |   :group 'clojure
 156 |   :safe (lambda (value)
 157 |           (and (listp value)
 158 |                (every 'characterp value))))
 159 | 
 160 | (defvar clojure-mode-map
 161 |   (let ((map (make-sparse-keymap)))
 162 |     (set-keymap-parent map lisp-mode-shared-map)
 163 |     (define-key map (kbd "C-M-x")   'lisp-eval-defun)
 164 |     (define-key map (kbd "C-x C-e") 'lisp-eval-last-sexp)
 165 |     (define-key map (kbd "C-c C-e") 'lisp-eval-last-sexp)
 166 |     (define-key map (kbd "C-c C-l") 'clojure-load-file)
 167 |     (define-key map (kbd "C-c C-r") 'lisp-eval-region)
 168 |     (define-key map (kbd "C-c C-z") 'clojure-display-inferior-lisp-buffer)
 169 |     (define-key map (kbd "C-:") 'clojure-toggle-keyword-string)
 170 |     (easy-menu-define clojure-mode-menu map "Clojure Mode Menu"
 171 |       '("Clojure"
 172 |         ["Eval Top-Level Expression" lisp-eval-defun]
 173 |         ["Eval Last Expression" lisp-eval-last-sexp]
 174 |         ["Eval Region" lisp-eval-region]
 175 |         "--"
 176 |         ["Run Inferior Lisp" clojure-display-inferior-lisp-buffer]
 177 |         ["Display Inferior Lisp Buffer" clojure-display-inferior-lisp-buffer]
 178 |         ["Load File" clojure-load-file]
 179 |         "--"
 180 |         ["Toggle between string & keyword" clojure-toggle-keyword-string]
 181 |         ["Mark string" clojure-mark-string]
 182 |         ["Insert ns form at point" clojure-insert-ns-form-at-point]
 183 |         ["Insert ns form at beginning" clojure-insert-ns-form]
 184 |         ["Update ns form" clojure-update-ns]
 185 |         "--"
 186 |         ["Version" clojure-mode-display-version]))
 187 |     map)
 188 |   "Keymap for Clojure mode.  Inherits from `lisp-mode-shared-map'.")
 189 | 
 190 | (defvar clojure-mode-syntax-table
 191 |   (let ((table (copy-syntax-table emacs-lisp-mode-syntax-table)))
 192 |     (modify-syntax-entry ?~ "'   " table)
 193 |     (modify-syntax-entry ?\{ "(}" table)
 194 |     (modify-syntax-entry ?\} "){" table)
 195 |     (modify-syntax-entry ?\[ "(]" table)
 196 |     (modify-syntax-entry ?\] ")[" table)
 197 |     (modify-syntax-entry ?^ "'" table)
 198 |     ;; Make hash a usual word character
 199 |     (modify-syntax-entry ?# "_ p" table)
 200 |     table))
 201 | 
 202 | (defvar clojure-prev-l/c-dir/file nil
 203 |   "Record last directory and file used in loading or compiling.
 204 | This holds a cons cell of the form `(DIRECTORY . FILE)'
 205 | describing the last `clojure-load-file' or `clojure-compile-file' command.")
 206 | 
 207 | (defconst clojure-mode-version "3.0.0"
 208 |   "The current version of `clojure-mode'.")
 209 | 
 210 | (defconst clojure--prettify-symbols-alist
 211 |   '(("fn"  . ?λ)))
 212 | 
 213 | (defun clojure-mode-display-version ()
 214 |   "Display the current `clojure-mode-version' in the minibuffer."
 215 |   (interactive)
 216 |   (message "clojure-mode (version %s)" clojure-mode-version))
 217 | 
 218 | (defun clojure-space-for-delimiter-p (endp delim)
 219 |   "Prevent paredit from inserting useless spaces.
 220 | See `paredit-space-for-delimiter-predicates' for the meaning of
 221 | ENDP and DELIM."
 222 |   (if (derived-mode-p 'clojure-mode)
 223 |       (save-excursion
 224 |         (backward-char)
 225 |         (if (and (or (char-equal delim ?\()
 226 |                      (char-equal delim ?\")
 227 |                      (char-equal delim ?{))
 228 |                  (not endp))
 229 |             (if (char-equal (char-after) ?#)
 230 |                 (and (not (bobp))
 231 |                      (or (char-equal ?w (char-syntax (char-before)))
 232 |                          (char-equal ?_ (char-syntax (char-before)))))
 233 |               t)
 234 |           t))
 235 |     t))
 236 | 
 237 | (defun clojure-no-space-after-tag (endp delimiter)
 238 |   "Prevent inserting a space after a reader-literal tag?
 239 | 
 240 | When a reader-literal tag is followed be an opening delimiter
 241 | listed in `clojure-omit-space-between-tag-and-delimiters', this
 242 | function returns t.
 243 | 
 244 | This allows you to write things like #db/id[:db.part/user]
 245 | without inserting a space between the tag and the opening
 246 | bracket.
 247 | 
 248 | See `paredit-space-for-delimiter-predicates' for the meaning of
 249 | ENDP and DELIMITER."
 250 |   (if endp
 251 |       t
 252 |     (or (not (member delimiter clojure-omit-space-between-tag-and-delimiters))
 253 |         (save-excursion
 254 |           (let ((orig-point (point)))
 255 |             (not (and (re-search-backward
 256 |                        "#\\([a-zA-Z0-9._-]+/\\)?[a-zA-Z0-9._-]+"
 257 |                        (line-beginning-position)
 258 |                        t)
 259 |                       (= orig-point (match-end 0)))))))))
 260 | 
 261 | (defun clojure-paredit-setup ()
 262 |   "A bit code to make `paredit-mode' play nice with `clojure-mode'."
 263 |   (when (>= paredit-version 21)
 264 |     (define-key clojure-mode-map "{" 'paredit-open-curly)
 265 |     (define-key clojure-mode-map "}" 'paredit-close-curly)
 266 |     (add-to-list 'paredit-space-for-delimiter-predicates
 267 |                  'clojure-space-for-delimiter-p)
 268 |     (add-to-list 'paredit-space-for-delimiter-predicates
 269 |                  'clojure-no-space-after-tag)))
 270 | 
 271 | ;;;###autoload
 272 | (define-derived-mode clojure-mode prog-mode "Clojure"
 273 |   "Major mode for editing Clojure code.
 274 | 
 275 | \\{clojure-mode-map}"
 276 |   (setq-local imenu-create-index-function
 277 |               (lambda ()
 278 |                 (imenu--generic-function '((nil clojure-match-next-def 0)))))
 279 |   (setq-local indent-tabs-mode nil)
 280 |   (lisp-mode-variables nil)
 281 |   (setq fill-paragraph-function 'clojure-fill-paragraph)
 282 |   (setq adaptive-fill-function 'clojure-adaptive-fill-function)
 283 |   (setq-local normal-auto-fill-function 'clojure-auto-fill-function)
 284 |   (setq-local comment-start-skip
 285 |               "\\(\\(^\\|[^\\\\\n]\\)\\(\\\\\\\\\\)*\\)\\(;+\\|#|\\) *")
 286 |   (setq-local indent-line-function 'clojure-indent-line)
 287 |   (setq-local lisp-indent-function 'clojure-indent-function)
 288 |   (setq-local lisp-doc-string-elt-property 'clojure-doc-string-elt)
 289 |   (setq-local inferior-lisp-program clojure-inf-lisp-command)
 290 |   (setq-local parse-sexp-ignore-comments t)
 291 |   (setq-local prettify-symbols-alist clojure--prettify-symbols-alist)
 292 |   (clojure-font-lock-setup)
 293 |   (setq-local open-paren-in-column-0-is-defun-start nil)
 294 |   (add-hook 'paredit-mode-hook 'clojure-paredit-setup))
 295 | 
 296 | (defsubst clojure-in-docstring-p ()
 297 |   "Check whether point is in a docstring."
 298 |   (eq (get-text-property (1- (point-at-eol)) 'face)
 299 |       'font-lock-doc-face))
 300 | 
 301 | (defsubst clojure-docstring-fill-prefix ()
 302 |   "The prefix string used by `clojure-fill-paragraph'.
 303 | 
 304 | It is simply `clojure-docstring-fill-prefix-width' number of spaces."
 305 |   (make-string clojure-docstring-fill-prefix-width ? ))
 306 | 
 307 | (defun clojure-adaptive-fill-function ()
 308 |   "Clojure adaptive fill function.
 309 | This only takes care of filling docstring correctly."
 310 |   (when (clojure-in-docstring-p)
 311 |     (clojure-docstring-fill-prefix)))
 312 | 
 313 | (defun clojure-fill-paragraph (&optional justify)
 314 |   "Like `fill-paragraph' but handle Clojure docstrings."
 315 |   (if (clojure-in-docstring-p)
 316 |       (let ((paragraph-start
 317 |              (concat paragraph-start
 318 |                      "\\|\\s-*\\([(;:\"[]\\|~@\\|`(\\|#'(\\)"))
 319 |             (paragraph-separate
 320 |              (concat paragraph-separate "\\|\\s-*\".*[,\\.]$"))
 321 |             (fill-column (or clojure-docstring-fill-column fill-column))
 322 |             (fill-prefix (clojure-docstring-fill-prefix)))
 323 |         (fill-paragraph justify))
 324 |     (let ((paragraph-start (concat paragraph-start
 325 |                                    "\\|\\s-*\\([(;:\"[]\\|`(\\|#'(\\)"))
 326 |           (paragraph-separate
 327 |            (concat paragraph-separate "\\|\\s-*\".*[,\\.[]$")))
 328 |       (or (fill-comment-paragraph justify)
 329 |           (fill-paragraph justify))
 330 |       ;; Always return `t'
 331 |       t)))
 332 | 
 333 | (defun clojure-auto-fill-function ()
 334 |   "Clojure auto-fill function."
 335 |   ;; Check if auto-filling is meaningful.
 336 |   (let ((fc (current-fill-column)))
 337 |     (when (and fc (> (current-column) fc))
 338 |       (let ((fill-column (if (clojure-in-docstring-p)
 339 |                              clojure-docstring-fill-column
 340 |                            fill-column))
 341 |             (fill-prefix (clojure-adaptive-fill-function)))
 342 |         (do-auto-fill)))))
 343 | 
 344 | (defun clojure-display-inferior-lisp-buffer ()
 345 |   "Display a buffer bound to `inferior-lisp-buffer'."
 346 |   (interactive)
 347 |   (if (and inferior-lisp-buffer (get-buffer inferior-lisp-buffer))
 348 |       (pop-to-buffer inferior-lisp-buffer t)
 349 |     (run-lisp inferior-lisp-program)))
 350 | 
 351 | (defun clojure-load-file (file-name)
 352 |   "Load a Clojure file FILE-NAME into the inferior Clojure process."
 353 |   (interactive (comint-get-source "Load Clojure file: "
 354 |                                   clojure-prev-l/c-dir/file
 355 |                                   '(clojure-mode) t))
 356 |   (comint-check-source file-name) ; Check to see if buffer needs saved.
 357 |   (setq clojure-prev-l/c-dir/file (cons (file-name-directory file-name)
 358 |                                         (file-name-nondirectory file-name)))
 359 |   (comint-send-string (inferior-lisp-proc)
 360 |                       (format clojure-load-command file-name))
 361 |   (switch-to-lisp t))
 362 | 
 363 | 
 364 | 
 365 | (defun clojure-match-next-def ()
 366 |   "Scans the buffer backwards for the next top-level definition.
 367 | Called by `imenu--generic-function'."
 368 |   (when (re-search-backward "^(def\\sw*" nil t)
 369 |     (save-excursion
 370 |       (let (found?
 371 |             (start (point)))
 372 |         (down-list)
 373 |         (forward-sexp)
 374 |         (while (not found?)
 375 |           (forward-sexp)
 376 |           (or (if (char-equal ?[ (char-after (point)))
 377 |                               (backward-sexp))
 378 |                   (if (char-equal ?) (char-after (point)))
 379 |                 (backward-sexp)))
 380 |           (destructuring-bind (def-beg . def-end) (bounds-of-thing-at-point 'sexp)
 381 |             (if (char-equal ?^ (char-after def-beg))
 382 |                 (progn (forward-sexp) (backward-sexp))
 383 |               (setq found? t)
 384 |               (set-match-data (list def-beg def-end)))))
 385 |         (goto-char start)))))
 386 | 
 387 | (defconst clojure-font-lock-keywords
 388 |   (eval-when-compile
 389 |     `(;; Top-level variable definition
 390 |       (,(concat "(\\(?:clojure.core/\\)?\\("
 391 |                 (regexp-opt '("def" "defonce"))
 392 |                 ;; variable declarations
 393 |                 "\\)\\>"
 394 |                 ;; Any whitespace
 395 |                 "[ \r\n\t]*"
 396 |                 ;; Possibly type or metadata
 397 |                 "\\(?:#?^\\(?:{[^}]*}\\|\\sw+\\)[ \r\n\t]*\\)*"
 398 |                 "\\(\\sw+\\)?")
 399 |        (1 font-lock-keyword-face)
 400 |        (2 font-lock-variable-name-face nil t))
 401 |       ;; Type definition
 402 |       (,(concat "(\\(?:clojure.core/\\)?\\("
 403 |                 (regexp-opt '("defstruct" "deftype" "defprotocol"
 404 |                               "defrecord"))
 405 |                 ;; type declarations
 406 |                 "\\)\\>"
 407 |                 ;; Any whitespace
 408 |                 "[ \r\n\t]*"
 409 |                 ;; Possibly type or metadata
 410 |                 "\\(?:#?^\\(?:{[^}]*}\\|\\sw+\\)[ \r\n\t]*\\)*"
 411 |                 "\\(\\sw+\\)?")
 412 |        (1 font-lock-keyword-face)
 413 |        (2 font-lock-type-face nil t))
 414 | 
 415 |       ;; clj-thamil
 416 |       ;; Function definition (anything that starts with வரையறு and is not
 417 |       ;; listed above)
 418 |       (,(concat "(\\(?:[a-z\.-]+/\\)?\\(வரையறு\[a-z\-\]*-?\\)"
 419 |                 ;; Function declarations
 420 |                 "\\>"
 421 |                 ;; Any whitespace
 422 |                 "[ \r\n\t]*"
 423 |                 ;; Possibly type or metadata
 424 |                 "\\(?:#?^\\(?:{[^}]*}\\|\\sw+\\)[ \r\n\t]*\\)*"
 425 |                 "\\(\\sw+\\)?")
 426 |        (1 font-lock-keyword-face)
 427 |        (2 font-lock-function-name-face nil t))
 428 |       
 429 |       ;; Function definition (anything that starts with def and is not
 430 |       ;; listed above)
 431 |       (,(concat "(\\(?:[a-z\.-]+/\\)?\\(def\[a-z\-\]*-?\\)"
 432 |                 ;; Function declarations
 433 |                 "\\>"
 434 |                 ;; Any whitespace
 435 |                 "[ \r\n\t]*"
 436 |                 ;; Possibly type or metadata
 437 |                 "\\(?:#?^\\(?:{[^}]*}\\|\\sw+\\)[ \r\n\t]*\\)*"
 438 |                 "\\(\\sw+\\)?")
 439 |        (1 font-lock-keyword-face)
 440 |        (2 font-lock-function-name-face nil t))
 441 |       ;; (fn name? args ...)
 442 |       (,(concat "(\\(?:clojure.core/\\)?\\(fn\\)[ \t]+"
 443 |                 ;; Possibly type
 444 |                 "\\(?:#?^\\sw+[ \t]*\\)?"
 445 |                 ;; Possibly name
 446 |                 "\\(t\\sw+\\)?" )
 447 |        (1 font-lock-keyword-face)
 448 |        (2 font-lock-function-name-face nil t))
 449 |       ;; lambda arguments - %, %1, %2, etc
 450 |       ("\\<%[1-9]?" (0 font-lock-variable-name-face))
 451 |       ;; Special forms & control structures
 452 |       (,(concat
 453 |          "(\\(?:clojure.core/\\)?"
 454 |          (regexp-opt
 455 |           '("let" "letfn" "do"
 456 |             "case" "cond" "cond->" "cond->>" "condp"
 457 |             "for" "loop" "recur"
 458 |             "when" "when-not" "when-let" "when-first" "when-some"
 459 |             "if" "if-let" "if-not" "if-some"
 460 |             "." ".." "->" "->>" "doto"
 461 |             "and" "or"
 462 |             "dosync" "doseq" "dotimes" "dorun" "doall"
 463 |             "load" "import" "unimport" "ns" "in-ns" "refer"
 464 |             "try" "catch" "finally" "throw"
 465 |             "with-open" "with-local-vars" "binding"
 466 |             "gen-class" "gen-and-load-class" "gen-and-save-class"
 467 |             "handler-case" "handle" "var" "declare") t)
 468 |          "\\>")
 469 |        1 font-lock-keyword-face)
 470 | 
 471 |       ;; clj-thamil
 472 |       ;; Special forms & control structures
 473 |       (,(concat
 474 |          "(\\(?:clojure.core/\\)?"
 475 |          (regexp-opt
 476 |           '("வைத்துக்கொள்" "letfn" "செய்"
 477 |             "case" "பொறுத்து" "cond->" "cond->>" "condp"
 478 |             "ஒவ்வொன்றுக்கும்" "சுற்று" "recur"
 479 |             "என்னும்போது" "இல்லென்னும்போது" "when-let" "when-first" "when-some"
 480 |             "எனில்" "if-let" "இல்லெனில்" "if-some"
 481 |             "." ".." "->" "->>" "doto"
 482 |             "மற்றும்" "அல்லது"
 483 |             "dosync" "செய்வரிசை" "dotimes" "dorun" "செய்யெல்லாம்q"
 484 |             "load" "import" "unimport" "ns" "in-ns" "refer"
 485 |             "try" "catch" "finally" "throw"
 486 |             "with-open" "with-local-vars" "binding"
 487 |             "gen-class" "gen-and-load-class" "gen-and-save-class"
 488 |             "handler-case" "handle" "var" "declare") t)
 489 |          "\\>")
 490 |        1 font-lock-keyword-face)
 491 |       
 492 |       
 493 |       (,(concat
 494 |          "\\<"
 495 |          (regexp-opt
 496 |           '("*1" "*2" "*3" "*agent*"
 497 |             "*allow-unresolved-vars*" "*assert*" "*clojure-version*"
 498 |             "*command-line-args*" "*compile-files*"
 499 |             "*compile-path*" "*e" "*err*" "*file*" "*flush-on-newline*"
 500 |             "*in*" "*macro-meta*" "*math-context*" "*ns*" "*out*"
 501 |             "*print-dup*" "*print-length*" "*print-level*"
 502 |             "*print-meta*" "*print-readably*"
 503 |             "*read-eval*" "*source-path*"
 504 |             "*use-context-classloader*" "*warn-on-reflection*")
 505 |           t)
 506 |          "\\>")
 507 |        0 font-lock-builtin-face)
 508 |       ;; Dynamic variables - *something* or @*something*
 509 |       ("\\<@?\\(\\*[a-z-]*\\*\\)\\>" 1 font-lock-variable-name-face)
 510 |       ;; Global constants - nil, true, false
 511 |       (,(concat
 512 |          "\\<"
 513 |          (regexp-opt
 514 |           '("true" "false" "nil") t)
 515 |          "\\>")
 516 |        0 font-lock-constant-face)
 517 |       ;; Character literals - \1, \a, \newline, \u0000
 518 |       ;; FIXME: handle properly some punctuation characters (like commas and semicolumns)
 519 |       ("\\\\\\([[:punct:]]\\|[a-z0-9]+\\)\\>" 0 'clojure-character-face)
 520 |       ;; Constant values (keywords), including as metadata e.g. ^:static
 521 |       ("\\<^?\\(:\\(\\sw\\|\\s_\\)+\\(\\>\\|\\_>\\)\\)" 1 'clojure-keyword-face)
 522 |       ;; cljx annotations (#+clj and #+cljs)
 523 |       ("#\\+cljs?\\>" 0 font-lock-preprocessor-face)
 524 |       ;; Java interop highlighting
 525 |       ;; CONST SOME_CONST (optionally prefixed by /)
 526 |       ("\\(?:\\<\\|/\\)\\([A-Z]+\\|\\([A-Z]+_[A-Z1-9_]+\\)\\)\\>" 1 font-lock-constant-face)
 527 |       ;; .foo .barBaz .qux01 .-flibble .-flibbleWobble
 528 |       ("\\<\\.-?[a-z][a-zA-Z0-9]*\\>" 0 'clojure-interop-method-face)
 529 |       ;; Foo Bar$Baz Qux_ World_OpenUDP Foo. Babylon15.
 530 |       ("\\(?:\\<\\|\\.\\|/\\|#?^\\)\\([A-Z][a-zA-Z0-9_]*[a-zA-Z0-9$_]+\\.?\\>\\)" 1 font-lock-type-face)
 531 |       ;; foo.bar.baz
 532 |       ("\\<^?\\([a-z][a-z0-9_-]+\\.\\([a-z][a-z0-9_-]*\\.?\\)+\\)" 1 font-lock-type-face)
 533 |       ;; (ns namespace) - special handling for single segment namespaces
 534 |       (,(concat "\\<ns\\>[ \r\n\t]*"
 535 |                 ;; Possibly metadata
 536 |                 "\\(?:\\^?{[^}]+}[ \r\n\t]*\\)*"
 537 |                 ;; namespace
 538 |                 "\\([a-z0-9-]+\\)")
 539 |        (1 font-lock-type-face nil t))
 540 |       ;; foo/ Foo/ @Foo/
 541 |       ("\\<@?\\([a-zA-Z][a-z0-9_-]*\\)/" 1 font-lock-type-face)
 542 |       ;; fooBar
 543 |       ("\\(?:\\<\\|/\\)\\([a-z]+[A-Z]+[a-zA-Z0-9$]*\\>\\)" 1 'clojure-interop-method-face)
 544 |       ;; Highlight grouping constructs in regular expressions
 545 |       (clojure-font-lock-regexp-groups
 546 |        (1 'font-lock-regexp-grouping-construct prepend))))
 547 |   "Default expressions to highlight in Clojure mode.")
 548 | 
 549 | (defun clojure-font-lock-syntactic-face-function (state)
 550 |   (if (nth 3 state)
 551 |       ;; This might be a (doc)string or a |...| symbol.
 552 |       (let ((startpos (nth 8 state)))
 553 |         (if (eq (char-after startpos) ?|)
 554 |             ;; This is not a string, but a |...| symbol.
 555 |             nil
 556 |           (let* ((listbeg (nth 1 state))
 557 |                  (firstsym (and listbeg
 558 |                                 (save-excursion
 559 |                                   (goto-char listbeg)
 560 |                                   (and (looking-at "([ \t\n]*\\(\\(\\sw\\|\\s_\\)+\\)")
 561 |                                        (match-string 1)))))
 562 |                  (docelt (and firstsym
 563 |                               (function-get (intern-soft firstsym)
 564 |                                             lisp-doc-string-elt-property))))
 565 |             (if (and docelt
 566 |                      ;; It's a string in a form that can have a docstring.
 567 |                      ;; Check whether it's in docstring position.
 568 |                      (save-excursion
 569 |                        (when (functionp docelt)
 570 |                          (goto-char (match-end 1))
 571 |                          (setq docelt (funcall docelt)))
 572 |                        (goto-char listbeg)
 573 |                        (forward-char 1)
 574 |                        (condition-case nil
 575 |                            (while (and (> docelt 0) (< (point) startpos)
 576 |                                        (progn (forward-sexp 1) t))
 577 |                              ;; ignore metadata and type hints
 578 |                              (unless (looking-at "[ \n\t]*\\(\\^[A-Z:].+\\|\\^?{.+\\)")
 579 |                                (setq docelt (1- docelt))))
 580 |                          (error nil))
 581 |                        (and (zerop docelt) (<= (point) startpos)
 582 |                             (progn (forward-comment (point-max)) t)
 583 |                             (= (point) (nth 8 state)))))
 584 |                 font-lock-doc-face
 585 |               font-lock-string-face))))
 586 |     font-lock-comment-face))
 587 | 
 588 | (defun clojure-font-lock-setup ()
 589 |   "Configures font-lock for editing Clojure code."
 590 |   (setq-local font-lock-multiline t)
 591 |   (add-to-list 'font-lock-extend-region-functions
 592 |                'clojure-font-lock-extend-region-def t)
 593 |   (setq font-lock-defaults
 594 |         '(clojure-font-lock-keywords    ; keywords
 595 |           nil nil
 596 |           (("+-*/.<>=!?$%_&~^:@" . "w")) ; syntax alist
 597 |           nil
 598 |           (font-lock-mark-block-function . mark-defun)
 599 |           (font-lock-syntactic-face-function
 600 |            . clojure-font-lock-syntactic-face-function))))
 601 | 
 602 | (defun clojure-font-lock-def-at-point (point)
 603 |   "Range between the top-most def* and the fourth element after POINT.
 604 | Note that this means that there is no guarantee of proper font
 605 | locking in def* forms that are not at top level."
 606 |   (goto-char point)
 607 |   (condition-case nil
 608 |       (beginning-of-defun)
 609 |     (error nil))
 610 | 
 611 |   (let ((beg-def (point)))
 612 |     (when (and (not (= point beg-def))
 613 |                (looking-at "(def"))
 614 |       (condition-case nil
 615 |           (progn
 616 |             ;; move forward as much as possible until failure (or success)
 617 |             (forward-char)
 618 |             (dotimes (_ 4)
 619 |               (forward-sexp)))
 620 |         (error nil))
 621 |       (cons beg-def (point)))))
 622 | 
 623 | (defun clojure-font-lock-extend-region-def ()
 624 |   "Set region boundaries to include the first four elements of def* forms."
 625 |   (let ((changed nil))
 626 |     (let ((def (clojure-font-lock-def-at-point font-lock-beg)))
 627 |       (when def
 628 |         (destructuring-bind (def-beg . def-end) def
 629 |           (when (and (< def-beg font-lock-beg)
 630 |                      (< font-lock-beg def-end))
 631 |             (setq font-lock-beg def-beg
 632 |                   changed t)))))
 633 |     (let ((def (clojure-font-lock-def-at-point font-lock-end)))
 634 |       (when def
 635 |         (destructuring-bind (def-beg . def-end) def
 636 |           (when (and (< def-beg font-lock-end)
 637 |                      (< font-lock-end def-end))
 638 |             (setq font-lock-end def-end
 639 |                   changed t)))))
 640 |     changed))
 641 | 
 642 | (defun clojure-font-lock-regexp-groups (bound)
 643 |   "Highlight grouping constructs in regular expression.
 644 | 
 645 | BOUND denotes the maximum number of characters (relative to the
 646 | point) to check."
 647 |   (catch 'found
 648 |     (while (re-search-forward (concat
 649 |                                ;; A group may start using several alternatives:
 650 |                                "\\(\\(?:"
 651 |                                ;; 1. (? special groups
 652 |                                "(\\?\\(?:"
 653 |                                ;; a) non-capturing group (?:X)
 654 |                                ;; b) independent non-capturing group (?>X)
 655 |                                ;; c) zero-width positive lookahead (?=X)
 656 |                                ;; d) zero-width negative lookahead (?!X)
 657 |                                "[:=!>]\\|"
 658 |                                ;; e) zero-width positive lookbehind (?<=X)
 659 |                                ;; f) zero-width negative lookbehind (?<!X)
 660 |                                "<[=!]\\|"
 661 |                                ;; g) named capturing group (?<name>X)
 662 |                                "<[[:alnum:]]+>"
 663 |                                "\\)\\|" ;; end of special groups
 664 |                                ;; 2. normal capturing groups (
 665 |                                ;; 3. we also highlight alternative
 666 |                                ;; separarators |, and closing parens )
 667 |                                "[|()]"
 668 |                                "\\)\\)")
 669 |                               bound t)
 670 |       (let ((face (get-text-property (1- (point)) 'face)))
 671 |         (when (and (or (and (listp face)
 672 |                             (memq 'font-lock-string-face face))
 673 |                        (eq 'font-lock-string-face face))
 674 |                    (clojure-string-start t))
 675 |           (throw 'found t))))))
 676 | 
 677 | ;; Docstring positions
 678 | (put 'ns 'clojure-doc-string-elt 2)
 679 | (put 'def 'clojure-doc-string-elt 2)
 680 | (put 'defn 'clojure-doc-string-elt 2)
 681 | (put 'defn- 'clojure-doc-string-elt 2)
 682 | (put 'defmulti 'clojure-doc-string-elt 2)
 683 | (put 'defmacro 'clojure-doc-string-elt 2)
 684 | (put 'definline 'clojure-doc-string-elt 2)
 685 | (put 'defprotocol 'clojure-doc-string-elt 2)
 686 | 
 687 | ;; clj-thamil
 688 | ;; Docstring positions
 689 | (put 'வரையறு 'clojure-doc-string-elt 2)
 690 | (put 'வரையறு-செயல்கூறு 'clojure-doc-string-elt 2)
 691 | 
 692 | (defun clojure-indent-line ()
 693 |   "Indent current line as Clojure code."
 694 |   (if (clojure-in-docstring-p)
 695 |       (save-excursion
 696 |         (beginning-of-line)
 697 |         (when (looking-at "^\\s-*")
 698 |           (replace-match (clojure-docstring-fill-prefix))))
 699 |     (lisp-indent-line)))
 700 | 
 701 | (defun clojure-indent-function (indent-point state)
 702 |   "This function is the normal value of the variable `lisp-indent-function'.
 703 | It is used when indenting a line within a function call, to see if the
 704 | called function says anything special about how to indent the line.
 705 | 
 706 | INDENT-POINT is the position where the user typed TAB, or equivalent.
 707 | Point is located at the point to indent under (for default indentation);
 708 | STATE is the `parse-partial-sexp' state for that position.
 709 | 
 710 | If the current line is in a call to a Lisp function
 711 | which has a non-nil property `lisp-indent-function',
 712 | that specifies how to do the indentation.
 713 | 
 714 | The property value can be
 715 | 
 716 | - `defun', meaning indent `defun'-style;
 717 | - an integer N, meaning indent the first N arguments specially
 718 |   like ordinary function arguments and then indent any further
 719 |   arguments like a body;
 720 | - a function to call just as this function was called.
 721 |   If that function returns nil, that means it doesn't specify
 722 |   the indentation.
 723 | 
 724 | This function also returns nil meaning don't specify the indentation."
 725 |   (let ((normal-indent (current-column)))
 726 |     (goto-char (1+ (elt state 1)))
 727 |     (parse-partial-sexp (point) calculate-lisp-indent-last-sexp 0 t)
 728 |     (if (and (elt state 2)
 729 |              (not (looking-at "\\sw\\|\\s_")))
 730 |         ;; car of form doesn't seem to be a symbol
 731 |         (progn
 732 |           (if (not (> (save-excursion (forward-line 1) (point))
 733 |                       calculate-lisp-indent-last-sexp))
 734 |               (progn (goto-char calculate-lisp-indent-last-sexp)
 735 |                      (beginning-of-line)
 736 |                      (parse-partial-sexp (point)
 737 |                                          calculate-lisp-indent-last-sexp 0 t)))
 738 |           ;; Indent under the list or under the first sexp on the same
 739 |           ;; line as calculate-lisp-indent-last-sexp.  Note that first
 740 |           ;; thing on that line has to be complete sexp since we are
 741 |           ;; inside the innermost containing sexp.
 742 |           (backward-prefix-chars)
 743 |           (if (and (eq (char-after (point)) ?\[)
 744 |                    (eq (char-after (elt state 1)) ?\())
 745 |               (+ (current-column) 2) ;; this is probably inside a defn
 746 |             (current-column)))
 747 |       (let* ((function (buffer-substring (point)
 748 |                                          (progn (forward-sexp 1) (point))))
 749 |              (open-paren (elt state 1))
 750 |              (method nil)
 751 |              (function-tail (first
 752 |                              (last
 753 |                               (split-string (substring-no-properties function) "/")))))
 754 |         (setq method (get (intern-soft function-tail) 'clojure-indent-function))
 755 |         (cond ((member (char-after open-paren) '(?\[ ?\{))
 756 |                (goto-char open-paren)
 757 |                (1+ (current-column)))
 758 |               ((or (eq method 'defun)
 759 |                    (and clojure-defun-style-default-indent
 760 |                         ;; largely to preserve useful alignment of :require, etc in ns
 761 |                         (not (string-match "^:" function))
 762 |                         (not method))
 763 |                    (and (null method)
 764 |                         (> (length function) 3)
 765 |                         (string-match "\\`\\(?:\\S +/\\)?\\(def\\|with-\\)"
 766 |                                       function)))
 767 |                (lisp-indent-defform state indent-point))
 768 |               ((integerp method)
 769 |                (lisp-indent-specform method state
 770 |                                      indent-point normal-indent))
 771 |               (method
 772 |                (funcall method indent-point state))
 773 |               (clojure-use-backtracking-indent
 774 |                (clojure-backtracking-indent
 775 |                 indent-point state normal-indent)))))))
 776 | 
 777 | (defun clojure-backtracking-indent (indent-point state normal-indent)
 778 |   "Experimental backtracking support.
 779 | 
 780 | Will upwards in an sexp to check for contextual indenting."
 781 |   (let (indent (path) (depth 0))
 782 |     (goto-char (elt state 1))
 783 |     (while (and (not indent)
 784 |                 (< depth clojure-max-backtracking))
 785 |       (let ((containing-sexp (point)))
 786 |         (parse-partial-sexp (1+ containing-sexp) indent-point 1 t)
 787 |         (when (looking-at "\\sw\\|\\s_")
 788 |           (let* ((start (point))
 789 |                  (fn (buffer-substring start (progn (forward-sexp 1) (point))))
 790 |                  (meth (get (intern-soft fn) 'clojure-backtracking-indent)))
 791 |             (let ((n 0))
 792 |               (when (< (point) indent-point)
 793 |                 (condition-case ()
 794 |                     (progn
 795 |                       (forward-sexp 1)
 796 |                       (while (< (point) indent-point)
 797 |                         (parse-partial-sexp (point) indent-point 1 t)
 798 |                         (incf n)
 799 |                         (forward-sexp 1)))
 800 |                   (error nil)))
 801 |               (push n path))
 802 |             (when meth
 803 |               (let ((def meth))
 804 |                 (dolist (p path)
 805 |                   (if (and (listp def)
 806 |                            (< p (length def)))
 807 |                       (setq def (nth p def))
 808 |                     (if (listp def)
 809 |                         (setq def (car (last def)))
 810 |                       (setq def nil))))
 811 |                 (goto-char (elt state 1))
 812 |                 (when def
 813 |                   (setq indent (+ (current-column) def)))))))
 814 |         (goto-char containing-sexp)
 815 |         (condition-case ()
 816 |             (progn
 817 |               (backward-up-list 1)
 818 |               (incf depth))
 819 |           (error (setq depth clojure-max-backtracking)))))
 820 |     indent))
 821 | 
 822 | ;; clojure backtracking indent is experimental and the format for these
 823 | ;; entries are subject to change
 824 | (put 'implement 'clojure-backtracking-indent '(4 (2)))
 825 | (put 'letfn 'clojure-backtracking-indent '((2) 2))
 826 | (put 'proxy 'clojure-backtracking-indent '(4 4 (2)))
 827 | (put 'reify 'clojure-backtracking-indent '((2)))
 828 | (put 'deftype 'clojure-backtracking-indent '(4 4 (2)))
 829 | (put 'defrecord 'clojure-backtracking-indent '(4 4 (2)))
 830 | (put 'defprotocol 'clojure-backtracking-indent '(4 (2)))
 831 | (put 'extend-type 'clojure-backtracking-indent '(4 (2)))
 832 | (put 'extend-protocol 'clojure-backtracking-indent '(4 (2)))
 833 | (put 'specify 'clojure-backtracking-indent '(4 (2)))
 834 | (put 'specify! 'clojure-backtracking-indent '(4 (2)))
 835 | 
 836 | (defun put-clojure-indent (sym indent)
 837 |   (put sym 'clojure-indent-function indent))
 838 | 
 839 | (defmacro define-clojure-indent (&rest kvs)
 840 |   `(progn
 841 |      ,@(mapcar (lambda (x) `(put-clojure-indent
 842 |                              (quote ,(first x)) ,(second x)))
 843 |                kvs)))
 844 | 
 845 | (defun add-custom-clojure-indents (name value)
 846 |   (custom-set-default name value)
 847 |   (mapcar (lambda (x)
 848 |             (put-clojure-indent x 'defun))
 849 |           value))
 850 | 
 851 | (defcustom clojure-defun-indents nil
 852 |   "List of additional symbols with defun-style indentation in Clojure.
 853 | 
 854 | You can use this to let Emacs indent your own macros the same way
 855 | that it indents built-in macros like with-open.  To manually set
 856 | it from Lisp code, use (put-clojure-indent 'some-symbol 'defun)."
 857 |   :type '(repeat symbol)
 858 |   :group 'clojure
 859 |   :set 'add-custom-clojure-indents)
 860 | 
 861 | (define-clojure-indent
 862 |   ;; built-ins
 863 |   (ns 1)
 864 |   (fn 'defun)
 865 |   (def 'defun)
 866 |   (defn 'defun)
 867 |   (bound-fn 'defun)
 868 |   (if 1)
 869 |   (if-not 1)
 870 |   (case 1)
 871 |   (condp 2)
 872 |   (when 1)
 873 |   (while 1)
 874 |   (when-not 1)
 875 |   (when-first 1)
 876 |   (do 0)
 877 |   (future 0)
 878 |   (comment 0)
 879 |   (doto 1)
 880 |   (locking 1)
 881 |   (proxy 2)
 882 |   (with-open 1)
 883 |   (with-precision 1)
 884 |   (with-local-vars 1)
 885 | 
 886 | 
 887 |   ;; clj-thamil
 888 |   ;; built-ins
 889 |   (ns 1)
 890 |   (செயல்கூறு 'defun)
 891 |   (வரையறு 'defun)
 892 |   (வரையறு-செயல்கூறு 'defun)
 893 |   (bound-fn 'defun)
 894 |   (எனில் 1)
 895 |   (இல்லெனில் 1)
 896 |   (case 1)
 897 |   (condp 2)
 898 |   (என்னும்போது 1)
 899 |   (while 1)
 900 |   (இல்லென்னும்-போது 1)
 901 |   (when-first 1)
 902 |   (செய் 0)
 903 |   (future 0)
 904 |   (comment 0)
 905 |   (doto 1)
 906 |   (locking 1)
 907 |   (proxy 2)
 908 |   (with-open 1)
 909 |   (with-precision 1)
 910 |   (with-local-vars 1)
 911 | 
 912 | 
 913 |   
 914 |   (reify 'defun)
 915 |   (deftype 2)
 916 |   (defrecord 2)
 917 |   (defprotocol 1)
 918 |   (extend 1)
 919 |   (extend-protocol 1)
 920 |   (extend-type 1)
 921 | 
 922 |   (try 0)
 923 |   (catch 2)
 924 |   (finally 0)
 925 | 
 926 |   ;; binding forms
 927 |   (let 1)
 928 |   (letfn 1)
 929 |   (binding 1)
 930 |   (loop 1)
 931 |   (for 1)
 932 |   (doseq 1)
 933 |   (dotimes 1)
 934 |   (when-let 1)
 935 |   (if-let 1)
 936 |   (when-some 1)
 937 |   (if-some 1)
 938 | 
 939 |   ;; clj-thamil
 940 |   ;; binding forms
 941 |   (வைத்துக்கொள் 1)
 942 |   (letfn 1)
 943 |   (binding 1)
 944 |   (சுற்று 1)
 945 |   (ஒவ்வொன்றுக்கும் 1)
 946 |   (செய்வரிசை 1)
 947 |   (dotimes 1)
 948 |   (when-let 1)
 949 |   (if-let 1)
 950 |   (when-some 1)
 951 |   (if-some 1)
 952 | 
 953 |   ;; data structures
 954 |   (defstruct 1)
 955 |   (struct-map 1)
 956 |   (assoc 1)
 957 | 
 958 |   (defmethod 'defun)
 959 | 
 960 |   ;; clojure.test
 961 |   (testing 1)
 962 |   (deftest 'defun)
 963 |   (are 1)
 964 |   (use-fixtures 'defun)
 965 | 
 966 |   ;; core.logic
 967 |   (run 'defun)
 968 |   (run* 'defun)
 969 |   (fresh 'defun)
 970 | 
 971 |   ;; core.async
 972 |   (alt! 0)
 973 |   (alt!! 0)
 974 |   (go 0)
 975 |   (go-loop 1)
 976 |   (thread 0))
 977 | 
 978 | 
 979 | 
 980 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 981 | ;;
 982 | ;; Better docstring filling for clojure-mode
 983 | ;;
 984 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 985 | 
 986 | (defun clojure-string-start (&optional regex)
 987 |   "Return the position of the \" that begins the string at point.
 988 | If REGEX is non-nil, return the position of the # that begins the
 989 | regex at point.  If point is not inside a string or regex, return
 990 | nil."
 991 |   (when (nth 3 (syntax-ppss)) ;; Are we really in a string?
 992 |     (save-excursion
 993 |       (save-match-data
 994 |         ;; Find a quote that appears immediately after whitespace,
 995 |         ;; beginning of line, hash, or an open paren, brace, or bracket
 996 |         (re-search-backward "\\(\\s-\\|^\\|#\\|(\\|\\[\\|{\\)\\(\"\\)")
 997 |         (let ((beg (match-beginning 2)))
 998 |           (when beg
 999 |             (if regex
1000 |                 (and (char-before beg) (char-equal ?# (char-before beg)) (1- beg))
1001 |               (when (not (char-equal ?# (char-before beg)))
1002 |                 beg))))))))
1003 | 
1004 | (defun clojure-char-at-point ()
1005 |   "Return the char at point or nil if at buffer end."
1006 |   (when (not (= (point) (point-max)))
1007 |     (buffer-substring-no-properties (point) (1+ (point)))))
1008 | 
1009 | (defun clojure-char-before-point ()
1010 |   "Return the char before point or nil if at buffer beginning."
1011 |   (when (not (= (point) (point-min)))
1012 |     (buffer-substring-no-properties (point) (1- (point)))))
1013 | 
1014 | ;; TODO: Deal with the fact that when point is exactly at the
1015 | ;; beginning of a string, it thinks that is the end.
1016 | (defun clojure-string-end ()
1017 |   "Return the position of the \" that ends the string at point.
1018 | 
1019 | Note that point must be inside the string - if point is
1020 | positioned at the opening quote, incorrect results will be
1021 | returned."
1022 |   (save-excursion
1023 |     (save-match-data
1024 |       ;; If we're at the end of the string, just return point.
1025 |       (if (and (string= (clojure-char-at-point) "\"")
1026 |                (not (string= (clojure-char-before-point) "\\")))
1027 |           (point)
1028 |         ;; We don't want to get screwed by starting out at the
1029 |         ;; backslash in an escaped quote.
1030 |         (when (string= (clojure-char-at-point) "\\")
1031 |           (backward-char))
1032 |         ;; Look for a quote not preceeded by a backslash
1033 |         (re-search-forward "[^\\]\\\(\\\"\\)")
1034 |         (match-beginning 1)))))
1035 | 
1036 | (defun clojure-mark-string ()
1037 |   "Mark the string at point."
1038 |   (interactive)
1039 |   (goto-char (clojure-string-start))
1040 |   (forward-char)
1041 |   (set-mark (clojure-string-end)))
1042 | 
1043 | (defun clojure-toggle-keyword-string ()
1044 |   "Convert the string or keyword at point to keyword or string."
1045 |   (interactive)
1046 |   (let ((original-point (point)))
1047 |     (while (and (> (point) 1)
1048 |                 (not (equal "\"" (buffer-substring-no-properties (point) (+ 1 (point)))))
1049 |                 (not (equal ":" (buffer-substring-no-properties (point) (+ 1 (point))))))
1050 |       (backward-char))
1051 |     (cond
1052 |      ((equal 1 (point))
1053 |       (error "Beginning of file reached, this was probably a mistake"))
1054 |      ((equal "\"" (buffer-substring-no-properties (point) (+ 1 (point))))
1055 |       (insert ":" (substring (clojure-delete-and-extract-sexp) 1 -1)))
1056 |      ((equal ":" (buffer-substring-no-properties (point) (+ 1 (point))))
1057 |       (insert "\"" (substring (clojure-delete-and-extract-sexp) 1) "\"")))
1058 |     (goto-char original-point)))
1059 | 
1060 | (defun clojure-delete-and-extract-sexp ()
1061 |   "Delete the sexp and return it."
1062 |   (interactive)
1063 |   (let ((begin (point)))
1064 |     (forward-sexp)
1065 |     (let ((result (buffer-substring-no-properties begin (point))))
1066 |       (delete-region begin (point))
1067 |       result)))
1068 | 
1069 | 
1070 | 
1071 | (defconst clojure-namespace-name-regex
1072 |   (rx line-start
1073 |       (zero-or-more whitespace)
1074 |       "("
1075 |       (zero-or-one (group (regexp "clojure.core/")))
1076 |       (zero-or-one (submatch "in-"))
1077 |       "ns"
1078 |       (zero-or-one "+")
1079 |       (one-or-more (any whitespace "\n"))
1080 |       (zero-or-more (or (submatch (zero-or-one "#")
1081 |                                   "^{"
1082 |                                   (zero-or-more (not (any "}")))
1083 |                                   "}")
1084 |                         (zero-or-more "^:"
1085 |                                       (one-or-more (not (any whitespace)))))
1086 |                     (one-or-more (any whitespace "\n")))
1087 |       ;; why is this here? oh (in-ns 'foo) or (ns+ :user)
1088 |       (zero-or-one (any ":'"))
1089 |       (group (one-or-more (not (any "()\"" whitespace))) word-end)))
1090 | 
1091 | ;; for testing clojure-namespace-name-regex, you can evaluate this code and make
1092 | ;; sure foo (or whatever the namespace name is) shows up in results. some of
1093 | ;; these currently fail.
1094 | ;; (mapcar (lambda (s) (let ((n (string-match clojure-namespace-name-regex s)))
1095 | ;;                       (if n (match-string 4 s))))
1096 | ;;         '("(ns foo)"
1097 | ;;           "(ns
1098 | ;; foo)"
1099 | ;;           "(ns foo.baz)"
1100 | ;;           "(ns ^:bar foo)"
1101 | ;;           "(ns ^:bar ^:baz foo)"
1102 | ;;           "(ns ^{:bar true} foo)"
1103 | ;;           "(ns #^{:bar true} foo)"
1104 | ;;           "(ns #^{:fail {}} foo)"
1105 | ;;           "(ns ^{:fail2 {}} foo.baz)"
1106 | ;;           "(ns ^{} foo)"
1107 | ;;           "(ns ^{:skip-wiki true}
1108 | ;;   aleph.netty
1109 | ;; "
1110 | ;;           "(ns
1111 | ;;  foo)"
1112 | ;;     "foo"))
1113 | 
1114 | 
1115 | 
1116 | (defun clojure-expected-ns ()
1117 |   "Return the namespace name that the file should have."
1118 |   (let* ((project-dir (file-truename
1119 |                        (locate-dominating-file default-directory
1120 |                                                "project.clj")))
1121 |          (relative (substring (file-truename (buffer-file-name))
1122 |                               (length project-dir)
1123 |                               (- (length (file-name-extension (buffer-file-name) t))))))
1124 |     (replace-regexp-in-string
1125 |      "_" "-" (mapconcat 'identity (cdr (split-string relative "/")) "."))))
1126 | 
1127 | (defun clojure-insert-ns-form-at-point ()
1128 |   "Insert a namespace form at point."
1129 |   (interactive)
1130 |   (insert (format "(ns %s)" (clojure-expected-ns))))
1131 | 
1132 | (defun clojure-insert-ns-form ()
1133 |   "Insert a namespace form at the beginning of the buffer."
1134 |   (interactive)
1135 |   (widen)
1136 |   (goto-char (point-min))
1137 |   (clojure-insert-ns-form-at-point))
1138 | 
1139 | (defun clojure-update-ns ()
1140 |   "Update the namespace of the current buffer.
1141 | Useful if a file has been renamed."
1142 |   (interactive)
1143 |   (let ((nsname (clojure-expected-ns)))
1144 |     (when nsname
1145 |       (save-excursion
1146 |         (save-match-data
1147 |           (if (clojure-find-ns)
1148 |               (replace-match nsname nil nil nil 4)
1149 |             (error "Namespace not found")))))))
1150 | 
1151 | (defun clojure-find-ns ()
1152 |   "Find the namespace of the current Clojure buffer."
1153 |   (let ((regexp clojure-namespace-name-regex))
1154 |     (save-excursion
1155 |       (save-restriction
1156 |         (widen)
1157 |         (goto-char (point-min))
1158 |         (when (re-search-forward regexp nil t)
1159 |           (match-string-no-properties 4))))))
1160 | 
1161 | (defun clojure-find-def ()
1162 |   "Find the var declaration macro and symbol name of the current form.
1163 | Returns a list pair, e.g. (\"defn\" \"abc\") or (\"deftest\" \"some-test\")."
1164 |   (let ((re (concat "(\\(?:\\(?:\\sw\\|\\s_\\)+/\\)?"
1165 |                     ;; Declaration
1166 |                     "\\(def\\sw*\\)\\>"
1167 |                     ;; Any whitespace
1168 |                     "[ \r\n\t]*"
1169 |                     ;; Possibly type or metadata
1170 |                     "\\(?:#?^\\(?:{[^}]*}\\|\\(?:\\sw\\|\\s_\\)+\\)[ \r\n\t]*\\)*"
1171 |                     ;; Symbol name
1172 |                     "\\(\\(?:\\sw\\|\\s_\\)+\\)")))
1173 |     (save-excursion
1174 |       (unless (looking-at re)
1175 |         (beginning-of-defun))
1176 |       (when (search-forward-regexp re nil t)
1177 |         (list (match-string 1)
1178 |               (match-string 2))))))
1179 | 
1180 | ;;;###autoload
1181 | (add-to-list 'auto-mode-alist
1182 |              '("\\.\\(clj[sx]?\\|dtm\\|edn\\)\\'" . clojure-mode))
1183 | 
1184 | (provide 'clojure-mode)
1185 | 
1186 | ;; Local Variables:
1187 | ;; coding: utf-8
1188 | ;; byte-compile-warnings: (not cl-functions)
1189 | ;; indent-tabs-mode: nil
1190 | ;; End:
1191 | 
1192 | ;;; clojure-mode.el ends here
1193 | 


--------------------------------------------------------------------------------