├── doc └── intro.md ├── examples ├── clj │ ├── clj-español │ │ ├── doc │ │ │ └── intro.md │ │ ├── .gitignore │ │ ├── test │ │ │ └── clj_español │ │ │ │ └── core_test.clj │ │ ├── README.md │ │ ├── project.clj │ │ ├── src │ │ │ └── clj_español │ │ │ │ └── core.clj │ │ └── LICENSE │ ├── clj-spanish │ │ ├── doc │ │ │ └── intro.md │ │ ├── .gitignore │ │ ├── test │ │ │ └── clj_spanish │ │ │ │ └── core_test.clj │ │ ├── README.md │ │ ├── project.clj │ │ ├── src │ │ │ └── clj_spanish │ │ │ │ └── core.clj │ │ └── LICENSE │ ├── ஆமை-தமிழ் │ │ ├── .gitignore │ │ ├── project.clj │ │ └── src │ │ │ └── ஆமை_தமிழ் │ │ │ └── கரு.clj │ └── turtle-thamil │ │ ├── project.clj │ │ └── src │ │ └── turtle_thamil │ │ └── core.clj ├── js │ ├── setup.sh │ ├── test01.html │ ├── test03.html │ └── test02.html └── java │ └── java-examples │ ├── README.md │ ├── src │ └── main │ │ └── java │ │ └── clj-thamil │ │ └── examples │ │ └── java │ │ ├── WordSort01.java │ │ └── WordSort02.java │ └── pom.xml ├── .gitignore ├── src └── clj_thamil │ ├── java │ └── api │ │ └── format.cljc │ ├── subprograms.cljc │ ├── js │ └── api │ │ └── convert.cljs │ ├── main.clj │ ├── format │ ├── analysis.cljc │ └── convert.cljc │ ├── core.cljc │ ├── மொழியியல்.cljc │ └── format.cljc ├── test └── clj_thamil │ ├── format │ ├── analysis_test.cljc │ └── convert_test.cljc │ ├── demo │ └── trans_demo_01.cljc │ ├── core_test.cljc │ ├── மொழியியல்_test.cljc │ └── format_test.cljc ├── CHANGELOG.md ├── project.clj ├── README.md └── emacs └── clojure-mode.el /doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to clj-thamil-test 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/great-documentation/what-to-write/) 4 | -------------------------------------------------------------------------------- /examples/clj/clj-español/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to clj-español 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /examples/clj/clj-spanish/doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to clj-spanish 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /examples/clj/ஆமை-தமிழ்/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | .hgignore 11 | .hg/ 12 | -------------------------------------------------------------------------------- /examples/clj/clj-español/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | .hgignore 11 | .hg/ 12 | -------------------------------------------------------------------------------- /examples/clj/clj-spanish/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | .hgignore 11 | .hg/ 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /lib 3 | /classes 4 | /checkouts 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | .lein-deps-sum 10 | .lein-failures 11 | .lein-plugins 12 | .lein-repl-history 13 | /examples/js/js 14 | /resources/public/js 15 | -------------------------------------------------------------------------------- /src/clj_thamil/java/api/format.cljc: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.java.api.format 2 | (:require [clj-thamil.format :as fmt]) 3 | #?(:clj (:import java.util.Comparator) 4 | :clj (:gen-class 5 | :methods [#^{:static true} [word_comp [] java.util.Comparator]]))) 6 | 7 | (defn -word_comp [] fmt/word-comp) 8 | -------------------------------------------------------------------------------- /examples/clj/clj-español/test/clj_español/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-español.core-test 2 | (:require [clojure.test :refer :all] 3 | [clj-español.core :refer :all])) 4 | 5 | (deftest core-test 6 | (let [numbers [2 3 5 7 11]] 7 | (testing "Clojure en español" 8 | (is (= 11 (último numbers)))))) 9 | -------------------------------------------------------------------------------- /examples/clj/clj-spanish/test/clj_spanish/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-spanish.core-test 2 | (:require [clojure.test :refer :all] 3 | [clj-spanish.core :refer :all])) 4 | 5 | (deftest core-test 6 | (let [numbers [2 3 5 7 11]] 7 | (testing "Clojure en español" 8 | (is (= 11 (último numbers)))))) 9 | -------------------------------------------------------------------------------- /examples/js/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | mkdir -p js 4 | lein do clean, compile, cljsbuild once 5 | #cp -r ../../resources/public/js/clj-thamil.js ./js 6 | #cp -r ../../resources/public/js/out/* ./js 7 | rsync --recursive ../../resources/public/js/clj-thamil.js ./js 8 | rsync --recursive ../../resources/public/js/out/ ./js 9 | -------------------------------------------------------------------------------- /examples/clj/clj-español/README.md: -------------------------------------------------------------------------------- 1 | # clj-español 2 | 3 | A Clojure library designed to ... well, that part is up to you. 4 | 5 | ## Usage 6 | 7 | FIXME 8 | 9 | ## License 10 | 11 | Copyright © 2016 FIXME 12 | 13 | Distributed under the Eclipse Public License either version 1.0 or (at 14 | your option) any later version. 15 | -------------------------------------------------------------------------------- /examples/clj/clj-spanish/README.md: -------------------------------------------------------------------------------- 1 | # clj-spanish 2 | 3 | A Clojure library designed to ... well, that part is up to you. 4 | 5 | ## Usage 6 | 7 | FIXME 8 | 9 | ## License 10 | 11 | Copyright © 2016 FIXME 12 | 13 | Distributed under the Eclipse Public License either version 1.0 or (at 14 | your option) any later version. 15 | -------------------------------------------------------------------------------- /examples/clj/clj-español/project.clj: -------------------------------------------------------------------------------- 1 | (defproject clj-español "0.1.0-SNAPSHOT" 2 | :description "FIXME: write description" 3 | :url "http://example.com/FIXME" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.7.0"] 7 | [clj-thamil "0.1.2"]]) 8 | -------------------------------------------------------------------------------- /examples/clj/clj-spanish/project.clj: -------------------------------------------------------------------------------- 1 | (defproject clj-spanish "0.1.0-SNAPSHOT" 2 | :description "FIXME: write description" 3 | :url "http://example.com/FIXME" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.7.0"] 7 | [clj-thamil "0.1.2"]]) 8 | -------------------------------------------------------------------------------- /examples/clj/ஆமை-தமிழ்/project.clj: -------------------------------------------------------------------------------- 1 | (defproject ஆமை-தமிழ் "0.1.0-SNAPSHOT" 2 | :description "FIXME: write description" 3 | :url "http://example.com/FIXME" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.7.0"] 7 | [clj-thamil "0.1.2"] 8 | [com.google/clojure-turtle "0.2.0"]]) 9 | -------------------------------------------------------------------------------- /examples/clj/turtle-thamil/project.clj: -------------------------------------------------------------------------------- 1 | (defproject turtle-thamil "0.1.0-SNAPSHOT" 2 | :description "FIXME: write description" 3 | :url "http://example.com/FIXME" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.7.0"] 7 | [clj-thamil "0.1.2"] 8 | [com.google/clojure-turtle "0.2.0"]]) 9 | -------------------------------------------------------------------------------- /src/clj_thamil/subprograms.cljc: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.subprograms 2 | (:require [clojure.java.io :as jio] 3 | [clj-thamil.format :as fmt])) 4 | 5 | (defn print-as-phonemes 6 | [& args] 7 | (with-open [rdr (jio/reader *in*)] 8 | (let [lines (line-seq rdr)] 9 | (doall 10 | (for [line lines] 11 | (let [phoneme-str (apply str (fmt/str->phonemes line))] 12 | (println phoneme-str))))))) 13 | -------------------------------------------------------------------------------- /src/clj_thamil/js/api/convert.cljs: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.js.api.convert 2 | (:require [clj-thamil.format.convert :as cvt])) 3 | 4 | (def romanized-to-thamil cvt/romanized->தமிழ்) 5 | (def thamil-to-romanized cvt/தமிழ்->romanized) 6 | 7 | (def tab-to-thamil cvt/tab->தமிழ்) 8 | (def thamil-to-tab cvt/தமிழ்->tab) 9 | 10 | (def bamini-to-thamil cvt/bamini->தமிழ்) 11 | (def thamil-to-bamini cvt/தமிழ்->bamini) 12 | 13 | (def tscii-to-thamil cvt/tscii->தமிழ்) 14 | (def thamil-to-tscii cvt/தமிழ்->tscii) 15 | 16 | (def webulagam-to-thamil cvt/webulagam->தமிழ்) 17 | (def thamil-to-webulagam cvt/தமிழ்->webulagam) 18 | 19 | 20 | -------------------------------------------------------------------------------- /test/clj_thamil/format/analysis_test.cljc: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.format.analysis-test 2 | (:require [clj-thamil.format :as fmt]) 3 | (:use clj-thamil.format.analysis 4 | clojure.test)) 5 | 6 | (deftest letters-plus-grantha-test 7 | (let [letters-plus-grantha-trie (fmt/make-trie (flatten letters-plus-grantha)) 8 | str->letters-plus-grantha (fn [s] (fmt/str->elems letters-plus-grantha-trie s))] 9 | (testing "string fns also working on grantha letters" 10 | (is (= ["ஜி" "மி" "க்" "கி"] (str->letters-plus-grantha "ஜிமிக்கி"))) 11 | (is (= ["கு" "ஷி"] (str->letters-plus-grantha "குஷி")))) 12 | (testing "trie-elem-freqs" 13 | (let [s "ஜோடி"] 14 | (is (= (trie-elem-freqs letters-plus-grantha-trie s) 15 | (trie-elem-freqs letters-plus-grantha-trie (str s " abc 123 a3")))))))) 16 | -------------------------------------------------------------------------------- /examples/clj/turtle-thamil/src/turtle_thamil/core.clj: -------------------------------------------------------------------------------- 1 | (ns turtle-thamil.core 2 | (:require [clojure-turtle.core :as turtle]) 3 | (:use clj-thamil.core)) 4 | 5 | (def turtle-fns-map '{turtle/forward முன்னால் 6 | turtle/back பின்னால் 7 | turtle/right வலது 8 | turtle/left இடது 9 | turtle/translate இடம்பெயர் 10 | turtle/penup எழுதுகோலெடு 11 | turtle/pendown எழுதுகோல்வை 12 | turtle/clean துப்புரவு 13 | ;; setxy ??? 14 | turtle/setheading திசைவை 15 | turtle/home வீடு}) 16 | 17 | (def turtle-forms-map '{turtle/repeat மீண்டும் 18 | turtle/all எல்லாம் 19 | turtle/new-window புது-சாளரம்}) 20 | 21 | (translate-fns turtle-fns-map) 22 | (translate-forms turtle-forms-map) 23 | -------------------------------------------------------------------------------- /examples/clj/ஆமை-தமிழ்/src/ஆமை_தமிழ்/கரு.clj: -------------------------------------------------------------------------------- 1 | (ns ஆமை-தமிழ்.கரு 2 | (:require [clojure-turtle.core :as turtle]) 3 | (:use clj-thamil.core)) 4 | 5 | (translate-forms '{translate-fns மொழிப்பெயர்-செயல்கூறுகள் 6 | translate-forms மொழிப்பெயர்-வடிவங்கள்}) 7 | 8 | (வரையறு ஆமை-செயல்கூறுகள் 9 | '{turtle/forward முன்னால் 10 | turtle/back பின்னால் 11 | turtle/right வலது 12 | turtle/left இடது 13 | turtle/translate இடம்பெயர் 14 | turtle/penup எழுதுகோலெடு 15 | turtle/pendown எழுதுகோல்வை 16 | turtle/clean துப்புரவு 17 | ;; setxy ??? 18 | turtle/setheading திசைவை 19 | turtle/home வீடு}) 20 | 21 | (வரையறு ஆமை-வடிவங்கள் 22 | '{turtle/repeat மீண்டும் 23 | turtle/all எல்லாம் 24 | turtle/new-window புது-சாளரம்}) 25 | 26 | (மொழிப்பெயர்-செயல்கூறுகள் ஆமை-செயல்கூறுகள்) 27 | (மொழிப்பெயர்-வடிவங்கள் ஆமை-வடிவங்கள்) 28 | -------------------------------------------------------------------------------- /src/clj_thamil/main.clj: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.main 2 | (require [clojure.string :as string] 3 | [clj-thamil.format [analysis :as analysis] [convert :as convert]] 4 | [clj-thamil.subprograms :as subprog]) 5 | (:gen-class)) 6 | 7 | (def ^{:doc "a map that specifies what sub-program to run based on the first arg passed in"} 8 | main-fns 9 | {"freqs" analysis/-main 10 | "osxkeyb" convert/-main 11 | "phonemes" subprog/print-as-phonemes}) 12 | 13 | (defn -main [& args] 14 | (assert (pos? (count args)) "Running clj-thamil as an executable requires arguments") 15 | (let [subprog (first args) 16 | default-fn (fn [& args] (throw (Exception. (str "The specified clj-thamil sub-program is misspelled or does not exist. Available sub-programs: [" (string/join ", " (-> main-fns keys sort)) "]")))) 17 | subprog-fn (get main-fns subprog default-fn) 18 | new-args (rest args)] 19 | (apply subprog-fn new-args))) 20 | -------------------------------------------------------------------------------- /test/clj_thamil/demo/trans_demo_01.cljc: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.demo.trans-demo-01 2 | (:use clj-thamil.core)) 3 | 4 | (வரையறு-செயல்கூறு தன்னால்-பெருக்கு 5 | [அ] 6 | (* அ அ)) 7 | 8 | (வரையறு எண்கள் [108 1008 18 63 6 12 247]) 9 | 10 | ;; 1-ஆம் சுற்று 11 | 12 | (வரையறு-செயல்கூறு சுற்று-01 13 | [] 14 | (வரி-அச்சிடு "எண்களையும் அவற்றின் சதுர ஆக்கங்களையும்(?) அச்சிடுவது:") 15 | (செய்வரிசை [எண் எண்கள்] 16 | (வரி-அச்சிடு (தொடை "[" எண் "] -> [" (தன்னால்-பெருக்கு எண்) "]")))) 17 | 18 | (வரையறு சதுர-எண்கள் (விவரி தன்னால்-பெருக்கு எண்கள்)) 19 | 20 | ;; 2-ஆம் சுற்று 21 | 22 | (வரையறு-செயல்கூறு சுற்று-02 23 | [] 24 | (வரி-அச்சிடு "மீண்டும் எண்களையும் சதுர எண்களையும் அச்சிடுவது:") 25 | (செய்யோட்டம் 26 | (விவரி (செயல்கூறு [எ சஎ] (வரி-அச்சிடு எ "*" எ "=" சஎ)) எண்கள் சதுர-எண்கள்))) 27 | 28 | ;; எல்லாச் சுற்றும் 29 | 30 | (வரையறு-செயல்கூறு எல்லாவற்றையும்-ஓட்டு 31 | [] 32 | (வைத்துக்கொள் [செயல்கூறுகள் [சுற்று-01 சுற்று-02] 33 | ஓட்டும்-செயல்கூறுகள் (இடைபொருத்து வரி-அச்சிடு செயல்கூறுகள்)] 34 | (செய்வரிசை [செ ஓட்டும்-செயல்கூறுகள்] 35 | (செ)))) 36 | -------------------------------------------------------------------------------- /examples/java/java-examples/README.md: -------------------------------------------------------------------------------- 1 | # Java Examples for clj-thamil 2 | 3 | ## Requirements 4 | 5 | The Java example code requires the clj-thamil artifact to be built and installed. Refer to the [Building](../../../README.md) section on how to build and install the artifact. 6 | 7 | ## Building 8 | 9 | All of the Java examples can be built together by 10 | ``` 11 | lein clean 12 | lein install 13 | cd examples/java/java-examples 14 | mvn clean package 15 | ``` 16 | 17 | Building the Java examples is separate from the clj-thamil artifact that they depend on. 18 | 19 | ## Usage 20 | 21 | After following the build instructions above, a shaded jar/uberjar will be in the `target` subdirectory, but is not 22 | in itself executable since it contains multiple main methods. 23 | Instead, the uberjar should be provided in the classpath followed by 24 | the class name of the example being run: 25 | ``` 26 | java -cp target/java-examples-1.0.jar clj_thamil.examples.java.WordSort01 27 | java -cp target/java-examples-1.0.jar clj_thamil.examples.java.WordSort02 28 | ``` 29 | 30 | ## Overview 31 | 32 | * WordSort01 - sorts words based on Thamil alphabetical order 33 | * WordSort02 - sorts words based on Thamil alphabetical order 34 | -------------------------------------------------------------------------------- /examples/js/test01.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /test/clj_thamil/core_test.cljc: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.core-test 2 | (:use clojure.test 3 | clj-thamil.core)) 4 | 5 | (defn demo-print-1 6 | [] 7 | (println "hello")) 8 | 9 | (defn demo-print-2 10 | [] 11 | (println "வணக்கம்")) 12 | 13 | (defn demo-print-3 14 | [] 15 | (வரி-அச்சிடு "வணக்கம்")) 16 | 17 | (defn demo-add-1 18 | [] 19 | (if (= 4 (+ 2 2)) 20 | (println "true") 21 | (println "false"))) 22 | 23 | (defn demo-add-2 24 | [] 25 | (if (= 4 (+ 2 2)) 26 | (println "true") 27 | (println "false")) 28 | (if (= 5 (+ 2 2)) 29 | (println "true") 30 | (println "false"))) 31 | 32 | (defn demo-add-3 33 | [] 34 | (எனில் (= 4 (+ 2 2)) 35 | (வரி-அச்சிடு "வாய்மை") 36 | (வரி-அச்சிடு "பொய்மை")) 37 | (எனில் (= 5 (+ 2 2)) 38 | (வரி-அச்சிடு "வாய்மை") 39 | (வரி-அச்சிடு "பொய்மை"))) 40 | 41 | (வரையறு-செயல்கூறு demo-add-4 42 | [] 43 | (எனில் (= 4 (+ 2 2)) 44 | (வரி-அச்சிடு "வாய்மை") 45 | (வரி-அச்சிடு "பொய்மை")) 46 | (எனில் (= 5 (+ 2 2)) 47 | (வரி-அச்சிடு "வாய்மை") 48 | (வரி-அச்சிடு "பொய்மை"))) 49 | 50 | (வரையறு-செயல்கூறு மாதிரி-கூட்டு-5 51 | [] 52 | (எனில் (= 4 (+ 2 2)) 53 | (வரி-அச்சிடு "வாய்மை") 54 | (வரி-அச்சிடு "பொய்மை")) 55 | (எனில் (= 5 (+ 2 2)) 56 | (வரி-அச்சிடு "வாய்மை") 57 | (வரி-அச்சிடு "பொய்மை"))) 58 | 59 | (def demo-fns [demo-print-1 demo-print-2 demo-print-3 60 | demo-add-1 demo-add-2 demo-add-3 demo-add-4 மாதிரி-கூட்டு-5]) 61 | 62 | (deftest a-test 63 | (testing "FIXME, I fail." 64 | (let [s "வணக்கம்" 65 | hello (fn [] 66 | (str "Hello, and " s)) 67 | ஒன்று 1 68 | இரண்டு 2 69 | v [ஒன்று இரண்டு "மூன்று"]] 70 | (is (= v [1 2 "மூன்று"])) 71 | (is (= (hello) "Hello, and வணக்கம்"))))) 72 | 73 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. 3 | 4 | ## [Unreleased][unreleased] 5 | 6 | ## [0.2.0] - 2016-07-30 7 | ### Added 8 | - Examples of Clojure in Spanish, Logo in Thamil 9 | ### Changed 10 | - Using reader conditionals instead of cljx (for compiling common Clojure code to Java and JS targets) 11 | ### Fixed 12 | - JS examples 13 | 14 | ## [0.1.2] - 2015-02-19 15 | ### Added 16 | - Generative testing for font conversion functions 17 | - Java examples using clj-thamil jar artifact 18 | - Doc for clj-thamil and Java examples 19 | 20 | ### Changed 21 | - Font conversion information represented as map instead of seq 22 | - தமிழ்->romanized transliteration for certain consonant clusters 23 | 24 | ### Fixed 25 | - Test file namespace name 26 | - Cljx configs for dirs for source and target for clj, cljs 27 | 28 | ## [0.1.1] - 2015-02-04 29 | ### Added 30 | - 'Translations' of Clojure core library form names via a couple of maps and handful of macros 31 | - Trie functions (create, get-in) 32 | - Function using a trie to extract/convert a string into its elements 33 | - Fns to split a string into a sequence of Thamil letters/phonemes 34 | - Fn to create a Thamil string from a sequence of phonemes 35 | - Sorting fns and comparators for single- and multi-letter Thamil strings 36 | - Seq fns generalized from string functions (index-of, prefix) 37 | - Function to adjust the cursor position in Thamil text 38 | - Functions written in Thamil to perform Thamil grammatical operations 39 | - Pluralize, add suffixes generally, add noun case suffixes 40 | - Letter frequency analysis and result output functions 41 | - Functions to convert between old Thamil font character sets and Unicode 42 | - Using cljx to be forward-compatible with compilation to JS via ClojureScript 43 | - Configs for deploying GPG-signed releases to Clojars 44 | 45 | [unreleased]: https://github.com/echeran/clj-thamil/compare/0.2.0...master 46 | [0.2.0]: https://github.com/echeran/clj-thamil/tree/0.2.0 47 | [0.1.2]: https://github.com/echeran/clj-thamil/tree/0.1.2 48 | [0.1.1]: https://github.com/echeran/clj-thamil/tree/0.1.1 49 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject clj-thamil "0.2.0" 2 | :description "A project encompassing various Thamil language-specific computing ideas" 3 | :url "https://github.com/echeran/clj-thamil" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :scm {:name "git" 7 | :url "https://github.com/echeran/clj-thamil"} 8 | :repositories [["releases" {:url "https://clojars.org/repo/"}]] 9 | :deploy-repositories [["clojars" {:creds :gpg}]] 10 | :pom-addition [:developers [:developer 11 | [:name "Elango Cheran"] 12 | [:url "http://www.elangocheran.com"] 13 | [:email "elango.cheran@gmail.com"] 14 | [:timezone "-8"]]] 15 | 16 | :dependencies [[org.clojure/clojure "1.8.0"] 17 | [org.clojure/data.csv "0.1.2"] 18 | [org.clojure/algo.generic "0.1.2"] 19 | [org.clojure/test.check "0.9.0"] 20 | [org.clojure/clojurescript "1.9.89"]] 21 | 22 | :jar-exclusions [#"\.cljx|\.swp|\.swo|\.DS_Store"] 23 | 24 | :aot [clj-thamil.main 25 | clj-thamil.java.api.format 26 | clj-thamil.format.analysis 27 | clj-thamil.format.convert] 28 | 29 | :main clj-thamil.main 30 | 31 | :lein-release {:deploy-via :shell 32 | :shell ["lein" "deploy"]} 33 | 34 | :profiles {:provided {:dependencies []} 35 | :dev {:plugins [[lein-cljsbuild "1.1.3"]] 36 | ;; :cljsbuild {:test-commands {"node" ["node" :node-runner "target/testable.js"]} 37 | ;; :builds [{:source-paths ["target/classes" "target/test-classes"] 38 | ;; :compiler {:output-to "target/testable.js" 39 | ;; :optimizations :advanced 40 | ;; :pretty-print true}}]} 41 | 42 | :cljsbuild {:builds {:app {:source-paths ["src"] 43 | :compiler {:output-to "resources/public/js/clj-thamil.js" 44 | :output-dir "resources/public/js/out" 45 | :externs [] 46 | :optimizations :none 47 | :pretty-print true}}} 48 | :test-commands { 49 | ;; no cljs test configured yet 50 | }}}}) 51 | -------------------------------------------------------------------------------- /examples/js/test03.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 34 | 37 | 38 |
Unsorted தமிழ் words (input)Sorted தமிழ் words (output)
32 | 33 | 35 | 36 |
39 | 40 | 43 | 44 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /examples/js/test02.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 33 | 36 | 37 | 38 | 40 | 43 | 44 |
English transliterated inputதமிழ் output
31 | 32 | 34 | 35 |
39 | 41 | 42 |
45 | 46 | 48 | 49 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /examples/java/java-examples/src/main/java/clj-thamil/examples/java/WordSort01.java: -------------------------------------------------------------------------------- 1 | package clj_thamil.examples.java; 2 | 3 | import clojure.java.api.Clojure; 4 | import clojure.lang.IFn; 5 | import java.io.File; 6 | import java.io.FileNotFoundException; 7 | import java.io.FileOutputStream; 8 | import java.io.IOException; 9 | import java.io.PrintWriter; 10 | import java.io.UnsupportedEncodingException; 11 | import java.nio.charset.Charset; 12 | import java.util.ArrayList; 13 | import java.util.Arrays; 14 | import java.util.Collections; 15 | import java.util.Comparator; 16 | import java.util.List; 17 | import org.apache.commons.lang3.StringUtils; 18 | 19 | public class WordSort01 { 20 | 21 | public static String utf8String(String s) throws UnsupportedEncodingException { 22 | byte[] array = s.getBytes("UTF-8"); 23 | return new String(array, Charset.forName("UTF-8")); 24 | } 25 | 26 | public static void main(String[] args) throws UnsupportedEncodingException, 27 | FileNotFoundException, 28 | IOException { 29 | // (require 'clj-thamil.format) 30 | IFn require = Clojure.var("clojure.core", "require"); 31 | require.invoke(Clojure.read("clj-thamil.format")); 32 | // access clj-thamil.format/word-comp, a non-fn var 33 | IFn wordCompVar = Clojure.var("clj-thamil.format", "word-comp"); 34 | IFn deref = Clojure.var("clojure.core", "deref"); 35 | Comparator wordComp = (Comparator) (deref.invoke(wordCompVar)); 36 | 37 | List strs = Arrays.asList( 38 | 39 | "மடம்", 40 | "மட்டம்", 41 | "மட்டும்", 42 | "மடக்கு", 43 | "முடக்கு", 44 | "முடுக்கு", 45 | "படம்", 46 | "குடம்", 47 | "தடம்", 48 | "தடி", 49 | "திட்டம்" 50 | 51 | ); 52 | List strs2 = new ArrayList(); 53 | strs2.addAll(strs); 54 | Collections.sort(strs2, wordComp); 55 | String list1 = StringUtils.join(strs, "\n"); 56 | String list2 = StringUtils.join(strs2, "\n"); 57 | 58 | System.out.println("Original list of strings:"); 59 | System.out.println(list1); 60 | System.out.println("Sorted list of strings:"); 61 | System.out.println(list2); 62 | 63 | // String fileName = "out.txt"; 64 | // FileOutputStream fos = new FileOutputStream(new File(fileName)); 65 | // PrintWriter pw = new PrintWriter(fos); 66 | // pw.println("Original list of strings:"); 67 | // pw.println(list1); 68 | // pw.println("Sorted list of strings:"); 69 | // pw.println(list2); 70 | // pw.close(); 71 | // fos.close(); 72 | 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /examples/java/java-examples/src/main/java/clj-thamil/examples/java/WordSort02.java: -------------------------------------------------------------------------------- 1 | package clj_thamil.examples.java; 2 | 3 | import clojure.java.api.Clojure; 4 | import clojure.lang.IFn; 5 | import java.io.File; 6 | import java.io.FileNotFoundException; 7 | import java.io.FileOutputStream; 8 | import java.io.IOException; 9 | import java.io.PrintWriter; 10 | import java.io.UnsupportedEncodingException; 11 | import java.nio.charset.Charset; 12 | import java.util.ArrayList; 13 | import java.util.Arrays; 14 | import java.util.Collections; 15 | import java.util.Comparator; 16 | import java.util.List; 17 | import org.apache.commons.lang3.StringUtils; 18 | 19 | import clj_thamil.java.api.format; 20 | 21 | public class WordSort02 { 22 | 23 | public static String utf8String(String s) throws UnsupportedEncodingException { 24 | byte[] array = s.getBytes("UTF-8"); 25 | return new String(array, Charset.forName("UTF-8")); 26 | } 27 | 28 | public static void main(String[] args) throws UnsupportedEncodingException, 29 | FileNotFoundException, 30 | IOException { 31 | Comparator wordComp = format.word_comp(); 32 | 33 | List strs = Arrays.asList( 34 | 35 | "மடம்", 36 | "மட்டம்", 37 | "மட்டும்", 38 | "மடக்கு", 39 | "முடக்கு", 40 | "முடுக்கு", 41 | "படம்", 42 | "குடம்", 43 | "தடம்", 44 | "தடி", 45 | "திட்டம்" 46 | 47 | // "\u0bae\u0b9f\u0bae\u0bcd", 48 | // "\u0bae\u0b9f\u0bcd\u0b9f\u0bae\u0bcd", 49 | // "\u0bae\u0b9f\u0bcd\u0b9f\u0bc1\u0bae\u0bcd", 50 | // "\u0bae\u0b9f\u0b95\u0bcd\u0b95\u0bc1", 51 | // "\u0bae\u0bc1\u0b9f\u0b95\u0bcd\u0b95\u0bc1", 52 | // "\u0bae\u0bc1\u0b9f\u0bc1\u0b95\u0bcd\u0b95\u0bc1", 53 | // "\u0baa\u0b9f\u0bae\u0bcd", 54 | // "\u0b95\u0bc1\u0b9f\u0bae\u0bcd", 55 | // "\u0ba4\u0b9f\u0bae\u0bcd", 56 | // "\u0ba4\u0b9f\u0bbf", 57 | // "\u0ba4\u0bbf\u0b9f\u0bcd\u0b9f\u0bae\u0bcd" 58 | 59 | ); 60 | List strs2 = new ArrayList(); 61 | strs2.addAll(strs); 62 | Collections.sort(strs2, wordComp); 63 | String list1 = StringUtils.join(strs, "\n"); 64 | String list2 = StringUtils.join(strs2, "\n"); 65 | 66 | System.out.println("Original list of strings:"); 67 | System.out.println(list1); 68 | System.out.println("Sorted list of strings:"); 69 | System.out.println(list2); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /examples/java/java-examples/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | clj-thamil 5 | java-examples 6 | jar 7 | 1.0 8 | java-examples 9 | https://github.com/echeran/clj-thamil-examples 10 | 11 | 12 | central 13 | https://repo1.maven.org/maven2/ 14 | 15 | false 16 | 17 | 18 | true 19 | 20 | 21 | 22 | clojars 23 | https://clojars.org/repo/ 24 | 25 | true 26 | 27 | 28 | true 29 | 30 | 31 | 32 | releases 33 | https://clojars.org/repo/ 34 | 35 | true 36 | 37 | 38 | true 39 | 40 | 41 | 42 | 43 | 44 | junit 45 | junit 46 | 3.8.1 47 | test 48 | 49 | 50 | clj-thamil 51 | clj-thamil 52 | 0.2.0 53 | 54 | 55 | org.apache.commons 56 | commons-lang3 57 | 3.3.2 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-compiler-plugin 62 | 3.2 63 | maven-plugin 64 | 65 | 66 | 67 | 68 | 69 | org.apache.maven.plugins 70 | maven-compiler-plugin 71 | 3.2 72 | 73 | 1.6 74 | 1.6 75 | ${project.build.sourceEncoding} 76 | 77 | 78 | 79 | org.apache.maven.plugins 80 | maven-shade-plugin 81 | 2.3 82 | 83 | 84 | 85 | package 86 | 87 | shade 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | UTF-8 103 | 104 | 105 | -------------------------------------------------------------------------------- /examples/clj/clj-español/src/clj_español/core.clj: -------------------------------------------------------------------------------- 1 | (ns clj-español.core 2 | (:require [clj-thamil.core :refer [translate-fns translate-forms]])) 3 | 4 | (def fns-map '{ 5 | take toma 6 | drop baja 7 | inc carga ;; translated to "load" in english 8 | dec extracto ;; based soley on translation. need more info/context 9 | ;; to decide what connotations/denotations would be best 10 | range gama 11 | take-while toma-mientras 12 | drop-while baja-mientras 13 | interleave பின்னு ;; not sure what this means or what function 14 | ;; this macro serves, and thus can't decide how to label 15 | ;; reduce reduce 16 | ;; reducer reductor 17 | map mapa 18 | hash-map hachís-mapa 19 | ;; vector vector 20 | list enumera 21 | set pone 22 | hash-set hachís-pone ;; could use fijo or colocar as "set" 23 | atom átomo 24 | agent agencia ;; or agente 25 | first primero 26 | second segundo 27 | last último 28 | butlast pero-último ;; not sure of what this should communicate. 29 | rest lodemás ;; should we separate the words into "lo demás" 30 | ;; or shorten it to "demás"? Could also use "el resto" 31 | next próximo ;; this is used in the present, but "siguiente" is used 32 | ;; in the past. Not sure which makes more sense. 33 | true cierto 34 | false falso 35 | print imprime 36 | println imprimeln ;; ln means "line" in English, and 37 | ;; line in spanish is simply "linea" so 38 | ;; I thought it appropriate to keep it. 39 | filter forma 40 | remove quita 41 | keep guardar 42 | slurp ventosa;; if this should be a verb, use "sorber" 43 | spit escupe ;; could be "saliva" if it's not an action 44 | seq sec ;; short for "secuenciar" 45 | dorun hazcorrer ;; could also just use "haz" meaning "do" 46 | doall haztodo ;; literally means do it all 47 | str crd ;; short for "cuerda" which translates to string 48 | interpose interpone 49 | find encuentra 50 | get consigue 51 | apply aplica 52 | count cuenta 53 | every? cada? 54 | true? cierto? 55 | false? falso? 56 | concat social 57 | identity identidad 58 | reverse invierte 59 | some alguno 60 | flatten aplana 61 | 62 | boolean booleano ;; sounds like English but couldn't find 63 | ;; a more specific word 64 | }) 65 | 66 | (def forms-map '{ 67 | if si 68 | when cuando 69 | if-not si-no 70 | when-not cuando-no 71 | ;; def def ;; short for "definir" = define 72 | ;; fn fn ;; short for "función" 73 | ;; defn defn ;; again, it still makes sense to keep the 74 | ;; English equivalents because romance languages 75 | ;; can sometimes have the same abbreviations 76 | let deja 77 | and y 78 | or o 79 | not no 80 | else más ;; or "otro" 81 | loop darvuelta ;; actually two words "dar vuelta" 82 | doseq hazsec 83 | for para ;; could be por, but I think para fits the function better 84 | cond dependela ;; means "depending on the ..." --> dependiendo de la 85 | do haz 86 | 87 | ;; clojure.test 88 | deftest def-prueba ;; not sure if I should hyphenate all the 89 | ;; double words, or keep as a compound? 90 | testing probando 91 | is es 92 | are son 93 | }) 94 | 95 | ;; do the actual "translation" for bindings, fns, and any other value 96 | (translate-fns fns-map) 97 | 98 | ;; do the actual "translation" for macros and special forms 99 | (translate-forms forms-map) 100 | -------------------------------------------------------------------------------- /examples/clj/clj-spanish/src/clj_spanish/core.clj: -------------------------------------------------------------------------------- 1 | (ns clj-spanish.core 2 | (:require [clj-thamil.core :refer [translate-fns translate-forms]])) 3 | 4 | (def fns-map '{ 5 | take toma 6 | drop baja 7 | inc carga ;; translated to "load" in english 8 | dec extracto ;; based soley on translation. need more info/context 9 | ;; to decide what connotations/denotations would be best 10 | range gama 11 | take-while toma-mientras 12 | drop-while baja-mientras 13 | interleave பின்னு ;; not sure what this means or what function 14 | ;; this macro serves, and thus can't decide how to label 15 | ;; reduce reduce 16 | ;; reducer reductor 17 | map mapa 18 | hash-map hachís-mapa 19 | ;; vector vector 20 | list enumera 21 | set pone 22 | hash-set hachís-pone ;; could use fijo or colocar as "set" 23 | atom átomo 24 | agent agencia ;; or agente 25 | first primero 26 | second segundo 27 | last último 28 | butlast pero-último ;; not sure of what this should communicate. 29 | rest lodemás ;; should we separate the words into "lo demás" 30 | ;; or shorten it to "demás"? Could also use "el resto" 31 | next próximo ;; this is used in the present, but "siguiente" is used 32 | ;; in the past. Not sure which makes more sense. 33 | true cierto 34 | false falso 35 | print imprime 36 | println imprimeln ;; ln means "line" in English, and 37 | ;; line in spanish is simply "linea" so 38 | ;; I thought it appropriate to keep it. 39 | filter forma 40 | remove quita 41 | keep guardar 42 | slurp ventosa;; if this should be a verb, use "sorber" 43 | spit escupe ;; could be "saliva" if it's not an action 44 | seq sec ;; short for "secuenciar" 45 | dorun hazcorrer ;; could also just use "haz" meaning "do" 46 | doall haztodo ;; literally means do it all 47 | str crd ;; short for "cuerda" which translates to string 48 | interpose interpone 49 | find encuentra 50 | get consigue 51 | apply aplica 52 | count cuenta 53 | every? cada? 54 | true? cierto? 55 | false? falso? 56 | concat social 57 | identity identidad 58 | reverse invierte 59 | some alguno 60 | flatten aplana 61 | 62 | boolean booleano ;; sounds like English but couldn't find 63 | ;; a more specific word 64 | }) 65 | 66 | (def forms-map '{ 67 | if si 68 | when cuando 69 | if-not si-no 70 | when-not cuando-no 71 | ;; def def ;; short for "definir" = define 72 | ;; fn fn ;; short for "función" 73 | ;; defn defn ;; again, it still makes sense to keep the 74 | ;; English equivalents because romance languages 75 | ;; can sometimes have the same abbreviations 76 | let deja 77 | and y 78 | or o 79 | not no 80 | else más ;; or "otro" 81 | loop darvuelta ;; actually two words "dar vuelta" 82 | doseq hazsec 83 | for para ;; could be por, but I think para fits the function better 84 | cond dependela ;; means "depending on the ..." --> dependiendo de la 85 | do haz 86 | 87 | ;; clojure.test 88 | deftest def-prueba ;; not sure if I should hyphenate all the 89 | ;; double words, or keep as a compound? 90 | testing probando 91 | is es 92 | are son 93 | }) 94 | 95 | ;; do the actual "translation" for bindings, fns, and any other value 96 | (translate-fns fns-map) 97 | 98 | ;; do the actual "translation" for macros and special forms 99 | (translate-forms forms-map) 100 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # clj-thamil 2 | 3 | clj-thamil is a Clojure library designed to be a multi-purpose library for Thamil 4 | computing. 5 | 6 | It can be used for natural language processing, designing input 7 | methods, the UI for text editing, etc. 8 | 9 | It can also be used as a basis for programming in Clojure in the 10 | Thamil language. 11 | 12 | Currently, it can support the following: 13 | * programming in Clojure (programming language) in Thamil (natural language) 14 | * natural language processing for Thamil language text 15 | * split a string into Thamil letters (not characters) and phonemes 16 | * combine a sequence of Thamil phonemes back into a proper Thamil string 17 | * sort letters, words, etc. by Thamil alphabetical order 18 | * convert a string between Thamil (Unicode format) and: 19 | * English-transliterated formats 20 | * TAB format 21 | * TSCII format 22 | * Bamini format 23 | * Webulagam format 24 | * basic grammar functions - pluralize, add suffixes, and add noun 25 | case suffixes 26 | * perform a letter frequency analysis on input Thamil text 27 | 28 | For examples of programming in Thamil (natural language), see: 29 | * `test/clj_thamil/demo/core_test.clj` - a gradual 30 | replacement of English into Thamil 31 | * `test/clj_thamil/demo/trans_demo_01.cljc` - just about entirely 32 | in Thamil. Demonstrates squaring numbers differently. 33 | * `src/clj_thamil/மொழியியல்.cljc`- just about 34 | entirely in Thamil. Contains functions for basic grammar in Thamil 35 | (making plurals, adding suffixes, adding noun case suffixes) 36 | 37 | ## Building 38 | 39 | For both Clojure and ClojureScript, compiling, testing, and starting a 40 | REPL can be done with the usual Leiningen commands `lein compile`, 41 | `lein test`, and `lein repl`. 42 | 43 | Compile the source into an executable JAR file (runnable on the JVM, 44 | based on Clojure sources) using the command `lein uberjar`. This is 45 | all you need to do to run one of the standalone processes or 46 | create a jar artifact. `lein install` will install the artifact and 47 | pom.xml into your local Maven cache. 48 | 49 | ## Usage 50 | 51 | ### Examples 52 | 53 | #### Java Examples 54 | 55 | The Java example code is in the Maven project in this repostiory at 56 | [`examples/java/java-examples`](examples/java/java-examples/README.md). 57 | See that page for instructions and building and running. 58 | 59 | #### JavaScript Examples 60 | 61 | The Clojure/-Script code in clj-thamil must be compiled using the 62 | ClojureScript compiler into JavaScript before it can used in JS 63 | runtimes (ex: webpages). In the JS exmaples directory 64 | [`examples/js`][(examples/js), first run `sh setup.sh`. Then load the 65 | HTML files located in the same directory from within a browser that 66 | has JS enabled. 67 | 68 | * test01.html - transliteration of English into Thamil on a JS popup box 69 | * test02.html - transliterates English into Thamil and splits Thamil into letter 70 | * test03.html - sorting Thamil words by Thamil alphabetical order 71 | 72 | ### Command-line processes 73 | 74 | #### Letter frequencies 75 | 76 | The frequency analysis program (`freqs`) can be 77 | run by 78 | ``` 79 | cat input | lein run freqs > output 80 | ``` 81 | 82 | The frequency analysis program 83 | takes input from the standard input stream and outputs to the standard 84 | output stream. 85 | 86 | The code behind the letter frequency analysis can be found in the namespace 87 | `clj-thamil.format.analysis`. 88 | 89 | #### Phonemes 90 | 91 | The text to phoneme converter program (`phonemes`) can be run by 92 | ``` 93 | cat input | lein run phonemes > output 94 | ``` 95 | 96 | The phonemes program 97 | takes input from the standard input stream and outputs to the standard 98 | output stream. 99 | 100 | The code behind the phoneme conversion can be found in the namespaces `clj-thamil.subprograms` and `clj-thamil.format`. 101 | 102 | ## Editing 103 | 104 | For programming in Thamil, if using a computer running Mac OS X, use 105 | the Aquamacs program (a Mac OS X-friendly version of Emacs) to ensure that support for Thamil letters works 106 | correctly. If installing the `clojure-mode` package for Clojure 107 | support in Aquamacs, find the `clojure-mode.el` file in your Aquamacs 108 | MELPA/ELPA repository, and replace it with the file 109 | `emacs/clojure-mode.el` in this repository. Then load the newly-saved 110 | `clojure-mode.el` file in the Aquamacs package repository into 111 | Aquamacs, and run the command `M-x emacs-lisp-byte-compile-and-load`. 112 | 113 | ## Mailing List 114 | 115 | Join the [clj-thamil mailing list](https://groups.google.com/forum/#!forum/clj-thamil) to post questions and receive announcements. 116 | 117 | ## License 118 | 119 | Distributed under the Eclipse Public License, the same as Clojure. 120 | -------------------------------------------------------------------------------- /src/clj_thamil/format/analysis.cljc: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.format.analysis 2 | (:require #?(:clj [clojure.java.io :as jio]) 3 | [clojure.string :as string] 4 | [clj-thamil.format :as fmt]) 5 | #?(:clj (:gen-class))) 6 | 7 | (def letters-plus-grantha 8 | (concat fmt/letters 9 | [["ஜ்" "ஜ" "ஜா" "ஜி" "ஜீ" "ஜு" "ஜூ" "ஜெ" "ஜே" "ஜை" "ஜொ" "ஜோ" "ஜௌ"] 10 | ["ஷ்" "ஷ" "ஷா" "ஷி" "ஷீ" "ஷு" "ஷூ" "ஷெ" "ஷே" "ஷை" "ஷொ" "ஷோ" "ஷௌ"] 11 | ["ஸ்" "ஸ" "ஸா" "ஸி" "ஸீ" "ஸு" "ஸூ" "ஸெ" "ஸே" "ஸை" "ஸொ" "ஸோ" "ஸௌ"] 12 | ["ஹ்" "ஹ" "ஹா" "ஹி" "ஹீ" "ஹு" "ஹூ" "ஹெ" "ஹே" "ஹை" "ஹொ" "ஹோ" "ஹௌ"] 13 | ["க்ஷ்" "க்ஷ" "க்ஷா" "க்ஷி" "க்ஷீ" "க்ஷு" "க்ஷூ" "க்ஷெ" "க்ஷே" "க்ஷை" "க்ஷொ" "க்ஷோ" "க்ஷௌ"] 14 | ["ஶ்ரீ"]])) 15 | 16 | (defn trie-elem-freqs 17 | "given a trie of strings (char seqs) and an input string, return a frequency map for every letter in the trie appearing in the input string" 18 | [trie s] 19 | (let [keep-fn (fn [x] 20 | (when (fmt/in-trie? trie x) 21 | x)) 22 | letters (fmt/str->elems trie s) 23 | letters-in-trie (keep keep-fn letters)] 24 | (frequencies letters-in-trie))) 25 | 26 | (defn trie-elem-string-seq-freqs 27 | "given a trie of strings (char seqs) and a sequence of input strings, return a final frequency map for every letter appearing across all strings" 28 | [trie strs] 29 | (apply merge-with + (map (partial trie-elem-freqs trie) strs))) 30 | 31 | (defn freq-grid 32 | "given a sequence of தமிழ் letters (flattened from a letter grid) and a map of those letters' frequences, return the frequencies in the shape of the grid" 33 | [letter-seq freq-map] 34 | (let [freq-seq (map #(or (get freq-map %) 0) letter-seq) 35 | freq-grid (partition-all 13 freq-seq)] 36 | freq-grid)) 37 | 38 | ;; 39 | ;; printing functions 40 | ;; 41 | 42 | #?(:clj 43 | (defn print-freq-grid 44 | "given a number grid and the corresponding letter grid, print them to std out" 45 | [freq-grid letter-grid] 46 | (let [print-grid (fn [grid] (doseq [row grid] (println (string/join "\t" row))))] 47 | (println "the letter grid's frequencies:") 48 | (println "") 49 | (print-grid freq-grid) 50 | (println "") 51 | (println "the letter grid used:") 52 | (print-grid letter-grid)))) 53 | #?(:clj 54 | (defn print-consonant-row-sums 55 | "given a frequnecy grid in the shape of a letter grid, and the letter grid itself, print out the sums of each consonant's row" 56 | [freq-grid letter-grid] 57 | (let [;; use rest in order to drop the first row = vowel row 58 | row-names (rest (map first letter-grid)) 59 | row-sums (rest (map (partial apply +) freq-grid))] 60 | (dorun 61 | (map (fn [rn rs] (println "For consonant:" rn ", there are" rs "instances of it in a C or C+V letter")) row-names row-sums))))) 62 | 63 | #?(:clj 64 | (defn print-vowel-col-sums 65 | "given a frequency gridn in the shape of a letter grid, and the letter grid itself, print out the sums of each vowel's column specifically among consonant and consonant+vowel letters (exclude pure vowels)" 66 | [freq-grid letter-grid] 67 | (let [full-row-freq-grid (->> freq-grid 68 | rest 69 | (filter #(= 13 (count %)))) 70 | full-row-letter-grid (->> letter-grid 71 | rest 72 | (filter #(= 13 (count %)))) 73 | vowels (first letter-grid) 74 | col-names vowels 75 | freq-cols (apply map list full-row-freq-grid) 76 | col-sums (map (partial apply +) freq-cols)] 77 | (dorun 78 | (map (fn [cn cs] (println "For vowel/ஃ:" cn ", there are" cs "instances of it in a C or C+V letter")) col-names col-sums))))) 79 | 80 | ;; 81 | ;; umbrella printing fn 82 | ;; 83 | 84 | #?(:clj 85 | (defn print-letter-grid-stats-on-strs 86 | "for a given letter grid and a sequence of strings, print out all of the stats" 87 | [letter-grid strs] 88 | (let [letter-seq (flatten letter-grid) 89 | letter-trie (fmt/make-trie letter-seq) 90 | str->letters (fn [s] (fmt/str->elems letter-trie s)) 91 | fmap (trie-elem-string-seq-freqs letter-trie strs) 92 | fgrid (freq-grid letter-seq fmap)] 93 | (print-vowel-col-sums fgrid letter-grid) 94 | (println "") 95 | (print-consonant-row-sums fgrid letter-grid) 96 | (println "") 97 | (print-freq-grid fgrid letter-grid)))) 98 | 99 | ;; 100 | ;; main fn 101 | ;; 102 | 103 | #?(:clj 104 | (defn -main 105 | [& args] 106 | (with-open [rdr (jio/reader *in*)] 107 | (let [lines (line-seq rdr) 108 | letter-grid letters-plus-grantha] 109 | (print-letter-grid-stats-on-strs letter-grid lines))))) 110 | -------------------------------------------------------------------------------- /test/clj_thamil/மொழியியல்_test.cljc: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.மொழியியல்-test 2 | (:use clojure.test 3 | clj-thamil.மொழியியல் 4 | clj-thamil.core)) 5 | 6 | (வரையறு-பரிசோதனை அசை-உயிரெழுத்து-பரிசோதனை 7 | (பரிசோதிக்கும் "குறில், நெடில் செயல்கூறுகள்" 8 | (பரிசோதிக்கும் "குறில் செயல்கூறுகள்" 9 | (ஆகும் (உண்மையா? (குறிலா? "அ"))) 10 | (ஆகும் (உண்மையா? (குறிலா? "இ"))) 11 | (ஆகும் (உண்மையா? (குறிலா? "உ"))) 12 | (ஆகும் (உண்மையா? (குறிலா? "எ"))) 13 | (ஆகும் (உண்மையா? (குறிலா? "ஒ"))) 14 | (ஆகும் (பொய்மையா? (குறிலா? "ஆ"))) 15 | (ஆகும் (பொய்மையா? (குறிலா? "ஈ"))) 16 | (ஆகும் (பொய்மையா? (குறிலா? "ஊ"))) 17 | (ஆகும் (பொய்மையா? (குறிலா? "ஏ"))) 18 | (ஆகும் (பொய்மையா? (குறிலா? "ஓ"))) 19 | (ஆகும் (பொய்மையா? (குறிலா? "ஐ"))) 20 | (ஆகும் (பொய்மையா? (குறிலா? "ஔ"))) 21 | (ஆகும் (பொய்மையா? (குறிலா? nil))) 22 | (ஆகும் (பொய்மையா? (குறிலா? "")))) 23 | (பரிசோதிக்கும் "நெடில் செயல்கூறுகள்" 24 | (ஆகும் (பொய்மையா? (நெடிலா? "அ"))) 25 | (ஆகும் (பொய்மையா? (நெடிலா? "இ"))) 26 | (ஆகும் (பொய்மையா? (நெடிலா? "உ"))) 27 | (ஆகும் (பொய்மையா? (நெடிலா? "எ"))) 28 | (ஆகும் (பொய்மையா? (நெடிலா? "ஒ"))) 29 | (ஆகும் (உண்மையா? (நெடிலா? "ஆ"))) 30 | (ஆகும் (உண்மையா? (நெடிலா? "ஈ"))) 31 | (ஆகும் (உண்மையா? (நெடிலா? "ஊ"))) 32 | (ஆகும் (உண்மையா? (நெடிலா? "ஏ"))) 33 | (ஆகும் (உண்மையா? (நெடிலா? "ஓ"))) 34 | (ஆகும் (பொய்மையா? (நெடிலா? "ஐ"))) 35 | (ஆகும் (பொய்மையா? (நெடிலா? "ஔ"))) 36 | (ஆகும் (பொய்மையா? (நெடிலா? nil))) 37 | (ஆகும் (பொய்மையா? (நெடிலா? "")))))) 38 | 39 | (வரையறு-பரிசோதனை பன்மை-பரிசோதனை 40 | (பரிசோதிக்கும் "பன்மை" 41 | (ஆகும் (= "கற்கள்" (பன்மை "கல்"))) 42 | (ஆகும் (= "முட்கள்" (பன்மை "முள்"))) 43 | (ஆகும் (= "பற்கள்" (பன்மை "பல்"))) 44 | (ஆகும் (= "தீக்கள்" (பன்மை "தீ"))) 45 | (ஆகும் (= "பூக்கள்" (பன்மை "பூ"))) 46 | (ஆகும் (= "பசுக்கள்" (பன்மை "பசு"))) 47 | (ஆகும் (= "காடுகள்" (பன்மை "காடு"))) 48 | (ஆகும் (= "மரங்கள்" (பன்மை "மரம்"))) 49 | (ஆகும் (= "நாள்கள்" (பன்மை "நாள்"))))) 50 | 51 | (வரையறு-பரிசோதனை சந்தி-பொது-விதி-பரிசோதனை 52 | (பரிசோதிக்கும் "சந்தி பொது விதிகள்" 53 | (ஆகும் (= "தமிழ்நாடு" (சந்தி "தமிழ்" "நாடு"))) 54 | (ஆகும் (= "தமிழீழம்" (சந்தி "தமிழ்" "ஈழம்"))) 55 | (ஆகும் (= "செய்யளம்" (சந்தி "செய்" "அளம்"))) 56 | (ஆகும் (= "கல்லாறு" (சந்தி "கல்" "ஆறு"))) 57 | (ஆகும் (= "தேயிலை" (சந்தி "தே" "இலை"))) 58 | (ஆகும் (= "மலையகம்" (சந்தி "மலை" "அகம்"))) 59 | (ஆகும் (= "குடியுரிமை" (சந்தி "குடி" "உரிமை"))) 60 | (ஆகும் (= "கையோடு" (சந்தி "கை" "ஓடு"))) 61 | (ஆகும் (= "வேருடன்" (சந்தி "வேர்" "உடன்"))) 62 | (ஆகும் (= "புழுவுக்கு" (சந்தி "புழு" "உக்கு"))) 63 | (ஆகும் (= "புல்லை" (சந்தி "புல்" "ஐ"))) 64 | (ஆகும் (= "பிடிக்கும்" (சந்தி "பிடி" "க்கும்"))) 65 | (ஆகும் (= "பூவெல்லாம்" (சந்தி "பூ" "எல்லாம்"))) 66 | (ஆகும் (= "சிலம்பாட்டம்" (சந்தி "சிலம்பு" "ஆட்டம்"))) 67 | (ஆகும் (= "தூண்கள்" (சந்தி "தூண்" "கள்"))))) 68 | 69 | (வரையறு-பரிசோதனை வேற்றுமை-பரிசோதனை 70 | (பரிசோதிக்கும் "வேற்றுமை" 71 | (ஆகும் (= "மரத்தை" (வேற்றுமை "மரம்" "ஐ"))) 72 | (ஆகும் (= "மரத்துக்கு" (வேற்றுமை "மரம்" "உக்கு"))) 73 | (ஆகும் (= "மரத்தில்" (வேற்றுமை "மரம்" "இல்"))) 74 | (ஆகும் (= "மரத்தால்" (வேற்றுமை "மரம்" "ஆல்"))) 75 | (ஆகும் (= "கெடுவை" (வேற்றுமை "கெடு" "ஐ"))) 76 | (ஆகும் (= "கெடுவுக்கு" (வேற்றுமை "கெடு" "உக்கு"))) 77 | (ஆகும் (= "கெடுவில்" (வேற்றுமை "கெடு" "இல்"))) 78 | (ஆகும் (= "கெடுவால்" (வேற்றுமை "கெடு" "ஆல்"))) 79 | (ஆகும் (= "காட்டை" (வேற்றுமை "காடு" "ஐ"))) 80 | (ஆகும் (= "காட்டுக்கு" (வேற்றுமை "காடு" "உக்கு"))) 81 | (ஆகும் (= "காட்டில்" (வேற்றுமை "காடு" "இல்"))) 82 | (ஆகும் (= "காட்டால்" (வேற்றுமை "காடு" "ஆல்"))) 83 | (ஆகும் (= "பறுவை" (வேற்றுமை "பறு" "ஐ"))) 84 | (ஆகும் (= "பறுவுக்கு" (வேற்றுமை "பறு" "உக்கு"))) 85 | (ஆகும் (= "பறுவில்" (வேற்றுமை "பறு" "இல்"))) 86 | (ஆகும் (= "பறுவால்" (வேற்றுமை "பறு" "ஆல்"))) 87 | (ஆகும் (= "கயிற்றை" (வேற்றுமை "கயிறு" "ஐ"))) 88 | (ஆகும் (= "கயிற்றுக்கு" (வேற்றுமை "கயிறு" "உக்கு"))) 89 | (ஆகும் (= "கயிற்றில்" (வேற்றுமை "கயிறு" "இல்"))) 90 | (ஆகும் (= "கயிற்றால்" (வேற்றுமை "கயிறு" "ஆல்"))) 91 | (ஆகும் (= "எழுதுகோலை" (வேற்றுமை "எழுதுகோல்" "ஐ"))) 92 | (ஆகும் (= "எழுதுகோலுக்கு" (வேற்றுமை "எழுதுகோல்" "உக்கு"))) 93 | (ஆகும் (= "எழுதுகோலில்" (வேற்றுமை "எழுதுகோல்" "இல்"))) 94 | (ஆகும் (= "எழுதுகோலால்" (வேற்றுமை "எழுதுகோல்" "ஆல்"))) 95 | (பரிசோதிக்கும் "4-ஆம் வேற்றுமை - (உ)க்கு" 96 | (ஆகும் (= "தீயை" (வேற்றுமை "தீ" "ஐ"))) 97 | (ஆகும் (= "தீக்கு" (வேற்றுமை "தீ" "உக்கு"))) 98 | (ஆகும் (= "தீயில்" (வேற்றுமை "தீ" "இல்"))) 99 | (ஆகும் (= "தீயால்" (வேற்றுமை "தீ" "ஆல்"))) 100 | (ஆகும் (= "காயை" (வேற்றுமை "காய்" "ஐ"))) 101 | (ஆகும் (= "காய்க்கு" (வேற்றுமை "காய்" "உக்கு"))) 102 | (ஆகும் (= "பொய்யை" (வேற்றுமை "பொய்" "ஐ"))) 103 | (ஆகும் (= "தொலைபேசியை" (வேற்றுமை "தொலைபேசி" "ஐ"))) 104 | (ஆகும் (= "தொலைபேசிக்கு" (வேற்றுமை "தொலைபேசி" "உக்கு"))) 105 | (ஆகும் (= "தேனீயை" (வேற்றுமை "தேனீ" "ஐ"))) 106 | (ஆகும் (= "தேனீக்கு" (வேற்றுமை "தேனீ" "உக்கு")))))) 107 | -------------------------------------------------------------------------------- /src/clj_thamil/core.cljc: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.core) 2 | 3 | 4 | (defmacro translate-fn 5 | [old-name new-name] 6 | `(def ~old-name ~new-name)) 7 | 8 | (defmacro translate-fn-symbol 9 | [old-name new-name] 10 | `(def ~(eval new-name) ~(eval old-name))) 11 | 12 | (defmacro translate-fns 13 | [symb-map] 14 | `(do 15 | ~@ 16 | (for [[old-form# new-form#] (eval symb-map)] 17 | `(translate-fn-symbol '~old-form# '~new-form#)))) 18 | 19 | ;; info on macro-writing macros based on info at 20 | ;; http://amalloy.hubpages.com/hub/Clojure-macro-writing-macros 21 | 22 | (defmacro translate-form 23 | "Does the effective translation of a special form or macro from its old name to its new name. In other words, generalizes the 'manual' process of defining something like: 24 | (defmacro எனில் 25 | [& body] 26 | `(if ~@body))" 27 | [old-name new-name] 28 | `(defmacro ~new-name 29 | [~'& body#] 30 | `(~'~old-name ~@body#))) 31 | 32 | ;; not sure if/how to shorten செயல்கூறு, வரையறு-செயல்கூறு, வைத்துக்கொள் 33 | 34 | (defmacro translate-form-symbol 35 | "Does the effective translation of a special form or macro from its old name to its new name, with the names given as symbols. Helper macro for translate-forms macro" 36 | [old-name new-name] 37 | `(defmacro ~(eval new-name) 38 | [~'& body#] 39 | `(~'~(eval old-name) ~@body#))) 40 | 41 | (defmacro translate-forms 42 | "takes a map of symbols and creates macros that do the translation of the form of the old symbol (key) to the new symbol (val)" 43 | [symb-map] 44 | `(do 45 | ~@ 46 | (for [[old-form# new-form#] (eval symb-map)] 47 | `(translate-form-symbol '~old-form# '~new-form#)))) 48 | 49 | 50 | (def fns-map '{ 51 | take எடு 52 | drop விடு 53 | ;; inc ஏறுமானம் 54 | inc ஏற்று 55 | ;; dec இறங்குமானம் 56 | dec இறக்கு 57 | range வீச்சு 58 | take-while எடு-என்னும்வரை 59 | drop-while விடு-என்னும்வரை 60 | interleave பின்னு 61 | reduce இறுக்கு 62 | ;; reducer இறுக்குவர் ;; ?? 63 | map விவரி 64 | hash-map புலவெண்-விவரணையாக்கம் 65 | ;; vector காவி ;; ?? 66 | ;; vector நெறியம் ;; ?? 67 | list பட்டியல் 68 | set அமைவு 69 | hash-set புலவெண்-அமைவு 70 | atom அணு 71 | agent முகவர் 72 | first முதல் 73 | second இரண்டாம் 74 | last கடைசி 75 | butlast கடைசியின்றி 76 | rest மீதி 77 | next அடுத்த 78 | true வாய்மை ;; should we use வாய்மை, மெய்மை, or உண்மை ? i am 79 | ;; thinking of using வாய்மை or மெய்மை so as to leave உண்மை to continue to 80 | ;; be used in more casual / less formal situations 81 | false பொய்மை 82 | print அச்சிடு 83 | println வரி-அச்சிடு 84 | filter வடி 85 | remove அகற்று 86 | keep கொள் 87 | slurp உறிஞ்சு;; could be சப்பு 88 | spit ஊற்று ;; could be துப்பு 89 | seq வரிசை 90 | dorun செய்யோட்டம் 91 | doall செய்யெல்லாம் 92 | str தொடை 93 | interpose இடைபொருத்து 94 | find கண்டுபிடி 95 | get பெறு 96 | apply செயல்படுத்து 97 | count எண்ணு 98 | every? ஒவ்வொன்றுமா? 99 | true? உண்மையா? 100 | false? பொய்மையா? 101 | concat தொடு 102 | identity அடையாளம் 103 | reverse புரட்டு 104 | some எதாவது 105 | flatten தட்டையாக்கு 106 | 107 | boolean பூலியன் 108 | }) 109 | 110 | (def forms-map '{ 111 | if எனில் 112 | when என்னும்போது 113 | if-not இல்லெனில் 114 | when-not இல்லென்னும்-போது 115 | def வரையறு 116 | fn செயல்கூறு 117 | defn வரையறு-செயல்கூறு 118 | let வைத்துக்கொள் ;; maybe just கொள் 119 | and மற்றும் 120 | or அல்லது 121 | not அன்று 122 | ;; else அன்றி ? 123 | loop சுற்று 124 | doseq செய்வரிசை 125 | ;; for ஒவ்வொன்றுக்கும் 126 | for ஒன்றொன்றுக்கு 127 | cond பொறுத்து 128 | do செய் 129 | 130 | ;; clojure.test 131 | deftest வரையறு-பரிசோதனை 132 | testing பரிசோதிக்கும் 133 | is ஆகும் 134 | are பல-ஆகும் 135 | }) 136 | 137 | ;; do the actual "translation" for bindings, fns, and any other value 138 | (translate-fns fns-map) 139 | 140 | ;; do the actual "translation" for macros and special forms 141 | (translate-forms forms-map) 142 | -------------------------------------------------------------------------------- /test/clj_thamil/format/convert_test.cljc: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.format.convert-test 2 | (:require [clojure.test.check :as sc] 3 | [clojure.test.check.generators :as gen] 4 | [clojure.test.check.properties :as prop :include-macros true] 5 | [clojure.string :as string] 6 | [clj-thamil.format :as fmt] 7 | [clj-thamil.format.convert :as cvt] 8 | [clj-thamil.மொழியியல் :as மொ]) 9 | (:use clojure.test 10 | clj-thamil.format.convert)) 11 | 12 | (def QCHK-SIZE 100) 13 | 14 | (def A_LOT 100) 15 | 16 | (deftest conversion-test 17 | (testing "romanized -> தமிழ்" 18 | (is (= "தமிழ்" (romanized->தமிழ் "thamiz"))) 19 | (is (= "தமிழ்" (romanized->தமிழ் "thamizh"))) 20 | (is (= "நீர்" (romanized->தமிழ் "n-iir"))) 21 | (is (= "பஃறுளி" (romanized->தமிழ் "paqRuLi"))) 22 | (is (= "சின்ன" (romanized->தமிழ் "chinna") (romanized->தமிழ் "sinna"))) 23 | (is (= "விகடன்" (romanized->தமிழ் "vikatan") (romanized->தமிழ் "vikadan"))) 24 | (is (= "சென்றேன் வென்றேன்" (romanized->தமிழ் "senreen venreen"))) 25 | (is (= "வந்தேன்" (romanized->தமிழ் "vantheen"))) 26 | (is (= "பாட்டு பாடு" (romanized->தமிழ் "paattu paadu")))) 27 | (testing "தமிழ் -> romanized; translation map inversion" 28 | (is (= "thamizh" (தமிழ்->romanized "தமிழ்"))) 29 | (is (not= "thamiz" (தமிழ்->romanized "தமிழ்"))) 30 | (is (= "niir" (தமிழ்->romanized "நீர்"))) 31 | (is (not= "neer" (தமிழ்->romanized "நீர்"))) 32 | (is (= "paambu" (தமிழ்->romanized "பாம்பு"))) 33 | (is (not= "paampu" (தமிழ்->romanized "பாம்பு"))) 34 | (is (not= "anpu" (தமிழ்->romanized "அன்பு"))) 35 | (is (= "anbu" (தமிழ்->romanized "அன்பு"))) 36 | (is (not= "panpu" (தமிழ்->romanized "பண்பு"))) 37 | (is (= "panbu" (தமிழ்->romanized "பண்பு"))))) 38 | 39 | (deftest double-check-test 40 | (testing "from the test.check / double-check Readme" 41 | (is (:result 42 | (sc/quick-check QCHK-SIZE (prop/for-all [v (gen/vector gen/int)] 43 | (= (sort v) (sort (sort v))))))))) 44 | 45 | (deftest convert-fn-invertible 46 | (let [thamil-letters fmt/letter-seq 47 | punct (map str [\. \space \newline]) 48 | all-letters (concat thamil-letters punct) 49 | lett-gen (gen/such-that identity (gen/elements all-letters)) 50 | thamil-text-gen (gen/fmap string/join (gen/vector lett-gen)) 51 | old-font-no-ambig-combo (fn [s] 52 | (let [phonemes (fmt/str->phonemes s) 53 | phoneme-triples (partition 3 1 phonemes) 54 | phoneme-doubles (partition 2 1 phonemes) 55 | ambig1 (fn [[a b c]] (and (மொ/மெய்யெழுத்தா? a) 56 | (= "எ" b) 57 | (= "ள்" c))) 58 | ambig2 (fn [[a b]] (and (= "ஒ" a) 59 | (= "ள்" b))) 60 | ambig3 (fn [[a b c]] (and (மொ/மெய்யெழுத்தா? a) 61 | (#{"எ" "ஏ" "ஆ"} b) 62 | (= "ர்" c))) 63 | no-ambig1 (every? false? (map ambig1 phoneme-triples)) 64 | no-ambig2 (every? false? (map ambig2 phoneme-doubles)) 65 | no-ambig3 (every? false? (map ambig3 phoneme-doubles))] 66 | (and no-ambig1 no-ambig2 no-ambig3))) 67 | ;; old fonts can't distinguish certain character combinations, 68 | ;; so prevent test cases that could cause that 69 | non-romanized-thamil-text-gen (gen/such-that old-font-no-ambig-combo lett-gen (* QCHK-SIZE A_LOT)) 70 | ;; applying converters for old fonts followed by their 71 | ;; inverses should give back the original text 72 | test-prop (fn [f inv] (prop/for-all [t non-romanized-thamil-text-gen] 73 | (= t (-> t f inv)))) 74 | test-res (fn [f inv] 75 | (->> (test-prop f inv) 76 | (sc/quick-check QCHK-SIZE) 77 | :result)) 78 | ;; only after we've transliterated to romanized can we use 79 | ;; the rule about applying converter + inverse = input, since 80 | ;; the தமிழ்->romanized direction has certain overrides 81 | romanized-test-prop (prop/for-all [t thamil-text-gen] 82 | (let [converted-test-txt (-> t cvt/தமிழ்->romanized cvt/romanized->தமிழ்)] 83 | (= converted-test-txt (-> converted-test-txt cvt/தமிழ்->romanized cvt/romanized->தமிழ்)))) 84 | romanized-res (->> romanized-test-prop 85 | (sc/quick-check QCHK-SIZE) 86 | :result)] 87 | (testing "convert and inverse fns for all font formats (except romanized)" 88 | (testing "romanized" 89 | (is (true? romanized-res))) 90 | (testing "tab" 91 | (is (true? (test-res cvt/தமிழ்->tab cvt/tab->தமிழ்)))) 92 | (testing "bamini" 93 | (is (true? (test-res cvt/தமிழ்->bamini cvt/bamini->தமிழ்)))) 94 | (testing "tscii" 95 | (is (true? (test-res cvt/தமிழ்->tscii cvt/tscii->தமிழ்)))) 96 | (testing "webulagam" 97 | (is (true? (test-res cvt/தமிழ்->webulagam cvt/webulagam->தமிழ்))))))) 98 | -------------------------------------------------------------------------------- /src/clj_thamil/மொழியியல்.cljc: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.மொழியியல் 2 | (:require [clj-thamil.format :as fmt]) 3 | #?(:clj 4 | (:use clj-thamil.core) 5 | :cljs 6 | (:use-macros [clj-thamil.core :only [வரையறு விவரி மீதி வரையறு-செயல்கூறு பெறு எதாவது பூலியன் என்னும்போது 7 | வைத்துக்கொள் கடைசி பொறுத்து எண்ணு முதல் இரண்டாம் தொடை 8 | கடைசியின்றி அன்று மற்றும் அல்லது தொடு செயல்படுத்து செயல்கூறு]]))) 9 | 10 | 11 | (வரையறு மெய்-தொடக்கம்-எழுத்துகள் fmt/c-cv-letters) 12 | 13 | (வரையறு உயிரெழுத்துகள் fmt/vowels) 14 | 15 | (வரையறு மெய்யெழுத்துகள் fmt/consonants) 16 | 17 | (வரையறு உயிர்மெய்யெழுத்துகள் (தட்டையாக்கு (விவரி மீதி மெய்-தொடக்கம்-எழுத்துகள்))) 18 | 19 | (வரையறு தொடை->எழுத்துகள் fmt/str->letters) 20 | 21 | (வரையறு தொடை->ஒலியன்கள் fmt/str->phonemes) 22 | 23 | (வரையறு-செயல்கூறு ஒலியன்கள்->எழுத்து [ஒலியன்கள்] (பெறு fmt/inverse-phoneme-map ஒலியன்கள்)) 24 | 25 | ;;;;;;;; 26 | ;; எழுத்து 27 | ;; letters 28 | ;;;;;;;; 29 | 30 | (வரையறு-செயல்கூறு எழுத்தா? [ச] (fmt/in-trie? ச)) 31 | 32 | (வரையறு-செயல்கூறு மெய்யெழுத்தா? [எ] (பூலியன் (எதாவது #{எ} மெய்யெழுத்துகள்))) 33 | 34 | (வரையறு-செயல்கூறு உயிரெழுத்தா? [எ] (பூலியன் (எதாவது #{எ} உயிரெழுத்துகள்))) 35 | 36 | (வரையறு-செயல்கூறு உயிர்மெயெழுத்தா? [எ] (பூலியன் (எதாவது #{எ} உயிர்மெய்யெழுத்துகள்))) 37 | 38 | ;;;;;;;; 39 | ;; அசை 40 | ;; syllables 41 | ;;;;;;;; 42 | 43 | (வரையறு குறில்-உயிரெழுத்துகள் #{"அ" "இ" "உ" "எ" "ஒ"}) 44 | 45 | (வரையறு நெடில்-உயிரெழுத்துகள் #{"ஆ" "ஈ" "ஊ" "ஏ" "ஓ"}) 46 | 47 | (வரையறு-செயல்கூறு நெடிலா? 48 | "எழுத்து நெடில் எழுத்தா என்பதைத் திருப்பிக் கொடுக்கும் 49 | returns whether the letter is நெடில் (has long vowel sound)" 50 | [எழுத்து] 51 | (பூலியன் 52 | (என்னும்போது (எழுத்தா? எழுத்து) 53 | ;; ஒலியன் = phoneme 54 | (வைத்துக்கொள் [ஒலியன்கள் (தொடை->ஒலியன்கள் எழுத்து) 55 | கடைசி-ஒலியன் (கடைசி ஒலியன்கள்)] 56 | (பெறு நெடில்-உயிரெழுத்துகள் கடைசி-ஒலியன்))))) 57 | 58 | (வரையறு-செயல்கூறு குறிலா? 59 | "எழுத்து குறில் எழுத்தா என்பதைத் திருப்பிக் கொடுக்கும் 60 | returns whether the letter is குறில் (has short vowel sound)" 61 | [எழுத்து] 62 | (பூலியன் 63 | (என்னும்போது (எழுத்தா? எழுத்து) 64 | (->> (தொடை->ஒலியன்கள் எழுத்து) 65 | கடைசி 66 | (பெறு குறில்-உயிரெழுத்துகள்))))) 67 | 68 | ;;;;;;;; 69 | ;; ஒலியன் 70 | ;; phonemes 71 | ;;;;;;;; 72 | 73 | (வரையறு முன்னொட்டா? fmt/prefix?) 74 | 75 | (வரையறு பின்னொட்டா? fmt/suffix?) 76 | 77 | ;;;;;;;; 78 | ;; விகுதி 79 | ;; suffixes 80 | ;;;;;;;; 81 | 82 | ;; பன்மை 83 | ;; plurals 84 | 85 | (வரையறு-செயல்கூறு பன்மை 86 | "ஒரு சொல்லை அதன் பன்மை வடிவத்தில் ஆக்குதல் 87 | takes a word and pluralizes it" 88 | [சொல்] 89 | (வைத்துக்கொள் [எழுத்துகள் (தொடை->எழுத்துகள் சொல்)] 90 | (பொறுத்து 91 | 92 | ;; (fmt/seq-prefix? (புரட்டு சொல்) (புரட்டு "கள்")) 93 | (பின்னொட்டா? சொல் "கள்") 94 | சொல் 95 | 96 | (= "ம்" (கடைசி எழுத்துகள்)) 97 | (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்) ["ங்கள்"])) 98 | 99 | (மற்றும் (= 1 (எண்ணு எழுத்துகள்)) 100 | (நெடிலா? சொல்)) 101 | (தொடை சொல் "க்கள்") 102 | 103 | (மற்றும் (= 2 (எண்ணு எழுத்துகள்)) 104 | (ஒவ்வொன்றுமா? அடையாளம் (விவரி குறிலா? எழுத்துகள்))) 105 | (தொடை சொல் "க்கள்") 106 | 107 | (மற்றும் (= 2 (எண்ணு எழுத்துகள்)) 108 | (குறிலா? (முதல் எழுத்துகள்)) 109 | (= "ல்" (இரண்டாம் எழுத்துகள்))) 110 | (தொடை (முதல் எழுத்துகள்) "ற்கள்") 111 | 112 | (மற்றும் (= 2 (எண்ணு எழுத்துகள்)) 113 | (குறிலா? (முதல் எழுத்துகள்)) 114 | (= "ள்" (இரண்டாம் எழுத்துகள்))) 115 | (தொடை (முதல் எழுத்துகள்) "ட்கள்") 116 | 117 | :அன்றி 118 | (தொடை சொல் "கள்")))) 119 | 120 | ;; சந்தி (விதிகள்) 121 | ;; (rules for) joining words/suffixes 122 | 123 | (வரையறு-செயல்கூறு சந்தி 124 | [சொல்1 சொல்2] 125 | (வைத்துக்கொள் [எழுத்துகள்1 (தொடை->எழுத்துகள் சொல்1) 126 | எழுத்துகள்2 (தொடை->எழுத்துகள் சொல்2) 127 | ஒலியன்கள்1 (தொடை->ஒலியன்கள் சொல்1) 128 | ஒலியன்கள்2 (தொடை->ஒலியன்கள் சொல்2) 129 | சொ1-கஒ (கடைசி ஒலியன்கள்1) 130 | சொ2-முஒ (முதல் ஒலியன்கள்2)] 131 | (பொறுத்து 132 | 133 | (மற்றும் (உயிரெழுத்தா? சொ2-முஒ) 134 | (பெறு #{"இ" "ஈ" "ஏ" "ஐ"} சொ1-கஒ)) 135 | (செயல்படுத்து தொடை சொல்1 (ஒலியன்கள்->எழுத்து ["ய்" சொ2-முஒ]) (மீதி சொல்2)) 136 | 137 | (மற்றும் (உயிரெழுத்தா? சொ2-முஒ) 138 | (பெறு #{"அ" "ஆ" "ஊ" "ஒ" "ஓ" "ஔ"} சொ1-கஒ)) 139 | (செயல்படுத்து தொடை சொல்1 (ஒலியன்கள்->எழுத்து ["வ்" சொ2-முஒ]) (மீதி சொல்2)) 140 | 141 | (மற்றும் (உயிரெழுத்தா? சொ2-முஒ) 142 | (= "உ" சொ1-கஒ) 143 | (= 2 (எண்ணு எழுத்துகள்1)) 144 | (ஒவ்வொன்றுமா? குறிலா? எழுத்துகள்1)) 145 | (செயல்படுத்து தொடை சொல்1 (ஒலியன்கள்->எழுத்து ["வ்" சொ2-முஒ]) (மீதி சொல்2)) 146 | 147 | (மற்றும் (உயிரெழுத்தா? சொ2-முஒ) 148 | (= "உ" சொ1-கஒ) 149 | (அன்று (மற்றும் (= 2 (எண்ணு எழுத்துகள்1)) 150 | (ஒவ்வொன்றுமா? குறிலா? எழுத்துகள்1)))) 151 | (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்1) (ஒலியன்கள்->எழுத்து [(கடைசி (கடைசியின்றி ஒலியன்கள்1)) சொ2-முஒ]) (மீதி சொல்2))) 152 | 153 | 154 | (மற்றும் (உயிரெழுத்தா? சொ2-முஒ) 155 | (= 2 (எண்ணு எழுத்துகள்1)) 156 | (குறிலா? (முதல் எழுத்துகள்1)) 157 | (மெய்யெழுத்தா? (இரண்டாம் எழுத்துகள்1))) 158 | (செயல்படுத்து தொடை (தொடு சொல்1 [(ஒலியன்கள்->எழுத்து [சொ1-கஒ சொ2-முஒ])] (மீதி சொல்2))) 159 | 160 | (மற்றும் (உயிரெழுத்தா? சொ2-முஒ) 161 | (மெய்யெழுத்தா? சொ1-கஒ)) 162 | (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்1) [(ஒலியன்கள்->எழுத்து [சொ1-கஒ சொ2-முஒ])] (மீதி சொல்2))) 163 | 164 | :அன்றி 165 | (தொடை சொல்1 சொல்2) 166 | 167 | ))) 168 | 169 | ;; வேற்றுமை 170 | ;; noun cases 171 | 172 | (வரையறு-செயல்கூறு வேற்றுமை-முன்-மாற்றம் 173 | "ஒரு பெயர்ச்சொல்லுக்கு வேற்றுமை விகுதி சேர்க்கும் முன் செய்யவேண்டிய மாற்றம் 174 | change that is required before adding a case suffix to a noun" 175 | [சொல்] 176 | (வைத்துக்கொள் [எழுத்துகள் (தொடை->எழுத்துகள் சொல்) 177 | ஒலியன்கள் (தொடை->ஒலியன்கள் சொல்) 178 | கஎ (கடைசி எழுத்துகள்) 179 | கஒ (கடைசி ஒலியன்கள்)] 180 | (பொறுத்து 181 | 182 | (= "ம்" (கடைசி எழுத்துகள்)) 183 | (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்) ["த்த்"])) 184 | 185 | (மற்றும் (பெறு #{"டு" "று"} கஎ) 186 | (அல்லது (மற்றும் (= 2 (எண்ணு எழுத்துகள்)) 187 | (ஒவ்வொன்றுமா? குறிலா? எழுத்துகள்)) 188 | (மெய்யெழுத்தா? (கடைசி (கடைசியின்றி எழுத்துகள்))))) 189 | சொல் 190 | 191 | (= "டு" கஎ) 192 | (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்) ["ட்ட்"])) 193 | 194 | (= "று" கஎ) 195 | (செயல்படுத்து தொடை (தொடு (கடைசியின்றி எழுத்துகள்) ["ற்ற்"])) 196 | 197 | :அன்றி 198 | சொல்))) 199 | 200 | (வரையறு-செயல்கூறு வேற்றுமை 201 | "ஒரு பெயர்ச்சொல்லுக்கு ஒரு வேற்றுமை விகுதியைச் சேர்த்தல் 202 | adds a case suffix to a noun" 203 | [சொல் வே] 204 | (வைத்துக்கொள் [எழுத்துகள் (தொடை->எழுத்துகள் சொல்) 205 | ஒலியன்கள் (தொடை->ஒலியன்கள் சொல்)] 206 | (எனில் (மற்றும் (= "உக்கு" வே) 207 | (அல்லது (பெறு #{"இ" "ஈ" "ஐ"} (கடைசி ஒலியன்கள்)) 208 | (எதாவது (செயல்கூறு [தொடை] (பின்னொட்டா? சொல் தொடை)) 209 | ["ஆய்"]))) 210 | (வேற்றுமை சொல் "க்கு") 211 | (-> சொல் 212 | வேற்றுமை-முன்-மாற்றம் 213 | (சந்தி வே))))) 214 | -------------------------------------------------------------------------------- /examples/clj/clj-español/LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor tocontrol, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of New York and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /examples/clj/clj-spanish/LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor tocontrol, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of New York and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /test/clj_thamil/format_test.cljc: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.format-test 2 | (:use clojure.test 3 | clj-thamil.format 4 | clj-thamil.core)) 5 | 6 | (def words ["பந்து" "பந்தி" "பத்து" "பந்துகள்" "பந்தயம்" "பந்தாடு" "பந்தல்"]) 7 | 8 | (deftest trie-test 9 | (let [first-word (first words) 10 | first-two-words (take 2 words)] 11 | (testing "creating a trie" 12 | (testing "creating a trie from a sequence of words (default val is attached to terminus)" 13 | (testing "boundary case" 14 | (is (= {} (make-trie [])))) 15 | (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} (make-trie [first-word]))) 16 | (is (= (make-trie [first-word]) (make-trie (take 1 words)))) 17 | (testing "words that share some prefix" 18 | (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil} \u0BBF {nil nil}}}}}} (make-trie first-two-words)))) 19 | (testing "words that have no shared prefix" 20 | (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}} \வ {\u0BC6 {\ற {\u0BCD {\ற {\u0BBF {nil nil}}}}}}} (make-trie [first-word "வெற்றி"]))))) 21 | (testing "creating a trie from a map of word->terminus-attached-val" 22 | (testing "boundary case" 23 | (is (= {} (make-trie {})))) 24 | (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil 1}}}}}} (make-trie {first-word 1}))) 25 | (is (= {\ப {\ந {\u0BCD {\த {\u0BC1 {nil 0 26 | \க {\ள {\u0BCD {nil 3}}}} 27 | \u0BBF {nil 1} 28 | \ய {\ம {\u0BCD {nil 4}}} 29 | \ல {\u0BCD {nil 6}} 30 | \u0BBE {\ட {\u0BC1 {nil 5}}}}}} 31 | \த {\u0BCD {\த {\u0BC1 {nil 2}}}}}} 32 | (make-trie (zipmap words (range))))))) 33 | (testing "trie lookup fns" 34 | (testing "nil as valued attached to terminus of input sequences" 35 | (is (= true (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {\க {\ள {\u0BCD {nil nil}}}}}}}}} "பந்துகள்"))) 36 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {\க {\ள {\u0BCD {nil nil}}}}}}}}} "ப"))) 37 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {\க {\ள {\u0BCD {nil nil}}}}}}}}} "பந்துக"))) 38 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {\க {\ள {\u0BCD {nil nil}}}}}}}}} "பந்து"))) 39 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்துகள்"))) 40 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "ப"))) 41 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்துக"))) 42 | (is (= true (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்து"))) 43 | (is (= false (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்து")))) 44 | (is (= false (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "ப")))) 45 | (is (= true (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "பந்துகள்")))) 46 | (is (= true (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil nil}}}}}} "கோடு"))))) 47 | (testing "non-nil values attached to terminus of input sequences" 48 | (is (= true (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil 1}}}}}} "பந்து"))) 49 | (is (= false (in-trie? {\ப {\ந {\u0BCD {\த {\u0BC1 {nil 3.14159}}}}}} "ப"))) 50 | (is (= false (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil \a}}}}}} "பந்து")))) 51 | (is (= false (nil? (trie-prefix-subtree {\ப {\ந {\u0BCD {\த {\u0BC1 {nil true}}}}}} "ப")))) 52 | (testing "splitting words directly into phonemes using phoneme trie" 53 | (is (= ["வ்" "அ" "ண்" "அ" "க்" "க்" "அ" "ம்"] (str->phonemes "வணக்கம்"))) 54 | (is (empty? (str->phonemes nil))) 55 | (is (empty? (str->phonemes ""))) 56 | (is (= ["அ"] (str->phonemes "அ"))) 57 | (is (= ["க்"] (str->phonemes "க்"))) 58 | (is (= ["க்" "அ"] (str->phonemes "க"))) 59 | (is (= ["க்" "ஊ"] (str->phonemes "கூ"))) 60 | (is (= ["வ்" "இ" "ட்" "உ" "த்" "அ" "ல்" "ஐ"] (str->phonemes "விடுதலை"))))) 61 | (testing "inverting maps for creating tries for inverse conversion" 62 | (is (= "பக்கம்" (phonemes->str "ப்அக்க்அம்"))) 63 | (is (= "தலைய்123ஈடு" (phonemes->str "த்அல்ஐய்123ஈட்உ"))) 64 | (is (= "நடு" (phonemes->str "ந்அடு"))))))) 65 | 66 | (deftest word-letter-test 67 | (testing "splitting strings of தமிழ் characters into constituent தமிழ் characters" 68 | (is (= [] (str->letters ""))) 69 | (is (= [] (str->letters nil))) 70 | (is (= ["த"] (str->letters "த"))) 71 | (is (= [" " "த"] (str->letters " த"))) 72 | (is (= ["த" " "] (str->letters "த "))) 73 | (is (= ["த்"] (str->letters "த்"))) 74 | (is (= ["த" "மி" "ழ்"] (str->letters "தமிழ்"))) 75 | (is (= ["த" "மி" "ழ்" " "] (str->letters "தமிழ் "))) 76 | (is (= ["s" "o" "f" "t" "w" "a" "r" "e" "=" "மெ" "ன்" "பொ" "ரு" "ள்" "," "." "." "."] (str->letters "software=மென்பொருள்,...")))) 77 | (testing "letter ordering" 78 | (testing "boundary cases" 79 | (is (= true (letter-before? nil nil))) 80 | (is (= true (letter-before? nil ""))) 81 | (is (= false (letter-before? "" nil))) 82 | (is (= true (letter-before? nil "அ"))) 83 | (is (= true (letter-before? nil "a"))) 84 | (is (= true (letter-before? "a" "அ")))) 85 | (testing "equal inputs" 86 | (is (= false (letter-before? "அ" "அ")))) 87 | (testing "தமிழ்" 88 | (is (= true (letter-before? "அ" "ஆ"))) 89 | (is (= true (letter-before? "ஆ" "இ"))) 90 | (is (= true (letter-before? "அ" "ஔ"))) 91 | (is (= true (letter-before? "ஔ" "ஃ"))) 92 | (is (= true (letter-before? "ஃ" "க்"))) 93 | (is (= true (letter-before? "க்" "க"))) 94 | (is (= true (letter-before? "க" "கா"))) 95 | (is (= true (letter-before? "க்" "கௌ"))) 96 | (is (= false (letter-before? "க்" "ஃ"))) 97 | (is (= true (letter-before? "கௌ" "ங்"))) 98 | (is (= false (letter-before? "ங்" "கௌ")))) 99 | (testing "ASCII" 100 | (is (= true (letter-before? "a" "z"))) 101 | (is (= true (letter-before? "A" "Z"))) 102 | (is (= true (letter-before? "Z" "a"))) 103 | (is (= true (letter-before? "0" "9"))) 104 | (is (= true (letter-before? "9" "A")))) 105 | (testing "comparator / sorting" 106 | (is (= ["அ" "ஆ" "இ" "ஒ" "ஓ" "ஔ" "ஃ" "க்" "க" "ன்" "ன" "னா" "னு" "னௌ"] (sort-by identity letter-comp ["இ" "க" "ஃ" "ன" "னு" "னௌ" "னா" "ஆ" "க்" "அ" "ஔ" "ஓ" "ன்" "ஒ"]))))) 107 | (testing "word ordering" 108 | (testing "equal inputs" 109 | (is (= false (word-before? "அ" "அ")))) 110 | (testing "extra letters in one word" 111 | (is (= false (word-before? "அது" "அ"))) 112 | (is (= true (word-before? "அ" "அது")))) 113 | (testing "Unicode 'consonant' vs. Unicode 'consonant+ligature' - ஒருங்குறியில் தமிழ் மெய்யெழுத்து+அகரம் மற்றும் அதே மெய்யெழுத்து {வெறுமன்; அதோடு வேறொரு உயிரெழுத்து}" 114 | (is (= true (word-before? "படம்" "பாடம்"))) 115 | (is (= false (word-before? "பாடம்" "படம்"))) 116 | (is (= false (word-before? "படம்" "பட்டம்"))) 117 | (is (= true (word-before? "பட்டம்" "படம்"))) 118 | (is (= false (word-before? "கடமை" "கட்டம்"))) 119 | (is (= true (word-before? "கட்டம்" "கடமை"))) 120 | (is (= true (word-before? "கட்டு" "கெட்டு"))) 121 | (is (= false (word-before? "கெட்டு" "கட்டு"))) 122 | (is (= false (word-before? "பைந்தமிழ்" "பந்தல்"))) 123 | (is (= true (word-before? "பந்தல்" "பைந்தமிழ்")))) 124 | (testing "order of consonants" 125 | (is (= true (word-before? "பாடம்" "பாதம்"))) 126 | (is (= false (word-before? "பாதம்" "பாடம்")))) 127 | (testing "order of vowels" 128 | (is (= true (word-before? "அப்பம்" "ஆப்பம்"))) 129 | (is (= false (word-before? "ஆப்பம்" "அப்பம்")))) 130 | (testing "order of vowel vs. consonant, and order of two உயிர்மெய்யெழுத்துகள்" 131 | (is (= false (word-before? "நுளம்பு" "கொசு"))) 132 | (is (= true (word-before? "கொசு" "நுளம்பு"))) 133 | (is (= true (word-before? "ஈ" "கொசு"))) 134 | (is (= false (word-before? "கொசு" "ஈ")))))) 135 | 136 | (deftest util-fn-test 137 | (let [s "abcqwertyuiop"] 138 | (testing "seq-prefix" 139 | (is (= [] (seq-prefix nil nil))) 140 | (is (= [] (seq-prefix nil []))) 141 | (is (= [] (seq-prefix [] nil))) 142 | (is (= [] (seq-prefix nil [1 2]))) 143 | (is (= [\a \b \c] (seq-prefix "abcdefgh" s))) 144 | (is (= [\a \b] (seq-prefix "abbb" s))) 145 | (is (= [] (seq-prefix "zyx" s)))) 146 | (testing "seq-prefix?" 147 | (is (= false (seq-prefix? nil nil))) 148 | (is (= false (seq-prefix? nil []))) 149 | (is (= false (seq-prefix? [] nil))) 150 | (is (= false (seq-prefix? nil [1 2]))) 151 | (is (= false (seq-prefix? "abcdefgh" s))) 152 | (is (= false (seq-prefix? "abbb" s))) 153 | (is (= false (seq-prefix? "zyx" s))) 154 | (is (= false (seq-prefix? "abc" s))) 155 | (is (= true (seq-prefix? s "abc"))) 156 | (is (= true (seq-prefix? s "a"))) 157 | (is (= true (seq-prefix? s ""))) 158 | (is (= true (seq-prefix? s []))) 159 | (is (= true (seq-prefix? s nil)))) 160 | (testing "seq-index-of" 161 | (let [check-seq-index-of (fn [s1 s2] (= (.indexOf s1 s2) 162 | (seq-index-of s1 s2)))] 163 | (is (= true (check-seq-index-of "abc" "a"))) 164 | (is (true? (check-seq-index-of "a" "abc"))) 165 | (is (true? (check-seq-index-of "" "abc"))))) 166 | (testing "prefix?" 167 | (is (true? (prefix? "வந்தான்" ""))) 168 | (is (true? (prefix? "வந்தான்" "வ்"))) 169 | (is (true? (prefix? "வந்தான்" "வ"))) 170 | (is (true? (prefix? "வந்தான்" "வந்"))) 171 | (is (false? (prefix? "வந்தான்" "வந"))) 172 | (is (true? (prefix? "வந்தான்" "வந்த்"))) 173 | (is (false? (prefix? "வந்தான்" "வந்து"))) 174 | (is (true? (prefix? "வந்தான்" "வந்தா"))) 175 | (is (true? (prefix? "வந்தான்" "வந்தான்"))) 176 | (is (false? (prefix? "வந்தான்" "வந்தானே"))) 177 | (is (true? (prefix? "வந்தானே" "வந்தான்")))))) 178 | 179 | (deftest word-char-traits-test 180 | (testing "word and char traits" 181 | (testing "char traits" 182 | (let [ws-chars [\space \tab \newline] 183 | wordy-chars [\a \Z \0 ] 184 | punct-chars [\- \* \^ \$ \+ \. \_ \; ] 185 | தமிழ்-எழுத்து-unicode-chars [\அ \ஆ \இ \ஔ \ஃ \க \ங \ன] 186 | தமிழ்-எழுத்து-துணை-குறி-unicode-chars [\u0BCD \u0BBE \u0BBF \u0BC0 \u0BC1 \u0BC2 \u0BC6 \u0BC7 \u0BC8 \u0BCA \u0BCB \u0BCC]] 187 | (is (= true (every? true? (map whitespace? ws-chars)))) 188 | (is (= true (every? true? (map wordy-char? wordy-chars)))) 189 | (is (= true (every? true? (map wordy-char? தமிழ்-எழுத்து-unicode-chars)))) 190 | (is (= true (every? true? (map wordy-char? தமிழ்-எழுத்து-துணை-குறி-unicode-chars)))) 191 | (is (= true (every? true? (map wordy-char? தமிழ்-எழுத்து-துணை-குறி-unicode-chars)))) 192 | (is (= true (every? false? (map wordy-char? punct-chars)))))) 193 | (testing "word boundaries" 194 | (let [s1 "aldsk சிக்கல் sdfsdf234234lsdflksjdf Zürich" 195 | s2 " alsfjs" 196 | s3 "" 197 | s4 nil] 198 | (is (= ["aldsk" "சிக்கல்" "sdfsdf234234lsdflksjdf" "Zürich"] (wordy-seq s1))) 199 | (is (= ["alsfjs"] (wordy-seq s2))) 200 | (is (= [] (wordy-seq s3))) 201 | (is (= nil (wordy-seq s4))))))) 202 | 203 | (deftest cursor-pos-test 204 | (let [s1 "aldsk சிக்கல் sdfsdf234234lsdflksjdf Zürich" 205 | s2 " alsfjs" 206 | s3 "a b" 207 | s4 "சிக்கல்"] 208 | (testing "cursor position" 209 | (testing "wordy chunk under cursor" 210 | (is (= "aldsk" (wordy-chunk-under s1 0))) 211 | (is (= "aldsk" (wordy-chunk-under s1 1))) 212 | (is (= "aldsk" (wordy-chunk-under s1 5))) 213 | (is (= "சிக்கல்" (wordy-chunk-under s1 6))) 214 | (is (= "Zürich" (wordy-chunk-under s1 (count s1)))) 215 | (is (= "Zürich" (wordy-chunk-under s1 (- (count s1) (count "Zürich"))))) 216 | (is (nil? (wordy-chunk-under s2 0))) 217 | (is (nil? (wordy-chunk-under s2 1))) 218 | (is (= "alsfjs" (wordy-chunk-under s2 2))) 219 | (is (= "a" (wordy-chunk-under s3 0))) 220 | (is (= "a" (wordy-chunk-under s3 1))) 221 | (is (nil? (wordy-chunk-under s3 2)))) 222 | (testing "cursor position within wordy chunk" 223 | (is (= ["aldsk" 0] (wordy-chunk-and-cursor-pos s1 0))) 224 | (is (= ["aldsk" 1] (wordy-chunk-and-cursor-pos s1 1))) 225 | (is (= ["aldsk" 5] (wordy-chunk-and-cursor-pos s1 5))) 226 | (is (= ["சிக்கல்" 0] (wordy-chunk-and-cursor-pos s1 6))) 227 | (is (= ["Zürich" 6] (wordy-chunk-and-cursor-pos s1 (count s1)))) 228 | (is (= ["Zürich" 0] (wordy-chunk-and-cursor-pos s1 (- (count s1) (count "Zürich"))))) 229 | (is (nil? (wordy-chunk-and-cursor-pos s2 0))) 230 | (is (nil? (wordy-chunk-and-cursor-pos s2 1))) 231 | (is (= ["alsfjs" 0] (wordy-chunk-and-cursor-pos s2 2))) 232 | (is (= ["a" 0] (wordy-chunk-and-cursor-pos s3 0))) 233 | (is (= ["a" 1] (wordy-chunk-and-cursor-pos s3 1))) 234 | (is (nil? (wordy-chunk-and-cursor-pos s3 2)))) 235 | (testing "cursor adjust" 236 | (is (= 2 (cursor-adjust s4 3 :to-first))) 237 | (is (= 4 (cursor-adjust s4 3 :to-last))) 238 | (is (= 4 (cursor-adjust s4 3 nil))) 239 | (is (= 2 (cursor-adjust s4 2 :to-first))) 240 | (is (= 2 (cursor-adjust s4 2 :to-last))) 241 | (is (= 2 (cursor-adjust s4 2 nil))) 242 | (is (= 0 (cursor-adjust s4 0 :to-first))) 243 | (is (= 0 (cursor-adjust s4 0 :to-last))) 244 | (is (= 0 (cursor-adjust s4 0 nil))) 245 | (is (= 7 (cursor-adjust s4 7 :to-first))) 246 | (is (= 7 (cursor-adjust s4 7 :to-last))) 247 | (is (= 7 (cursor-adjust s4 7 nil))))))) 248 | -------------------------------------------------------------------------------- /src/clj_thamil/format.cljc: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.format 2 | (:require [clojure.set :as set]) 3 | #?(:clj (:use clj-thamil.core))) 4 | 5 | ;;;;;;;;;; 6 | ;; letters 7 | ;;;;;;;;;; 8 | 9 | (def letters [["ஃ" "அ" "ஆ" "இ" "ஈ" "உ" "ஊ" "எ" "ஏ" "ஐ" "ஒ" "ஓ" "ஔ"] 10 | ["க்" "க" "கா" "கி" "கீ" "கு" "கூ" "கெ" "கே" "கை" "கொ" "கோ" "கௌ"] 11 | ["ங்" "ங" "ஙா" "ஙி" "ஙீ" "ஙு" "ஙூ" "ஙெ" "ஙே" "ஙை" "ஙொ" "ஙோ" "ஙௌ"] 12 | ["ச்" "ச" "சா" "சி" "சீ" "சு" "சூ" "செ" "சே" "சை" "சொ" "சோ" "சௌ"] 13 | ["ஞ்" "ஞ" "ஞா" "ஞி" "ஞீ" "ஞு" "ஞூ" "ஞெ" "ஞே" "ஞை" "ஞொ" "ஞோ" "ஞௌ"] 14 | ["ட்" "ட" "டா" "டி" "டீ" "டு" "டூ" "டெ" "டே" "டை" "டொ" "டோ" "டௌ"] 15 | ["ண்" "ண" "ணா" "ணி" "ணீ" "ணு" "ணூ" "ணெ" "ணே" "ணை" "ணொ" "ணோ" "ணௌ"] 16 | ["த்" "த" "தா" "தி" "தீ" "து" "தூ" "தெ" "தே" "தை" "தொ" "தோ" "தௌ"] 17 | ["ந்" "ந" "நா" "நி" "நீ" "நு" "நூ" "நெ" "நே" "நை" "நொ" "நோ" "நௌ"] 18 | ["ப்" "ப" "பா" "பி" "பீ" "பு" "பூ" "பெ" "பே" "பை" "பொ" "போ" "பௌ"] 19 | ["ம்" "ம" "மா" "மி" "மீ" "மு" "மூ" "மெ" "மே" "மை" "மொ" "மோ" "மௌ"] 20 | ["ய்" "ய" "யா" "யி" "யீ" "யு" "யூ" "யெ" "யே" "யை" "யொ" "யோ" "யௌ"] 21 | ["ர்" "ர" "ரா" "ரி" "ரீ" "ரு" "ரூ" "ரெ" "ரே" "ரை" "ரொ" "ரோ" "ரௌ"] 22 | ["ல்" "ல" "லா" "லி" "லீ" "லு" "லூ" "லெ" "லே" "லை" "லொ" "லோ" "லௌ"] 23 | ["வ்" "வ" "வா" "வி" "வீ" "வு" "வூ" "வெ" "வே" "வை" "வொ" "வோ" "வௌ"] 24 | ["ழ்" "ழ" "ழா" "ழி" "ழீ" "ழு" "ழூ" "ழெ" "ழே" "ழை" "ழொ" "ழோ" "ழௌ"] 25 | ["ள்" "ள" "ளா" "ளி" "ளீ" "ளு" "ளூ" "ளெ" "ளே" "ளை" "ளொ" "ளோ" "ளௌ"] 26 | ["ற்" "ற" "றா" "றி" "றீ" "று" "றூ" "றெ" "றே" "றை" "றொ" "றோ" "றௌ"] 27 | ["ன்" "ன" "னா" "னி" "னீ" "னு" "னூ" "னெ" "னே" "னை" "னொ" "னோ" "னௌ"]]) 28 | 29 | (def vowels 30 | (let [vowel-row (first letters)] 31 | (concat (rest vowel-row) [(first vowel-row)]))) 32 | 33 | (def c-cv-letters (rest letters)) 34 | 35 | (def consonants (map first c-cv-letters)) 36 | 37 | ;;;;;;;;;;; 38 | ;; trie fns 39 | ;;;;;;;;;;; 40 | 41 | (defn- trie-add-seq 42 | "take a trie (represented as a nested map) and add a sequence, with an optional value attached to its terminus" 43 | ([trie-map s] 44 | (trie-add-seq trie-map s nil)) 45 | ([trie-map s term-val] 46 | (loop [idx (count s) 47 | tm trie-map] 48 | (when-not (neg? idx) 49 | (if (zero? idx) 50 | (if (= 1 (count s)) 51 | (assoc-in tm s {nil term-val}) 52 | (update-in tm (vec s) assoc-in [nil] term-val)) 53 | (let [[pre post] (split-at idx s)] 54 | (if (get-in tm pre) 55 | (update-in tm pre assoc-in (concat post [nil]) term-val) 56 | (recur (dec idx) tm)))))))) 57 | 58 | (defn make-trie 59 | "take a sequence (may be nested) of input sequences, or else takes a map (single-level) where keys are sequences and vals are attached to the terminus in trie. fn creates a trie, represented as a nested map." 60 | [sequence] 61 | (if (map? sequence) 62 | (reduce (partial apply trie-add-seq) {} sequence) 63 | (let [s (flatten sequence)] 64 | (reduce trie-add-seq {} s)))) 65 | 66 | (def ^{:private true 67 | :doc "a trie that contains all strings representing the individual letters in தமிழ்"} 68 | letter-trie (make-trie letters)) 69 | 70 | (defn trie-prefix-subtree 71 | "take a trie and a sequence, look up the sequence in the trie, and return the subtree" 72 | [trie sq] 73 | (get-in trie sq)) 74 | 75 | (defn in-trie? 76 | "return whether the sequence exists in the trie" 77 | ([sq] 78 | (in-trie? letter-trie sq)) 79 | ([trie sq] 80 | (-> (trie-prefix-subtree trie sq) 81 | (find nil) 82 | boolean))) 83 | 84 | (defn get-in-trie 85 | "return the corresponding value from the trie -- either the combined version of the input seq, or the value attached to the terminus of the input seq in the trie" 86 | [trie sq] 87 | (if (in-trie? trie sq) 88 | (let [subtree (trie-prefix-subtree trie sq)] 89 | (if (nil? (get subtree nil)) 90 | (apply str sq) 91 | (get subtree nil))) 92 | (apply str sq))) 93 | 94 | (defn- backfill-new-chars 95 | "a helper fn for str->elems that takes the new-chars array (after knowing that the next character cannot be added to it because the resultant char path would not be in the trie) as input. we now need to process the new-chars array to test whether it (or else, its substrings) are themselves in the trie. we need to work backwards to find the maximally long substring (char seq) that is also in trie. 96 | this fn is set up as O(n^2) on the assumption that input sequences won't be too big (the sequences that make up the paths of the trie don't have too many shared long sequences that start at the trie root). 97 | this fn might be needed to distinguish, for example, between a 3-elem chunk and 2 smaller chnks (ex: \"ksh\" vs \"k\" + \"sh\" -- ignore the fact that க்ஷ் and ஸ் aren't originally Thamil). in fact, this fn probably isn't necessary for original Thamil letters, since they only need 2 codepoints, and may be only an issue for English transliteration of Grantha letters, or more of an issue for others languages which require 3+ chars to form a letter)" 98 | [trie new-chars & [{:keys [flat-output] :as opts}]] 99 | (loop [chars new-chars 100 | in-trie-letters [] 101 | idx (count chars)] 102 | (condp = idx 103 | 0 (if-not flat-output (flatten in-trie-letters) in-trie-letters) 104 | 1 (recur (drop 1 chars) (conj in-trie-letters (get-in-trie trie (take 1 chars))) (count (drop 1 chars))) 105 | ;; else 106 | (if (in-trie? trie (take idx chars)) 107 | (recur (drop idx chars) (conj in-trie-letters (get-in-trie trie (take idx chars))) (count (drop idx chars))) 108 | (recur chars in-trie-letters (dec idx)))))) 109 | 110 | (defn str->elems 111 | "take a string and split it into chunks based on the input trie. for every maximally long sequence in the trie that is detected in the input string, the terminus-attached value is added to the output sequence if it exists (ex: useful for transliteration / format conversion), or else the string chunk itself is added." 112 | ([s] 113 | (str->elems letter-trie s)) 114 | ([trie s & [{:keys [transform] :as opts}]] 115 | ;; loop is like a procedural for loop or while loop 116 | ;; this loop is like a for loop, where 0 <= idx < (count s) 117 | (loop [idx 0 118 | new-chars [] 119 | letters []] 120 | ;; test if we've consumed our entire input string 121 | (if (= idx (count s)) 122 | ;; test whether we have handled entire input string, or if 123 | ;; there are still chars still not fully processed 124 | (if (empty? new-chars) 125 | letters 126 | (concat letters (backfill-new-chars trie new-chars))) 127 | ;; start next iteration 128 | (let [next-char (.charAt s idx)] 129 | ;; if adding the next character makes a prefix in trie no 130 | ;; longer in trie, then we have our maximally long prefix. 131 | ;; if not, just add the char and continue 132 | (if (nil? (trie-prefix-subtree trie (apply str (conj new-chars next-char)))) 133 | ;; test whether this is just because we're at the 134 | ;; beginning of our string. if not, return our prefix 135 | ;; and reset our next prefix starting with the new char 136 | (if (empty? new-chars) 137 | (recur (inc idx) (conj new-chars next-char) letters) 138 | (recur (inc idx) [next-char] (concat letters (backfill-new-chars trie new-chars)))) 139 | (recur (inc idx) (conj new-chars next-char) letters))))))) 140 | 141 | ;;;;;;;;;;; 142 | ;; letters & phonemes 143 | ;;;;;;;;;;; 144 | 145 | (defn str->letters 146 | "take a string and split it into its constitutent தமிழ் + non-complex letters (non-complex = all left-to-right, 1-to-1 codepoint-to-glyph encodings -- this includes all Western languages)" 147 | [s] 148 | (str->elems letter-trie s)) 149 | 150 | (def ^{:doc "a map whose keys are தமிழ் letters and whose values are sequences of the constituent phonemes (represented as strings) of those letters. letters are from the set {உயிர்-, மெய்-, உயிர்மெய்-}எழுத்துகள், phonemes are from the set {உயிர்-,மெய்-}எழுத்துகள்"} 151 | phoneme-map 152 | {"ஃ" ["ஃ"], 153 | "அ" ["அ"], 154 | "ஆ" ["ஆ"], 155 | "இ" ["இ"], 156 | "ஈ" ["ஈ"], 157 | "உ" ["உ"], 158 | "ஊ" ["ஊ"], 159 | "எ" ["எ"], 160 | "ஏ" ["ஏ"], 161 | "ஐ" ["ஐ"], 162 | "ஒ" ["ஒ"], 163 | "ஓ" ["ஓ"], 164 | "ஔ" ["ஔ"], 165 | "க்" ["க்"], 166 | "க" ["க்" "அ"], 167 | "கா" ["க்" "ஆ"], 168 | "கி" ["க்" "இ"], 169 | "கீ" ["க்" "ஈ"], 170 | "கு" ["க்" "உ"], 171 | "கூ" ["க்" "ஊ"], 172 | "கெ" ["க்" "எ"], 173 | "கே" ["க்" "ஏ"], 174 | "கை" ["க்" "ஐ"], 175 | "கொ" ["க்" "ஒ"], 176 | "கோ" ["க்" "ஓ"], 177 | "கௌ" ["க்" "ஔ"], 178 | "ங்" ["ங்"], 179 | "ங" ["ங்" "அ"], 180 | "ஙா" ["ங்" "ஆ"], 181 | "ஙி" ["ங்" "இ"], 182 | "ஙீ" ["ங்" "ஈ"], 183 | "ஙு" ["ங்" "உ"], 184 | "ஙூ" ["ங்" "ஊ"], 185 | "ஙெ" ["ங்" "எ"], 186 | "ஙே" ["ங்" "ஏ"], 187 | "ஙை" ["ங்" "ஐ"], 188 | "ஙொ" ["ங்" "ஒ"], 189 | "ஙோ" ["ங்" "ஓ"], 190 | "ஙௌ" ["ங்" "ஔ"], 191 | "ச்" ["ச்"], 192 | "ச" ["ச்" "அ"], 193 | "சா" ["ச்" "ஆ"], 194 | "சி" ["ச்" "இ"], 195 | "சீ" ["ச்" "ஈ"], 196 | "சு" ["ச்" "உ"], 197 | "சூ" ["ச்" "ஊ"], 198 | "செ" ["ச்" "எ"], 199 | "சே" ["ச்" "ஏ"], 200 | "சை" ["ச்" "ஐ"], 201 | "சொ" ["ச்" "ஒ"], 202 | "சோ" ["ச்" "ஓ"], 203 | "சௌ" ["ச்" "ஔ"], 204 | "ஞ்" ["ஞ்"], 205 | "ஞ" ["ஞ்" "அ"], 206 | "ஞா" ["ஞ்" "ஆ"], 207 | "ஞி" ["ஞ்" "இ"], 208 | "ஞீ" ["ஞ்" "ஈ"], 209 | "ஞு" ["ஞ்" "உ"], 210 | "ஞூ" ["ஞ்" "ஊ"], 211 | "ஞெ" ["ஞ்" "எ"], 212 | "ஞே" ["ஞ்" "ஏ"], 213 | "ஞை" ["ஞ்" "ஐ"], 214 | "ஞொ" ["ஞ்" "ஒ"], 215 | "ஞோ" ["ஞ்" "ஓ"], 216 | "ஞௌ" ["ஞ்" "ஔ"], 217 | "ட்" ["ட்"], 218 | "ட" ["ட்" "அ"], 219 | "டா" ["ட்" "ஆ"], 220 | "டி" ["ட்" "இ"], 221 | "டீ" ["ட்" "ஈ"], 222 | "டு" ["ட்" "உ"], 223 | "டூ" ["ட்" "ஊ"], 224 | "டெ" ["ட்" "எ"], 225 | "டே" ["ட்" "ஏ"], 226 | "டை" ["ட்" "ஐ"], 227 | "டொ" ["ட்" "ஒ"], 228 | "டோ" ["ட்" "ஓ"], 229 | "டௌ" ["ட்" "ஔ"], 230 | "ண்" ["ண்"], 231 | "ண" ["ண்" "அ"], 232 | "ணா" ["ண்" "ஆ"], 233 | "ணி" ["ண்" "இ"], 234 | "ணீ" ["ண்" "ஈ"], 235 | "ணு" ["ண்" "உ"], 236 | "ணூ" ["ண்" "ஊ"], 237 | "ணெ" ["ண்" "எ"], 238 | "ணே" ["ண்" "ஏ"], 239 | "ணை" ["ண்" "ஐ"], 240 | "ணொ" ["ண்" "ஒ"], 241 | "ணோ" ["ண்" "ஓ"], 242 | "ணௌ" ["ண்" "ஔ"], 243 | "த்" ["த்"], 244 | "த" ["த்" "அ"], 245 | "தா" ["த்" "ஆ"], 246 | "தி" ["த்" "இ"], 247 | "தீ" ["த்" "ஈ"], 248 | "து" ["த்" "உ"], 249 | "தூ" ["த்" "ஊ"], 250 | "தெ" ["த்" "எ"], 251 | "தே" ["த்" "ஏ"], 252 | "தை" ["த்" "ஐ"], 253 | "தொ" ["த்" "ஒ"], 254 | "தோ" ["த்" "ஓ"], 255 | "தௌ" ["த்" "ஔ"], 256 | "ந்" ["ந்"], 257 | "ந" ["ந்" "அ"], 258 | "நா" ["ந்" "ஆ"], 259 | "நி" ["ந்" "இ"], 260 | "நீ" ["ந்" "ஈ"], 261 | "நு" ["ந்" "உ"], 262 | "நூ" ["ந்" "ஊ"], 263 | "நெ" ["ந்" "எ"], 264 | "நே" ["ந்" "ஏ"], 265 | "நை" ["ந்" "ஐ"], 266 | "நொ" ["ந்" "ஒ"], 267 | "நோ" ["ந்" "ஓ"], 268 | "நௌ" ["ந்" "ஔ"], 269 | "ப்" ["ப்"], 270 | "ப" ["ப்" "அ"], 271 | "பா" ["ப்" "ஆ"], 272 | "பி" ["ப்" "இ"], 273 | "பீ" ["ப்" "ஈ"], 274 | "பு" ["ப்" "உ"], 275 | "பூ" ["ப்" "ஊ"], 276 | "பெ" ["ப்" "எ"], 277 | "பே" ["ப்" "ஏ"], 278 | "பை" ["ப்" "ஐ"], 279 | "பொ" ["ப்" "ஒ"], 280 | "போ" ["ப்" "ஓ"], 281 | "பௌ" ["ப்" "ஔ"], 282 | "ம்" ["ம்"], 283 | "ம" ["ம்" "அ"], 284 | "மா" ["ம்" "ஆ"], 285 | "மி" ["ம்" "இ"], 286 | "மீ" ["ம்" "ஈ"], 287 | "மு" ["ம்" "உ"], 288 | "மூ" ["ம்" "ஊ"], 289 | "மெ" ["ம்" "எ"], 290 | "மே" ["ம்" "ஏ"], 291 | "மை" ["ம்" "ஐ"], 292 | "மொ" ["ம்" "ஒ"], 293 | "மோ" ["ம்" "ஓ"], 294 | "மௌ" ["ம்" "ஔ"], 295 | "ய்" ["ய்"], 296 | "ய" ["ய்" "அ"], 297 | "யா" ["ய்" "ஆ"], 298 | "யி" ["ய்" "இ"], 299 | "யீ" ["ய்" "ஈ"], 300 | "யு" ["ய்" "உ"], 301 | "யூ" ["ய்" "ஊ"], 302 | "யெ" ["ய்" "எ"], 303 | "யே" ["ய்" "ஏ"], 304 | "யை" ["ய்" "ஐ"], 305 | "யொ" ["ய்" "ஒ"], 306 | "யோ" ["ய்" "ஓ"], 307 | "யௌ" ["ய்" "ஔ"], 308 | "ர்" ["ர்"], 309 | "ர" ["ர்" "அ"], 310 | "ரா" ["ர்" "ஆ"], 311 | "ரி" ["ர்" "இ"], 312 | "ரீ" ["ர்" "ஈ"], 313 | "ரு" ["ர்" "உ"], 314 | "ரூ" ["ர்" "ஊ"], 315 | "ரெ" ["ர்" "எ"], 316 | "ரே" ["ர்" "ஏ"], 317 | "ரை" ["ர்" "ஐ"], 318 | "ரொ" ["ர்" "ஒ"], 319 | "ரோ" ["ர்" "ஓ"], 320 | "ரௌ" ["ர்" "ஔ"], 321 | "ல்" ["ல்"], 322 | "ல" ["ல்" "அ"], 323 | "லா" ["ல்" "ஆ"], 324 | "லி" ["ல்" "இ"], 325 | "லீ" ["ல்" "ஈ"], 326 | "லு" ["ல்" "உ"], 327 | "லூ" ["ல்" "ஊ"], 328 | "லெ" ["ல்" "எ"], 329 | "லே" ["ல்" "ஏ"], 330 | "லை" ["ல்" "ஐ"], 331 | "லொ" ["ல்" "ஒ"], 332 | "லோ" ["ல்" "ஓ"], 333 | "லௌ" ["ல்" "ஔ"], 334 | "வ்" ["வ்"], 335 | "வ" ["வ்" "அ"], 336 | "வா" ["வ்" "ஆ"], 337 | "வி" ["வ்" "இ"], 338 | "வீ" ["வ்" "ஈ"], 339 | "வு" ["வ்" "உ"], 340 | "வூ" ["வ்" "ஊ"], 341 | "வெ" ["வ்" "எ"], 342 | "வே" ["வ்" "ஏ"], 343 | "வை" ["வ்" "ஐ"], 344 | "வொ" ["வ்" "ஒ"], 345 | "வோ" ["வ்" "ஓ"], 346 | "வௌ" ["வ்" "ஔ"], 347 | "ழ்" ["ழ்"], 348 | "ழ" ["ழ்" "அ"], 349 | "ழா" ["ழ்" "ஆ"], 350 | "ழி" ["ழ்" "இ"], 351 | "ழீ" ["ழ்" "ஈ"], 352 | "ழு" ["ழ்" "உ"], 353 | "ழூ" ["ழ்" "ஊ"], 354 | "ழெ" ["ழ்" "எ"], 355 | "ழே" ["ழ்" "ஏ"], 356 | "ழை" ["ழ்" "ஐ"], 357 | "ழொ" ["ழ்" "ஒ"], 358 | "ழோ" ["ழ்" "ஓ"], 359 | "ழௌ" ["ழ்" "ஔ"], 360 | "ள்" ["ள்"], 361 | "ள" ["ள்" "அ"], 362 | "ளா" ["ள்" "ஆ"], 363 | "ளி" ["ள்" "இ"], 364 | "ளீ" ["ள்" "ஈ"], 365 | "ளு" ["ள்" "உ"], 366 | "ளூ" ["ள்" "ஊ"], 367 | "ளெ" ["ள்" "எ"], 368 | "ளே" ["ள்" "ஏ"], 369 | "ளை" ["ள்" "ஐ"], 370 | "ளொ" ["ள்" "ஒ"], 371 | "ளோ" ["ள்" "ஓ"], 372 | "ளௌ" ["ள்" "ஔ"], 373 | "ற்" ["ற்"], 374 | "ற" ["ற்" "அ"], 375 | "றா" ["ற்" "ஆ"], 376 | "றி" ["ற்" "இ"], 377 | "றீ" ["ற்" "ஈ"], 378 | "று" ["ற்" "உ"], 379 | "றூ" ["ற்" "ஊ"], 380 | "றெ" ["ற்" "எ"], 381 | "றே" ["ற்" "ஏ"], 382 | "றை" ["ற்" "ஐ"], 383 | "றொ" ["ற்" "ஒ"], 384 | "றோ" ["ற்" "ஓ"], 385 | "றௌ" ["ற்" "ஔ"], 386 | "ன்" ["ன்"], 387 | "ன" ["ன்" "அ"], 388 | "னா" ["ன்" "ஆ"], 389 | "னி" ["ன்" "இ"], 390 | "னீ" ["ன்" "ஈ"], 391 | "னு" ["ன்" "உ"], 392 | "னூ" ["ன்" "ஊ"], 393 | "னெ" ["ன்" "எ"], 394 | "னே" ["ன்" "ஏ"], 395 | "னை" ["ன்" "ஐ"], 396 | "னொ" ["ன்" "ஒ"], 397 | "னோ" ["ன்" "ஓ"], 398 | "னௌ" ["ன்" "ஔ"]}) 399 | 400 | (def ^{:doc "a trie of the individual letters in தமிழ், whose terminus-attached values are sequences of each letter's phonemes -- this trie can be used in str->elems for directly splitting a word into its phonemes"} 401 | phoneme-trie (make-trie phoneme-map)) 402 | 403 | (def inverse-phoneme-map (set/map-invert phoneme-map)) 404 | 405 | (defn str->phonemes 406 | "take a string and split it into its constitutent தமிழ் phonemes" 407 | [s] 408 | (str->elems phoneme-trie s)) 409 | 410 | ;; TODO: create a make-inverse-trie fn 411 | ;; TODO: turn str->elem into seq->elem, use that to refactor phonemes->str 412 | 413 | (defn phonemes->str 414 | "given a seq of phonemes, create a string where the phonemes are combined into their proper letters" 415 | [phoneme-seq] 416 | (let [concat-phoneme-str (apply str phoneme-seq) 417 | inverse-concat-phoneme-map (into {} (for [[k v] inverse-phoneme-map] 418 | [(apply str k) v])) 419 | inverse-concat-phoneme-trie (make-trie inverse-concat-phoneme-map) 420 | combined-phoneme-str (apply str (str->elems inverse-concat-phoneme-trie concat-phoneme-str))] 421 | combined-phoneme-str)) 422 | 423 | ;;;;;;;;;;;;;; 424 | ;; sorting fns 425 | ;;;;;;;;;;;;;; 426 | 427 | (def ^{:private false 428 | :doc "a flattened seq of all தமிழ் letters in lexicographical (alphabetical) order -- put anohter way, in the order of அகர முதல் னரக இறுவாய் as the 2500 yr old grammatical compendium தொல்காப்பியம் states in its outset"} 429 | letter-seq (flatten (concat vowels c-cv-letters))) 430 | 431 | (def ^{:doc "a map where the key is a தமிழ் letter, and the value is a number indicating its relative position in sort order"} 432 | sort-map (zipmap letter-seq (range))) 433 | 434 | (defn letter-before? 435 | "a 2-arg predicate indicating whether the first string comes before the second string, but assuming that each string will only represent individual letters" 436 | [s1 s2] 437 | (cond (and (nil? s1) (nil? s2)) true 438 | (and (nil? (get sort-map s1)) (nil? (get sort-map s2))) (boolean (neg? (compare s1 s2))) 439 | (nil? (get sort-map s1)) true 440 | (nil? (get sort-map s2)) false 441 | :else (< (get sort-map s1) (get sort-map s2)))) 442 | 443 | (def ^{:doc "a comparator for strings that represent a single letter that respects தமிழ் alphabetical order"} 444 | letter-comp (comparator letter-before?)) 445 | 446 | (defn word-before? 447 | "a 2-arg predicate indicating whether the first string comes before the second string lexicographically, handling தமிழ் letters in addition to 1-to-1 codepoint-to-letter encodings" 448 | [str1 str2] 449 | (loop [s1 (str->elems str1) 450 | s2 (str->elems str2)] 451 | (cond (not (seq s1)) (boolean (seq s2)) 452 | (not (seq s2)) false 453 | (not= (first s1) (first s2)) (letter-before? (first s1) (first s2)) 454 | :else (recur (rest s1) (rest s2))))) 455 | 456 | (def ^{:doc "a comparator for lexicographical comparisons of arbitrary strings (consisting of தமிழ் letters and letters from 1-to-1 encodings)"} 457 | word-comp (comparator word-before?)) 458 | 459 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 460 | ;; word & character traits fns 461 | ;; position fns 462 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 463 | 464 | (defn whitespace? 465 | "returns whether a Java Character a.k.a. Unicode codepoint is whitespace or not (according to Java's understanding of Unicode)" 466 | [ch] 467 | (when ch 468 | #?(:clj (Character/isWhitespace ch) 469 | :cljs (boolean (re-seq #"\s" (str ch)))))) 470 | 471 | (defn wordy-char? 472 | "take a Java Character a.k.a. Unicode codepoint and return whether it represents a character that might go into a word or identifier. In other words, it is for Unicode like what \\w has representing in regular expressions for ASCII characters -- which is alpha-numeric characters" 473 | [ch] 474 | (when ch 475 | (and 476 | (not (get #{\$ \_} ch)) 477 | #?(:clj (Character/isJavaIdentifierPart ch) 478 | :cljs (not (whitespace? (str ch))))))) 479 | 480 | ;; TODO: DRY on seq-prefix & seq-prefix? -- is there a Clojure implementation? 481 | 482 | (defn seq-prefix 483 | "return the shared prefix between the 2 input sequence" 484 | [seq1 seq2] 485 | (loop [s1 seq1 486 | s2 seq2 487 | comm-prefix []] 488 | (let [f1 (first s1) 489 | f2 (first s2)] 490 | (if (or (empty? s1) 491 | (empty? s2) 492 | (not= f1 f2)) 493 | comm-prefix 494 | (recur (rest s1) (rest s2) (conj comm-prefix f1)))))) 495 | 496 | (defn seq-prefix? 497 | "return whether the query seq is a prefix of the target" 498 | [tgt qry] 499 | (let [pfx (seq-prefix tgt qry)] 500 | (boolean 501 | (and (seq tgt) 502 | (or (= (seq qry) pfx) 503 | (and (empty? qry) (empty? pfx))))))) 504 | 505 | (defn prefix? 506 | "return whether the 2nd word is a prefix of the 1st word, based on தமிழ் phonemes" 507 | [str1 str2] 508 | (let [phonemes1 (str->elems phoneme-trie str1) 509 | phonemes2 (str->elems phoneme-trie str2)] 510 | (seq-prefix? phonemes1 phonemes2))) 511 | 512 | (defn suffix? 513 | "return whether the 2nd word is a suffix of the 1st word, based on தமிழ் phonemes" 514 | [str1 str2] 515 | (let [phonemes1 (str->elems phoneme-trie str1) 516 | phonemes2 (str->elems phoneme-trie str2)] 517 | (seq-prefix? (reverse phonemes1) (reverse phonemes2)))) 518 | 519 | ;; TODO: DRY on seq-index-of -- is there already a Clojure implementation? 520 | 521 | (defn seq-index-of 522 | "given a target seq and a query seq, return the 0-based index of the first occurrence of the query seq appearing inside the target seq, or else return -1 (is that Clojure-y, or is returning nil more Clojure-y?) 523 | calls seq-prefix? at every index -- only realizes the target seq as needed, pulls query seq into memory" 524 | [tgt qry] 525 | (let [qlen (count qry)] 526 | (loop [ts tgt 527 | idx 0] 528 | (if (or (empty? ts) 529 | (< (count (take qlen ts)) qlen)) 530 | -1 531 | (if (seq-prefix? ts qry) 532 | idx 533 | (recur (rest ts) (inc idx))))))) 534 | 535 | (def ^{:doc "a wrapper around the native fn call that gives the index of the first occurrence of a particular substring"} 536 | index-of 537 | #?(:cljs seq-index-of 538 | :clj (fn [tgt qry] 539 | (.indexOf tgt qry)))) 540 | 541 | (defn wordy-seq 542 | "take a string and produce a seq of the Unicode-aware version of the \\w+ regex pattern - basically, split input string into all chunks of non-whitepsace. Originally, I called this fn word-seq, but that is not true for all languages and/or throughout time where there was no spearation between words (ex: Thai, Chinese, Japanese, Latin manuscripts, ancient Thamil stone inscriptions, etc.)" 543 | [s] 544 | (when s 545 | (let [chunks (partition-by wordy-char? s) 546 | word-chunks (filter (comp wordy-char? first) chunks) 547 | words (map (partial apply str) word-chunks)] 548 | words))) 549 | 550 | (defn wordy-chunk-and-cursor-pos 551 | "given a string and an index number that the cursor is on or before, return the wordy chunk that the cursor is in the middle of, and the cursor pos relative to the chunk. if cursor is before or after a word, or at the beginning or end of string, return a falsey value (ex: nil). accepts idx being at end of string (idx == (count s))." 552 | [s idx] 553 | (assert (<= 0 idx) (str "cursor postiion out of range [idx =" idx "]")) 554 | (assert (<= idx (count s)) (str "cursor postiion out of range [idx =" idx "], [str len =" (count s) "]")) 555 | (let [[before after] [(subs s 0 idx) (subs s idx)] 556 | partitions-before (partition-by wordy-char? before) 557 | partitions-after (partition-by wordy-char? after) 558 | wordy-chunks-before (wordy-seq before) 559 | wordy-chunks-after (wordy-seq after) 560 | chunk-seq-wordy? (comp wordy-char? first) 561 | prev-chunk (last wordy-chunks-before) 562 | next-chunk (first wordy-chunks-after) 563 | prev-chunk-wordiness (chunk-seq-wordy? (last partitions-before)) 564 | next-chunk-wordiness (chunk-seq-wordy? (first partitions-after)) 565 | prev-chunk-idx (if prev-chunk (index-of before prev-chunk) -1) 566 | next-chunk-idx (if next-chunk (index-of after next-chunk) -1) 567 | prev-chunk-flush (= idx (+ prev-chunk-idx (count prev-chunk))) 568 | next-chunk-flush (zero? next-chunk-idx)] 569 | (cond 570 | (and prev-chunk-wordiness next-chunk-wordiness prev-chunk-flush next-chunk-flush) [(str prev-chunk next-chunk) (- idx prev-chunk-idx)] 571 | (and prev-chunk-wordiness prev-chunk-flush) [prev-chunk (- idx prev-chunk-idx)] 572 | (and next-chunk-wordiness next-chunk-flush) [next-chunk 0] 573 | :else nil))) 574 | 575 | (def wordy-chunk-under (comp first wordy-chunk-and-cursor-pos)) 576 | 577 | (defn cursor-adjust 578 | "given a string, a cursor position (idx), and a direction, give the new position of the cursor that that is on the boundary of the actual letters" 579 | [s idx direction] 580 | (let [[wordy-chunk rel-idx] (wordy-chunk-and-cursor-pos s idx) 581 | letters (str->letters wordy-chunk) 582 | indices (reductions #(+ %1 (count %2)) 0 letters) 583 | before-idx (->> indices 584 | (take-while #(<= % idx)) 585 | last) 586 | after-idx (->> indices 587 | (drop-while #(< % idx)) 588 | first)] 589 | (if (= before-idx after-idx) 590 | (do 591 | (assert (= idx before-idx after-idx)) 592 | idx) 593 | (case direction 594 | (:to-first :முதல்-நோக்கி) before-idx 595 | (:to-last :பின்-நோக்கி) after-idx 596 | after-idx)))) 597 | -------------------------------------------------------------------------------- /src/clj_thamil/format/convert.cljc: -------------------------------------------------------------------------------- 1 | (ns clj-thamil.format.convert 2 | (:require ;; [clojure.algo.generic.functor :as ftor] 3 | [clojure.set :as set] 4 | [clj-thamil.format :as fmt]) 5 | #?(:clj (:gen-class))) 6 | 7 | ;; A general note about the conversion and transliteration schemes 8 | ;; defined by the maps in this namespace: 9 | ;; 10 | ;; There may be multiple English letter sequences mapping to the same 11 | ;; தமிழ் letter. Also note that we get the mapping for the reverse conversion 12 | ;; by inverting the map (keys become values, and values become keys). 13 | ;; When multiple keys map to the same value, and you invert the map, 14 | ;; the inverse will have the old value pointing to a single old key 15 | ;; which is determined non-deterministically. Therefore, the inverse 16 | ;; map may need to be "manually adjusted" in that case to select a 17 | ;; default mapping in the inverse map. 18 | 19 | 20 | ;;;;;;;; 21 | ;; தமிழ் <-> Romanized 22 | ;;;;;;;; 23 | 24 | (def ^{:doc "a map of English strings to their தமிழ் phonemes (and consonant clusters)."} 25 | romanized-தமிழ்-phoneme-map 26 | {"g" "க்" 27 | "s" "ச்" 28 | "d" "ட்" 29 | "w" "ந்" 30 | "b" "ப்" 31 | "z" "ழ்" 32 | "mb" "ம்ப்" 33 | "nth" "ந்த்" 34 | "nr" "ன்ற்" 35 | "nd" "ண்ட்" 36 | 37 | "a" "அ" 38 | "aa" "ஆ" 39 | "A" "ஆ" 40 | "i" "இ" 41 | "ii" "ஈ" 42 | "I" "ஈ" 43 | "u" "உ" 44 | "uu" "ஊ" 45 | "U" "ஊ" 46 | "e" "எ" 47 | "ee" "ஏ" 48 | "E" "ஏ" 49 | "ai" "ஐ" 50 | "o" "ஒ" 51 | "oo" "ஓ" 52 | "O" "ஓ" 53 | "au" "ஔ" 54 | "q" "ஃ" 55 | "k" "க்" 56 | "ng" "ங்" 57 | "ch" "ச்" 58 | "nj" "ஞ்" 59 | "t" "ட்" 60 | "N" "ண்" 61 | "th" "த்" 62 | "n-" "ந்" 63 | "p" "ப்" 64 | "m" "ம்" 65 | "y" "ய்" 66 | "r" "ர்" 67 | "l" "ல்" 68 | "v" "வ்" 69 | "zh" "ழ்" 70 | "L" "ள்" 71 | "R" "ற்" 72 | "n" "ன்"}) 73 | 74 | (def ^{:doc "designates specific transliterations of phonemes / phoneme clusters in the தமிழ்->English direction (ex: resolving situations where multiple English sequences map to a single தமிழ் phoneme)"} 75 | தமிழ்-romanized-phoneme-overrides 76 | {"ஓ" "O" 77 | "ஏ" "E" 78 | "க்" "k" 79 | "ச்" "ch" 80 | "ட்" "t" 81 | "ந்" "n" 82 | "ப்" "p" 83 | "ழ்" "zh" 84 | "ங்க்" "ng" 85 | "ஆ" "aa" 86 | "ஈ" "ii" 87 | "ஊ" "uu" 88 | "ன்ப்" "nb" 89 | "ண்ப்" "nb"}) 90 | 91 | (def ^{:doc "an inverse of romanized-தமிழ்-phoneme-map, but with a few manual mappings for certain தமிழ் letters that can be input in multiple ways (or whose transliteration into English should be different then how it is input via English)"} 92 | தமிழ்-romanized-phoneme-map 93 | (merge (set/map-invert romanized-தமிழ்-phoneme-map) 94 | தமிழ்-romanized-phoneme-overrides)) 95 | 96 | (def romanized-தமிழ்-phoneme-trie (fmt/make-trie romanized-தமிழ்-phoneme-map)) 97 | 98 | (def தமிழ்-romanized-phoneme-trie (fmt/make-trie தமிழ்-romanized-phoneme-map)) 99 | 100 | (defn romanized->தமிழ் 101 | "transliterates a string of English (transliterated தமிழ்) into the தமிழ் that it represents" 102 | [s] 103 | (fmt/phonemes->str (fmt/str->elems romanized-தமிழ்-phoneme-trie s))) 104 | 105 | (defn தமிழ்->romanized 106 | "transliterates a தமிழ் string into English (transliterated தமிழ்)" 107 | [s] 108 | (->> (fmt/str->phonemes s) 109 | (apply str) 110 | (fmt/str->elems தமிழ்-romanized-phoneme-trie) 111 | (apply str))) 112 | 113 | ;;;;;;;; 114 | ;; தமிழ் <-> TAB 115 | ;;;;;;;; 116 | 117 | (def tab-map 118 | {"அ" "Ü" 119 | "ஆ" "Ý" 120 | "இ" "Þ" 121 | "ஈ" "ß" 122 | "உ" "à" 123 | "ஊ" "á" 124 | "எ" "â" 125 | "ஏ" "ã" 126 | "ஐ" "ä" 127 | "ஒ" "å" 128 | "ஓ" "æ" 129 | "ஔ" "å÷" 130 | "ஃ" "ç" 131 | "க்" "è¢" 132 | "க" "è" 133 | "கா" "è£" 134 | "கி" "è¤" 135 | "கீ" "è¦" 136 | "கு" "°" 137 | "கூ" "Ã" 138 | "கெ" "ªè" 139 | "கே" "«è" 140 | "கை" "¬è" 141 | "கொ" "ªè£" 142 | "கோ" "«è£" 143 | "கௌ" "ªè÷" 144 | "ங்" "é¢" 145 | "ங" "é" 146 | "ஙா" "é£" 147 | "ஙி" "é¤" 148 | "ஙீ" "é¦" 149 | "ஙு" "±" 150 | "ஙூ" "Ä" 151 | "ஙெ" "ªé" 152 | "ஙே" "«é" 153 | "ஙை" "¬é" 154 | "ஙொ" "ªé£" 155 | "ஙோ" "«é£" 156 | "ஙௌ" "ªé÷" 157 | "ச்" "ê¢" 158 | "ச" "ê" 159 | "சா" "ê£" 160 | "சி" "ê¤" 161 | "சீ" "ê¦" 162 | "சு" "²" 163 | "சூ" "Å" 164 | "செ" "ªê" 165 | "சே" "«ê" 166 | "சை" "¬ê" 167 | "சொ" "ªê£" 168 | "சோ" "«ê£" 169 | "சௌ" "ªê÷" 170 | "ஞ்" "ë¢" 171 | "ஞ" "ë" 172 | "ஞா" "ë£" 173 | "ஞி" "ë¤" 174 | "ஞீ" "ë¦" 175 | "ஞு" "³" 176 | "ஞூ" "Æ" 177 | "ஞெ" "ªë" 178 | "ஞே" "«ë" 179 | "ஞை" "¬ë" 180 | "ஞொ" "ªë£" 181 | "ஞோ" "«ë£" 182 | "ஞௌ" "ªë÷" 183 | "ட்" "ì¢" 184 | "ட" "ì" 185 | "டா" "ì£" 186 | "டி" "®" 187 | "டீ" "ì¦" 188 | "டு" "´" 189 | "டூ" "Ç" 190 | "டெ" "ªì" 191 | "டே" "«ì" 192 | "டை" "¬ì" 193 | "டொ" "ªì£" 194 | "டோ" "«ì£" 195 | "டௌ" "ªì÷" 196 | "ண்" "í¢" 197 | "ண" "í" 198 | "ணா" "í£" 199 | "ணி" "í¤" 200 | "ணீ" "í¦" 201 | "ணு" "µ" 202 | "ணூ" "È" 203 | "ணெ" "ªí" 204 | "ணே" "«í" 205 | "ணை" "¬í" 206 | "ணொ" "ªí£" 207 | "ணோ" "«í£" 208 | "ணௌ" "ªí÷" 209 | "த்" "î¢" 210 | "த" "î" 211 | "தா" "î£" 212 | "தி" "î¤" 213 | "தீ" "î¦" 214 | "து" "¶" 215 | "தூ" "É" 216 | "தெ" "ªî" 217 | "தே" "«î" 218 | "தை" "¬î" 219 | "தொ" "ªî£" 220 | "தோ" "«î£" 221 | "தௌ" "ªî÷" 222 | "ந்" "ï¢" 223 | "ந" "ï" 224 | "நா" "ï£" 225 | "நி" "ï¤" 226 | "நீ" "ï¦" 227 | "நு" "¸" 228 | "நூ" "Ë" 229 | "நெ" "ªï" 230 | "நே" "«ï" 231 | "நை" "¬ï" 232 | "நொ" "ªï£" 233 | "நோ" "«ï£" 234 | "நௌ" "ªï÷" 235 | "ப்" "ð¢" 236 | "ப" "ð" 237 | "பா" "ð£" 238 | "பி" "ð¤" 239 | "பீ" "ð¦" 240 | "பு" "¹" 241 | "பூ" "Ì" 242 | "பெ" "ªð" 243 | "பே" "«ð" 244 | "பை" "¬ð" 245 | "பொ" "ªð£" 246 | "போ" "«ð£" 247 | "பௌ" "ªð÷" 248 | "ம்" "ñ¢" 249 | "ம" "ñ" 250 | "மா" "ñ£" 251 | "மி" "ñ¤" 252 | "மீ" "ñ¦" 253 | "மு" "º" 254 | "மூ" "Í" 255 | "மெ" "ªñ" 256 | "மே" "«ñ" 257 | "மை" "¬ñ" 258 | "மொ" "ªñ£" 259 | "மோ" "«ñ£" 260 | "மௌ" "ªñ÷" 261 | "ய்" "ò¢" 262 | "ய" "ò" 263 | "யா" "ò£" 264 | "யி" "ò¤" 265 | "யீ" "ò¦" 266 | "யு" "»" 267 | "யூ" "Î" 268 | "யெ" "ªò" 269 | "யே" "«ò" 270 | "யை" "¬ò" 271 | "யொ" "ªò£" 272 | "யோ" "«ò£" 273 | "யௌ" "ªò÷" 274 | "ர்" "ó¢" 275 | "ர" "ó" 276 | "ரா" "ó£" 277 | "ரி" "ó¤" 278 | "ரீ" "ó¦" 279 | "ரு" "¼" 280 | "ரூ" "Ï" 281 | "ரெ" "ªó" 282 | "ரே" "«ó" 283 | "ரை" "¬ó" 284 | "ரொ" "ªó£" 285 | "ரோ" "«ó£" 286 | "ரௌ" "ªó÷" 287 | "ல்" "ô¢" 288 | "ல" "ô" 289 | "லா" "ô£" 290 | "லி" "ô¤" 291 | "லீ" "ô¦" 292 | "லு" "½" 293 | "லூ" "Ö" 294 | "லெ" "ªô" 295 | "லே" "«ô" 296 | "லை" "¬ô" 297 | "லொ" "ªô£" 298 | "லோ" "«ô£" 299 | "லௌ" "ªô÷" 300 | "வ்" "õ¢" 301 | "வ" "õ" 302 | "வா" "õ£" 303 | "வி" "õ¤" 304 | "வீ" "õ¦" 305 | "வு" "¾" 306 | "வூ" "×" 307 | "வெ" "ªõ" 308 | "வே" "«õ" 309 | "வை" "¬õ" 310 | "வொ" "ªõ£" 311 | "வோ" "«õ£" 312 | "வௌ" "ªõ÷" 313 | "ழ்" "ö¢" 314 | "ழ" "ö" 315 | "ழா" "ö£" 316 | "ழி" "ö¤" 317 | "ழீ" "ö¦" 318 | "ழு" "¿" 319 | "ழூ" "Ø" 320 | "ழெ" "ªö" 321 | "ழே" "«ö" 322 | "ழை" "¬ö" 323 | "ழொ" "ªö£" 324 | "ழோ" "«ö£" 325 | "ழௌ" "ªö÷" 326 | "ள்" "÷¢" 327 | "ள" "÷" 328 | "ளா" "÷£" 329 | "ளி" "÷¤" 330 | "ளீ" "÷¦" 331 | "ளு" "À" 332 | "ளூ" "Ù" 333 | "ளெ" "ª÷" 334 | "ளே" "«÷" 335 | "ளை" "¬÷" 336 | "ளொ" "ª÷£" 337 | "ளோ" "«÷£" 338 | "ளௌ" "ª÷÷" 339 | "ற்" "ø¢" 340 | "ற" "ø" 341 | "றா" "ø£" 342 | "றி" "ø¤" 343 | "றீ" "ø¦" 344 | "று" "Á" 345 | "றூ" "Ú" 346 | "றெ" "ªø" 347 | "றே" "«ø" 348 | "றை" "¬ø" 349 | "றொ" "ªø£" 350 | "றோ" "«ø£" 351 | "றௌ" "ªø÷" 352 | "ன்" "ù¢" 353 | "ன" "ù" 354 | "னா" "ù£" 355 | "னி" "ù¤" 356 | "னீ" "ù¦" 357 | "னு" "Â" 358 | "னூ" "Û" 359 | "னெ" "ªù" 360 | "னே" "«ù" 361 | "னை" "¬ù" 362 | "னொ" "ªù£" 363 | "னோ" "«ù£" 364 | "னௌ" "ªù÷"}) 365 | 366 | ;;;;;;;; 367 | ;; தமிழ் <-> Bamini 368 | ;;;;;;;; 369 | 370 | (def bamini-map 371 | {"அ" "m" 372 | "ஆ" "M" 373 | "இ" "," 374 | "ஈ" "<" 375 | "உ" "c" 376 | "ஊ" "C" 377 | "எ" "v" 378 | "ஏ" "V" 379 | "ஐ" "I" 380 | "ஒ" "x" 381 | "ஓ" "X" 382 | "ஔ" "xs" 383 | "ஃ" "/" 384 | "க்" "f;" 385 | "க" "f" 386 | "கா" "fh" 387 | "கி" "fp" 388 | "கீ" "fP" 389 | "கு" "F" 390 | "கூ" "$" 391 | "கெ" "nf" 392 | "கே" "Nf" 393 | "கை" "if" 394 | "கொ" "nfh" 395 | "கோ" "Nfh" 396 | "கௌ" "nfs" 397 | "ங்" "q;" 398 | "ங" "q" 399 | "ஙா" "qh" 400 | "ஙி" "qp" 401 | "ஙீ" "qP" 402 | ;; "ஙு" nil 403 | ;; "ஙூ" nil 404 | "ஙெ" "nq" 405 | "ஙே" "Nq" 406 | "ஙை" "iq" 407 | "ஙொ" "nqh" 408 | "ஙோ" "Nqh" 409 | "ஙௌ" "nqs" 410 | "ச்" "r;" 411 | "ச" "r" 412 | "சா" "rh" 413 | "சி" "rp" 414 | "சீ" "rP" 415 | "சு" "R" 416 | "சூ" "#" 417 | "செ" "nr" 418 | "சே" "Nr" 419 | "சை" "ir" 420 | "சொ" "nrh" 421 | "சோ" "Nrh" 422 | "சௌ" "nrs" 423 | "ஞ்" "Q;" 424 | "ஞ" "Q" 425 | "ஞா" "Qh" 426 | "ஞி" "Qp" 427 | "ஞீ" "QP" 428 | ;; "ஞு" nil 429 | ;; "ஞூ" nil 430 | "ஞெ" "nQ" 431 | "ஞே" "NQ" 432 | "ஞை" "iQ" 433 | "ஞொ" "nQh" 434 | "ஞோ" "NQh" 435 | "ஞௌ" "nQs" 436 | "ட்" "l;" 437 | "ட" "l" 438 | "டா" "lh" 439 | "டி" "b" 440 | "டீ" "B" 441 | "டு" "L" 442 | "டூ" "^" 443 | "டெ" "nl" 444 | "டே" "Nl" 445 | "டை" "il" 446 | "டொ" "nlh" 447 | "டோ" "Nlh" 448 | "டௌ" "nls" 449 | "ண்" "z;" 450 | "ண" "z" 451 | "ணா" "zh" 452 | "ணி" "zp" 453 | "ணீ" "zP" 454 | "ணு" "Z" 455 | "ணூ" "Z}" 456 | "ணெ" "nz" 457 | "ணே" "Nz" 458 | "ணை" "iz" 459 | "ணொ" "nzh" 460 | "ணோ" "Nzh" 461 | "ணௌ" "nzs" 462 | "த்" "j;" 463 | "த" "j" 464 | "தா" "jh" 465 | "தி" "jp" 466 | "தீ" "jP" 467 | "து" "J" 468 | "தூ" "J}" 469 | "தெ" "nj" 470 | "தே" "Nj" 471 | "தை" "ij" 472 | "தொ" "njh" 473 | "தோ" "Njh" 474 | "தௌ" "njs" 475 | "ந்" "e;" 476 | "ந" "e" 477 | "நா" "eh" 478 | "நி" "ep" 479 | "நீ" "eP" 480 | "நு" "E" 481 | "நூ" "E}" 482 | "நெ" "ne" 483 | "நே" "Ne" 484 | "நை" "ie" 485 | "நொ" "neh" 486 | "நோ" "Neh" 487 | "நௌ" "nes" 488 | "ப்" "g;" 489 | "ப" "g" 490 | "பா" "gh" 491 | "பி" "gp" 492 | "பீ" "gP" 493 | "பு" "G" 494 | "பூ" "G+" 495 | "பெ" "ng" 496 | "பே" "Ng" 497 | "பை" "ig" 498 | "பொ" "ngh" 499 | "போ" "Ngh" 500 | "பௌ" "ngs" 501 | "ம்" "k;" 502 | "ம" "k" 503 | "மா" "kh" 504 | "மி" "kp" 505 | "மீ" "kP" 506 | "மு" "K" 507 | "மூ" "%" 508 | "மெ" "nk" 509 | "மே" "Nk" 510 | "மை" "ik" 511 | "மொ" "nkh" 512 | "மோ" "Nkh" 513 | "மௌ" "nks" 514 | "ய்" "a;" 515 | "ய" "a" 516 | "யா" "ah" 517 | "யி" "ap" 518 | "யீ" "aP" 519 | "யு" "A" 520 | "யூ" "A+" 521 | "யெ" "na" 522 | "யே" "Na" 523 | "யை" "ia" 524 | "யொ" "nah" 525 | "யோ" "Nah" 526 | "யௌ" "nas" 527 | "ர்" "u;" 528 | "ர" "u" 529 | "ரா" "uh" 530 | "ரி" "up" 531 | "ரீ" "uP" 532 | "ரு" "U" 533 | "ரூ" "&" 534 | "ரெ" "nu" 535 | "ரே" "Nu" 536 | "ரை" "iu" 537 | "ரொ" "nuh" 538 | "ரோ" "Nuh" 539 | "ரௌ" "nus" 540 | "ல்" "y;" 541 | "ல" "y" 542 | "லா" "yh" 543 | "லி" "yp" 544 | "லீ" "yP" 545 | "லு" "Y" 546 | "லூ" "Y}" 547 | "லெ" "ny" 548 | "லே" "Ny" 549 | "லை" "iy" 550 | "லொ" "nyh" 551 | "லோ" "Nyh" 552 | "லௌ" "nys" 553 | "வ்" "t;" 554 | "வ" "t" 555 | "வா" "th" 556 | "வி" "tp" 557 | "வீ" "tP" 558 | "வு" "T" 559 | "வூ" "T+" 560 | "வெ" "nt" 561 | "வே" "Nt" 562 | "வை" "it" 563 | "வொ" "nth" 564 | "வோ" "Nth" 565 | "வௌ" "ntt" 566 | "ழ்" "o;" 567 | "ழ" "o" 568 | "ழா" "oh" 569 | "ழி" "op" 570 | "ழீ" "oP" 571 | "ழு" "O" 572 | "ழூ" "*" 573 | "ழெ" "no" 574 | "ழே" "No" 575 | "ழை" "io" 576 | "ழொ" "noh" 577 | "ழோ" "Noh" 578 | "ழௌ" "noo" 579 | "ள்" "s;" 580 | "ள" "s" 581 | "ளா" "sh" 582 | "ளி" "sp" 583 | "ளீ" "sP" 584 | "ளு" "S" 585 | "ளூ" "Sh" 586 | "ளெ" "ns" 587 | "ளே" "Ns" 588 | "ளை" "is" 589 | "ளொ" "nsh" 590 | "ளோ" "Nsh" 591 | "ளௌ" "nss" 592 | "ற்" "w;" 593 | "ற" "w" 594 | "றா" "wh" 595 | "றி" "wp" 596 | "றீ" "wP" 597 | "று" "W" 598 | "றூ" "W}" 599 | "றெ" "nw" 600 | "றே" "Nw" 601 | "றை" "iw" 602 | "றொ" "nwh" 603 | "றோ" "Nwh" 604 | "றௌ" "nws" 605 | "ன்" "d;" 606 | "ன" "d" 607 | "னா" "dh" 608 | "னி" "dp" 609 | "னீ" "dP" 610 | "னு" "D" 611 | "னூ" "D}" 612 | "னெ" "nd" 613 | "னே" "Nd" 614 | "னை" "id" 615 | "னொ" "ndh" 616 | "னோ" "Ndh" 617 | "னௌ" "nds" 618 | 619 | "ஜ்" "[;" 620 | "ஜ" "[" 621 | "ஜா" "[h" 622 | "ஜி" "[p" 623 | "ஜீ" "[P" 624 | "ஜு" "[{" 625 | "ஜூ" "[\"" 626 | "ஜெ" "n[" 627 | "ஜே" "N[" 628 | "ஜை" "i[" 629 | "ஜொ" "n[h" 630 | "ஜோ" "N[h" 631 | "ஜௌ" "n[s" 632 | 633 | "ஷ்" "\\;" 634 | "ஷ" "\\" 635 | "ஷா" "\\h" 636 | "ஷி" "\\p" 637 | "ஷீ" "\\P" 638 | "ஷு" "\\{" 639 | "ஷூ" "\\\"" 640 | "ஷெ" "n\\" 641 | "ஷே" "N\\" 642 | "ஷை" "i\\" 643 | "ஷொ" "n\\h" 644 | "ஷோ" "N\\h" 645 | "ஷௌ" "n\\s" 646 | 647 | "ஸ்" "];" 648 | "ஸ" "]" 649 | "ஸா" "]h" 650 | "ஸி" "]p" 651 | "ஸீ" "]P" 652 | "ஸு" "]{" 653 | "ஸூ" "]\"" 654 | "ஸெ" "n]" 655 | "ஸே" "N]" 656 | "ஸை" "i]" 657 | "ஸொ" "n]h" 658 | "ஸோ" "N]h" 659 | "ஸௌ" "n]s" 660 | 661 | "ஹ்" "`;" 662 | "ஹ" "`" 663 | "ஹா" "`h" 664 | "ஹி" "`p" 665 | "ஹீ" "`P" 666 | "ஹு" "`{" 667 | "ஹூ" "`\"" 668 | "ஹெ" "n`" 669 | "ஹே" "N`" 670 | "ஹை" "i`" 671 | "ஹொ" "n`h" 672 | "ஹோ" "N`h" 673 | "ஹௌ" "n`s" 674 | 675 | "க்ஷ்" "~;" 676 | 677 | "ஶ்ரீ" "=" 678 | 679 | }) 680 | 681 | ;;;;;;;; 682 | ;; தமிழ் <-> TSCII 683 | ;;;;;;;; 684 | 685 | (def tscii-map 686 | {"அ" "«" 687 | "ஆ" "¬" 688 | "இ" "­" 689 | "ஈ" "®" 690 | "உ" "¯" 691 | "ஊ" "°" 692 | "எ" "±" 693 | "ஏ" "²" 694 | "ஐ" "³" 695 | "ஒ" "´" 696 | "ஓ" "µ" 697 | "ஔ" "¶" 698 | "ஃ" "∙" 699 | "க்" "ì" 700 | "க" "¸" 701 | "கா" "¸¡" 702 | "கி" "¸¢" 703 | "கீ" "¸£" 704 | "கு" "Ì" 705 | "கூ" "Ü" 706 | "கெ" "¦¸" 707 | "கே" "§¸" 708 | "கை" "¨¸" 709 | "கொ" "¦¸¡" 710 | "கோ" "§¸¡" 711 | "கௌ" "¦¸ª" 712 | "ங்" "í" 713 | "ங" "¹" 714 | "ஙா" "¹¡" 715 | "ஙி" "¹¢" 716 | "ஙீ" "¹£" 717 | "ஙு" "™" 718 | "ஙூ" "›" 719 | "ஙெ" "¦¹" 720 | "ஙே" "§¹" 721 | "ஙை" "¨¹" 722 | "ஙொ" "¦¹¡" 723 | "ஙோ" "§¹¡" 724 | "ஙௌ" "¦¹ª" 725 | "ச்" "î" 726 | "ச" "º" 727 | "சா" "º¡" 728 | "சி" "º¢" 729 | "சீ" "º£" 730 | "சு" "Í" 731 | "சூ" "Ý" 732 | "செ" "¦º" 733 | "சே" "§º" 734 | "சை" "¨º" 735 | "சொ" "¦º¡" 736 | "சோ" "§º¡" 737 | "சௌ" "¦ºª" 738 | "ஞ்" "ï" 739 | "ஞ" "»" 740 | "ஞா" "»¡" 741 | "ஞி" "»¢" 742 | "ஞீ" "»£" 743 | "ஞு" "š" 744 | "ஞூ" "œ" 745 | "ஞெ" "¦»" 746 | "ஞே" "§»" 747 | "ஞை" "¨»" 748 | "ஞொ" "¦»¡" 749 | "ஞோ" "§»¡" 750 | "ஞௌ" "¦»ª" 751 | "ட்" "ð" 752 | "ட" "¼" 753 | "டா" "¼¡" 754 | "டி" "Ê" 755 | "டீ" "Ë" 756 | "டு" "Î" 757 | "டூ" "Þ" 758 | "டெ" "¦¼" 759 | "டே" "§¼" 760 | "டை" "¨¼" 761 | "டொ" "¦¼¡" 762 | "டோ" "§¼¡" 763 | "டௌ" "¦¼ª" 764 | "ண்" "ñ" 765 | "ண" "½" 766 | "ணா" "½¡" 767 | "ணி" "½¢" 768 | "ணீ" "½£" 769 | "ணு" "Ï" 770 | "ணூ" "ß" 771 | "ணெ" "¦½" 772 | "ணே" "§½" 773 | "ணை" "¨½" 774 | "ணொ" "¦½¡" 775 | "ணோ" "§½¡" 776 | "ணௌ" "¦½ª" 777 | "த்" "ò" 778 | "த" "¾" 779 | "தா" "¾¡" 780 | "தி" "¾¢" 781 | "தீ" "¾£" 782 | "து" "Ð" 783 | "தூ" "à" 784 | "தெ" "¦¾" 785 | "தே" "§¾" 786 | "தை" "¨¾" 787 | "தொ" "¦¾¡" 788 | "தோ" "§¾¡" 789 | "தௌ" "¦¾ª" 790 | "ந்" "ó" 791 | "ந" "¿" 792 | "நா" "¿¡" 793 | "நி" "¿¢" 794 | "நீ" "¿£" 795 | "நு" "Ñ" 796 | "நூ" "á" 797 | "நெ" "¦¿" 798 | "நே" "§¿" 799 | "நை" "¨¿" 800 | "நொ" "¦¿¡" 801 | "நோ" "§¿¡" 802 | "நௌ" "¦¿ª" 803 | "ப்" "ô" 804 | "ப" "À" 805 | "பா" "À¡" 806 | "பி" "À¢" 807 | "பீ" "À£" 808 | "பு" "Ò" 809 | "பூ" "â" 810 | "பெ" "¦À" 811 | "பே" "§À" 812 | "பை" "¨À" 813 | "பொ" "¦À¡" 814 | "போ" "§À¡" 815 | "பௌ" "¦Àª" 816 | "ம்" "õ" 817 | "ம" "Á" 818 | "மா" "Á¡" 819 | "மி" "Á¢" 820 | "மீ" "Á£" 821 | "மு" "Ó" 822 | "மூ" "ã" 823 | "மெ" "¦Á" 824 | "மே" "§Á" 825 | "மை" "¨Á" 826 | "மொ" "¦Á¡" 827 | "மோ" "§Á¡" 828 | "மௌ" "¦Áª" 829 | "ய்" "ö" 830 | "ய" "Â" 831 | "யா" "¡" 832 | "யி" "¢" 833 | "யீ" "£" 834 | "யு" "Ô" 835 | "யூ" "ä" 836 | "யெ" "¦Â" 837 | "யே" "§Â" 838 | "யை" "¨Â" 839 | "யொ" "¦Â¡" 840 | "யோ" "§Â¡" 841 | "யௌ" "¦Âª" 842 | "ர்" "÷" 843 | "ர" "Ã" 844 | "ரா" "á" 845 | "ரி" "â" 846 | "ரீ" "ã" 847 | "ரு" "Õ" 848 | "ரூ" "å" 849 | "ரெ" "¦Ã" 850 | "ரே" "§Ã" 851 | "ரை" "¨Ã" 852 | "ரொ" "¦Ã¡" 853 | "ரோ" "§Ã¡" 854 | "ரௌ" "¦Ãª" 855 | "ல்" "ø" 856 | "ல" "Ä" 857 | "லா" "Ä¡" 858 | "லி" "Ä¢" 859 | "லீ" "Ä£" 860 | "லு" "Ö" 861 | "லூ" "æ" 862 | "லெ" "¦Ä" 863 | "லே" "§Ä" 864 | "லை" "¨Ä" 865 | "லொ" "¦Ä¡" 866 | "லோ" "§Ä¡" 867 | "லௌ" "¦Äª" 868 | "வ்" "ù" 869 | "வ" "Å" 870 | "வா" "Å¡" 871 | "வி" "Å¢" 872 | "வீ" "Å£" 873 | "வு" "×" 874 | "வூ" "ç" 875 | "வெ" "¦Å" 876 | "வே" "§Å" 877 | "வை" "¨Å" 878 | "வொ" "¦Å¡" 879 | "வோ" "§Å¡" 880 | "வௌ" "¦Åª" 881 | "ழ்" "ú" 882 | "ழ" "Æ" 883 | "ழா" "Æ¡" 884 | "ழி" "Æ¢" 885 | "ழீ" "Æ£" 886 | "ழு" "Ø" 887 | "ழூ" "è" 888 | "ழெ" "¦Æ" 889 | "ழே" "§Æ" 890 | "ழை" "¨Æ" 891 | "ழொ" "¦Æ¡" 892 | "ழோ" "§Æ¡" 893 | "ழௌ" "¦Æª" 894 | "ள்" "û" 895 | "ள" "Ç" 896 | "ளா" "Ç¡" 897 | "ளி" "Ç¢" 898 | "ளீ" "Ç£" 899 | "ளு" "Ù" 900 | "ளூ" "é" 901 | "ளெ" "¦Ç" 902 | "ளே" "§Ç" 903 | "ளை" "¨Ç" 904 | "ளொ" "¦Ç¡" 905 | "ளோ" "§Ç¡" 906 | "ளௌ" "¦Çª" 907 | "ற்" "ü" 908 | "ற" "È" 909 | "றா" "È¡" 910 | "றி" "È¢" 911 | "றீ" "È£" 912 | "று" "Ú" 913 | "றூ" "ê" 914 | "றெ" "¦È" 915 | "றே" "§È" 916 | "றை" "¨È" 917 | "றொ" "¦È¡" 918 | "றோ" "§È¡" 919 | "றௌ" "¦Èª" 920 | "ன்" "ý" 921 | "ன" "É" 922 | "னா" "É¡" 923 | "னி" "É¢" 924 | "னீ" "É£" 925 | "னு" "Û" 926 | "னூ" "ë" 927 | "னெ" "¦É" 928 | "னே" "§É" 929 | "னை" "¨É" 930 | "னொ" "¦É¡" 931 | "னோ" "§É¡" 932 | "னௌ" "¦Éª"}) 933 | 934 | ;;;;;;;; 935 | ;; தமிழ் <-> Webulagam 936 | ;;;;;;;; 937 | 938 | (def webulagam-map 939 | {"அ" "m" 940 | "ஆ" "M" 941 | "இ" "ï" 942 | "ஈ" "<" 943 | "உ" "c" 944 | "ஊ" "C" 945 | "எ" "v" 946 | "ஏ" "V" 947 | "ஐ" "I" 948 | "ஒ" "x" 949 | "ஓ" "X" 950 | "ஔ" "xs" 951 | "ஃ" "~" 952 | "க்" "¡" 953 | "க" "f" 954 | "கா" "fh" 955 | "கி" "»" 956 | "கீ" "Ñ" 957 | "கு" "F" 958 | "கூ" "T" 959 | "கெ" "bf" 960 | "கே" "nf" 961 | "கை" "if" 962 | "கொ" "bfh" 963 | "கோ" "nfh" 964 | "கௌ" "bfs" 965 | "ங்" "§" 966 | "ங" "‡" 967 | "ஙா" "‡h" 968 | "ஙி" "À" 969 | "ஙீ" "†" 970 | "ஙு" "¼" 971 | "ஙூ" "½" 972 | "ஙெ" "b‡" 973 | "ஙே" "n‡" 974 | "ஙை" "i‡" 975 | "ஙொ" "b‡h" 976 | "ஙோ" "n‡h" 977 | "ஙௌ" "b‡s" 978 | "ச்" "¢" 979 | "ச" "r" 980 | "சா" "rh" 981 | "சி" "á" 982 | "சீ" "Ó" 983 | "சு" "R" 984 | "சூ" "N" 985 | "செ" "br" 986 | "சே" "nr" 987 | "சை" "ir" 988 | "சொ" "brh" 989 | "சோ" "nrh" 990 | "சௌ" "brs" 991 | "ஞ்" "Š" 992 | "ஞ" "P" 993 | "ஞா" "Ph" 994 | "ஞி" "Á" 995 | "ஞீ" "Ø" 996 | "ஞு" "|" 997 | "ஞூ" "ú" 998 | "ஞெ" "bP" 999 | "ஞே" "nP" 1000 | "ஞை" "iP" 1001 | "ஞொ" "bPh" 1002 | "ஞோ" "nPh" 1003 | "ஞௌ" "bPs" 1004 | "ட்" "£" 1005 | "ட" "l" 1006 | "டா" "lh" 1007 | "டி" "o" 1008 | "டீ" "O" 1009 | "டு" "L" 1010 | "டூ" "^" 1011 | "டெ" "bl" 1012 | "டே" "nl" 1013 | "டை" "il" 1014 | "டொ" "blh" 1015 | "டோ" "nlh" 1016 | "டௌ" "bls" 1017 | "ண்" "©" 1018 | "ண" "z" 1019 | "ணா" "zh" 1020 | "ணி" "Â" 1021 | "ணீ" "Ù" 1022 | "ணு" "Q" 1023 | "ணூ" "û" 1024 | "ணெ" "bz" 1025 | "ணே" "nz" 1026 | "ணை" "iz" 1027 | "ணொ" "bzh" 1028 | "ணோ" "nzh" 1029 | "ணௌ" "bzs" 1030 | "த்" "¤" 1031 | "த" "j" 1032 | "தா" "jh" 1033 | "தி" "â" 1034 | "தீ" "Ô" 1035 | "து" "J" 1036 | "தூ" "ö" 1037 | "தெ" "bj" 1038 | "தே" "nj" 1039 | "தை" "ij" 1040 | "தொ" "bjh" 1041 | "தோ" "njh" 1042 | "தௌ" "bjs" 1043 | "ந்" "ª" 1044 | "ந" "e" 1045 | "நா" "eh" 1046 | "நி" "Ã" 1047 | "நீ" "Ú" 1048 | "நு" "E" 1049 | "நூ" "ü" 1050 | "நெ" "be" 1051 | "நே" "ne" 1052 | "நை" "ie" 1053 | "நொ" "beh" 1054 | "நோ" "neh" 1055 | "நௌ" "bes" 1056 | "ப்" "¥" 1057 | "ப" "g" 1058 | "பா" "gh" 1059 | "பி" "ã" 1060 | "பீ" "Õ" 1061 | "பு" "ò" 1062 | "பூ" "ó" 1063 | "பெ" "bg" 1064 | "பே" "ng" 1065 | "பை" "ig" 1066 | "பொ" "bgh" 1067 | "போ" "ngh" 1068 | "பௌ" "bgs" 1069 | "ம்" "«" 1070 | "ம" "k" 1071 | "மா" "kh" 1072 | "மி" "Ä" 1073 | "மீ" "Û" 1074 | "மு" "K" 1075 | "மூ" "_" 1076 | "மெ" "bk" 1077 | "மே" "nk" 1078 | "மை" "ik" 1079 | "மொ" "bkh" 1080 | "மோ" "nkh" 1081 | "மௌ" "bks" 1082 | "ய்" "Œ" 1083 | "ய" "a" 1084 | "யா" "ah" 1085 | "யி" "Æ" 1086 | "யீ" "p" 1087 | "யு" "í" 1088 | "யூ" "ô" 1089 | "யெ" "ba" 1090 | "யே" "na" 1091 | "யை" "ia" 1092 | "யொ" "bah" 1093 | "யோ" "nah" 1094 | "யௌ" "bas" 1095 | "ர்" "®" 1096 | "ர" "u" 1097 | "ரா" "uh" 1098 | "ரி" "Ç" 1099 | "ரீ" "ß" 1100 | "ரு" "U" 1101 | "ரூ" "%" 1102 | "ரெ" "bu" 1103 | "ரே" "nu" 1104 | "ரை" "iu" 1105 | "ரொ" "buh" 1106 | "ரோ" "nuh" 1107 | "ரௌ" "bus" 1108 | "ல்" "š" 1109 | "ல" "y" 1110 | "லா" "yh" 1111 | "லி" "È" 1112 | "லீ" "ä" 1113 | "லு" "Y" 1114 | "லூ" "ÿ" 1115 | "லெ" "by" 1116 | "லே" "ny" 1117 | "லை" "iy" 1118 | "லொ" "byh" 1119 | "லோ" "nyh" 1120 | "லௌ" "bys" 1121 | "வ்" "›" 1122 | "வ" "t" 1123 | "வா" "th" 1124 | "வி" "É" 1125 | "வீ" "å" 1126 | "வு" "î" 1127 | "வூ" "ñ" 1128 | "வெ" "bt" 1129 | "வே" "nt" 1130 | "வை" "it" 1131 | "வொ" "bth" 1132 | "வோ" "nth" 1133 | "வௌ" "bts" 1134 | "ழ்" "œ" 1135 | "ழ" "H" 1136 | "ழா" "Hh" 1137 | "ழி" "Ê" 1138 | "ழீ" "æ" 1139 | "ழு" "G" 1140 | "ழூ" ">" 1141 | "ழெ" "bH" 1142 | "ழே" "nH" 1143 | "ழை" "iH" 1144 | "ழொ" "bHh" 1145 | "ழோ" "nHh" 1146 | "ழௌ" "bHs" 1147 | "ள்" "Ÿ" 1148 | "ள" "s" 1149 | "ளா" "sh" 1150 | "ளி" "Ë" 1151 | "ளீ" "ç" 1152 | "ளு" "S" 1153 | "ளூ" "q" 1154 | "ளெ" "bs" 1155 | "ளே" "ns" 1156 | "ளை" "is" 1157 | "ளொ" "bsh" 1158 | "ளோ" "nsh" 1159 | "ளௌ" "bss" 1160 | "ற்" "‰" 1161 | "ற" "w" 1162 | "றா" "wh" 1163 | "றி" "¿" 1164 | "றீ" "Ö" 1165 | "று" "W" 1166 | "றூ" "ù" 1167 | "றெ" "bw" 1168 | "றே" "nw" 1169 | "றை" "iw" 1170 | "றொ" "bwh" 1171 | "றோ" "nwh" 1172 | "றௌ" "bws" 1173 | "ன்" "‹" 1174 | "ன" "d" 1175 | "னா" "dh" 1176 | "னி" "Å" 1177 | "னீ" "Ü" 1178 | "னு" "D" 1179 | "னூ" "}" 1180 | "னெ" "bd" 1181 | "னே" "nd" 1182 | "னை" "id" 1183 | "னொ" "bdh" 1184 | "னோ" "ndh" 1185 | "னௌ" "bds"}) 1186 | 1187 | 1188 | ;;;;;;;; 1189 | ;; all character sets togeter 1190 | ;;;;;;;; 1191 | 1192 | (defn fill-in-bamini-to-unic-map 1193 | "Add in the entries in the bamini -> unicode conversion map 1194 | that represents the normal way that ர் ரி ரீ get written by hand" 1195 | [to-unic-map] 1196 | (let [;; c-with-அ-letters (map second fmt/c-cv-letters) 1197 | letters fmt/letters 1198 | entries (for [letter (flatten letters) 1199 | r-letter ["ர்" "ரி" "ரீ"]] 1200 | (let [new-val (str letter r-letter) 1201 | new-key (str (get bamini-map letter) 1202 | (get {"ர்" "h;" 1203 | "ரி" "hp" 1204 | "ரீ" "hP"} r-letter))] 1205 | [new-key new-val])) 1206 | extra-entries-map (into {} entries)] 1207 | (merge to-unic-map extra-entries-map))) 1208 | 1209 | (defn fill-charset-map 1210 | [{:keys [from-unic-map to-unic-map] :as m}] 1211 | (let [from-unic-trie (fmt/make-trie from-unic-map) 1212 | to-unic-trie (fmt/make-trie to-unic-map) 1213 | from-unic (fn [s] 1214 | (->> (fmt/str->elems from-unic-trie s) 1215 | (apply str))) 1216 | to-unic (fn [s] 1217 | (->> (fmt/str->elems to-unic-trie s) 1218 | (apply str)))] 1219 | {:to-unicode to-unic 1220 | :from-unicode from-unic})) 1221 | 1222 | (def init-charsets {:tab {:from-unic-map tab-map 1223 | :to-unic-map (set/map-invert tab-map)} 1224 | :bamini {:from-unic-map bamini-map 1225 | :to-unic-map (-> (set/map-invert bamini-map) 1226 | fill-in-bamini-to-unic-map 1227 | (assoc ">" ",") 1228 | (assoc "xsp" "ஒளி") 1229 | (assoc "R+" "சூ") 1230 | (assoc "@" ";"))} 1231 | :tscii {:from-unic-map tscii-map 1232 | :to-unic-map (set/map-invert tscii-map)} 1233 | :webulagam {:from-unic-map webulagam-map 1234 | :to-unic-map (set/map-invert webulagam-map)}}) 1235 | 1236 | (defn mmap-vals 1237 | "given a map and a fn, map the fn over the maps vals keeping keys same" 1238 | [f m] 1239 | (letfn [(reduce-fn [curr-map kv] 1240 | (assoc curr-map (first kv) (f (second kv))))] 1241 | (reduce reduce-fn {} m))) 1242 | 1243 | (def charsets (-> (mmap-vals fill-charset-map init-charsets) 1244 | ;; (ftor/fmap fill-charset-map init-charsets) 1245 | ;;(reduce-kv #(%1 %2 (fill-charset-map %3)) {} init-charsets) 1246 | (assoc :romanized {:to-unic romanized->தமிழ் 1247 | :from-unic தமிழ்->romanized}))) 1248 | 1249 | ;;;;;;;; 1250 | ;; named fns for convert fns 1251 | ;;;;;;;; 1252 | 1253 | ;; TAB 1254 | 1255 | (def ^{:doc "convert தமிழ் text from unicode to TAB format"} 1256 | தமிழ்->tab (get-in charsets [:tab :from-unicode])) 1257 | 1258 | (def ^{:doc "convert தமிழ் text from TAB to unicode format"} 1259 | tab->தமிழ் (get-in charsets [:tab :to-unicode])) 1260 | 1261 | ;; Bamini 1262 | 1263 | (def ^{:doc "convert தமிழ் text from unicode to Bamini format"} 1264 | தமிழ்->bamini (get-in charsets [:bamini :from-unicode])) 1265 | 1266 | (def ^{:doc "convert தமிழ் text from Bamini to unicode format"} 1267 | bamini->தமிழ் (get-in charsets [:bamini :to-unicode])) 1268 | 1269 | ;; TSCII 1270 | 1271 | (def ^{:doc "convert தமிழ் text from unicode to TSCII format"} 1272 | தமிழ்->tscii (get-in charsets [:tscii :from-unicode])) 1273 | 1274 | (def ^{:doc "convert தமிழ் text from TSCII to unicode format"} 1275 | tscii->தமிழ் (get-in charsets [:tscii :to-unicode])) 1276 | 1277 | ;; Webulagam 1278 | 1279 | (def ^{:doc "convert தமிழ் text from unicode to Webulagam format"} 1280 | தமிழ்->webulagam (get-in charsets [:webulagam :from-unicode])) 1281 | 1282 | (def ^{:doc "convert தமிழ் text from Webulagam to unicode format"} 1283 | webulagam->தமிழ் (get-in charsets [:webulagam :to-unicode])) 1284 | 1285 | ;;;;;;;; 1286 | ;; main 1287 | ;;;;;;;; 1288 | 1289 | (def ^{:doc "version of the Mac OS X input method (keyboard) plugin"} 1290 | OSX-INPUT-METHOD-VER "1.0") 1291 | 1292 | (defn -main 1293 | "generates the output necessary for a Mac OS X 10.x input method (keyboard) plugin" 1294 | [& args] 1295 | (let [vowels (remove #(= % "ஃ") fmt/vowels) 1296 | phon-kv-parts-by-vowel (group-by 1297 | #(boolean (some #{(second %)} vowels)) 1298 | romanized-தமிழ்-phoneme-map) 1299 | ஃ-map {"q" "ஃ"} 1300 | vowel-map (into {} (get phon-kv-parts-by-vowel true)) 1301 | cons-map (into {} (get phon-kv-parts-by-vowel false)) 1302 | cv-map (into {} (for [[eng-c tha-c] cons-map 1303 | [eng-v tha-v] vowel-map] 1304 | [(str eng-c eng-v) (fmt/phonemes->str [tha-c tha-v])])) 1305 | letters-map (merge ஃ-map vowel-map cons-map cv-map) 1306 | letters-lines (map #(str (first %) " " (second %)) letters-map) 1307 | input-chars-str (->> letters-map 1308 | keys 1309 | (map seq) 1310 | (apply concat) 1311 | distinct 1312 | (apply str)) 1313 | max-input-code (->> letters-map 1314 | keys 1315 | (map count) 1316 | (apply max)) 1317 | lines1 ["METHOD: TABLE" 1318 | "ENCODE: Unicode" 1319 | "PROMPT: கலை" 1320 | "DELIMITER ," 1321 | (str "VERSION " OSX-INPUT-METHOD-VER) 1322 | (str "MAXINPUTCODE " max-input-code) 1323 | (str "VALIDINPUTKEY " input-chars-str) 1324 | "BEGINCHARACTER" 1325 | ""] 1326 | lines2 ["" 1327 | "ENDCHARACTER"] 1328 | all-lines (concat lines1 letters-lines lines2)] 1329 | (dorun (map println all-lines)) 1330 | 1331 | ;; (println "hello") 1332 | )) 1333 | -------------------------------------------------------------------------------- /emacs/clojure-mode.el: -------------------------------------------------------------------------------- 1 | ;;; clojure-mode.el --- Major mode for Clojure code -*- lexical-binding: t; -*- 2 | 3 | ;; Copyright © 2007-2014 Jeffrey Chu, Lennart Staflin, Phil Hagelberg 4 | ;; Copyright © 2013-2014 Bozhidar Batsov 5 | ;; 6 | ;; Authors: Jeffrey Chu 7 | ;; Lennart Staflin 8 | ;; Phil Hagelberg 9 | ;; Bozhidar Batsov 10 | ;; URL: http://github.com/clojure-emacs/clojure-mode 11 | ;; Keywords: languages clojure clojurescript lisp 12 | ;; Version: 3.0.0 13 | ;; X-Original-Version: 3.0.0 14 | ;; Package-Requires: ((emacs "24.1")) 15 | 16 | ;; This file is not part of GNU Emacs. 17 | 18 | ;;; Commentary: 19 | 20 | ;; Provides font-lock, indentation, and navigation for the Clojure 21 | ;; programming language (http://clojure.org). 22 | 23 | ;; Using clojure-mode with paredit is highly recommended. Use paredit 24 | ;; as you would with any other minor mode; for instance: 25 | ;; 26 | ;; ;; require or autoload paredit-mode 27 | ;; (add-hook 'clojure-mode-hook 'paredit-mode) 28 | 29 | ;; See CIDER (http://github.com/clojure-emacs/cider) for 30 | ;; better interaction with subprocesses via nREPL. 31 | 32 | ;;; License: 33 | 34 | ;; This program is free software; you can redistribute it and/or 35 | ;; modify it under the terms of the GNU General Public License 36 | ;; as published by the Free Software Foundation; either version 3 37 | ;; of the License, or (at your option) any later version. 38 | ;; 39 | ;; This program is distributed in the hope that it will be useful, 40 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 41 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 42 | ;; GNU General Public License for more details. 43 | ;; 44 | ;; You should have received a copy of the GNU General Public License 45 | ;; along with GNU Emacs; see the file COPYING. If not, write to the 46 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 47 | ;; Boston, MA 02110-1301, USA. 48 | 49 | ;;; Code: 50 | 51 | 52 | ;;; Compatibility 53 | (eval-and-compile 54 | ;; `setq-local' for Emacs 24.2 and below 55 | (unless (fboundp 'setq-local) 56 | (defmacro setq-local (var val) 57 | "Set variable VAR to value VAL in current buffer." 58 | `(set (make-local-variable ',var) ,val)))) 59 | 60 | (eval-when-compile 61 | (defvar calculate-lisp-indent-last-sexp) 62 | (defvar font-lock-beg) 63 | (defvar font-lock-end) 64 | (defvar paredit-space-for-delimiter-predicates) 65 | (defvar paredit-version) 66 | (defvar paredit-mode)) 67 | 68 | (require 'cl) 69 | (require 'inf-lisp) 70 | (require 'imenu) 71 | 72 | (declare-function lisp-fill-paragraph "lisp-mode" (&optional justify)) 73 | 74 | (defgroup clojure nil 75 | "Major mode for editing Clojure code." 76 | :prefix "clojure-" 77 | :group 'languages 78 | :link '(url-link :tag "Github" "https://github.com/clojure-emacs/clojure-mode") 79 | :link '(emacs-commentary-link :tag "Commentary" "clojure-mode")) 80 | 81 | (defface clojure-keyword-face 82 | '((t (:inherit font-lock-constant-face))) 83 | "Face used to font-lock Clojure keywords (:something)." 84 | :group 'clojure 85 | :package-version '(clojure-mode . "3.0.0")) 86 | 87 | (defface clojure-character-face 88 | '((t (:inherit font-lock-string-face))) 89 | "Face used to font-lock Clojure character literals." 90 | :group 'clojure 91 | :package-version '(clojure-mode . "3.0.0")) 92 | 93 | (defface clojure-interop-method-face 94 | '((t (:inherit font-lock-preprocessor-face))) 95 | "Face used to font-lock interop method names (camelCase)." 96 | :group 'clojure 97 | :package-version '(clojure-mode . "3.0.0")) 98 | 99 | (defcustom clojure-load-command "(clojure.core/load-file \"%s\")\n" 100 | "Format-string for building a Clojure expression to load a file. 101 | This format string should use `%s' to substitute a file name and 102 | should result in a Clojure expression that will command the 103 | inferior Clojure to load that file." 104 | :type 'string 105 | :group 'clojure 106 | :safe 'stringp) 107 | 108 | (defcustom clojure-inf-lisp-command "lein repl" 109 | "The command used by `inferior-lisp-program'." 110 | :type 'string 111 | :group 'clojure 112 | :safe 'stringp) 113 | 114 | (defcustom clojure-defun-style-default-indent nil 115 | "When non-nil, use default indenting for functions and macros. 116 | Otherwise check `define-clojure-indent' and `put-clojure-indent'." 117 | :type 'boolean 118 | :group 'clojure 119 | :safe 'booleanp) 120 | 121 | (defcustom clojure-use-backtracking-indent t 122 | "When non-nil, enable context sensitive indentation." 123 | :type 'boolean 124 | :group 'clojure 125 | :safe 'booleanp) 126 | 127 | (defcustom clojure-max-backtracking 3 128 | "Maximum amount to backtrack up a list to check for context." 129 | :type 'integer 130 | :group 'clojure 131 | :safe 'integerp) 132 | 133 | (defcustom clojure-docstring-fill-column fill-column 134 | "Value of `fill-column' to use when filling a docstring." 135 | :type 'integer 136 | :group 'clojure 137 | :safe 'integerp) 138 | 139 | (defcustom clojure-docstring-fill-prefix-width 2 140 | "Width of `fill-prefix' when filling a docstring. 141 | The default value conforms with the de facto convention for 142 | Clojure docstrings, aligning the second line with the opening 143 | double quotes on the third column." 144 | :type 'integer 145 | :group 'clojure 146 | :safe 'integerp) 147 | 148 | (defcustom clojure-omit-space-between-tag-and-delimiters '(?\[ ?\{) 149 | "Allowed opening delimiter characters after a reader literal tag. 150 | For example, \[ is allowed in :db/id[:db.part/user]." 151 | :type '(set (const :tag "[" ?\[) 152 | (const :tag "{" ?\{) 153 | (const :tag "(" ?\() 154 | (const :tag "\"" ?\")) 155 | :group 'clojure 156 | :safe (lambda (value) 157 | (and (listp value) 158 | (every 'characterp value)))) 159 | 160 | (defvar clojure-mode-map 161 | (let ((map (make-sparse-keymap))) 162 | (set-keymap-parent map lisp-mode-shared-map) 163 | (define-key map (kbd "C-M-x") 'lisp-eval-defun) 164 | (define-key map (kbd "C-x C-e") 'lisp-eval-last-sexp) 165 | (define-key map (kbd "C-c C-e") 'lisp-eval-last-sexp) 166 | (define-key map (kbd "C-c C-l") 'clojure-load-file) 167 | (define-key map (kbd "C-c C-r") 'lisp-eval-region) 168 | (define-key map (kbd "C-c C-z") 'clojure-display-inferior-lisp-buffer) 169 | (define-key map (kbd "C-:") 'clojure-toggle-keyword-string) 170 | (easy-menu-define clojure-mode-menu map "Clojure Mode Menu" 171 | '("Clojure" 172 | ["Eval Top-Level Expression" lisp-eval-defun] 173 | ["Eval Last Expression" lisp-eval-last-sexp] 174 | ["Eval Region" lisp-eval-region] 175 | "--" 176 | ["Run Inferior Lisp" clojure-display-inferior-lisp-buffer] 177 | ["Display Inferior Lisp Buffer" clojure-display-inferior-lisp-buffer] 178 | ["Load File" clojure-load-file] 179 | "--" 180 | ["Toggle between string & keyword" clojure-toggle-keyword-string] 181 | ["Mark string" clojure-mark-string] 182 | ["Insert ns form at point" clojure-insert-ns-form-at-point] 183 | ["Insert ns form at beginning" clojure-insert-ns-form] 184 | ["Update ns form" clojure-update-ns] 185 | "--" 186 | ["Version" clojure-mode-display-version])) 187 | map) 188 | "Keymap for Clojure mode. Inherits from `lisp-mode-shared-map'.") 189 | 190 | (defvar clojure-mode-syntax-table 191 | (let ((table (copy-syntax-table emacs-lisp-mode-syntax-table))) 192 | (modify-syntax-entry ?~ "' " table) 193 | (modify-syntax-entry ?\{ "(}" table) 194 | (modify-syntax-entry ?\} "){" table) 195 | (modify-syntax-entry ?\[ "(]" table) 196 | (modify-syntax-entry ?\] ")[" table) 197 | (modify-syntax-entry ?^ "'" table) 198 | ;; Make hash a usual word character 199 | (modify-syntax-entry ?# "_ p" table) 200 | table)) 201 | 202 | (defvar clojure-prev-l/c-dir/file nil 203 | "Record last directory and file used in loading or compiling. 204 | This holds a cons cell of the form `(DIRECTORY . FILE)' 205 | describing the last `clojure-load-file' or `clojure-compile-file' command.") 206 | 207 | (defconst clojure-mode-version "3.0.0" 208 | "The current version of `clojure-mode'.") 209 | 210 | (defconst clojure--prettify-symbols-alist 211 | '(("fn" . ?λ))) 212 | 213 | (defun clojure-mode-display-version () 214 | "Display the current `clojure-mode-version' in the minibuffer." 215 | (interactive) 216 | (message "clojure-mode (version %s)" clojure-mode-version)) 217 | 218 | (defun clojure-space-for-delimiter-p (endp delim) 219 | "Prevent paredit from inserting useless spaces. 220 | See `paredit-space-for-delimiter-predicates' for the meaning of 221 | ENDP and DELIM." 222 | (if (derived-mode-p 'clojure-mode) 223 | (save-excursion 224 | (backward-char) 225 | (if (and (or (char-equal delim ?\() 226 | (char-equal delim ?\") 227 | (char-equal delim ?{)) 228 | (not endp)) 229 | (if (char-equal (char-after) ?#) 230 | (and (not (bobp)) 231 | (or (char-equal ?w (char-syntax (char-before))) 232 | (char-equal ?_ (char-syntax (char-before))))) 233 | t) 234 | t)) 235 | t)) 236 | 237 | (defun clojure-no-space-after-tag (endp delimiter) 238 | "Prevent inserting a space after a reader-literal tag? 239 | 240 | When a reader-literal tag is followed be an opening delimiter 241 | listed in `clojure-omit-space-between-tag-and-delimiters', this 242 | function returns t. 243 | 244 | This allows you to write things like #db/id[:db.part/user] 245 | without inserting a space between the tag and the opening 246 | bracket. 247 | 248 | See `paredit-space-for-delimiter-predicates' for the meaning of 249 | ENDP and DELIMITER." 250 | (if endp 251 | t 252 | (or (not (member delimiter clojure-omit-space-between-tag-and-delimiters)) 253 | (save-excursion 254 | (let ((orig-point (point))) 255 | (not (and (re-search-backward 256 | "#\\([a-zA-Z0-9._-]+/\\)?[a-zA-Z0-9._-]+" 257 | (line-beginning-position) 258 | t) 259 | (= orig-point (match-end 0))))))))) 260 | 261 | (defun clojure-paredit-setup () 262 | "A bit code to make `paredit-mode' play nice with `clojure-mode'." 263 | (when (>= paredit-version 21) 264 | (define-key clojure-mode-map "{" 'paredit-open-curly) 265 | (define-key clojure-mode-map "}" 'paredit-close-curly) 266 | (add-to-list 'paredit-space-for-delimiter-predicates 267 | 'clojure-space-for-delimiter-p) 268 | (add-to-list 'paredit-space-for-delimiter-predicates 269 | 'clojure-no-space-after-tag))) 270 | 271 | ;;;###autoload 272 | (define-derived-mode clojure-mode prog-mode "Clojure" 273 | "Major mode for editing Clojure code. 274 | 275 | \\{clojure-mode-map}" 276 | (setq-local imenu-create-index-function 277 | (lambda () 278 | (imenu--generic-function '((nil clojure-match-next-def 0))))) 279 | (setq-local indent-tabs-mode nil) 280 | (lisp-mode-variables nil) 281 | (setq fill-paragraph-function 'clojure-fill-paragraph) 282 | (setq adaptive-fill-function 'clojure-adaptive-fill-function) 283 | (setq-local normal-auto-fill-function 'clojure-auto-fill-function) 284 | (setq-local comment-start-skip 285 | "\\(\\(^\\|[^\\\\\n]\\)\\(\\\\\\\\\\)*\\)\\(;+\\|#|\\) *") 286 | (setq-local indent-line-function 'clojure-indent-line) 287 | (setq-local lisp-indent-function 'clojure-indent-function) 288 | (setq-local lisp-doc-string-elt-property 'clojure-doc-string-elt) 289 | (setq-local inferior-lisp-program clojure-inf-lisp-command) 290 | (setq-local parse-sexp-ignore-comments t) 291 | (setq-local prettify-symbols-alist clojure--prettify-symbols-alist) 292 | (clojure-font-lock-setup) 293 | (setq-local open-paren-in-column-0-is-defun-start nil) 294 | (add-hook 'paredit-mode-hook 'clojure-paredit-setup)) 295 | 296 | (defsubst clojure-in-docstring-p () 297 | "Check whether point is in a docstring." 298 | (eq (get-text-property (1- (point-at-eol)) 'face) 299 | 'font-lock-doc-face)) 300 | 301 | (defsubst clojure-docstring-fill-prefix () 302 | "The prefix string used by `clojure-fill-paragraph'. 303 | 304 | It is simply `clojure-docstring-fill-prefix-width' number of spaces." 305 | (make-string clojure-docstring-fill-prefix-width ? )) 306 | 307 | (defun clojure-adaptive-fill-function () 308 | "Clojure adaptive fill function. 309 | This only takes care of filling docstring correctly." 310 | (when (clojure-in-docstring-p) 311 | (clojure-docstring-fill-prefix))) 312 | 313 | (defun clojure-fill-paragraph (&optional justify) 314 | "Like `fill-paragraph' but handle Clojure docstrings." 315 | (if (clojure-in-docstring-p) 316 | (let ((paragraph-start 317 | (concat paragraph-start 318 | "\\|\\s-*\\([(;:\"[]\\|~@\\|`(\\|#'(\\)")) 319 | (paragraph-separate 320 | (concat paragraph-separate "\\|\\s-*\".*[,\\.]$")) 321 | (fill-column (or clojure-docstring-fill-column fill-column)) 322 | (fill-prefix (clojure-docstring-fill-prefix))) 323 | (fill-paragraph justify)) 324 | (let ((paragraph-start (concat paragraph-start 325 | "\\|\\s-*\\([(;:\"[]\\|`(\\|#'(\\)")) 326 | (paragraph-separate 327 | (concat paragraph-separate "\\|\\s-*\".*[,\\.[]$"))) 328 | (or (fill-comment-paragraph justify) 329 | (fill-paragraph justify)) 330 | ;; Always return `t' 331 | t))) 332 | 333 | (defun clojure-auto-fill-function () 334 | "Clojure auto-fill function." 335 | ;; Check if auto-filling is meaningful. 336 | (let ((fc (current-fill-column))) 337 | (when (and fc (> (current-column) fc)) 338 | (let ((fill-column (if (clojure-in-docstring-p) 339 | clojure-docstring-fill-column 340 | fill-column)) 341 | (fill-prefix (clojure-adaptive-fill-function))) 342 | (do-auto-fill))))) 343 | 344 | (defun clojure-display-inferior-lisp-buffer () 345 | "Display a buffer bound to `inferior-lisp-buffer'." 346 | (interactive) 347 | (if (and inferior-lisp-buffer (get-buffer inferior-lisp-buffer)) 348 | (pop-to-buffer inferior-lisp-buffer t) 349 | (run-lisp inferior-lisp-program))) 350 | 351 | (defun clojure-load-file (file-name) 352 | "Load a Clojure file FILE-NAME into the inferior Clojure process." 353 | (interactive (comint-get-source "Load Clojure file: " 354 | clojure-prev-l/c-dir/file 355 | '(clojure-mode) t)) 356 | (comint-check-source file-name) ; Check to see if buffer needs saved. 357 | (setq clojure-prev-l/c-dir/file (cons (file-name-directory file-name) 358 | (file-name-nondirectory file-name))) 359 | (comint-send-string (inferior-lisp-proc) 360 | (format clojure-load-command file-name)) 361 | (switch-to-lisp t)) 362 | 363 | 364 | 365 | (defun clojure-match-next-def () 366 | "Scans the buffer backwards for the next top-level definition. 367 | Called by `imenu--generic-function'." 368 | (when (re-search-backward "^(def\\sw*" nil t) 369 | (save-excursion 370 | (let (found? 371 | (start (point))) 372 | (down-list) 373 | (forward-sexp) 374 | (while (not found?) 375 | (forward-sexp) 376 | (or (if (char-equal ?[ (char-after (point))) 377 | (backward-sexp)) 378 | (if (char-equal ?) (char-after (point))) 379 | (backward-sexp))) 380 | (destructuring-bind (def-beg . def-end) (bounds-of-thing-at-point 'sexp) 381 | (if (char-equal ?^ (char-after def-beg)) 382 | (progn (forward-sexp) (backward-sexp)) 383 | (setq found? t) 384 | (set-match-data (list def-beg def-end))))) 385 | (goto-char start))))) 386 | 387 | (defconst clojure-font-lock-keywords 388 | (eval-when-compile 389 | `(;; Top-level variable definition 390 | (,(concat "(\\(?:clojure.core/\\)?\\(" 391 | (regexp-opt '("def" "defonce")) 392 | ;; variable declarations 393 | "\\)\\>" 394 | ;; Any whitespace 395 | "[ \r\n\t]*" 396 | ;; Possibly type or metadata 397 | "\\(?:#?^\\(?:{[^}]*}\\|\\sw+\\)[ \r\n\t]*\\)*" 398 | "\\(\\sw+\\)?") 399 | (1 font-lock-keyword-face) 400 | (2 font-lock-variable-name-face nil t)) 401 | ;; Type definition 402 | (,(concat "(\\(?:clojure.core/\\)?\\(" 403 | (regexp-opt '("defstruct" "deftype" "defprotocol" 404 | "defrecord")) 405 | ;; type declarations 406 | "\\)\\>" 407 | ;; Any whitespace 408 | "[ \r\n\t]*" 409 | ;; Possibly type or metadata 410 | "\\(?:#?^\\(?:{[^}]*}\\|\\sw+\\)[ \r\n\t]*\\)*" 411 | "\\(\\sw+\\)?") 412 | (1 font-lock-keyword-face) 413 | (2 font-lock-type-face nil t)) 414 | 415 | ;; clj-thamil 416 | ;; Function definition (anything that starts with வரையறு and is not 417 | ;; listed above) 418 | (,(concat "(\\(?:[a-z\.-]+/\\)?\\(வரையறு\[a-z\-\]*-?\\)" 419 | ;; Function declarations 420 | "\\>" 421 | ;; Any whitespace 422 | "[ \r\n\t]*" 423 | ;; Possibly type or metadata 424 | "\\(?:#?^\\(?:{[^}]*}\\|\\sw+\\)[ \r\n\t]*\\)*" 425 | "\\(\\sw+\\)?") 426 | (1 font-lock-keyword-face) 427 | (2 font-lock-function-name-face nil t)) 428 | 429 | ;; Function definition (anything that starts with def and is not 430 | ;; listed above) 431 | (,(concat "(\\(?:[a-z\.-]+/\\)?\\(def\[a-z\-\]*-?\\)" 432 | ;; Function declarations 433 | "\\>" 434 | ;; Any whitespace 435 | "[ \r\n\t]*" 436 | ;; Possibly type or metadata 437 | "\\(?:#?^\\(?:{[^}]*}\\|\\sw+\\)[ \r\n\t]*\\)*" 438 | "\\(\\sw+\\)?") 439 | (1 font-lock-keyword-face) 440 | (2 font-lock-function-name-face nil t)) 441 | ;; (fn name? args ...) 442 | (,(concat "(\\(?:clojure.core/\\)?\\(fn\\)[ \t]+" 443 | ;; Possibly type 444 | "\\(?:#?^\\sw+[ \t]*\\)?" 445 | ;; Possibly name 446 | "\\(t\\sw+\\)?" ) 447 | (1 font-lock-keyword-face) 448 | (2 font-lock-function-name-face nil t)) 449 | ;; lambda arguments - %, %1, %2, etc 450 | ("\\<%[1-9]?" (0 font-lock-variable-name-face)) 451 | ;; Special forms & control structures 452 | (,(concat 453 | "(\\(?:clojure.core/\\)?" 454 | (regexp-opt 455 | '("let" "letfn" "do" 456 | "case" "cond" "cond->" "cond->>" "condp" 457 | "for" "loop" "recur" 458 | "when" "when-not" "when-let" "when-first" "when-some" 459 | "if" "if-let" "if-not" "if-some" 460 | "." ".." "->" "->>" "doto" 461 | "and" "or" 462 | "dosync" "doseq" "dotimes" "dorun" "doall" 463 | "load" "import" "unimport" "ns" "in-ns" "refer" 464 | "try" "catch" "finally" "throw" 465 | "with-open" "with-local-vars" "binding" 466 | "gen-class" "gen-and-load-class" "gen-and-save-class" 467 | "handler-case" "handle" "var" "declare") t) 468 | "\\>") 469 | 1 font-lock-keyword-face) 470 | 471 | ;; clj-thamil 472 | ;; Special forms & control structures 473 | (,(concat 474 | "(\\(?:clojure.core/\\)?" 475 | (regexp-opt 476 | '("வைத்துக்கொள்" "letfn" "செய்" 477 | "case" "பொறுத்து" "cond->" "cond->>" "condp" 478 | "ஒவ்வொன்றுக்கும்" "சுற்று" "recur" 479 | "என்னும்போது" "இல்லென்னும்போது" "when-let" "when-first" "when-some" 480 | "எனில்" "if-let" "இல்லெனில்" "if-some" 481 | "." ".." "->" "->>" "doto" 482 | "மற்றும்" "அல்லது" 483 | "dosync" "செய்வரிசை" "dotimes" "dorun" "செய்யெல்லாம்q" 484 | "load" "import" "unimport" "ns" "in-ns" "refer" 485 | "try" "catch" "finally" "throw" 486 | "with-open" "with-local-vars" "binding" 487 | "gen-class" "gen-and-load-class" "gen-and-save-class" 488 | "handler-case" "handle" "var" "declare") t) 489 | "\\>") 490 | 1 font-lock-keyword-face) 491 | 492 | 493 | (,(concat 494 | "\\<" 495 | (regexp-opt 496 | '("*1" "*2" "*3" "*agent*" 497 | "*allow-unresolved-vars*" "*assert*" "*clojure-version*" 498 | "*command-line-args*" "*compile-files*" 499 | "*compile-path*" "*e" "*err*" "*file*" "*flush-on-newline*" 500 | "*in*" "*macro-meta*" "*math-context*" "*ns*" "*out*" 501 | "*print-dup*" "*print-length*" "*print-level*" 502 | "*print-meta*" "*print-readably*" 503 | "*read-eval*" "*source-path*" 504 | "*use-context-classloader*" "*warn-on-reflection*") 505 | t) 506 | "\\>") 507 | 0 font-lock-builtin-face) 508 | ;; Dynamic variables - *something* or @*something* 509 | ("\\<@?\\(\\*[a-z-]*\\*\\)\\>" 1 font-lock-variable-name-face) 510 | ;; Global constants - nil, true, false 511 | (,(concat 512 | "\\<" 513 | (regexp-opt 514 | '("true" "false" "nil") t) 515 | "\\>") 516 | 0 font-lock-constant-face) 517 | ;; Character literals - \1, \a, \newline, \u0000 518 | ;; FIXME: handle properly some punctuation characters (like commas and semicolumns) 519 | ("\\\\\\([[:punct:]]\\|[a-z0-9]+\\)\\>" 0 'clojure-character-face) 520 | ;; Constant values (keywords), including as metadata e.g. ^:static 521 | ("\\<^?\\(:\\(\\sw\\|\\s_\\)+\\(\\>\\|\\_>\\)\\)" 1 'clojure-keyword-face) 522 | ;; cljx annotations (#+clj and #+cljs) 523 | ("#\\+cljs?\\>" 0 font-lock-preprocessor-face) 524 | ;; Java interop highlighting 525 | ;; CONST SOME_CONST (optionally prefixed by /) 526 | ("\\(?:\\<\\|/\\)\\([A-Z]+\\|\\([A-Z]+_[A-Z1-9_]+\\)\\)\\>" 1 font-lock-constant-face) 527 | ;; .foo .barBaz .qux01 .-flibble .-flibbleWobble 528 | ("\\<\\.-?[a-z][a-zA-Z0-9]*\\>" 0 'clojure-interop-method-face) 529 | ;; Foo Bar$Baz Qux_ World_OpenUDP Foo. Babylon15. 530 | ("\\(?:\\<\\|\\.\\|/\\|#?^\\)\\([A-Z][a-zA-Z0-9_]*[a-zA-Z0-9$_]+\\.?\\>\\)" 1 font-lock-type-face) 531 | ;; foo.bar.baz 532 | ("\\<^?\\([a-z][a-z0-9_-]+\\.\\([a-z][a-z0-9_-]*\\.?\\)+\\)" 1 font-lock-type-face) 533 | ;; (ns namespace) - special handling for single segment namespaces 534 | (,(concat "\\[ \r\n\t]*" 535 | ;; Possibly metadata 536 | "\\(?:\\^?{[^}]+}[ \r\n\t]*\\)*" 537 | ;; namespace 538 | "\\([a-z0-9-]+\\)") 539 | (1 font-lock-type-face nil t)) 540 | ;; foo/ Foo/ @Foo/ 541 | ("\\<@?\\([a-zA-Z][a-z0-9_-]*\\)/" 1 font-lock-type-face) 542 | ;; fooBar 543 | ("\\(?:\\<\\|/\\)\\([a-z]+[A-Z]+[a-zA-Z0-9$]*\\>\\)" 1 'clojure-interop-method-face) 544 | ;; Highlight grouping constructs in regular expressions 545 | (clojure-font-lock-regexp-groups 546 | (1 'font-lock-regexp-grouping-construct prepend)))) 547 | "Default expressions to highlight in Clojure mode.") 548 | 549 | (defun clojure-font-lock-syntactic-face-function (state) 550 | (if (nth 3 state) 551 | ;; This might be a (doc)string or a |...| symbol. 552 | (let ((startpos (nth 8 state))) 553 | (if (eq (char-after startpos) ?|) 554 | ;; This is not a string, but a |...| symbol. 555 | nil 556 | (let* ((listbeg (nth 1 state)) 557 | (firstsym (and listbeg 558 | (save-excursion 559 | (goto-char listbeg) 560 | (and (looking-at "([ \t\n]*\\(\\(\\sw\\|\\s_\\)+\\)") 561 | (match-string 1))))) 562 | (docelt (and firstsym 563 | (function-get (intern-soft firstsym) 564 | lisp-doc-string-elt-property)))) 565 | (if (and docelt 566 | ;; It's a string in a form that can have a docstring. 567 | ;; Check whether it's in docstring position. 568 | (save-excursion 569 | (when (functionp docelt) 570 | (goto-char (match-end 1)) 571 | (setq docelt (funcall docelt))) 572 | (goto-char listbeg) 573 | (forward-char 1) 574 | (condition-case nil 575 | (while (and (> docelt 0) (< (point) startpos) 576 | (progn (forward-sexp 1) t)) 577 | ;; ignore metadata and type hints 578 | (unless (looking-at "[ \n\t]*\\(\\^[A-Z:].+\\|\\^?{.+\\)") 579 | (setq docelt (1- docelt)))) 580 | (error nil)) 581 | (and (zerop docelt) (<= (point) startpos) 582 | (progn (forward-comment (point-max)) t) 583 | (= (point) (nth 8 state))))) 584 | font-lock-doc-face 585 | font-lock-string-face)))) 586 | font-lock-comment-face)) 587 | 588 | (defun clojure-font-lock-setup () 589 | "Configures font-lock for editing Clojure code." 590 | (setq-local font-lock-multiline t) 591 | (add-to-list 'font-lock-extend-region-functions 592 | 'clojure-font-lock-extend-region-def t) 593 | (setq font-lock-defaults 594 | '(clojure-font-lock-keywords ; keywords 595 | nil nil 596 | (("+-*/.<>=!?$%_&~^:@" . "w")) ; syntax alist 597 | nil 598 | (font-lock-mark-block-function . mark-defun) 599 | (font-lock-syntactic-face-function 600 | . clojure-font-lock-syntactic-face-function)))) 601 | 602 | (defun clojure-font-lock-def-at-point (point) 603 | "Range between the top-most def* and the fourth element after POINT. 604 | Note that this means that there is no guarantee of proper font 605 | locking in def* forms that are not at top level." 606 | (goto-char point) 607 | (condition-case nil 608 | (beginning-of-defun) 609 | (error nil)) 610 | 611 | (let ((beg-def (point))) 612 | (when (and (not (= point beg-def)) 613 | (looking-at "(def")) 614 | (condition-case nil 615 | (progn 616 | ;; move forward as much as possible until failure (or success) 617 | (forward-char) 618 | (dotimes (_ 4) 619 | (forward-sexp))) 620 | (error nil)) 621 | (cons beg-def (point))))) 622 | 623 | (defun clojure-font-lock-extend-region-def () 624 | "Set region boundaries to include the first four elements of def* forms." 625 | (let ((changed nil)) 626 | (let ((def (clojure-font-lock-def-at-point font-lock-beg))) 627 | (when def 628 | (destructuring-bind (def-beg . def-end) def 629 | (when (and (< def-beg font-lock-beg) 630 | (< font-lock-beg def-end)) 631 | (setq font-lock-beg def-beg 632 | changed t))))) 633 | (let ((def (clojure-font-lock-def-at-point font-lock-end))) 634 | (when def 635 | (destructuring-bind (def-beg . def-end) def 636 | (when (and (< def-beg font-lock-end) 637 | (< font-lock-end def-end)) 638 | (setq font-lock-end def-end 639 | changed t))))) 640 | changed)) 641 | 642 | (defun clojure-font-lock-regexp-groups (bound) 643 | "Highlight grouping constructs in regular expression. 644 | 645 | BOUND denotes the maximum number of characters (relative to the 646 | point) to check." 647 | (catch 'found 648 | (while (re-search-forward (concat 649 | ;; A group may start using several alternatives: 650 | "\\(\\(?:" 651 | ;; 1. (? special groups 652 | "(\\?\\(?:" 653 | ;; a) non-capturing group (?:X) 654 | ;; b) independent non-capturing group (?>X) 655 | ;; c) zero-width positive lookahead (?=X) 656 | ;; d) zero-width negative lookahead (?!X) 657 | "[:=!>]\\|" 658 | ;; e) zero-width positive lookbehind (?<=X) 659 | ;; f) zero-width negative lookbehind (?X) 662 | "<[[:alnum:]]+>" 663 | "\\)\\|" ;; end of special groups 664 | ;; 2. normal capturing groups ( 665 | ;; 3. we also highlight alternative 666 | ;; separarators |, and closing parens ) 667 | "[|()]" 668 | "\\)\\)") 669 | bound t) 670 | (let ((face (get-text-property (1- (point)) 'face))) 671 | (when (and (or (and (listp face) 672 | (memq 'font-lock-string-face face)) 673 | (eq 'font-lock-string-face face)) 674 | (clojure-string-start t)) 675 | (throw 'found t)))))) 676 | 677 | ;; Docstring positions 678 | (put 'ns 'clojure-doc-string-elt 2) 679 | (put 'def 'clojure-doc-string-elt 2) 680 | (put 'defn 'clojure-doc-string-elt 2) 681 | (put 'defn- 'clojure-doc-string-elt 2) 682 | (put 'defmulti 'clojure-doc-string-elt 2) 683 | (put 'defmacro 'clojure-doc-string-elt 2) 684 | (put 'definline 'clojure-doc-string-elt 2) 685 | (put 'defprotocol 'clojure-doc-string-elt 2) 686 | 687 | ;; clj-thamil 688 | ;; Docstring positions 689 | (put 'வரையறு 'clojure-doc-string-elt 2) 690 | (put 'வரையறு-செயல்கூறு 'clojure-doc-string-elt 2) 691 | 692 | (defun clojure-indent-line () 693 | "Indent current line as Clojure code." 694 | (if (clojure-in-docstring-p) 695 | (save-excursion 696 | (beginning-of-line) 697 | (when (looking-at "^\\s-*") 698 | (replace-match (clojure-docstring-fill-prefix)))) 699 | (lisp-indent-line))) 700 | 701 | (defun clojure-indent-function (indent-point state) 702 | "This function is the normal value of the variable `lisp-indent-function'. 703 | It is used when indenting a line within a function call, to see if the 704 | called function says anything special about how to indent the line. 705 | 706 | INDENT-POINT is the position where the user typed TAB, or equivalent. 707 | Point is located at the point to indent under (for default indentation); 708 | STATE is the `parse-partial-sexp' state for that position. 709 | 710 | If the current line is in a call to a Lisp function 711 | which has a non-nil property `lisp-indent-function', 712 | that specifies how to do the indentation. 713 | 714 | The property value can be 715 | 716 | - `defun', meaning indent `defun'-style; 717 | - an integer N, meaning indent the first N arguments specially 718 | like ordinary function arguments and then indent any further 719 | arguments like a body; 720 | - a function to call just as this function was called. 721 | If that function returns nil, that means it doesn't specify 722 | the indentation. 723 | 724 | This function also returns nil meaning don't specify the indentation." 725 | (let ((normal-indent (current-column))) 726 | (goto-char (1+ (elt state 1))) 727 | (parse-partial-sexp (point) calculate-lisp-indent-last-sexp 0 t) 728 | (if (and (elt state 2) 729 | (not (looking-at "\\sw\\|\\s_"))) 730 | ;; car of form doesn't seem to be a symbol 731 | (progn 732 | (if (not (> (save-excursion (forward-line 1) (point)) 733 | calculate-lisp-indent-last-sexp)) 734 | (progn (goto-char calculate-lisp-indent-last-sexp) 735 | (beginning-of-line) 736 | (parse-partial-sexp (point) 737 | calculate-lisp-indent-last-sexp 0 t))) 738 | ;; Indent under the list or under the first sexp on the same 739 | ;; line as calculate-lisp-indent-last-sexp. Note that first 740 | ;; thing on that line has to be complete sexp since we are 741 | ;; inside the innermost containing sexp. 742 | (backward-prefix-chars) 743 | (if (and (eq (char-after (point)) ?\[) 744 | (eq (char-after (elt state 1)) ?\()) 745 | (+ (current-column) 2) ;; this is probably inside a defn 746 | (current-column))) 747 | (let* ((function (buffer-substring (point) 748 | (progn (forward-sexp 1) (point)))) 749 | (open-paren (elt state 1)) 750 | (method nil) 751 | (function-tail (first 752 | (last 753 | (split-string (substring-no-properties function) "/"))))) 754 | (setq method (get (intern-soft function-tail) 'clojure-indent-function)) 755 | (cond ((member (char-after open-paren) '(?\[ ?\{)) 756 | (goto-char open-paren) 757 | (1+ (current-column))) 758 | ((or (eq method 'defun) 759 | (and clojure-defun-style-default-indent 760 | ;; largely to preserve useful alignment of :require, etc in ns 761 | (not (string-match "^:" function)) 762 | (not method)) 763 | (and (null method) 764 | (> (length function) 3) 765 | (string-match "\\`\\(?:\\S +/\\)?\\(def\\|with-\\)" 766 | function))) 767 | (lisp-indent-defform state indent-point)) 768 | ((integerp method) 769 | (lisp-indent-specform method state 770 | indent-point normal-indent)) 771 | (method 772 | (funcall method indent-point state)) 773 | (clojure-use-backtracking-indent 774 | (clojure-backtracking-indent 775 | indent-point state normal-indent))))))) 776 | 777 | (defun clojure-backtracking-indent (indent-point state normal-indent) 778 | "Experimental backtracking support. 779 | 780 | Will upwards in an sexp to check for contextual indenting." 781 | (let (indent (path) (depth 0)) 782 | (goto-char (elt state 1)) 783 | (while (and (not indent) 784 | (< depth clojure-max-backtracking)) 785 | (let ((containing-sexp (point))) 786 | (parse-partial-sexp (1+ containing-sexp) indent-point 1 t) 787 | (when (looking-at "\\sw\\|\\s_") 788 | (let* ((start (point)) 789 | (fn (buffer-substring start (progn (forward-sexp 1) (point)))) 790 | (meth (get (intern-soft fn) 'clojure-backtracking-indent))) 791 | (let ((n 0)) 792 | (when (< (point) indent-point) 793 | (condition-case () 794 | (progn 795 | (forward-sexp 1) 796 | (while (< (point) indent-point) 797 | (parse-partial-sexp (point) indent-point 1 t) 798 | (incf n) 799 | (forward-sexp 1))) 800 | (error nil))) 801 | (push n path)) 802 | (when meth 803 | (let ((def meth)) 804 | (dolist (p path) 805 | (if (and (listp def) 806 | (< p (length def))) 807 | (setq def (nth p def)) 808 | (if (listp def) 809 | (setq def (car (last def))) 810 | (setq def nil)))) 811 | (goto-char (elt state 1)) 812 | (when def 813 | (setq indent (+ (current-column) def))))))) 814 | (goto-char containing-sexp) 815 | (condition-case () 816 | (progn 817 | (backward-up-list 1) 818 | (incf depth)) 819 | (error (setq depth clojure-max-backtracking))))) 820 | indent)) 821 | 822 | ;; clojure backtracking indent is experimental and the format for these 823 | ;; entries are subject to change 824 | (put 'implement 'clojure-backtracking-indent '(4 (2))) 825 | (put 'letfn 'clojure-backtracking-indent '((2) 2)) 826 | (put 'proxy 'clojure-backtracking-indent '(4 4 (2))) 827 | (put 'reify 'clojure-backtracking-indent '((2))) 828 | (put 'deftype 'clojure-backtracking-indent '(4 4 (2))) 829 | (put 'defrecord 'clojure-backtracking-indent '(4 4 (2))) 830 | (put 'defprotocol 'clojure-backtracking-indent '(4 (2))) 831 | (put 'extend-type 'clojure-backtracking-indent '(4 (2))) 832 | (put 'extend-protocol 'clojure-backtracking-indent '(4 (2))) 833 | (put 'specify 'clojure-backtracking-indent '(4 (2))) 834 | (put 'specify! 'clojure-backtracking-indent '(4 (2))) 835 | 836 | (defun put-clojure-indent (sym indent) 837 | (put sym 'clojure-indent-function indent)) 838 | 839 | (defmacro define-clojure-indent (&rest kvs) 840 | `(progn 841 | ,@(mapcar (lambda (x) `(put-clojure-indent 842 | (quote ,(first x)) ,(second x))) 843 | kvs))) 844 | 845 | (defun add-custom-clojure-indents (name value) 846 | (custom-set-default name value) 847 | (mapcar (lambda (x) 848 | (put-clojure-indent x 'defun)) 849 | value)) 850 | 851 | (defcustom clojure-defun-indents nil 852 | "List of additional symbols with defun-style indentation in Clojure. 853 | 854 | You can use this to let Emacs indent your own macros the same way 855 | that it indents built-in macros like with-open. To manually set 856 | it from Lisp code, use (put-clojure-indent 'some-symbol 'defun)." 857 | :type '(repeat symbol) 858 | :group 'clojure 859 | :set 'add-custom-clojure-indents) 860 | 861 | (define-clojure-indent 862 | ;; built-ins 863 | (ns 1) 864 | (fn 'defun) 865 | (def 'defun) 866 | (defn 'defun) 867 | (bound-fn 'defun) 868 | (if 1) 869 | (if-not 1) 870 | (case 1) 871 | (condp 2) 872 | (when 1) 873 | (while 1) 874 | (when-not 1) 875 | (when-first 1) 876 | (do 0) 877 | (future 0) 878 | (comment 0) 879 | (doto 1) 880 | (locking 1) 881 | (proxy 2) 882 | (with-open 1) 883 | (with-precision 1) 884 | (with-local-vars 1) 885 | 886 | 887 | ;; clj-thamil 888 | ;; built-ins 889 | (ns 1) 890 | (செயல்கூறு 'defun) 891 | (வரையறு 'defun) 892 | (வரையறு-செயல்கூறு 'defun) 893 | (bound-fn 'defun) 894 | (எனில் 1) 895 | (இல்லெனில் 1) 896 | (case 1) 897 | (condp 2) 898 | (என்னும்போது 1) 899 | (while 1) 900 | (இல்லென்னும்-போது 1) 901 | (when-first 1) 902 | (செய் 0) 903 | (future 0) 904 | (comment 0) 905 | (doto 1) 906 | (locking 1) 907 | (proxy 2) 908 | (with-open 1) 909 | (with-precision 1) 910 | (with-local-vars 1) 911 | 912 | 913 | 914 | (reify 'defun) 915 | (deftype 2) 916 | (defrecord 2) 917 | (defprotocol 1) 918 | (extend 1) 919 | (extend-protocol 1) 920 | (extend-type 1) 921 | 922 | (try 0) 923 | (catch 2) 924 | (finally 0) 925 | 926 | ;; binding forms 927 | (let 1) 928 | (letfn 1) 929 | (binding 1) 930 | (loop 1) 931 | (for 1) 932 | (doseq 1) 933 | (dotimes 1) 934 | (when-let 1) 935 | (if-let 1) 936 | (when-some 1) 937 | (if-some 1) 938 | 939 | ;; clj-thamil 940 | ;; binding forms 941 | (வைத்துக்கொள் 1) 942 | (letfn 1) 943 | (binding 1) 944 | (சுற்று 1) 945 | (ஒவ்வொன்றுக்கும் 1) 946 | (செய்வரிசை 1) 947 | (dotimes 1) 948 | (when-let 1) 949 | (if-let 1) 950 | (when-some 1) 951 | (if-some 1) 952 | 953 | ;; data structures 954 | (defstruct 1) 955 | (struct-map 1) 956 | (assoc 1) 957 | 958 | (defmethod 'defun) 959 | 960 | ;; clojure.test 961 | (testing 1) 962 | (deftest 'defun) 963 | (are 1) 964 | (use-fixtures 'defun) 965 | 966 | ;; core.logic 967 | (run 'defun) 968 | (run* 'defun) 969 | (fresh 'defun) 970 | 971 | ;; core.async 972 | (alt! 0) 973 | (alt!! 0) 974 | (go 0) 975 | (go-loop 1) 976 | (thread 0)) 977 | 978 | 979 | 980 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 981 | ;; 982 | ;; Better docstring filling for clojure-mode 983 | ;; 984 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 985 | 986 | (defun clojure-string-start (&optional regex) 987 | "Return the position of the \" that begins the string at point. 988 | If REGEX is non-nil, return the position of the # that begins the 989 | regex at point. If point is not inside a string or regex, return 990 | nil." 991 | (when (nth 3 (syntax-ppss)) ;; Are we really in a string? 992 | (save-excursion 993 | (save-match-data 994 | ;; Find a quote that appears immediately after whitespace, 995 | ;; beginning of line, hash, or an open paren, brace, or bracket 996 | (re-search-backward "\\(\\s-\\|^\\|#\\|(\\|\\[\\|{\\)\\(\"\\)") 997 | (let ((beg (match-beginning 2))) 998 | (when beg 999 | (if regex 1000 | (and (char-before beg) (char-equal ?# (char-before beg)) (1- beg)) 1001 | (when (not (char-equal ?# (char-before beg))) 1002 | beg)))))))) 1003 | 1004 | (defun clojure-char-at-point () 1005 | "Return the char at point or nil if at buffer end." 1006 | (when (not (= (point) (point-max))) 1007 | (buffer-substring-no-properties (point) (1+ (point))))) 1008 | 1009 | (defun clojure-char-before-point () 1010 | "Return the char before point or nil if at buffer beginning." 1011 | (when (not (= (point) (point-min))) 1012 | (buffer-substring-no-properties (point) (1- (point))))) 1013 | 1014 | ;; TODO: Deal with the fact that when point is exactly at the 1015 | ;; beginning of a string, it thinks that is the end. 1016 | (defun clojure-string-end () 1017 | "Return the position of the \" that ends the string at point. 1018 | 1019 | Note that point must be inside the string - if point is 1020 | positioned at the opening quote, incorrect results will be 1021 | returned." 1022 | (save-excursion 1023 | (save-match-data 1024 | ;; If we're at the end of the string, just return point. 1025 | (if (and (string= (clojure-char-at-point) "\"") 1026 | (not (string= (clojure-char-before-point) "\\"))) 1027 | (point) 1028 | ;; We don't want to get screwed by starting out at the 1029 | ;; backslash in an escaped quote. 1030 | (when (string= (clojure-char-at-point) "\\") 1031 | (backward-char)) 1032 | ;; Look for a quote not preceeded by a backslash 1033 | (re-search-forward "[^\\]\\\(\\\"\\)") 1034 | (match-beginning 1))))) 1035 | 1036 | (defun clojure-mark-string () 1037 | "Mark the string at point." 1038 | (interactive) 1039 | (goto-char (clojure-string-start)) 1040 | (forward-char) 1041 | (set-mark (clojure-string-end))) 1042 | 1043 | (defun clojure-toggle-keyword-string () 1044 | "Convert the string or keyword at point to keyword or string." 1045 | (interactive) 1046 | (let ((original-point (point))) 1047 | (while (and (> (point) 1) 1048 | (not (equal "\"" (buffer-substring-no-properties (point) (+ 1 (point))))) 1049 | (not (equal ":" (buffer-substring-no-properties (point) (+ 1 (point)))))) 1050 | (backward-char)) 1051 | (cond 1052 | ((equal 1 (point)) 1053 | (error "Beginning of file reached, this was probably a mistake")) 1054 | ((equal "\"" (buffer-substring-no-properties (point) (+ 1 (point)))) 1055 | (insert ":" (substring (clojure-delete-and-extract-sexp) 1 -1))) 1056 | ((equal ":" (buffer-substring-no-properties (point) (+ 1 (point)))) 1057 | (insert "\"" (substring (clojure-delete-and-extract-sexp) 1) "\""))) 1058 | (goto-char original-point))) 1059 | 1060 | (defun clojure-delete-and-extract-sexp () 1061 | "Delete the sexp and return it." 1062 | (interactive) 1063 | (let ((begin (point))) 1064 | (forward-sexp) 1065 | (let ((result (buffer-substring-no-properties begin (point)))) 1066 | (delete-region begin (point)) 1067 | result))) 1068 | 1069 | 1070 | 1071 | (defconst clojure-namespace-name-regex 1072 | (rx line-start 1073 | (zero-or-more whitespace) 1074 | "(" 1075 | (zero-or-one (group (regexp "clojure.core/"))) 1076 | (zero-or-one (submatch "in-")) 1077 | "ns" 1078 | (zero-or-one "+") 1079 | (one-or-more (any whitespace "\n")) 1080 | (zero-or-more (or (submatch (zero-or-one "#") 1081 | "^{" 1082 | (zero-or-more (not (any "}"))) 1083 | "}") 1084 | (zero-or-more "^:" 1085 | (one-or-more (not (any whitespace))))) 1086 | (one-or-more (any whitespace "\n"))) 1087 | ;; why is this here? oh (in-ns 'foo) or (ns+ :user) 1088 | (zero-or-one (any ":'")) 1089 | (group (one-or-more (not (any "()\"" whitespace))) word-end))) 1090 | 1091 | ;; for testing clojure-namespace-name-regex, you can evaluate this code and make 1092 | ;; sure foo (or whatever the namespace name is) shows up in results. some of 1093 | ;; these currently fail. 1094 | ;; (mapcar (lambda (s) (let ((n (string-match clojure-namespace-name-regex s))) 1095 | ;; (if n (match-string 4 s)))) 1096 | ;; '("(ns foo)" 1097 | ;; "(ns 1098 | ;; foo)" 1099 | ;; "(ns foo.baz)" 1100 | ;; "(ns ^:bar foo)" 1101 | ;; "(ns ^:bar ^:baz foo)" 1102 | ;; "(ns ^{:bar true} foo)" 1103 | ;; "(ns #^{:bar true} foo)" 1104 | ;; "(ns #^{:fail {}} foo)" 1105 | ;; "(ns ^{:fail2 {}} foo.baz)" 1106 | ;; "(ns ^{} foo)" 1107 | ;; "(ns ^{:skip-wiki true} 1108 | ;; aleph.netty 1109 | ;; " 1110 | ;; "(ns 1111 | ;; foo)" 1112 | ;; "foo")) 1113 | 1114 | 1115 | 1116 | (defun clojure-expected-ns () 1117 | "Return the namespace name that the file should have." 1118 | (let* ((project-dir (file-truename 1119 | (locate-dominating-file default-directory 1120 | "project.clj"))) 1121 | (relative (substring (file-truename (buffer-file-name)) 1122 | (length project-dir) 1123 | (- (length (file-name-extension (buffer-file-name) t)))))) 1124 | (replace-regexp-in-string 1125 | "_" "-" (mapconcat 'identity (cdr (split-string relative "/")) ".")))) 1126 | 1127 | (defun clojure-insert-ns-form-at-point () 1128 | "Insert a namespace form at point." 1129 | (interactive) 1130 | (insert (format "(ns %s)" (clojure-expected-ns)))) 1131 | 1132 | (defun clojure-insert-ns-form () 1133 | "Insert a namespace form at the beginning of the buffer." 1134 | (interactive) 1135 | (widen) 1136 | (goto-char (point-min)) 1137 | (clojure-insert-ns-form-at-point)) 1138 | 1139 | (defun clojure-update-ns () 1140 | "Update the namespace of the current buffer. 1141 | Useful if a file has been renamed." 1142 | (interactive) 1143 | (let ((nsname (clojure-expected-ns))) 1144 | (when nsname 1145 | (save-excursion 1146 | (save-match-data 1147 | (if (clojure-find-ns) 1148 | (replace-match nsname nil nil nil 4) 1149 | (error "Namespace not found"))))))) 1150 | 1151 | (defun clojure-find-ns () 1152 | "Find the namespace of the current Clojure buffer." 1153 | (let ((regexp clojure-namespace-name-regex)) 1154 | (save-excursion 1155 | (save-restriction 1156 | (widen) 1157 | (goto-char (point-min)) 1158 | (when (re-search-forward regexp nil t) 1159 | (match-string-no-properties 4)))))) 1160 | 1161 | (defun clojure-find-def () 1162 | "Find the var declaration macro and symbol name of the current form. 1163 | Returns a list pair, e.g. (\"defn\" \"abc\") or (\"deftest\" \"some-test\")." 1164 | (let ((re (concat "(\\(?:\\(?:\\sw\\|\\s_\\)+/\\)?" 1165 | ;; Declaration 1166 | "\\(def\\sw*\\)\\>" 1167 | ;; Any whitespace 1168 | "[ \r\n\t]*" 1169 | ;; Possibly type or metadata 1170 | "\\(?:#?^\\(?:{[^}]*}\\|\\(?:\\sw\\|\\s_\\)+\\)[ \r\n\t]*\\)*" 1171 | ;; Symbol name 1172 | "\\(\\(?:\\sw\\|\\s_\\)+\\)"))) 1173 | (save-excursion 1174 | (unless (looking-at re) 1175 | (beginning-of-defun)) 1176 | (when (search-forward-regexp re nil t) 1177 | (list (match-string 1) 1178 | (match-string 2)))))) 1179 | 1180 | ;;;###autoload 1181 | (add-to-list 'auto-mode-alist 1182 | '("\\.\\(clj[sx]?\\|dtm\\|edn\\)\\'" . clojure-mode)) 1183 | 1184 | (provide 'clojure-mode) 1185 | 1186 | ;; Local Variables: 1187 | ;; coding: utf-8 1188 | ;; byte-compile-warnings: (not cl-functions) 1189 | ;; indent-tabs-mode: nil 1190 | ;; End: 1191 | 1192 | ;;; clojure-mode.el ends here 1193 | --------------------------------------------------------------------------------