├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── rabbit.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.pyc 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.3" 5 | - "nightly" 6 | script: python test.py 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Htain Lin Shwe/Rabbit-Converter 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rabbit-Python 2 | 3 | [![Build Status](https://travis-ci.org/Rabbit-Converter/Rabbit-Python.svg?branch=master)](https://travis-ci.org/Rabbit-Converter/Rabbit-Python) 4 | 5 | Another Zawgyi <=> Unicode Converter as a Python module. Written in [Python](https://www.python.org). 6 | 7 | Current minimum requirements: Python 2.7.x or 3.3+ 8 | 9 | ## Installation 10 | 11 | You can include rabbit.py directly: 12 | 13 | ```python 14 | from rabbit import Rabbit 15 | ``` 16 | 17 | TODO: pip and easy_install 18 | 19 | ## Usage 20 | 21 | ```python 22 | from rabbit import Rabbit 23 | 24 | Rabbit.uni2zg("မင်္ဂလာပါ") # returns zg strings "မဂၤလာပါ" 25 | 26 | Rabbit.zg2uni("မဂၤလာပါ") # returns unicode strings "မင်္ဂလာပါ" 27 | ``` 28 | 29 | ## Contributing 30 | 31 | 1. Fork it ( https://github.com/Rabbit-Converter/Rabbit-Python ) 32 | 2. Create your feature branch (`git checkout -b my-new-feature`) 33 | - Install python 2.x and python 3.x 34 | - Run `python test.py` and `python3 test.py` 35 | - Add a test case to test.py 36 | - Implement the code. 37 | - Make the tests pass. 38 | 3. Commit your changes (`git commit -am 'Add some feature'`) 39 | 4. Push to the branch (`git push origin my-new-feature`) 40 | 5. Create a new Pull Request 41 | 42 | ## License 43 | MIT 44 | -------------------------------------------------------------------------------- /rabbit.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Rabbit-Converter/Rabbit-Python 3 | # Zawgyi <-> Unicode converter for Myanmar language 4 | 5 | import re 6 | 7 | class Rabbit: 8 | @staticmethod 9 | def uni2zg(text): 10 | rules = [ 11 | { "from": u"\u1004\u103a\u1039", "to": u"\u1064" }, { "from": u"\u1039\u1010\u103d", "to": u"\u1096" }, { "from": u"\u1014(?=[\u1030\u103d\u103e\u102f\u1039])", "to": u"\u108f" }, { "from": u"\u102b\u103a", "to": u"\u105a" }, { "from": u"\u100b\u1039\u100c", "to": u"\u1092" }, { "from": u"\u102d\u1036", "to": u"\u108e" }, { "from": u"\u104e\u1004\u103a\u1038", "to": u"\u104e" }, { "from": u"[\u1025\u1009](?=[\u1039\u102f\u1030])", "to": u"\u106a" }, { "from": u"[\u1025\u1009](?=[\u103a])", "to": u"\u1025" }, { "from": u"\u100a(?=[\u1039\u102f\u1030\u103d])", "to": u"\u106b" }, { "from": u"(\u1039[\u1000-\u1021])\u102f", "to": u"\\1\u1033" }, { "from": u"(\u1039[\u1000-\u1021])\u1030", "to": u"\\1\u1034" }, { "from": u"\u1039\u1000", "to": u"\u1060" }, { "from": u"\u1039\u1001", "to": u"\u1061" }, { "from": u"\u1039\u1002", "to": u"\u1062" }, { "from": u"\u1039\u1003", "to": u"\u1063" }, { "from": u"\u1039\u1005", "to": u"\u1065" }, { "from": u"\u1039\u1007", "to": u"\u1068" }, { "from": u"\u1039\u1008", "to": u"\u1069" }, { "from": u"\u100a(?=[\u1039\u102f\u1030])", "to": u"\u106b" }, { "from": u"\u1039\u100b", "to": u"\u106c" }, { "from": u"\u1039\u100c", "to": u"\u106d" }, { "from": u"\u100d\u1039\u100d", "to": u"\u106e" }, { "from": u"\u100e\u1039\u100d", "to": u"\u106f" }, { "from": u"\u1039\u100f", "to": u"\u1070" }, { "from": u"\u1039\u1010", "to": u"\u1071" }, { "from": u"\u1039\u1011", "to": u"\u1073" }, { "from": u"\u1039\u1012", "to": u"\u1075" }, { "from": u"\u1039\u1013", "to": u"\u1076" }, { "from": u"\u1039\u1013", "to": u"\u1076" }, { "from": u"\u1039\u1014", "to": u"\u1077" }, { "from": u"\u1039\u1015", "to": u"\u1078" }, { "from": u"\u1039\u1016", "to": u"\u1079" }, { "from": u"\u1039\u1017", "to": u"\u107a" }, { "from": u"\u1039\u1018", "to": u"\u107b" }, { "from": u"\u1039\u1019", "to": u"\u107c" }, { "from": u"\u1039\u101c", "to": u"\u1085" }, { "from": u"\u103f", "to": u"\u1086" }, { "from": u"(\u103c)\u103e", "to": u"\\1\u1087" }, { "from": u"\u103d\u103e", "to": u"\u108a" }, { "from": u"(\u1064)([\u1031]?)([\u103c]?)([\u1000-\u1021])\u102d", "to": u"\\2\\3\\4\u108b" }, { "from": u"(\u1064)([\u1031]?)([\u103c]?)([\u1000-\u1021])\u102e", "to": u"\\2\\3\\4\u108c" }, { "from": u"(\u1064)([\u1031]?)([\u103c]?)([\u1000-\u1021])\u1036", "to": u"\\2\\3\\4\u108d" }, { "from": u"(\u1064)([\u1031]?)([\u103c]?)([\u1000-\u1021])", "to": u"\\2\\3\\4\\1" }, { "from": u"\u101b(?=[\u102f\u1030\u103d\u108a])", "to": u"\u1090" }, { "from": u"\u100f\u1039\u100d", "to": u"\u1091" }, { "from": u"\u100b\u1039\u100b", "to": u"\u1097" }, { "from": u"([\u1000-\u1021\u1029\u1090])([\u1060-\u1069\u106c\u106d\u1070-\u107c\u1085\u108a])?([\u103b-\u103e]*)?\u1031", "to": u"\u1031\\1\\2\\3" }, { "from": u"([\u1000-\u1021\u1029])([\u1060-\u1069\u106c\u106d\u1070-\u107c\u1085])?(\u103c)", "to": u"\\3\\1\\2" }, { "from": u"\u103a", "to": u"\u1039" }, { "from": u"\u103b", "to": u"\u103a" }, { "from": u"\u103c", "to": u"\u103b" }, { "from": u"\u103d", "to": u"\u103c" }, { "from": u"\u103e", "to": u"\u103d" }, { "from": u"\u103d\u102f", "to": u"\u1088" }, { "from": u"([\u102f\u1030\u1014\u101b\u103c\u108a\u103d\u1088])([\u1032\u1036]{0,1})\u1037", "to": u"\\1\\2\u1095" }, { "from": u"\u102f\u1095", "to": u"\u102f\u1094" }, { "from": u"([\u1014\u101b])([\u1032\u1036\u102d\u102e\u108b\u108c\u108d\u108e])\u1037", "to": u"\\1\\2\u1095" }, { "from": u"\u1095\u1039", "to": u"\u1094\u1039" }, { "from": u"([\u103a\u103b])([\u1000-\u1021])([\u1036\u102d\u102e\u108b\u108c\u108d\u108e]?)\u102f", "to": u"\\1\\2\\3\u1033" }, { "from": u"([\u103a\u103b])([\u1000-\u1021])([\u1036\u102d\u102e\u108b\u108c\u108d\u108e]?)\u1030", "to": u"\\1\\2\\3\u1034" }, { "from": u"\u103a\u102f", "to": u"\u103a\u1033" }, { "from": u"\u103a([\u1036\u102d\u102e\u108b\u108c\u108d\u108e])\u102f", "to": u"\u103a\\1\u1033" }, { "from": u"([\u103a\u103b])([\u1000-\u1021])\u1030", "to": u"\\1\\2\u1034" }, { "from": u"\u103a\u1030", "to": u"\u103a\u1034" }, { "from": u"\u103a([\u1036\u102d\u102e\u108b\u108c\u108d\u108e])\u1030", "to": u"\u103a\\1\u1034" }, { "from": u"\u103d\u1030", "to": u"\u1089" }, { "from": u"\u103b([\u1000\u1003\u1006\u100f\u1010\u1011\u1018\u101a\u101c\u101a\u101e\u101f])", "to": u"\u107e\\1" }, { "from": u"\u107e([\u1000\u1003\u1006\u100f\u1010\u1011\u1018\u101a\u101c\u101a\u101e\u101f])([\u103c\u108a])([\u1032\u1036\u102d\u102e\u108b\u108c\u108d\u108e])", "to": u"\u1084\\1\\2\\3" }, { "from": u"\u107e([\u1000\u1003\u1006\u100f\u1010\u1011\u1018\u101a\u101c\u101a\u101e\u101f])([\u103c\u108a])", "to": u"\u1082\\1\\2" }, { "from": u"\u107e([\u1000\u1003\u1006\u100f\u1010\u1011\u1018\u101a\u101c\u101a\u101e\u101f])([\u1033\u1034]?)([\u1032\u1036\u102d\u102e\u108b\u108c\u108d\u108e])", "to": u"\u1080\\1\\2\\3" }, { "from": u"\u103b([\u1000-\u1021])([\u103c\u108a])([\u1032\u1036\u102d\u102e\u108b\u108c\u108d\u108e])", "to": u"\u1083\\1\\2\\3" }, { "from": u"\u103b([\u1000-\u1021])([\u103c\u108a])", "to": u"\u1081\\1\\2" }, { "from": u"\u103b([\u1000-\u1021])([\u1033\u1034]?)([\u1032\u1036\u102d\u102e\u108b\u108c\u108d\u108e])", "to": u"\u107f\\1\\2\\3" }, { "from": u"\u103a\u103d", "to": u"\u103d\u103a" }, { "from": u"\u103a([\u103c\u108a])", "to": u"\\1\u107d" }, { "from": u"([\u1033\u1034])\u1094", "to": u"\\1\u1095" } 12 | ] 13 | for rule in rules: 14 | text = re.sub(rule["from"], rule["to"], text) 15 | return text 16 | 17 | @staticmethod 18 | def zg2uni(text): 19 | rules = [ 20 | { "from": u"(\u103d|\u1087)", "to": u"\u103e" }, { "from": u"\u103c", "to": u"\u103d" }, { "from": u"(\u103b|\u107e|\u107f|\u1080|\u1081|\u1082|\u1083|\u1084)", "to": u"\u103c" }, { "from": u"(\u103a|\u107d)", "to": u"\u103b" }, { "from": u"\u1039", "to": u"\u103a" }, { "from": u"\u106a", "to": u"\u1009" }, { "from": u"\u106b", "to": u"\u100a" }, { "from": u"\u106c", "to": u"\u1039\u100b" }, { "from": u"\u106d", "to": u"\u1039\u100c" }, { "from": u"\u106e", "to": u"\u100d\u1039\u100d" }, { "from": u"\u106f", "to": u"\u100d\u1039\u100e" }, { "from": u"\u1070", "to": u"\u1039\u100f" }, { "from": u"(\u1071|\u1072)", "to": u"\u1039\u1010" }, { "from": u"\u1060", "to": u"\u1039\u1000" }, { "from": u"\u1061", "to": u"\u1039\u1001" }, { "from": u"\u1062", "to": u"\u1039\u1002" }, { "from": u"\u1063", "to": u"\u1039\u1003" }, { "from": u"\u1065", "to": u"\u1039\u1005" }, { "from": u"\u1068", "to": u"\u1039\u1007" }, { "from": u"\u1069", "to": u"\u1039\u1008" }, { "from": u"/(\u1073|\u1074)/g", "to": u"\u1039\u1011" }, { "from": u"\u1075", "to": u"\u1039\u1012" }, { "from": u"\u1076", "to": u"\u1039\u1013" }, { "from": u"\u1077", "to": u"\u1039\u1014" }, { "from": u"\u1078", "to": u"\u1039\u1015" }, { "from": u"\u1079", "to": u"\u1039\u1016" }, { "from": u"\u107a", "to": u"\u1039\u1017" }, { "from": u"\u107c", "to": u"\u1039\u1019" }, { "from": u"\u1085", "to": u"\u1039\u101c" }, { "from": u"\u1033", "to": u"\u102f" }, { "from": u"\u1034", "to": u"\u1030" }, { "from": u"\u103f", "to": u"\u1030" }, { "from": u"\u1086", "to": u"\u103f" }, { "from": u"\u1036\u1088", "to": u"\u1088\u1036" }, { "from": u"\u1088", "to": u"\u103e\u102f" }, { "from": u"\u1089", "to": u"\u103e\u1030" }, { "from": u"\u108a", "to": u"\u103d\u103e" }, { "from": u"([\u1000-\u1021])\u1064", "to": u"\u1004\u103a\u1039\\1" }, { "from": u"([\u1000-\u1021])\u108b", "to": u"\u1004\u103a\u1039\\1\u102d" }, { "from": u"([\u1000-\u1021])\u108c", "to": u"\u1004\u103a\u1039\\1\u102e" }, { "from": u"([\u1000-\u1021])\u108d", "to": u"\u1004\u103a\u1039\\1\u1036" }, { "from": u"\u108e", "to": u"\u102d\u1036" }, { "from": u"\u108f", "to": u"\u1014" }, { "from": u"\u1090", "to": u"\u101b" }, { "from": u"\u1091", "to": u"\u100f\u1039\u1091" }, { "from": u"\u1019\u102c(\u107b|\u1093)", "to": u"\u1019\u1039\u1018\u102c" }, { "from": u"(\u107b|\u1093)", "to": u"\u103a\u1018" }, { "from": u"(\u1094|\u1095)", "to": u"\u1037" }, { "from": u"\u1096", "to": u"\u1039\u1010\u103d" }, { "from": u"\u1097", "to": u"\u100b\u1039\u100b" }, { "from": u"\u103c([\u1000-\u1021])([\u1000-\u1021])?", "to": u"\\1\u103c\\2" }, { "from": u"([\u1000-\u1021])\u103c\u103a", "to": u"\u103c\\1\u103a" }, { "from": u"\u1031([\u1000-\u1021])(\u103e)?(\u103b)?", "to": u"\\1\\2\\3\u1031" }, { "from": u"([\u1000-\u1021])\u1031([\u103b\u103c\u103d\u103e]+)", "to": u"\\1\\2\u1031" }, { "from": u"\u1032\u103d", "to": u"\u103d\u1032" }, { "from": u"\u103d\u103b", "to": u"\u103b\u103d" }, { "from": u"\u103a\u1037", "to": u"\u1037\u103a" }, { "from": u"\u102f(\u102d|\u102e|\u1036|\u1037)\u102f", "to": u"\u102f\\1" }, { "from": u"\u102f\u102f", "to": u"\u102f" }, { "from": u"(\u102f|\u1030)(\u102d|\u102e)", "to": u"\\2\\1" }, { "from": u"(\u103e)(\u103b|\u1037)", "to": u"\\2\\1" }, { "from": u"\u1025(\u103a|\u102c)", "to": u"\u1009\\1" }, { "from": u"\u1025\u102e", "to": u"\u1026" }, { "from": u"\u1005\u103b", "to": u"\u1008" }, { "from": u"\u1036(\u102f|\u1030)", "to": u"\\1\u1036" }, { "from": u"\u1031\u1037\u103e", "to": u"\u103e\u1031\u1037" }, { "from": u"\u1031\u103e\u102c", "to": u"\u103e\u1031\u102c" }, { "from": u"\u105a", "to": u"\u102b\u103a" }, { "from": u"\u1031\u103b\u103e", "to": u"\u103b\u103e\u1031" }, { "from": u"(\u102d|\u102e)(\u103d|\u103e)", "to": u"\\2\\1" }, { "from": u"\u102c\u1039([\u1000-\u1021])", "to": u"\u1039\\1\u102c" }, { "from": u"\u103c\u1004\u103a\u1039([\u1000-\u1021])", "to": u"\u1004\u103a\u1039\\1\u103c" }, { "from": u"\u1039\u103c\u103a\u1039([\u1000-\u1021])", "to": u"\u103a\u1039\\1\u103c" }, { "from": u"\u103c\u1039([\u1000-\u1021])", "to": u"\u1039\\1\u103c" }, { "from": u"\u1036\u1039([\u1000-\u1021])", "to": u"\u1039\\1\u1036" }, { "from": u"\u1092", "to": u"\u100b\u1039\u100c" }, { "from": u"\u104e", "to": u"\u104e\u1004\u103a\u1038" }, { "from": u"\u1040(\u102b|\u102c|\u1036)", "to": u"\u101d\\1" }, { "from": u"\u1025\u1039", "to": u"\u1009\u1039" }, { "from": u"([\u1000-\u1021])\u103c\u1031\u103d", "to": u"\\1\u103c\u103d\u1031" }, { "from": u"([\u1000-\u1021])\u103d\u1031\u103b", "to": u"\\1\u103b\u103d\u1031" } 21 | ] 22 | for rule in rules: 23 | text = re.sub(rule["from"], rule["to"], text) 24 | return text 25 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # test.py for Rabbit-Python 3 | 4 | import unittest 5 | from rabbit import Rabbit 6 | 7 | class RabbitTests(unittest.TestCase): 8 | def setUp(self): 9 | testing = True 10 | 11 | def test_uni2zg(self): 12 | print("converting Unicode to Zawgyi") 13 | converted = Rabbit.uni2zg(u"မင်္ဂလာပါ") 14 | self.assertEqual(converted, u"မဂၤလာပါ") 15 | 16 | def test_zg2uni(self): 17 | print("converting Zawgyi to Unicode") 18 | converted = Rabbit.zg2uni(u"မဂၤလာပါ") 19 | self.assertEqual(converted, u"မင်္ဂလာပါ") 20 | 21 | if __name__ == "__main__": 22 | unittest.main() 23 | --------------------------------------------------------------------------------