├── .github └── workflows │ └── ci.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.rst ├── docs-kr ├── Makefile └── source │ ├── conf.py │ └── index.rst ├── docs ├── Makefile └── source │ ├── conf.py │ ├── index.rst │ ├── sample.rst │ └── unicode.rst ├── jamo ├── __init__.py ├── data │ ├── U+11xx.json │ ├── U+31xx.json │ └── decompositions.json └── jamo.py ├── requirements-dev.txt ├── setup.py ├── tests └── test_jamo.py ├── tools ├── check.py └── parse.py └── tox.ini /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Test and lint 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [master] 7 | pull_request: 8 | branches: 9 | - "**" 10 | 11 | jobs: 12 | test: 13 | name: Run unit tests 14 | runs-on: ubuntu-latest 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: 19 | [ 20 | "pypy-3.7", 21 | "pypy-3.8", 22 | "3.7", 23 | "3.8", 24 | "3.9", 25 | "3.10", 26 | ] 27 | 28 | steps: 29 | - uses: actions/checkout@v2 30 | 31 | - name: Set up Python ${{ matrix.python-version }} 32 | uses: actions/setup-python@v2 33 | with: 34 | python-version: ${{ matrix.python-version }} 35 | 36 | - name: Install tox 37 | run: | 38 | pip install tox tox-gh-actions 39 | 40 | - name: Run tests with tox 41 | run: tox 42 | 43 | lint: 44 | name: Run linters and formatters 45 | runs-on: ubuntu-latest 46 | 47 | steps: 48 | - uses: actions/checkout@v2 49 | 50 | - name: Set up Python 51 | uses: actions/setup-python@v2 52 | with: 53 | python-version: "3.10" 54 | 55 | - name: Install dependencies 56 | run: | 57 | pip install -r requirements-dev.txt 58 | 59 | - name: Run linters 60 | run: make lint 61 | 62 | - name: Run code formatter 63 | run: make format 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | venv/ 4 | *.py[cod] 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .cache 41 | nosetests.xml 42 | coverage.xml 43 | 44 | # Translations 45 | *.mo 46 | *.pot 47 | 48 | # Django stuff: 49 | *.log 50 | 51 | # Sphinx documentation 52 | docs/_build/ 53 | docs-kr/_build/ 54 | 55 | # PyBuilder 56 | target/ 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2017 Joshua Dong 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PROJECT=jamo 2 | PYTHON := /usr/bin/env python 3 | PYTHON_VERSION=$(shell $(PYTHON) -c 'import sys; print(sys.version_info[0])') 4 | JAMO_VERSION=$(shell $(PYTHON) -c 'import jamo; print(jamo.__version__)') 5 | 6 | default: 7 | @echo "install: install the package and scripts" 8 | @echo "clean: remove build/test artifacts" 9 | @echo "lint: check syntax" 10 | @echo "test: run unit tests" 11 | @echo "Python Version: $(PYTHON_VERSION)" 12 | @echo " Jamo Version: $(JAMO_VERSION)" 13 | 14 | install: 15 | python setup.py install 16 | 17 | clean: 18 | find . -name \*.pyc -exec rm -f {} \; 19 | find . -depth -type d -name __pycache__ -exec rm -rf {} \; 20 | rm -rf build dist $(PROJECT).egg-info 21 | 22 | lint: 23 | flake8 --ignore=E123,E501,F401 $(PROJECT) 24 | 25 | format: 26 | autopep8 -r --in-place --exit-code $(PROJECT) 27 | 28 | test: 29 | nose2 --with-coverage --coverage=$(PROJECT) 30 | 31 | dist/jamo-$(JAMO_VERSION).tar.gz: 32 | $(PYTHON) setup.py sdist 33 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Jamo: Hangul Character Analysis 2 | =============================== 3 | 4 | .. image:: https://github.com/JDongian/python-jamo/actions/workflows/ci.yml/badge.svg 5 | :target: https://github.com/JDongian/python-jamo/actions/workflows/ci.yml 6 | 7 | .. image:: https://readthedocs.org/projects/python-jamo/badge/?version=latest 8 | :target: https://readthedocs.org/projects/python-jamo/?badge=latest 9 | 10 | Python-jamo is a Python Hangul syllable decomposition and synthesis library 11 | for working with Hangul characters and jamo. 12 | 13 | Currently in beta release, function names are subject to change, but there is 14 | coverage for nearly all Hangul-related codepoints under Unicode 7.0. 15 | 16 | Originally designed to help students identify difficult-to-spell words 17 | containing (ㅔ,ㅐ) or (ㅗ,ㅜ), this project hopes to fill the niche of Korean 18 | phonetic and spelling analysis. 19 | 20 | 21 | Installation 22 | ------------ 23 | 24 | To install Jamo from `pypi`_, simply: 25 | 26 | .. code-block:: bash 27 | 28 | $ pip install jamo 29 | 30 | The jamo module is Python 3 only. Viva the bleeding edge! 31 | 32 | 33 | Documentation 34 | ------------- 35 | 36 | Documentation is available at ReadTheDocs in `English`_. 37 | 38 | 39 | Contributing 40 | ------------ 41 | 42 | Like this project or want to help? Take a look at the issues! I'm active on 43 | github, and will review pulls. I'm open to email as well, so please contact 44 | me if you have any ideas for this project. 45 | 46 | 47 | License 48 | ------- 49 | 50 | Apache 2.0 licensed. 51 | 52 | Anyone is free to use the software for any purpose, to distribute it, to 53 | modify it, and to distribute modified versions of the software, under the 54 | terms of the license, without concern for royalties. 55 | 56 | 57 | .. _pypi: https://pypi.python.org/pypi/jamo 58 | .. _English: http://python-jamo.readthedocs.org/en/latest/ 59 | .. _Korean: http://python-jamo.readthedocs.org/ko/latest/ 60 | -------------------------------------------------------------------------------- /docs-kr/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/acky.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/acky.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/acky" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/acky" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs-kr/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # jamo documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Jun 20 14:25:33 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | #sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = ['sphinx.ext.autodoc'] 32 | 33 | # Add any paths that contain templates here, relative to this directory. 34 | templates_path = ['_templates'] 35 | 36 | # The suffix of source filenames. 37 | source_suffix = '.rst' 38 | 39 | # The encoding of source files. 40 | #source_encoding = 'utf-8-sig' 41 | 42 | # The master toctree document. 43 | master_doc = 'index' 44 | 45 | # General information about the project. 46 | project = u'jamo' 47 | copyright = u'2015, Joshua Dong' 48 | 49 | # The version info for the project you're documenting, acts as replacement for 50 | # |version| and |release|, also used in various other places throughout the 51 | # built documents. 52 | # 53 | # The short X.Y version. 54 | version = '0.0' 55 | # The full version, including alpha/beta/rc tags. 56 | release = '0.0' 57 | 58 | # The language for content autogenerated by Sphinx. Refer to documentation 59 | # for a list of supported languages. 60 | #language = None 61 | 62 | # There are two options for replacing |today|: either, you set today to some 63 | # non-false value, then it is used: 64 | #today = '' 65 | # Else, today_fmt is used as the format for a strftime call. 66 | #today_fmt = '%B %d, %Y' 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | exclude_patterns = ['_build'] 71 | 72 | # The reST default role (used for this markup: `text`) to use for all 73 | # documents. 74 | #default_role = None 75 | 76 | # If true, '()' will be appended to :func: etc. cross-reference text. 77 | #add_function_parentheses = True 78 | 79 | # If true, the current module name will be prepended to all description 80 | # unit titles (such as .. function::). 81 | #add_module_names = True 82 | 83 | # If true, sectionauthor and moduleauthor directives will be shown in the 84 | # output. They are ignored by default. 85 | #show_authors = False 86 | 87 | # The name of the Pygments (syntax highlighting) style to use. 88 | pygments_style = 'sphinx' 89 | 90 | # A list of ignored prefixes for module index sorting. 91 | #modindex_common_prefix = [] 92 | 93 | # If true, keep warnings as "system message" paragraphs in the built documents. 94 | #keep_warnings = False 95 | 96 | 97 | # -- Options for HTML output ---------------------------------------------- 98 | 99 | # The theme to use for HTML and HTML Help pages. See the documentation for 100 | # a list of builtin themes. 101 | html_theme = 'default' 102 | 103 | # Theme options are theme-specific and customize the look and feel of a theme 104 | # further. For a list of options available for each theme, see the 105 | # documentation. 106 | #html_theme_options = {} 107 | 108 | # Add any paths that contain custom themes here, relative to this directory. 109 | #html_theme_path = [] 110 | 111 | # The name for this set of Sphinx documents. If None, it defaults to 112 | # " v documentation". 113 | #html_title = None 114 | 115 | # A shorter title for the navigation bar. Default is the same as html_title. 116 | #html_short_title = None 117 | 118 | # The name of an image file (relative to this directory) to place at the top 119 | # of the sidebar. 120 | #html_logo = None 121 | 122 | # The name of an image file (within the static path) to use as favicon of the 123 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 124 | # pixels large. 125 | #html_favicon = None 126 | 127 | # Add any paths that contain custom static files (such as style sheets) here, 128 | # relative to this directory. They are copied after the builtin static files, 129 | # so a file named "default.css" will overwrite the builtin "default.css". 130 | html_static_path = ['_static'] 131 | 132 | # Add any extra paths that contain custom files (such as robots.txt or 133 | # .htaccess) here, relative to this directory. These files are copied 134 | # directly to the root of the documentation. 135 | #html_extra_path = [] 136 | 137 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 138 | # using the given strftime format. 139 | #html_last_updated_fmt = '%b %d, %Y' 140 | 141 | # If true, SmartyPants will be used to convert quotes and dashes to 142 | # typographically correct entities. 143 | #html_use_smartypants = True 144 | 145 | # Custom sidebar templates, maps document names to template names. 146 | #html_sidebars = {} 147 | 148 | # Additional templates that should be rendered to pages, maps page names to 149 | # template names. 150 | #html_additional_pages = {} 151 | 152 | # If false, no module index is generated. 153 | #html_domain_indices = True 154 | 155 | # If false, no index is generated. 156 | #html_use_index = True 157 | 158 | # If true, the index is split into individual pages for each letter. 159 | #html_split_index = False 160 | 161 | # If true, links to the reST sources are added to the pages. 162 | #html_show_sourcelink = True 163 | 164 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 165 | #html_show_sphinx = True 166 | 167 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 168 | #html_show_copyright = True 169 | 170 | # If true, an OpenSearch description file will be output, and all pages will 171 | # contain a tag referring to it. The value of this option must be the 172 | # base URL from which the finished HTML is served. 173 | #html_use_opensearch = '' 174 | 175 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 176 | #html_file_suffix = None 177 | 178 | # Output file base name for HTML help builder. 179 | htmlhelp_basename = 'jamodoc' 180 | 181 | 182 | # -- Options for LaTeX output --------------------------------------------- 183 | 184 | latex_elements = { 185 | # The paper size ('letterpaper' or 'a4paper'). 186 | #'papersize': 'letterpaper', 187 | 188 | # The font size ('10pt', '11pt' or '12pt'). 189 | #'pointsize': '10pt', 190 | 191 | # Additional stuff for the LaTeX preamble. 192 | #'preamble': '', 193 | } 194 | 195 | # Grouping the document tree into LaTeX files. List of tuples 196 | # (source start file, target name, title, 197 | # author, documentclass [howto, manual, or own class]). 198 | latex_documents = [ 199 | ('index', 'jamo.tex', u'jamo Documentation', 200 | u'Joshua Dong', 'manual'), 201 | ] 202 | 203 | # The name of an image file (relative to this directory) to place at the top of 204 | # the title page. 205 | #latex_logo = None 206 | 207 | # For "manual" documents, if this is true, then toplevel headings are parts, 208 | # not chapters. 209 | #latex_use_parts = False 210 | 211 | # If true, show page references after internal links. 212 | #latex_show_pagerefs = False 213 | 214 | # If true, show URL addresses after external links. 215 | #latex_show_urls = False 216 | 217 | # Documents to append as an appendix to all manuals. 218 | #latex_appendices = [] 219 | 220 | # If false, no module index is generated. 221 | #latex_domain_indices = True 222 | 223 | 224 | # -- Options for manual page output --------------------------------------- 225 | 226 | # One entry per manual page. List of tuples 227 | # (source start file, name, description, authors, manual section). 228 | man_pages = [ 229 | ('index', 'jamo', u'jamo Documentation', 230 | [u'Joshua Dong'], 1) 231 | ] 232 | 233 | # If true, show URL addresses after external links. 234 | #man_show_urls = False 235 | 236 | 237 | # -- Options for Texinfo output ------------------------------------------- 238 | 239 | # Grouping the document tree into Texinfo files. List of tuples 240 | # (source start file, target name, title, author, 241 | # dir menu entry, description, category) 242 | texinfo_documents = [ 243 | ('index', 'jamo', u'jamo Documentation', 244 | u'Joshua Dong', 'jamo', 'One line description of project.', 245 | 'Miscellaneous'), 246 | ] 247 | 248 | # Documents to append as an appendix to all manuals. 249 | #texinfo_appendices = [] 250 | 251 | # If false, no module index is generated. 252 | #texinfo_domain_indices = True 253 | 254 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 255 | #texinfo_show_urls = 'footnote' 256 | 257 | # If true, do not generate a @detailmenu in the "Top" node's menu. 258 | #texinfo_no_detailmenu = False 259 | -------------------------------------------------------------------------------- /docs-kr/source/index.rst: -------------------------------------------------------------------------------- 1 | .. _Jamo: 2 | 3 | ================== 4 | Python 자모 입문서 5 | ================== 6 | 7 | 한국어 입문서는 없어요 :( 8 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/acky.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/acky.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/acky" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/acky" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # jamo documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Jun 20 14:25:33 2015. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | #sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = ['sphinx.ext.autodoc'] 32 | 33 | # Add any paths that contain templates here, relative to this directory. 34 | templates_path = ['_templates'] 35 | 36 | # The suffix of source filenames. 37 | source_suffix = '.rst' 38 | 39 | # The encoding of source files. 40 | #source_encoding = 'utf-8-sig' 41 | 42 | # The master toctree document. 43 | master_doc = 'index' 44 | 45 | # General information about the project. 46 | project = u'jamo' 47 | copyright = u'2015, Joshua Dong' 48 | 49 | # The version info for the project you're documenting, acts as replacement for 50 | # |version| and |release|, also used in various other places throughout the 51 | # built documents. 52 | # 53 | # The short X.Y version. 54 | version = '0.4' 55 | # The full version, including alpha/beta/rc tags. 56 | release = '0.4-beta' 57 | 58 | # The language for content autogenerated by Sphinx. Refer to documentation 59 | # for a list of supported languages. 60 | #language = None 61 | 62 | # There are two options for replacing |today|: either, you set today to some 63 | # non-false value, then it is used: 64 | #today = '' 65 | # Else, today_fmt is used as the format for a strftime call. 66 | #today_fmt = '%B %d, %Y' 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | exclude_patterns = ['_build'] 71 | 72 | # The reST default role (used for this markup: `text`) to use for all 73 | # documents. 74 | #default_role = None 75 | 76 | # If true, '()' will be appended to :func: etc. cross-reference text. 77 | #add_function_parentheses = True 78 | 79 | # If true, the current module name will be prepended to all description 80 | # unit titles (such as .. function::). 81 | #add_module_names = True 82 | 83 | # If true, sectionauthor and moduleauthor directives will be shown in the 84 | # output. They are ignored by default. 85 | #show_authors = False 86 | 87 | # The name of the Pygments (syntax highlighting) style to use. 88 | pygments_style = 'sphinx' 89 | 90 | # A list of ignored prefixes for module index sorting. 91 | #modindex_common_prefix = [] 92 | 93 | # If true, keep warnings as "system message" paragraphs in the built documents. 94 | #keep_warnings = False 95 | 96 | 97 | # -- Options for HTML output ---------------------------------------------- 98 | 99 | # The theme to use for HTML and HTML Help pages. See the documentation for 100 | # a list of builtin themes. 101 | html_theme = 'default' 102 | 103 | # Theme options are theme-specific and customize the look and feel of a theme 104 | # further. For a list of options available for each theme, see the 105 | # documentation. 106 | #html_theme_options = {} 107 | 108 | # Add any paths that contain custom themes here, relative to this directory. 109 | #html_theme_path = [] 110 | 111 | # The name for this set of Sphinx documents. If None, it defaults to 112 | # " v documentation". 113 | #html_title = None 114 | 115 | # A shorter title for the navigation bar. Default is the same as html_title. 116 | #html_short_title = None 117 | 118 | # The name of an image file (relative to this directory) to place at the top 119 | # of the sidebar. 120 | #html_logo = None 121 | 122 | # The name of an image file (within the static path) to use as favicon of the 123 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 124 | # pixels large. 125 | #html_favicon = None 126 | 127 | # Add any paths that contain custom static files (such as style sheets) here, 128 | # relative to this directory. They are copied after the builtin static files, 129 | # so a file named "default.css" will overwrite the builtin "default.css". 130 | html_static_path = ['_static'] 131 | 132 | # Add any extra paths that contain custom files (such as robots.txt or 133 | # .htaccess) here, relative to this directory. These files are copied 134 | # directly to the root of the documentation. 135 | #html_extra_path = [] 136 | 137 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 138 | # using the given strftime format. 139 | #html_last_updated_fmt = '%b %d, %Y' 140 | 141 | # If true, SmartyPants will be used to convert quotes and dashes to 142 | # typographically correct entities. 143 | #html_use_smartypants = True 144 | 145 | # Custom sidebar templates, maps document names to template names. 146 | #html_sidebars = {} 147 | 148 | # Additional templates that should be rendered to pages, maps page names to 149 | # template names. 150 | #html_additional_pages = {} 151 | 152 | # If false, no module index is generated. 153 | #html_domain_indices = True 154 | 155 | # If false, no index is generated. 156 | #html_use_index = True 157 | 158 | # If true, the index is split into individual pages for each letter. 159 | #html_split_index = False 160 | 161 | # If true, links to the reST sources are added to the pages. 162 | #html_show_sourcelink = True 163 | 164 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 165 | #html_show_sphinx = True 166 | 167 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 168 | #html_show_copyright = True 169 | 170 | # If true, an OpenSearch description file will be output, and all pages will 171 | # contain a tag referring to it. The value of this option must be the 172 | # base URL from which the finished HTML is served. 173 | #html_use_opensearch = '' 174 | 175 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 176 | #html_file_suffix = None 177 | 178 | # Output file base name for HTML help builder. 179 | htmlhelp_basename = 'jamodoc' 180 | 181 | 182 | # -- Options for LaTeX output --------------------------------------------- 183 | 184 | latex_elements = { 185 | # The paper size ('letterpaper' or 'a4paper'). 186 | #'papersize': 'letterpaper', 187 | 188 | # The font size ('10pt', '11pt' or '12pt'). 189 | #'pointsize': '10pt', 190 | 191 | # Additional stuff for the LaTeX preamble. 192 | #'preamble': '', 193 | } 194 | 195 | # Grouping the document tree into LaTeX files. List of tuples 196 | # (source start file, target name, title, 197 | # author, documentclass [howto, manual, or own class]). 198 | latex_documents = [ 199 | ('index', 'jamo.tex', u'jamo Documentation', 200 | u'Joshua Dong', 'manual'), 201 | ] 202 | 203 | # The name of an image file (relative to this directory) to place at the top of 204 | # the title page. 205 | #latex_logo = None 206 | 207 | # For "manual" documents, if this is true, then toplevel headings are parts, 208 | # not chapters. 209 | #latex_use_parts = False 210 | 211 | # If true, show page references after internal links. 212 | #latex_show_pagerefs = False 213 | 214 | # If true, show URL addresses after external links. 215 | #latex_show_urls = False 216 | 217 | # Documents to append as an appendix to all manuals. 218 | #latex_appendices = [] 219 | 220 | # If false, no module index is generated. 221 | #latex_domain_indices = True 222 | 223 | 224 | # -- Options for manual page output --------------------------------------- 225 | 226 | # One entry per manual page. List of tuples 227 | # (source start file, name, description, authors, manual section). 228 | man_pages = [ 229 | ('index', 'jamo', u'jamo Documentation', 230 | [u'Joshua Dong'], 1) 231 | ] 232 | 233 | # If true, show URL addresses after external links. 234 | #man_show_urls = False 235 | 236 | 237 | # -- Options for Texinfo output ------------------------------------------- 238 | 239 | # Grouping the document tree into Texinfo files. List of tuples 240 | # (source start file, target name, title, author, 241 | # dir menu entry, description, category) 242 | texinfo_documents = [ 243 | ('index', 'jamo', u'jamo Documentation', 244 | u'Joshua Dong', 'jamo', 'One line description of project.', 245 | 'Miscellaneous'), 246 | ] 247 | 248 | # Documents to append as an appendix to all manuals. 249 | #texinfo_appendices = [] 250 | 251 | # If false, no module index is generated. 252 | #texinfo_domain_indices = True 253 | 254 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 255 | #texinfo_show_urls = 'footnote' 256 | 257 | # If true, do not generate a @detailmenu in the "Top" node's menu. 258 | #texinfo_no_detailmenu = False 259 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. _Jamo: 2 | 3 | ============================ 4 | A Guide to using Python-Jamo 5 | ============================ 6 | 7 | `Hangul `_ is a modern writing system 8 | that originated in 1443 to represent the Korean language. It uses an alphabet 9 | of 24 consonants and vowels, each of which are called **jamo** (자모, 字母). 10 | 11 | Let's analyze Korean phonemes by decomposing some Hangul. Using the individual 12 | jamo characters, we can construct some Hangul afterwards. 13 | 14 | 15 | Hangul Decomposition 16 | -------------------- 17 | 18 | The python jamo library aims to provide a straightforward interface to Hangul 19 | decomposition:: 20 | 21 | >>> from jamo import h2j 22 | >>> h2j("한굴") 23 | '한굴' 24 | 25 | Notice that the characters may have display issues because they are from the 26 | U+11xx jamo code block. This is because there are actually two sets of jamo in 27 | Unicode. Computers use jamo from the U+31xx code block, known as **Hangul 28 | Compatibility Jamo**, here on referenced as *HCJ*. To render HCJ instead of 29 | U+11xx jamo:: 30 | 31 | >>> from jamo import h2j, j2hcj 32 | >>> j2hcj(h2j("한굴")) 33 | 'ㅎㅏㄴㄱㅜㄹ' 34 | >>> j2hcj(h2j("자모=字母=jamo")) 35 | 'ㅈㅏㅁㅗ=字母=jamo' 36 | 37 | Here we convert the Hangul characters to U+11xx jamo characters, then convert 38 | them to HCJ for more uniform display. 39 | 40 | If you are curious, learn more about the differences between U+11xx and U+31xx 41 | jamo at :ref:`unicode_tutorial`. Related, Gernot Katzers has an excellent 42 | writeup on `Hangul representation in unicode`_ that is well worth a read. 43 | 44 | 45 | Hangul Synthesis 46 | ---------------- 47 | 48 | Hangul synthesis combines a lead, vowel, and optional tail to form a single 49 | jamo character:: 50 | 51 | >>> from jamo import j2h 52 | >>> j2h('ㅇ', 'ㅕ', 'ㅇ') 53 | 영 54 | >>> j2h('ㅇ', 'ㅓ') 55 | 어 56 | 57 | A little hack you can use is the splat operator ``*`` if your arguments are 58 | in string form:: 59 | 60 | >>> j2h(*'ㅇㅕㅇ') 61 | 영 62 | >>> j2h(*'ㅇㅓ') 63 | 어 64 | 65 | 66 | Large Texts 67 | ------------ 68 | 69 | When working with large files, we will end up with lots of output. To handle 70 | large files, it is recommended to use the provided generator functions:: 71 | 72 | >>> from jamo import hangul_to_jamo 73 | >>> long_story = open("구운몽.txt", 'r').read() 74 | >>> hangul_to_jamo(long_story) 75 | at 0xdeadbeef9001> 76 | 77 | To produce HCJ output:: 78 | 79 | >>> from jamo import hangul_to_jamo, hangul_to_hcj 80 | >>> long_story = open("구운몽.txt", 'r').read() 81 | >>> hangul_to_hcj(hangul_to_jamo(long_story)) 82 | at 0x12cafebabe34> 83 | 84 | 85 | Naming Conventions 86 | ------------------ 87 | 88 | The python-jamo module is designed to be simple and lightweight. There are no 89 | classes to wrap Hangul strings or jamo characters. Below are two important 90 | string generator pairs: 91 | 92 | +---------------------+-----------------+ 93 | | Generator Function | String Function | 94 | +=====================+=================+ 95 | | jamo_to_hcj | j2hcj | 96 | +---------------------+-----------------+ 97 | | hangul_to_jamo | h2j | 98 | +---------------------+-----------------+ 99 | 100 | Note that most functions in the module are named in pairs, where the function 101 | with the shorter name is the one best for casual use, and the function with the 102 | longer name returns a generator and is probably better for analytic 103 | applications. 104 | 105 | Module output favors characters whenever possible. 106 | 107 | 108 | Examples 109 | -------- 110 | 111 | Basic examples: :ref:`sample_usage`. 112 | 113 | .. Some example uses of jamo are shown below: 114 | .. 115 | .. * `Highlight tricky vocabulary terms` (soon) 116 | .. * `Frequency analysis of heads, vowels, and tails in Hangul` (soon) 117 | .. * `Jamo-level trigram analysis for Hangul` (soon) 118 | .. * `Jamo-level autocompletion` (soon) 119 | 120 | 121 | .. _Hangul representation in unicode: http://gernot-katzers-spice-pages.com/var/korean_hangul_unicode.html 122 | -------------------------------------------------------------------------------- /docs/source/sample.rst: -------------------------------------------------------------------------------- 1 | .. _sample_usage: 2 | 3 | 4 | ======= 5 | Samples 6 | ======= 7 | 8 | Listed here are basic use cases of the jamo module. 9 | 10 | Checking Character Types 11 | ------------------------ 12 | 13 | Functions exist to determine character types:: 14 | 15 | >>> from jamo import (is_jamo, is_jamo_modern, 16 | is_hcj, is_hcj_modern, 17 | is_hangul_char) 18 | >>> is_jamo("한") 19 | False 20 | >>> is_jamo("ㅎ") 21 | True 22 | >>> is_jamo_modern("ㆄ") 23 | False 24 | >>> is_jamo_modern("ㅍ") 25 | True 26 | >>> is_hcj(chr(0x1100)) 27 | False 28 | >>> is_hcj(chr(0x3131)) 29 | True 30 | >>> is_hcj_modern("ㄱ") 31 | True 32 | >>> is_hangul_char("한") 33 | True 34 | >>> ''.join(_ for _ in "한글=ㅎㅏㄴㄱㅡㄹ" if is_jamo(_)) 35 | 'ㅎㅏㄴㄱㅡㄹ' 36 | 37 | These functions require a single character as input. Note that ``is_jamo`` and 38 | ``is_jamo_modern`` return ``True`` for HCJ characters. 39 | 40 | 41 | Jamo Position 42 | -------------- 43 | 44 | The function ``get_jamo_class`` returns a string 45 | representing the position of the jamo character. Initial consonants are 46 | represented with ``"lead"``, vowels with ``"vowel"``, and final consonants with 47 | ``"tail"``:: 48 | 49 | >>> from jamo import get_jamo_class 50 | >>> get_jamo_class("ᄋ") 51 | 'lead' 52 | >>> get_jamo_class("ᆐ") 53 | 'vowel' 54 | >>> get_jamo_class("ᆼ") 55 | 'tail' 56 | >>> get_jamo_class("ㅁ") 57 | Could not parse jamo: U+3141 58 | Traceback (most recent call last): 59 | File "", line 1, in 60 | File "/home/joshua/git/jamo/jamo/jamo.py", line 168, in get_jamo_class 61 | raise InvalidJamoError("Invalid or classless jamo argument.", jamo) 62 | jamo.jamo.InvalidJamoError: Invalid or classless jamo argument. 63 | 64 | This function does not accept HCJ consonants, as they are ambiguous. 65 | 66 | 67 | Converting between Jamo and HCJ 68 | ------------------------------- 69 | 70 | Converting from jamo to HCJ is straightforward:: 71 | 72 | >>> from jamo import j2hcj 73 | >>> j2hcj("자모: ᄀ ᄁ ᄂ ᄃ ᄄ ᄅ") 74 | '자모: ㄱ ㄲ ㄴ ㄷ ㄸ ㄹ' 75 | 76 | The associated generator is ``jamo_to_hcj``. 77 | 78 | Converting from HCJ to jamo is less simple:: 79 | 80 | >>> from jamo import hcj2j 81 | >>> hcj2j("ㅇ", "lead") 82 | 'ᄋ' 83 | >>> hcj2j("ㅇ", "tail") 84 | 'ᆼ' 85 | >>> hcj2j("ㅏ", "vowel") 86 | 'ᅡ' 87 | >>> hcj2j("ㅏ") 88 | 'ᅡ' 89 | 90 | The class must be given for consonants, and must be either the string 91 | ``"lead"```, ``"vowel"``, or ``"tail"``. 92 | 93 | Both of these functions have corresponding generators: ``jamo_to_hcj`` and 94 | ``hcj_to_jamo``, respectively. 95 | 96 | 97 | Converting from Hangul to Jamo 98 | ------------------------------ 99 | 100 | Converting from Hangul to jamo is straightforward:: 101 | 102 | from jamo import h2j 103 | >>> h2j("What is 한글?") 104 | 'What is 한글?' 105 | 106 | or more commonly:: 107 | 108 | from jamo import h2j, j2hcj 109 | >>> j2hcj(h2j("What is 한글?")) 110 | 'What is ㅎㅏㄴㄱㅡㄹ?' 111 | 112 | This produces HCJ output and is preferable for font compatibility on the web. 113 | 114 | 115 | Building Hangul Characters 116 | -------------------------- 117 | 118 | Building Hangul from jamo is easy, but must be done character-by-character:: 119 | 120 | from jamo import j2h 121 | >>> j2h("ㅈ", "ㅏ") 122 | '자' 123 | >>> j2h("ㅎ", "ㅏ", "ㄴ") 124 | '한' 125 | 126 | Note that HCJ and jamo inputs are both supported. 127 | -------------------------------------------------------------------------------- /docs/source/unicode.rst: -------------------------------------------------------------------------------- 1 | .. _unicode_tutorial: 2 | 3 | 4 | ================================================ 5 | Introduction to Hangul Representation in Unicode 6 | ================================================ 7 | 8 | Most jamo is represented in Unicode 7.0 in the codeblocks U+11xx and U+31xx. 9 | The extended-A and extended-B blocks are in different codeblocks. 10 | U+11xx represents regular jamo, with inital, vowel, and ending positions. 11 | U+31xx jamo is designated **Hangul Compatibility Jamo**, or *HCJ*. These jamo have 12 | no distiction between initial and final positions, but are much better supported for display. 13 | -------------------------------------------------------------------------------- /jamo/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .jamo import (JAMO_LEADS, JAMO_LEADS_MODERN, 3 | JAMO_VOWELS, JAMO_VOWELS_MODERN, 4 | JAMO_TAILS, JAMO_TAILS_MODERN, 5 | is_jamo, is_jamo_modern, 6 | is_hcj, is_hcj_modern, 7 | is_hangul_char, 8 | get_jamo_class, 9 | jamo_to_hcj, j2hcj, 10 | hcj_to_jamo, hcj2j, 11 | jamo_to_hangul, j2h, 12 | hangul_to_jamo, h2j, 13 | compose_jamo, decompose_jamo, 14 | is_jamo_compound, 15 | InvalidJamoError) 16 | __version__ = '0.4.3' 17 | -------------------------------------------------------------------------------- /jamo/data/U+11xx.json: -------------------------------------------------------------------------------- 1 | { 2 | "\u1100": "HANGUL CHOSEONG KIYEOK", 3 | "\u1101": "HANGUL CHOSEONG SSANGKIYEOK", 4 | "\u1102": "HANGUL CHOSEONG NIEUN", 5 | "\u1103": "HANGUL CHOSEONG TIKEUT", 6 | "\u1104": "HANGUL CHOSEONG SSANGTIKEUT", 7 | "\u1105": "HANGUL CHOSEONG RIEUL", 8 | "\u1106": "HANGUL CHOSEONG MIEUM", 9 | "\u1107": "HANGUL CHOSEONG PIEUP", 10 | "\u1108": "HANGUL CHOSEONG SSANGPIEUP", 11 | "\u1109": "HANGUL CHOSEONG SIOS", 12 | "\u110a": "HANGUL CHOSEONG SSANGSIOS", 13 | "\u110b": "HANGUL CHOSEONG IEUNG", 14 | "\u110c": "HANGUL CHOSEONG CIEUC", 15 | "\u110d": "HANGUL CHOSEONG SSANGCIEUC", 16 | "\u110e": "HANGUL CHOSEONG CHIEUCH", 17 | "\u110f": "HANGUL CHOSEONG KHIEUKH", 18 | "\u1110": "HANGUL CHOSEONG THIEUTH", 19 | "\u1111": "HANGUL CHOSEONG PHIEUPH", 20 | "\u1112": "HANGUL CHOSEONG HIEUH", 21 | "\u1113": "HANGUL CHOSEONG NIEUN-KIYEOK", 22 | "\u1114": "HANGUL CHOSEONG SSANGNIEUN", 23 | "\u1115": "HANGUL CHOSEONG NIEUN-TIKEUT", 24 | "\u1116": "HANGUL CHOSEONG NIEUN-PIEUP", 25 | "\u1117": "HANGUL CHOSEONG TIKEUT-KIYEOK", 26 | "\u1118": "HANGUL CHOSEONG RIEUL-NIEUN", 27 | "\u1119": "HANGUL CHOSEONG SSANGRIEUL", 28 | "\u111a": "HANGUL CHOSEONG RIEUL-HIEUH", 29 | "\u111b": "HANGUL CHOSEONG KAPYEOUNRIEUL", 30 | "\u111c": "HANGUL CHOSEONG MIEUM-PIEUP", 31 | "\u111d": "HANGUL CHOSEONG KAPYEOUNMIEUM", 32 | "\u111e": "HANGUL CHOSEONG PIEUP-KIYEOK", 33 | "\u111f": "HANGUL CHOSEONG PIEUP-NIEUN", 34 | "\u1120": "HANGUL CHOSEONG PIEUP-TIKEUT", 35 | "\u1121": "HANGUL CHOSEONG PIEUP-SIOS", 36 | "\u1122": "HANGUL CHOSEONG PIEUP-SIOS-KIYEOK", 37 | "\u1123": "HANGUL CHOSEONG PIEUP-SIOS-TIKEUT", 38 | "\u1124": "HANGUL CHOSEONG PIEUP-SIOS-PIEUP", 39 | "\u1125": "HANGUL CHOSEONG PIEUP-SSANGSIOS", 40 | "\u1126": "HANGUL CHOSEONG PIEUP-SIOS-CIEUC", 41 | "\u1127": "HANGUL CHOSEONG PIEUP-CIEUC", 42 | "\u1128": "HANGUL CHOSEONG PIEUP-CHIEUCH", 43 | "\u1129": "HANGUL CHOSEONG PIEUP-THIEUTH", 44 | "\u112a": "HANGUL CHOSEONG PIEUP-PHIEUPH", 45 | "\u112b": "HANGUL CHOSEONG KAPYEOUNPIEUP", 46 | "\u112c": "HANGUL CHOSEONG KAPYEOUNSSANGPIEUP", 47 | "\u112d": "HANGUL CHOSEONG SIOS-KIYEOK", 48 | "\u112e": "HANGUL CHOSEONG SIOS-NIEUN", 49 | "\u112f": "HANGUL CHOSEONG SIOS-TIKEUT", 50 | "\u1130": "HANGUL CHOSEONG SIOS-RIEUL", 51 | "\u1131": "HANGUL CHOSEONG SIOS-MIEUM", 52 | "\u1132": "HANGUL CHOSEONG SIOS-PIEUP", 53 | "\u1133": "HANGUL CHOSEONG SIOS-PIEUP-KIYEOK", 54 | "\u1134": "HANGUL CHOSEONG SIOS-SSANGSIOS", 55 | "\u1135": "HANGUL CHOSEONG SIOS-IEUNG", 56 | "\u1136": "HANGUL CHOSEONG SIOS-CIEUC", 57 | "\u1137": "HANGUL CHOSEONG SIOS-CHIEUCH", 58 | "\u1138": "HANGUL CHOSEONG SIOS-KHIEUKH", 59 | "\u1139": "HANGUL CHOSEONG SIOS-THIEUTH", 60 | "\u113a": "HANGUL CHOSEONG SIOS-PHIEUPH", 61 | "\u113b": "HANGUL CHOSEONG SIOS-HIEUH", 62 | "\u113c": "HANGUL CHOSEONG CHITUEUMSIOS", 63 | "\u113d": "HANGUL CHOSEONG CHITUEUMSSANGSIOS", 64 | "\u113e": "HANGUL CHOSEONG CEONGCHIEUMSIOS", 65 | "\u113f": "HANGUL CHOSEONG CEONGCHIEUMSSANGSIOS", 66 | "\u1140": "HANGUL CHOSEONG PANSIOS", 67 | "\u1141": "HANGUL CHOSEONG IEUNG-KIYEOK", 68 | "\u1142": "HANGUL CHOSEONG IEUNG-TIKEUT", 69 | "\u1143": "HANGUL CHOSEONG IEUNG-MIEUM", 70 | "\u1144": "HANGUL CHOSEONG IEUNG-PIEUP", 71 | "\u1145": "HANGUL CHOSEONG IEUNG-SIOS", 72 | "\u1146": "HANGUL CHOSEONG IEUNG-PANSIOS", 73 | "\u1147": "HANGUL CHOSEONG SSANGIEUNG", 74 | "\u1148": "HANGUL CHOSEONG IEUNG-CIEUC", 75 | "\u1149": "HANGUL CHOSEONG IEUNG-CHIEUCH", 76 | "\u114a": "HANGUL CHOSEONG IEUNG-THIEUTH", 77 | "\u114b": "HANGUL CHOSEONG IEUNG-PHIEUPH", 78 | "\u114c": "HANGUL CHOSEONG YESIEUNG", 79 | "\u114d": "HANGUL CHOSEONG CIEUC-IEUNG", 80 | "\u114e": "HANGUL CHOSEONG CHITUEUMCIEUC", 81 | "\u114f": "HANGUL CHOSEONG CHITUEUMSSANGCIEUC", 82 | "\u1150": "HANGUL CHOSEONG CEONGCHIEUMCIEUC", 83 | "\u1151": "HANGUL CHOSEONG CEONGCHIEUMSSANGCIEUC", 84 | "\u1152": "HANGUL CHOSEONG CHIEUCH-KHIEUKH", 85 | "\u1153": "HANGUL CHOSEONG CHIEUCH-HIEUH", 86 | "\u1154": "HANGUL CHOSEONG CHITUEUMCHIEUCH", 87 | "\u1155": "HANGUL CHOSEONG CEONGCHIEUMCHIEUCH", 88 | "\u1156": "HANGUL CHOSEONG PHIEUPH-PIEUP", 89 | "\u1157": "HANGUL CHOSEONG KAPYEOUNPHIEUPH", 90 | "\u1158": "HANGUL CHOSEONG SSANGHIEUH", 91 | "\u1159": "HANGUL CHOSEONG YEORINHIEUH", 92 | "\u115a": "HANGUL CHOSEONG KIYEOK-TIKEUT", 93 | "\u115b": "HANGUL CHOSEONG NIEUN-SIOS", 94 | "\u115c": "HANGUL CHOSEONG NIEUN-CIEUC", 95 | "\u115d": "HANGUL CHOSEONG NIEUN-HIEUH", 96 | "\u115e": "HANGUL CHOSEONG TIKEUT-RIEUL", 97 | "\u115f": "HANGUL CHOSEONG FILLER", 98 | "\u1160": "HANGUL JUNGSEONG FILLER", 99 | "\u1161": "HANGUL JUNGSEONG A", 100 | "\u1162": "HANGUL JUNGSEONG AE", 101 | "\u1163": "HANGUL JUNGSEONG YA", 102 | "\u1164": "HANGUL JUNGSEONG YAE", 103 | "\u1165": "HANGUL JUNGSEONG EO", 104 | "\u1166": "HANGUL JUNGSEONG E", 105 | "\u1167": "HANGUL JUNGSEONG YEO", 106 | "\u1168": "HANGUL JUNGSEONG YE", 107 | "\u1169": "HANGUL JUNGSEONG O", 108 | "\u116a": "HANGUL JUNGSEONG WA", 109 | "\u116b": "HANGUL JUNGSEONG WAE", 110 | "\u116c": "HANGUL JUNGSEONG OE", 111 | "\u116d": "HANGUL JUNGSEONG YO", 112 | "\u116e": "HANGUL JUNGSEONG U", 113 | "\u116f": "HANGUL JUNGSEONG WEO", 114 | "\u1170": "HANGUL JUNGSEONG WE", 115 | "\u1171": "HANGUL JUNGSEONG WI", 116 | "\u1172": "HANGUL JUNGSEONG YU", 117 | "\u1173": "HANGUL JUNGSEONG EU", 118 | "\u1174": "HANGUL JUNGSEONG YI", 119 | "\u1175": "HANGUL JUNGSEONG I", 120 | "\u1176": "HANGUL JUNGSEONG A-O", 121 | "\u1177": "HANGUL JUNGSEONG A-U", 122 | "\u1178": "HANGUL JUNGSEONG YA-O", 123 | "\u1179": "HANGUL JUNGSEONG YA-YO", 124 | "\u117a": "HANGUL JUNGSEONG EO-O", 125 | "\u117b": "HANGUL JUNGSEONG EO-U", 126 | "\u117c": "HANGUL JUNGSEONG EO-EU", 127 | "\u117d": "HANGUL JUNGSEONG YEO-O", 128 | "\u117e": "HANGUL JUNGSEONG YEO-U", 129 | "\u117f": "HANGUL JUNGSEONG O-EO", 130 | "\u1180": "HANGUL JUNGSEONG O-E", 131 | "\u1181": "HANGUL JUNGSEONG O-YE", 132 | "\u1182": "HANGUL JUNGSEONG O-O", 133 | "\u1183": "HANGUL JUNGSEONG O-U", 134 | "\u1184": "HANGUL JUNGSEONG YO-YA", 135 | "\u1185": "HANGUL JUNGSEONG YO-YAE", 136 | "\u1186": "HANGUL JUNGSEONG YO-YEO", 137 | "\u1187": "HANGUL JUNGSEONG YO-O", 138 | "\u1188": "HANGUL JUNGSEONG YO-I", 139 | "\u1189": "HANGUL JUNGSEONG U-A", 140 | "\u118a": "HANGUL JUNGSEONG U-AE", 141 | "\u118b": "HANGUL JUNGSEONG U-EO-EU", 142 | "\u118c": "HANGUL JUNGSEONG U-YE", 143 | "\u118d": "HANGUL JUNGSEONG U-U", 144 | "\u118e": "HANGUL JUNGSEONG YU-A", 145 | "\u118f": "HANGUL JUNGSEONG YU-EO", 146 | "\u1190": "HANGUL JUNGSEONG YU-E", 147 | "\u1191": "HANGUL JUNGSEONG YU-YEO", 148 | "\u1192": "HANGUL JUNGSEONG YU-YE", 149 | "\u1193": "HANGUL JUNGSEONG YU-U", 150 | "\u1194": "HANGUL JUNGSEONG YU-I", 151 | "\u1195": "HANGUL JUNGSEONG EU-U", 152 | "\u1196": "HANGUL JUNGSEONG EU-EU", 153 | "\u1197": "HANGUL JUNGSEONG YI-U", 154 | "\u1198": "HANGUL JUNGSEONG I-A", 155 | "\u1199": "HANGUL JUNGSEONG I-YA", 156 | "\u119a": "HANGUL JUNGSEONG I-O", 157 | "\u119b": "HANGUL JUNGSEONG I-U", 158 | "\u119c": "HANGUL JUNGSEONG I-EU", 159 | "\u119d": "HANGUL JUNGSEONG I-ARAEA", 160 | "\u119e": "HANGUL JUNGSEONG ARAEA", 161 | "\u119f": "HANGUL JUNGSEONG ARAEA-EO", 162 | "\u11a0": "HANGUL JUNGSEONG ARAEA-U", 163 | "\u11a1": "HANGUL JUNGSEONG ARAEA-I", 164 | "\u11a2": "HANGUL JUNGSEONG SSANGARAEA", 165 | "\u11a3": "HANGUL JUNGSEONG A-EU", 166 | "\u11a4": "HANGUL JUNGSEONG YA-U", 167 | "\u11a5": "HANGUL JUNGSEONG YEO-YA", 168 | "\u11a6": "HANGUL JUNGSEONG O-YA", 169 | "\u11a7": "HANGUL JUNGSEONG O-YAE", 170 | "\u11a8": "HANGUL JONGSEONG KIYEOK", 171 | "\u11a9": "HANGUL JONGSEONG SSANGKIYEOK", 172 | "\u11aa": "HANGUL JONGSEONG KIYEOK-SIOS", 173 | "\u11ab": "HANGUL JONGSEONG NIEUN", 174 | "\u11ac": "HANGUL JONGSEONG NIEUN-CIEUC", 175 | "\u11ad": "HANGUL JONGSEONG NIEUN-HIEUH", 176 | "\u11ae": "HANGUL JONGSEONG TIKEUT", 177 | "\u11af": "HANGUL JONGSEONG RIEUL", 178 | "\u11b0": "HANGUL JONGSEONG RIEUL-KIYEOK", 179 | "\u11b1": "HANGUL JONGSEONG RIEUL-MIEUM", 180 | "\u11b2": "HANGUL JONGSEONG RIEUL-PIEUP", 181 | "\u11b3": "HANGUL JONGSEONG RIEUL-SIOS", 182 | "\u11b4": "HANGUL JONGSEONG RIEUL-THIEUTH", 183 | "\u11b5": "HANGUL JONGSEONG RIEUL-PHIEUPH", 184 | "\u11b6": "HANGUL JONGSEONG RIEUL-HIEUH", 185 | "\u11b7": "HANGUL JONGSEONG MIEUM", 186 | "\u11b8": "HANGUL JONGSEONG PIEUP", 187 | "\u11b9": "HANGUL JONGSEONG PIEUP-SIOS", 188 | "\u11ba": "HANGUL JONGSEONG SIOS", 189 | "\u11bb": "HANGUL JONGSEONG SSANGSIOS", 190 | "\u11bc": "HANGUL JONGSEONG IEUNG", 191 | "\u11bd": "HANGUL JONGSEONG CIEUC", 192 | "\u11be": "HANGUL JONGSEONG CHIEUCH", 193 | "\u11bf": "HANGUL JONGSEONG KHIEUKH", 194 | "\u11c0": "HANGUL JONGSEONG THIEUTH", 195 | "\u11c1": "HANGUL JONGSEONG PHIEUPH", 196 | "\u11c2": "HANGUL JONGSEONG HIEUH", 197 | "\u11c3": "HANGUL JONGSEONG KIYEOK-RIEUL", 198 | "\u11c4": "HANGUL JONGSEONG KIYEOK-SIOS-KIYEOK", 199 | "\u11c5": "HANGUL JONGSEONG NIEUN-KIYEOK", 200 | "\u11c6": "HANGUL JONGSEONG NIEUN-TIKEUT", 201 | "\u11c7": "HANGUL JONGSEONG NIEUN-SIOS", 202 | "\u11c8": "HANGUL JONGSEONG NIEUN-PANSIOS", 203 | "\u11c9": "HANGUL JONGSEONG NIEUN-THIEUTH", 204 | "\u11ca": "HANGUL JONGSEONG TIKEUT-KIYEOK", 205 | "\u11cb": "HANGUL JONGSEONG TIKEUT-RIEUL", 206 | "\u11cc": "HANGUL JONGSEONG RIEUL-KIYEOK-SIOS", 207 | "\u11cd": "HANGUL JONGSEONG RIEUL-NIEUN", 208 | "\u11ce": "HANGUL JONGSEONG RIEUL-TIKEUT", 209 | "\u11cf": "HANGUL JONGSEONG RIEUL-TIKEUT-HIEUH", 210 | "\u11d0": "HANGUL JONGSEONG SSANGRIEUL", 211 | "\u11d1": "HANGUL JONGSEONG RIEUL-MIEUM-KIYEOK", 212 | "\u11d2": "HANGUL JONGSEONG RIEUL-MIEUM-SIOS", 213 | "\u11d3": "HANGUL JONGSEONG RIEUL-PIEUP-SIOS", 214 | "\u11d4": "HANGUL JONGSEONG RIEUL-PIEUP-HIEUH", 215 | "\u11d5": "HANGUL JONGSEONG RIEUL-KAPYEOUNPIEUP", 216 | "\u11d6": "HANGUL JONGSEONG RIEUL-SSANGSIOS", 217 | "\u11d7": "HANGUL JONGSEONG RIEUL-PANSIOS", 218 | "\u11d8": "HANGUL JONGSEONG RIEUL-KHIEUKH", 219 | "\u11d9": "HANGUL JONGSEONG RIEUL-YEORINHIEUH", 220 | "\u11da": "HANGUL JONGSEONG MIEUM-KIYEOK", 221 | "\u11db": "HANGUL JONGSEONG MIEUM-RIEUL", 222 | "\u11dc": "HANGUL JONGSEONG MIEUM-PIEUP", 223 | "\u11dd": "HANGUL JONGSEONG MIEUM-SIOS", 224 | "\u11de": "HANGUL JONGSEONG MIEUM-SSANGSIOS", 225 | "\u11df": "HANGUL JONGSEONG MIEUM-PANSIOS", 226 | "\u11e0": "HANGUL JONGSEONG MIEUM-CHIEUCH", 227 | "\u11e1": "HANGUL JONGSEONG MIEUM-HIEUH", 228 | "\u11e2": "HANGUL JONGSEONG KAPYEOUNMIEUM", 229 | "\u11e3": "HANGUL JONGSEONG PIEUP-RIEUL", 230 | "\u11e4": "HANGUL JONGSEONG PIEUP-PHIEUPH", 231 | "\u11e5": "HANGUL JONGSEONG PIEUP-HIEUH", 232 | "\u11e6": "HANGUL JONGSEONG KAPYEOUNPIEUP", 233 | "\u11e7": "HANGUL JONGSEONG SIOS-KIYEOK", 234 | "\u11e8": "HANGUL JONGSEONG SIOS-TIKEUT", 235 | "\u11e9": "HANGUL JONGSEONG SIOS-RIEUL", 236 | "\u11ea": "HANGUL JONGSEONG SIOS-PIEUP", 237 | "\u11eb": "HANGUL JONGSEONG PANSIOS", 238 | "\u11ec": "HANGUL JONGSEONG IEUNG-KIYEOK", 239 | "\u11ed": "HANGUL JONGSEONG IEUNG-SSANGKIYEOK", 240 | "\u11ee": "HANGUL JONGSEONG SSANGIEUNG", 241 | "\u11ef": "HANGUL JONGSEONG IEUNG-KHIEUKH", 242 | "\u11f0": "HANGUL JONGSEONG YESIEUNG", 243 | "\u11f1": "HANGUL JONGSEONG YESIEUNG-SIOS", 244 | "\u11f2": "HANGUL JONGSEONG YESIEUNG-PANSIOS", 245 | "\u11f3": "HANGUL JONGSEONG PHIEUPH-PIEUP", 246 | "\u11f4": "HANGUL JONGSEONG KAPYEOUNPHIEUPH", 247 | "\u11f5": "HANGUL JONGSEONG HIEUH-NIEUN", 248 | "\u11f6": "HANGUL JONGSEONG HIEUH-RIEUL", 249 | "\u11f7": "HANGUL JONGSEONG HIEUH-MIEUM", 250 | "\u11f8": "HANGUL JONGSEONG HIEUH-PIEUP", 251 | "\u11f9": "HANGUL JONGSEONG YEORINHIEUH", 252 | "\u11fa": "HANGUL JONGSEONG KIYEOK-NIEUN", 253 | "\u11fb": "HANGUL JONGSEONG KIYEOK-PIEUP", 254 | "\u11fc": "HANGUL JONGSEONG KIYEOK-CHIEUCH", 255 | "\u11fd": "HANGUL JONGSEONG KIYEOK-KHIEUKH", 256 | "\u11fe": "HANGUL JONGSEONG KIYEOK-HIEUH", 257 | "\u11ff": "HANGUL JONGSEONG SSANGNIEUN" 258 | } -------------------------------------------------------------------------------- /jamo/data/U+31xx.json: -------------------------------------------------------------------------------- 1 | { 2 | "\u3131": "HANGUL LETTER KIYEOK", 3 | "\u3132": "HANGUL LETTER SSANGKIYEOK", 4 | "\u3133": "HANGUL LETTER KIYEOK-SIOS", 5 | "\u3134": "HANGUL LETTER NIEUN", 6 | "\u3135": "HANGUL LETTER NIEUN-CIEUC", 7 | "\u3136": "HANGUL LETTER NIEUN-HIEUH", 8 | "\u3137": "HANGUL LETTER TIKEUT", 9 | "\u3138": "HANGUL LETTER SSANGTIKEUT", 10 | "\u3139": "HANGUL LETTER RIEUL", 11 | "\u313a": "HANGUL LETTER RIEUL-KIYEOK", 12 | "\u313b": "HANGUL LETTER RIEUL-MIEUM", 13 | "\u313c": "HANGUL LETTER RIEUL-PIEUP", 14 | "\u313d": "HANGUL LETTER RIEUL-SIOS", 15 | "\u313e": "HANGUL LETTER RIEUL-THIEUTH", 16 | "\u313f": "HANGUL LETTER RIEUL-PHIEUPH", 17 | "\u3140": "HANGUL LETTER RIEUL-HIEUH", 18 | "\u3141": "HANGUL LETTER MIEUM", 19 | "\u3142": "HANGUL LETTER PIEUP", 20 | "\u3143": "HANGUL LETTER SSANGPIEUP", 21 | "\u3144": "HANGUL LETTER PIEUP-SIOS", 22 | "\u3145": "HANGUL LETTER SIOS", 23 | "\u3146": "HANGUL LETTER SSANGSIOS", 24 | "\u3147": "HANGUL LETTER IEUNG", 25 | "\u3148": "HANGUL LETTER CIEUC", 26 | "\u3149": "HANGUL LETTER SSANGCIEUC", 27 | "\u314a": "HANGUL LETTER CHIEUCH", 28 | "\u314b": "HANGUL LETTER KHIEUKH", 29 | "\u314c": "HANGUL LETTER THIEUTH", 30 | "\u314d": "HANGUL LETTER PHIEUPH", 31 | "\u314e": "HANGUL LETTER HIEUH", 32 | "\u314f": "HANGUL LETTER A", 33 | "\u3150": "HANGUL LETTER AE", 34 | "\u3151": "HANGUL LETTER YA", 35 | "\u3152": "HANGUL LETTER YAE", 36 | "\u3153": "HANGUL LETTER EO", 37 | "\u3154": "HANGUL LETTER E", 38 | "\u3155": "HANGUL LETTER YEO", 39 | "\u3156": "HANGUL LETTER YE", 40 | "\u3157": "HANGUL LETTER O", 41 | "\u3158": "HANGUL LETTER WA", 42 | "\u3159": "HANGUL LETTER WAE", 43 | "\u315a": "HANGUL LETTER OE", 44 | "\u315b": "HANGUL LETTER YO", 45 | "\u315c": "HANGUL LETTER U", 46 | "\u315d": "HANGUL LETTER WEO", 47 | "\u315e": "HANGUL LETTER WE", 48 | "\u315f": "HANGUL LETTER WI", 49 | "\u3160": "HANGUL LETTER YU", 50 | "\u3161": "HANGUL LETTER EU", 51 | "\u3162": "HANGUL LETTER YI", 52 | "\u3163": "HANGUL LETTER I", 53 | "\u3164": "HANGUL FILLER", 54 | "\u3165": "HANGUL LETTER SSANGNIEUN", 55 | "\u3166": "HANGUL LETTER NIEUN-TIKEUT", 56 | "\u3167": "HANGUL LETTER NIEUN-SIOS", 57 | "\u3168": "HANGUL LETTER NIEUN-PANSIOS", 58 | "\u3169": "HANGUL LETTER RIEUL-KIYEOK-SIOS", 59 | "\u316a": "HANGUL LETTER RIEUL-TIKEUT", 60 | "\u316b": "HANGUL LETTER RIEUL-PIEUP-SIOS", 61 | "\u316c": "HANGUL LETTER RIEUL-PANSIOS", 62 | "\u316d": "HANGUL LETTER RIEUL-YEORINHIEUH", 63 | "\u316e": "HANGUL LETTER MIEUM-PIEUP", 64 | "\u316f": "HANGUL LETTER MIEUM-SIOS", 65 | "\u3170": "HANGUL LETTER MIEUM-PANSIOS", 66 | "\u3171": "HANGUL LETTER KAPYEOUNMIEUM", 67 | "\u3172": "HANGUL LETTER PIEUP-KIYEOK", 68 | "\u3173": "HANGUL LETTER PIEUP-TIKEUT", 69 | "\u3174": "HANGUL LETTER PIEUP-SIOS-KIYEOK", 70 | "\u3175": "HANGUL LETTER PIEUP-SIOS-TIKEUT", 71 | "\u3176": "HANGUL LETTER PIEUP-CIEUC", 72 | "\u3177": "HANGUL LETTER PIEUP-THIEUTH", 73 | "\u3178": "HANGUL LETTER KAPYEOUNPIEUP", 74 | "\u3179": "HANGUL LETTER KAPYEOUNSSANGPIEUP", 75 | "\u317a": "HANGUL LETTER SIOS-KIYEOK", 76 | "\u317b": "HANGUL LETTER SIOS-NIEUN", 77 | "\u317c": "HANGUL LETTER SIOS-TIKEUT", 78 | "\u317d": "HANGUL LETTER SIOS-PIEUP", 79 | "\u317e": "HANGUL LETTER SIOS-CIEUC", 80 | "\u317f": "HANGUL LETTER PANSIOS", 81 | "\u3180": "HANGUL LETTER SSANGIEUNG", 82 | "\u3181": "HANGUL LETTER YESIEUNG", 83 | "\u3182": "HANGUL LETTER YESIEUNG-SIOS", 84 | "\u3183": "HANGUL LETTER YESIEUNG-PANSIOS", 85 | "\u3184": "HANGUL LETTER KAPYEOUNPHIEUPH", 86 | "\u3185": "HANGUL LETTER SSANGHIEUH", 87 | "\u3186": "HANGUL LETTER YEORINHIEUH", 88 | "\u3187": "HANGUL LETTER YO-YA", 89 | "\u3188": "HANGUL LETTER YO-YAE", 90 | "\u3189": "HANGUL LETTER YO-I", 91 | "\u318a": "HANGUL LETTER YU-YEO", 92 | "\u318b": "HANGUL LETTER YU-YE", 93 | "\u318c": "HANGUL LETTER YU-I", 94 | "\u318d": "HANGUL LETTER ARAEA", 95 | "\u318e": "HANGUL LETTER ARAEAE" 96 | } -------------------------------------------------------------------------------- /jamo/data/decompositions.json: -------------------------------------------------------------------------------- 1 | { 2 | "\u1101": ["\u1100", "\u1100"], 3 | "\u1104": ["\u1103", "\u1103"], 4 | "\u1108": ["\u1107", "\u1107"], 5 | "\u110a": ["\u1109", "\u1109"], 6 | "\u110d": ["\u110c", "\u110c"], 7 | "\u1113": ["\u1102", "\u1100"], 8 | "\u1114": ["\u1102", "\u1102"], 9 | "\u1115": ["\u1102", "\u1103"], 10 | "\u1116": ["\u1102", "\u1107"], 11 | "\u1117": ["\u1103", "\u1100"], 12 | "\u1118": ["\u1105", "\u1102"], 13 | "\u1119": ["\u1105", "\u1105"], 14 | "\u111a": ["\u1105", "\u1112"], 15 | "\u111b": ["\u1105", "\u110b"], 16 | "\u111c": ["\u1106", "\u1107"], 17 | "\u111d": ["\u1106", "\u110b"], 18 | "\u111e": ["\u1107", "\u1100"], 19 | "\u111f": ["\u1107", "\u1102"], 20 | "\u1120": ["\u1107", "\u1103"], 21 | "\u1121": ["\u1107", "\u1109"], 22 | "\u1122": ["\u1107", "\u1109", "\u1100"], 23 | "\u1123": ["\u1107", "\u1109", "\u1103"], 24 | "\u1124": ["\u1107", "\u1109", "\u1107"], 25 | "\u1125": ["\u1107", "\u1109", "\u1109"], 26 | "\u1126": ["\u1107", "\u1109", "\u110c"], 27 | "\u1127": ["\u1107", "\u110c"], 28 | "\u1128": ["\u1107", "\u110e"], 29 | "\u1129": ["\u1107", "\u1110"], 30 | "\u112a": ["\u1107", "\u1111"], 31 | "\u112b": ["\u1107", "\u110b"], 32 | "\u112c": ["\u1107", "\u1107", "\u110b"], 33 | "\u112d": ["\u1109", "\u1100"], 34 | "\u112e": ["\u1109", "\u1102"], 35 | "\u112f": ["\u1109", "\u1103"], 36 | "\u1130": ["\u1109", "\u1105"], 37 | "\u1131": ["\u1109", "\u1106"], 38 | "\u1132": ["\u1109", "\u1107"], 39 | "\u1133": ["\u1109", "\u1107", "\u1100"], 40 | "\u1134": ["\u1109", "\u1109", "\u1109"], 41 | "\u1135": ["\u1109", "\u114c"], 42 | "\u1136": ["\u1109", "\u110c"], 43 | "\u1137": ["\u1109", "\u110e"], 44 | "\u1138": ["\u1109", "\u110f"], 45 | "\u1139": ["\u1109", "\u1110"], 46 | "\u113a": ["\u1109", "\u1111"], 47 | "\u113b": ["\u1109", "\u1112"], 48 | "\u113d": ["\u113c", "\u113c"], 49 | "\u113f": ["\u113e", "\u113e"], 50 | "\u1141": ["\u114c", "\u1100"], 51 | "\u1142": ["\u114c", "\u1103"], 52 | "\u1143": ["\u114c", "\u1106"], 53 | "\u1144": ["\u114c", "\u1107"], 54 | "\u1145": ["\u114c", "\u1109"], 55 | "\u1146": ["\u114c", "\u1140"], 56 | "\u1147": ["\u110b", "\u110b"], 57 | "\u1148": ["\u114c", "\u110c"], 58 | "\u1149": ["\u114c", "\u110e"], 59 | "\u114a": ["\u114c", "\u1110"], 60 | "\u114b": ["\u114c", "\u1111"], 61 | "\u114d": ["\u110c", "\u114c"], 62 | "\u114f": ["\u114e", "\u114e"], 63 | "\u1151": ["\u1150", "\u1150"], 64 | "\u1152": ["\u110e", "\u110f"], 65 | "\u1153": ["\u110e", "\u1112"], 66 | "\u1156": ["\u1111", "\u1107"], 67 | "\u1157": ["\u1111", "\u110b"], 68 | "\u1158": ["\u1112", "\u1112"], 69 | "\u115a": ["\u1100", "\u1103"], 70 | "\u115b": ["\u1102", "\u1109"], 71 | "\u115c": ["\u1102", "\u110c"], 72 | "\u115d": ["\u1102", "\u1112"], 73 | "\u115e": ["\u1103", "\u1105"], 74 | "\u1162": ["\u1161", "\u1175"], 75 | "\u1164": ["\u1163", "\u1175"], 76 | "\u1166": ["\u1165", "\u1175"], 77 | "\u1168": ["\u1167", "\u1175"], 78 | "\u116a": ["\u1169", "\u1161"], 79 | "\u116b": ["\u1169", "\u1161", "\u1175"], 80 | "\u116c": ["\u1169", "\u1175"], 81 | "\u116f": ["\u116e", "\u1165"], 82 | "\u1170": ["\u116e", "\u1165", "\u1175"], 83 | "\u1171": ["\u116e", "\u1175"], 84 | "\u1174": ["\u1173", "\u1175"], 85 | "\u1176": ["\u1161", "\u1169"], 86 | "\u1177": ["\u1161", "\u116e"], 87 | "\u1178": ["\u1163", "\u1169"], 88 | "\u1179": ["\u1163", "\u116d"], 89 | "\u117a": ["\u1165", "\u1169"], 90 | "\u117b": ["\u1165", "\u116e"], 91 | "\u117c": ["\u1165", "\u1173"], 92 | "\u117d": ["\u1167", "\u1169"], 93 | "\u117e": ["\u1167", "\u116e"], 94 | "\u117f": ["\u1169", "\u1165"], 95 | "\u1180": ["\u1169", "\u1165", "\u1175"], 96 | "\u1181": ["\u1169", "\u1167", "\u1175"], 97 | "\u1182": ["\u1169", "\u1169"], 98 | "\u1183": ["\u1169", "\u116e"], 99 | "\u1184": ["\u116d", "\u1163"], 100 | "\u1185": ["\u116d", "\u1163", "\u1175"], 101 | "\u1186": ["\u116d", "\u1167"], 102 | "\u1187": ["\u116d", "\u1169"], 103 | "\u1188": ["\u116d", "\u1175"], 104 | "\u1189": ["\u116e", "\u1161"], 105 | "\u118a": ["\u116e", "\u1161", "\u1175"], 106 | "\u118b": ["\u116e", "\u1165", "\u1173"], 107 | "\u118c": ["\u116e", "\u1167", "\u1175"], 108 | "\u118d": ["\u116e", "\u116e"], 109 | "\u118e": ["\u1172", "\u1161"], 110 | "\u118f": ["\u1172", "\u1165"], 111 | "\u1190": ["\u1172", "\u1165", "\u1175"], 112 | "\u1191": ["\u1172", "\u1167"], 113 | "\u1192": ["\u1172", "\u1167", "\u1175"], 114 | "\u1193": ["\u1172", "\u116e"], 115 | "\u1194": ["\u1172", "\u1175"], 116 | "\u1195": ["\u1173", "\u116e"], 117 | "\u1196": ["\u1173", "\u1173"], 118 | "\u1197": ["\u1173", "\u1175", "\u116e"], 119 | "\u1198": ["\u1175", "\u1161"], 120 | "\u1199": ["\u1175", "\u1163"], 121 | "\u119a": ["\u1175", "\u1169"], 122 | "\u119b": ["\u1175", "\u116e"], 123 | "\u119c": ["\u1175", "\u1173"], 124 | "\u119d": ["\u1175", "\u119e"], 125 | "\u119f": ["\u119e", "\u1165"], 126 | "\u11a0": ["\u119e", "\u116e"], 127 | "\u11a1": ["\u119e", "\u1175"], 128 | "\u11a2": ["\u119e", "\u119e"], 129 | "\u11a3": ["\u1161", "\u1173"], 130 | "\u11a4": ["\u1163", "\u116e"], 131 | "\u11a5": ["\u1167", "\u1163"], 132 | "\u11a6": ["\u1169", "\u1163"], 133 | "\u11a7": ["\u1169", "\u1164"], 134 | "\u11a9": ["\u11a8", "\u11a8"], 135 | "\u11aa": ["\u11a8", "\u11ba"], 136 | "\u11ac": ["\u11ab", "\u11bd"], 137 | "\u11ad": ["\u11ab", "\u11c2"], 138 | "\u11b0": ["\u11af", "\u11a8"], 139 | "\u11b1": ["\u11af", "\u11b7"], 140 | "\u11b2": ["\u11af", "\u11b8"], 141 | "\u11b3": ["\u11af", "\u11ba"], 142 | "\u11b4": ["\u11af", "\u11c0"], 143 | "\u11b5": ["\u11af", "\u11c1"], 144 | "\u11b6": ["\u11af", "\u11c2"], 145 | "\u11b9": ["\u11b8", "\u11ba"], 146 | "\u11bb": ["\u11ba", "\u11ba"], 147 | "\u11c3": ["\u11a8", "\u11af"], 148 | "\u11c4": ["\u11a8", "\u11ba", "\u11a8"], 149 | "\u11c5": ["\u11ab", "\u11a8"], 150 | "\u11c6": ["\u11ab", "\u11ae"], 151 | "\u11c7": ["\u11ab", "\u11ba"], 152 | "\u11c8": ["\u11ab", "\u11eb"], 153 | "\u11c9": ["\u11ab", "\u11c0"], 154 | "\u11ca": ["\u11ae", "\u11a8"], 155 | "\u11cb": ["\u11ae", "\u11af"], 156 | "\u11cc": ["\u11af", "\u11a8", "\u11ba"], 157 | "\u11cd": ["\u11af", "\u11ab"], 158 | "\u11ce": ["\u11af", "\u11ae"], 159 | "\u11cf": ["\u11af", "\u11ae", "\u11c2"], 160 | "\u11d0": ["\u11af", "\u11af"], 161 | "\u11d1": ["\u11af", "\u11b7", "\u11a8"], 162 | "\u11d2": ["\u11af", "\u11b7", "\u11ba"], 163 | "\u11d3": ["\u11af", "\u11b8", "\u11ba"], 164 | "\u11d4": ["\u11af", "\u11b8", "\u11c2"], 165 | "\u11d5": ["\u11af", "\u11b8", "\u11bc"], 166 | "\u11d6": ["\u11af", "\u11ba", "\u11ba"], 167 | "\u11d7": ["\u11af", "\u11eb"], 168 | "\u11d8": ["\u11af", "\u11bf"], 169 | "\u11d9": ["\u11af", "\u11f9"], 170 | "\u11da": ["\u11b7", "\u11a8"], 171 | "\u11db": ["\u11b7", "\u11af"], 172 | "\u11dc": ["\u11b7", "\u11b8"], 173 | "\u11dd": ["\u11b7", "\u11ba"], 174 | "\u11de": ["\u11b7", "\u11ba", "\u11ba"], 175 | "\u11df": ["\u11b7", "\u11eb"], 176 | "\u11e0": ["\u11b7", "\u11be"], 177 | "\u11e1": ["\u11b7", "\u11c2"], 178 | "\u11e2": ["\u11b7", "\u11bc"], 179 | "\u11e3": ["\u11b8", "\u11af"], 180 | "\u11e4": ["\u11b8", "\u11c1"], 181 | "\u11e5": ["\u11b8", "\u11c2"], 182 | "\u11e6": ["\u11b8", "\u11bc"], 183 | "\u11e7": ["\u11ba", "\u11a8"], 184 | "\u11e8": ["\u11ba", "\u11ae"], 185 | "\u11e9": ["\u11ba", "\u11af"], 186 | "\u11ea": ["\u11ba", "\u11b8"], 187 | "\u11ec": ["\u11f0", "\u11a8"], 188 | "\u11ed": ["\u11f0", "\u11a8", "\u11a8"], 189 | "\u11ee": ["\u11f0", "\u11f0"], 190 | "\u11ef": ["\u11f0", "\u11bf"], 191 | "\u11f1": ["\u11f0", "\u11ba"], 192 | "\u11f2": ["\u11f0", "\u11eb"], 193 | "\u11f3": ["\u11c1", "\u11b8"], 194 | "\u11f4": ["\u11c1", "\u11bc"], 195 | "\u11f5": ["\u11c2", "\u11ab"], 196 | "\u11f6": ["\u11c2", "\u11af"], 197 | "\u11f7": ["\u11c2", "\u11b7"], 198 | "\u11f8": ["\u11c2", "\u11b8"], 199 | "\u11fa": ["\u11a8", "\u11ab"], 200 | "\u11fb": ["\u11a8", "\u11b8"], 201 | "\u11fc": ["\u11a8", "\u11be"], 202 | "\u11fd": ["\u11a8", "\u11bf"], 203 | "\u11fe": ["\u11a8", "\u11c2"], 204 | "\u11ff": ["\u11ab", "\u11ab"], 205 | "\u3132": ["\u3131", "\u3131"], 206 | "\u3133": ["\u3131", "\u3145"], 207 | "\u3135": ["\u3134", "\u3148"], 208 | "\u3136": ["\u3134", "\u314e"], 209 | "\u3138": ["\u3137", "\u3137"], 210 | "\u313a": ["\u3139", "\u3131"], 211 | "\u313b": ["\u3139", "\u3141"], 212 | "\u313c": ["\u3139", "\u3142"], 213 | "\u313d": ["\u3139", "\u3145"], 214 | "\u313e": ["\u3139", "\u314c"], 215 | "\u313f": ["\u3139", "\u314d"], 216 | "\u3140": ["\u3139", "\u314e"], 217 | "\u3143": ["\u3142", "\u3142"], 218 | "\u3144": ["\u3142", "\u3145"], 219 | "\u3146": ["\u3145", "\u3145"], 220 | "\u3149": ["\u3148", "\u3148"], 221 | "\u3150": ["\u314f", "\u3163"], 222 | "\u3152": ["\u3151", "\u3163"], 223 | "\u3154": ["\u3153", "\u3163"], 224 | "\u3156": ["\u3155", "\u3163"], 225 | "\u3158": ["\u3157", "\u314f"], 226 | "\u3159": ["\u3157", "\u314f", "\u3163"], 227 | "\u315a": ["\u3157", "\u3163"], 228 | "\u315d": ["\u315c", "\u3153"], 229 | "\u315e": ["\u315c", "\u3153", "\u3163"], 230 | "\u315f": ["\u315c", "\u3163"], 231 | "\u3162": ["\u3161", "\u3163"], 232 | "\u3165": ["\u3134", "\u3134"], 233 | "\u3166": ["\u3134", "\u3137"], 234 | "\u3167": ["\u3134", "\u3145"], 235 | "\u3168": ["\u3134", "\u317f"], 236 | "\u3169": ["\u3139", "\u3131", "\u3145"], 237 | "\u316a": ["\u3139", "\u3137"], 238 | "\u316b": ["\u3139", "\u3142", "\u3145"], 239 | "\u316c": ["\u3139", "\u317f"], 240 | "\u316d": ["\u3139", "\u3186"], 241 | "\u316e": ["\u3141", "\u3142"], 242 | "\u316f": ["\u3141", "\u3145"], 243 | "\u3170": ["\u3141", "\u317f"], 244 | "\u3171": ["\u3141", "\u3147"], 245 | "\u3172": ["\u3142", "\u3131"], 246 | "\u3173": ["\u3142", "\u3137"], 247 | "\u3174": ["\u3142", "\u3145", "\u3131"], 248 | "\u3175": ["\u3142", "\u3145", "\u3137"], 249 | "\u3176": ["\u3142", "\u3148"], 250 | "\u3177": ["\u3142", "\u314c"], 251 | "\u3178": ["\u3142", "\u3147"], 252 | "\u3179": ["\u3142", "\u3142", "\u3147"], 253 | "\u317a": ["\u3145", "\u3131"], 254 | "\u317b": ["\u3145", "\u3134"], 255 | "\u317c": ["\u3145", "\u3137"], 256 | "\u317d": ["\u3145", "\u3142"], 257 | "\u317e": ["\u3145", "\u3148"], 258 | "\u3180": ["\u3147", "\u3147"], 259 | "\u3182": ["\u3181", "\u3145"], 260 | "\u3183": ["\u3181", "\u317f"], 261 | "\u3184": ["\u314d", "\u3147"], 262 | "\u3185": ["\u314e", "\u314e"], 263 | "\u3187": ["\u315b", "\u3151"], 264 | "\u3188": ["\u315b", "\u3151", "\u3163"], 265 | "\u3189": ["\u315b", "\u3163"], 266 | "\u318a": ["\u3160", "\u3155"], 267 | "\u318b": ["\u3160", "\u3155", "\u3163"], 268 | "\u318c": ["\u3160", "\u3163"], 269 | "\u318e": ["\u318d", "\u3163"], 270 | "\ua960": ["\u1103", "\u1106"], 271 | "\ua961": ["\u1103", "\u1107"], 272 | "\ua962": ["\u1103", "\u1109"], 273 | "\ua963": ["\u1103", "\u110c"], 274 | "\ua964": ["\u1105", "\u1100"], 275 | "\ua965": ["\u1105", "\u1100", "\u1100"], 276 | "\ua966": ["\u1105", "\u1103"], 277 | "\ua967": ["\u1105", "\u1103", "\u1103"], 278 | "\ua968": ["\u1105", "\u1106"], 279 | "\ua969": ["\u1105", "\u1107"], 280 | "\ua96a": ["\u1105", "\u1107", "\u1107"], 281 | "\ua96b": ["\u1105", "\u112b"], 282 | "\ua96c": ["\u1105", "\u1109"], 283 | "\ua96d": ["\u1105", "\u110c"], 284 | "\ua96e": ["\u1105", "\u110f"], 285 | "\ua96f": ["\u1106", "\u1100"], 286 | "\ua970": ["\u1106", "\u1103"], 287 | "\ua971": ["\u1106", "\u1109"], 288 | "\ua972": ["\u1107", "\u1109", "\u1110"], 289 | "\ua973": ["\u1107", "\u110f"], 290 | "\ua974": ["\u1107", "\u1112"], 291 | "\ua975": ["\u1109", "\u1109", "\u1107"], 292 | "\ua976": ["\u110b", "\u1105"], 293 | "\ua977": ["\u110b", "\u1112"], 294 | "\ua978": ["\u110c", "\u110c", "\u1112"], 295 | "\ua979": ["\u1110", "\u1110"], 296 | "\ua97a": ["\u1111", "\u1112"], 297 | "\ua97b": ["\u1112", "\u1109"], 298 | "\ua97c": ["\u1159", "\u1159"], 299 | "\ud7b0": ["\u1169", "\u1167"], 300 | "\ud7b1": ["\u1169", "\u1169", "\u1175"], 301 | "\ud7b2": ["\u116d", "\u1161"], 302 | "\ud7b3": ["\u116d", "\u1162"], 303 | "\ud7b4": ["\u116d", "\u1165"], 304 | "\ud7b5": ["\u116e", "\u1167"], 305 | "\ud7b6": ["\u116e", "\u1175", "\u1175"], 306 | "\ud7b7": ["\u1172", "\u1162"], 307 | "\ud7b8": ["\u1172", "\u1169"], 308 | "\ud7b9": ["\u1173", "\u1161"], 309 | "\ud7ba": ["\u1173", "\u1165"], 310 | "\ud7bb": ["\u1173", "\u1166"], 311 | "\ud7bc": ["\u1173", "\u1169"], 312 | "\ud7bd": ["\u1175", "\u1163", "\u1169"], 313 | "\ud7be": ["\u1175", "\u1164"], 314 | "\ud7bf": ["\u1175", "\u1167"], 315 | "\ud7c0": ["\u1175", "\u1168"], 316 | "\ud7c1": ["\u1175", "\u1169", "\u1175"], 317 | "\ud7c2": ["\u1175", "\u116d"], 318 | "\ud7c3": ["\u1175", "\u1172"], 319 | "\ud7c4": ["\u1175", "\u1175"], 320 | "\ud7c5": ["\u119e", "\u1161"], 321 | "\ud7c6": ["\u119e", "\u1166"], 322 | "\ud7cb": ["\u11ab", "\u11af"], 323 | "\ud7cc": ["\u11ab", "\u11be"], 324 | "\ud7cd": ["\u11ae", "\u11ae"], 325 | "\ud7ce": ["\u11b8", "\u11ae", "\u11ae"], 326 | "\ud7cf": ["\u11ae", "\u11b8"], 327 | "\ud7d0": ["\u11ae", "\u11ba"], 328 | "\ud7d1": ["\u11ae", "\u11ba", "\u11a8"], 329 | "\ud7d2": ["\u11ae", "\u11bd"], 330 | "\ud7d3": ["\u11ae", "\u11be"], 331 | "\ud7d4": ["\u11ae", "\u11c0"], 332 | "\ud7d5": ["\u11af", "\u11a8", "\u11a8"], 333 | "\ud7d6": ["\u11af", "\u11a8", "\u11c2"], 334 | "\ud7d7": ["\u11af", "\u11af", "\u11bf"], 335 | "\ud7d8": ["\u11af", "\u11b7", "\u11c2"], 336 | "\ud7d9": ["\u11af", "\u11b8", "\u11ae"], 337 | "\ud7da": ["\u11af", "\u11b8", "\u11c1"], 338 | "\ud7db": ["\u11af", "\u11f0"], 339 | "\ud7dc": ["\u11af", "\u11f9", "\u11c2"], 340 | "\ud7de": ["\u11b7", "\u11ab"], 341 | "\ud7df": ["\u11b7", "\u11ab", "\u11ab"], 342 | "\ud7e0": ["\u11b7", "\u11b7"], 343 | "\ud7e1": ["\u11b7", "\u11b8", "\u11ba"], 344 | "\ud7e2": ["\u11b7", "\u11bd"], 345 | "\ud7e3": ["\u11b8", "\u11ae"], 346 | "\ud7e4": ["\u11b8", "\u11af", "\u11c1"], 347 | "\ud7e5": ["\u11b8", "\u11b7"], 348 | "\ud7e6": ["\u11b8", "\u11b8"], 349 | "\ud7e7": ["\u11b8", "\u11ba", "\u11ae"], 350 | "\ud7e8": ["\u11b8", "\u11bd"], 351 | "\ud7e9": ["\u11b8", "\u11be"], 352 | "\ud7ea": ["\u11ba", "\u11b7"], 353 | "\ud7eb": ["\u11ba", "\u11b8", "\u11bc"], 354 | "\ud7ec": ["\u11ba", "\u11ba", "\u11a8"], 355 | "\ud7ed": ["\u11ba", "\u11ba", "\u11ae"], 356 | "\ud7ee": ["\u11ba", "\u11eb"], 357 | "\ud7ef": ["\u11ba", "\u11bd"], 358 | "\ud7f0": ["\u11ba", "\u11be"], 359 | "\ud7f1": ["\u11ba", "\u11c0"], 360 | "\ud7f2": ["\u11ba", "\u11c2"], 361 | "\ud7f3": ["\u11eb", "\u11b8"], 362 | "\ud7f4": ["\u11eb", "\u11e6"], 363 | "\ud7f5": ["\u11f0", "\u11b7"], 364 | "\ud7f6": ["\u11f0", "\u11c2"], 365 | "\ud7f7": ["\u11bd", "\u11b8"], 366 | "\ud7f8": ["\u11bd", "\u11b8", "\u11b8"], 367 | "\ud7f9": ["\u11bd", "\u11bd"], 368 | "\ud7fa": ["\u11c1", "\u11ba"], 369 | "\ud7fb": ["\u11c1", "\u11c0"], 370 | "\ud7dd": ["\u1105", "\u110b"] 371 | } 372 | -------------------------------------------------------------------------------- /jamo/jamo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Syllable and jamo analysis for Korean. Default internal exchange form is 3 | Hangul characters, not codepoints. Jamo exchange form is U+11xx characters, 4 | not U+3xxx Hangul Compatibility Jamo (HCJ) characters or codepoints. 5 | 6 | For more information, see: 7 | http://python-jamo.readthedocs.org/ko/latest/ 8 | """ 9 | 10 | import os 11 | from sys import stderr 12 | from itertools import chain 13 | import json 14 | import re 15 | 16 | 17 | _ROOT = os.path.abspath(os.path.dirname(__file__)) 18 | 19 | _JAMO_OFFSET = 44032 20 | _JAMO_LEAD_OFFSET = 0x10ff 21 | _JAMO_VOWEL_OFFSET = 0x1160 22 | _JAMO_TAIL_OFFSET = 0x11a7 23 | 24 | with open(os.path.join(_ROOT, 'data', "U+11xx.json"), 'r') as namedata: 25 | _JAMO_TO_NAME = json.load(namedata) 26 | _JAMO_REVERSE_LOOKUP = {name: char for char, name in _JAMO_TO_NAME.items()} 27 | with open(os.path.join(_ROOT, 'data', "U+31xx.json"), 'r') as namedata: 28 | _HCJ_TO_NAME = json.load(namedata) 29 | _HCJ_REVERSE_LOOKUP = {name: char for char, name in _HCJ_TO_NAME.items()} 30 | with open(os.path.join(_ROOT, 'data', "decompositions.json"), 'r') as namedata: 31 | _JAMO_TO_COMPONENTS = json.load(namedata) 32 | _COMPONENTS_REVERSE_LOOKUP = {tuple(comps): char for char, 33 | comps in _JAMO_TO_COMPONENTS.items()} 34 | 35 | JAMO_LEADS = [chr(_) for _ in range(0x1100, 0x115F)] 36 | JAMO_LEADS_MODERN = [chr(_) for _ in range(0x1100, 0x1113)] 37 | JAMO_VOWELS = [chr(_) for _ in range(0x1161, 0x11A8)] 38 | JAMO_VOWELS_MODERN = [chr(_) for _ in range(0x1161, 0x1176)] 39 | JAMO_TAILS = [chr(_) for _ in range(0x11A8, 0x1200)] 40 | JAMO_TAILS_MODERN = [chr(_) for _ in range(0x11A8, 0x11C3)] 41 | JAMO_COMPOUNDS = _JAMO_TO_COMPONENTS.keys() 42 | 43 | 44 | class InvalidJamoError(Exception): 45 | """jamo is a U+11xx codepoint.""" 46 | 47 | def __init__(self, message, jamo): 48 | super(InvalidJamoError, self).__init__(message) 49 | self.jamo = hex(ord(jamo)) 50 | 51 | 52 | def _hangul_char_to_jamo(syllable): 53 | """Return a 3-tuple of lead, vowel, and tail jamo characters. 54 | Note: Non-Hangul characters are echoed back. 55 | """ 56 | if is_hangul_char(syllable): 57 | rem = ord(syllable) - _JAMO_OFFSET 58 | tail = rem % 28 59 | vowel = 1 + ((rem - tail) % 588) // 28 60 | lead = 1 + rem // 588 61 | if tail: 62 | return (chr(lead + _JAMO_LEAD_OFFSET), 63 | chr(vowel + _JAMO_VOWEL_OFFSET), 64 | chr(tail + _JAMO_TAIL_OFFSET)) 65 | else: 66 | return (chr(lead + _JAMO_LEAD_OFFSET), 67 | chr(vowel + _JAMO_VOWEL_OFFSET)) 68 | else: 69 | return syllable 70 | 71 | 72 | def _jamo_to_hangul_char(lead, vowel, tail=0): 73 | """Return the Hangul character for the given jamo characters. 74 | """ 75 | lead = ord(lead) - _JAMO_LEAD_OFFSET 76 | vowel = ord(vowel) - _JAMO_VOWEL_OFFSET 77 | tail = ord(tail) - _JAMO_TAIL_OFFSET if tail else 0 78 | return chr(tail + (vowel - 1) * 28 + (lead - 1) * 588 + _JAMO_OFFSET) 79 | 80 | 81 | def _jamo_char_to_hcj(char): 82 | if is_jamo(char): 83 | hcj_name = re.sub(r"(?<=HANGUL )(\w+)", 84 | "LETTER", 85 | _get_unicode_name(char)) 86 | if hcj_name in _HCJ_REVERSE_LOOKUP.keys(): 87 | return _HCJ_REVERSE_LOOKUP[hcj_name] 88 | return char 89 | 90 | 91 | def _get_unicode_name(char): 92 | """Fetch the unicode name for jamo characters. 93 | """ 94 | if char not in _JAMO_TO_NAME.keys() and char not in _HCJ_TO_NAME.keys(): 95 | raise InvalidJamoError("Not jamo or nameless jamo character", char) 96 | else: 97 | if is_hcj(char): 98 | return _HCJ_TO_NAME[char] 99 | return _JAMO_TO_NAME[char] 100 | 101 | 102 | def is_jamo(character): 103 | """Test if a single character is a jamo character. 104 | Valid jamo includes all modern and archaic jamo, as well as all HCJ. 105 | Non-assigned code points are invalid. 106 | """ 107 | code = ord(character) 108 | return 0x1100 <= code <= 0x11FF or\ 109 | 0xA960 <= code <= 0xA97C or\ 110 | 0xD7B0 <= code <= 0xD7C6 or 0xD7CB <= code <= 0xD7FB or\ 111 | is_hcj(character) 112 | 113 | 114 | def is_jamo_modern(character): 115 | """Test if a single character is a modern jamo character. 116 | Modern jamo includes all U+11xx jamo in addition to HCJ in modern usage, 117 | as defined in Unicode 7.0. 118 | WARNING: U+1160 is NOT considered a modern jamo character, but it is listed 119 | under 'Medial Vowels' in the Unicode 7.0 spec. 120 | """ 121 | code = ord(character) 122 | return 0x1100 <= code <= 0x1112 or\ 123 | 0x1161 <= code <= 0x1175 or\ 124 | 0x11A8 <= code <= 0x11C2 or\ 125 | is_hcj_modern(character) 126 | 127 | 128 | def is_hcj(character): 129 | """Test if a single character is a HCJ character. 130 | HCJ is defined as the U+313x to U+318x block, sans two non-assigned code 131 | points. 132 | """ 133 | return 0x3131 <= ord(character) <= 0x318E and ord(character) != 0x3164 134 | 135 | 136 | def is_hcj_modern(character): 137 | """Test if a single character is a modern HCJ character. 138 | Modern HCJ is defined as HCJ that corresponds to a U+11xx jamo character 139 | in modern usage. 140 | """ 141 | code = ord(character) 142 | return 0x3131 <= code <= 0x314E or\ 143 | 0x314F <= code <= 0x3163 144 | 145 | 146 | def is_hangul_char(character): 147 | """Test if a single character is in the U+AC00 to U+D7A3 code block, 148 | excluding unassigned codes. 149 | """ 150 | return 0xAC00 <= ord(character) <= 0xD7A3 151 | 152 | 153 | def is_jamo_compound(character): 154 | """Test if a single character is a compound, i.e., a consonant 155 | cluster, double consonant, or dipthong. 156 | """ 157 | if len(character) != 1: 158 | return False 159 | # Consider instead: 160 | # raise TypeError('is_jamo_compound() expected a single character') 161 | if is_jamo(character): 162 | return character in JAMO_COMPOUNDS 163 | return False 164 | 165 | 166 | def get_jamo_class(jamo): 167 | """Determine if a jamo character is a lead, vowel, or tail. 168 | Integers and U+11xx characters are valid arguments. HCJ consonants are not 169 | valid here. 170 | 171 | get_jamo_class should return the class ["lead" | "vowel" | "tail"] of a 172 | given character or integer. 173 | 174 | Note: jamo class directly corresponds to the Unicode 7.0 specification, 175 | thus includes filler characters as having a class. 176 | """ 177 | # TODO: Perhaps raise a separate error for U+3xxx jamo. 178 | if jamo in JAMO_LEADS or jamo == chr(0x115F): 179 | return "lead" 180 | if jamo in JAMO_VOWELS or jamo == chr(0x1160) or\ 181 | 0x314F <= ord(jamo) <= 0x3163: 182 | return "vowel" 183 | if jamo in JAMO_TAILS: 184 | return "tail" 185 | else: 186 | raise InvalidJamoError("Invalid or classless jamo argument.", jamo) 187 | 188 | 189 | def jamo_to_hcj(data): 190 | """Convert jamo to HCJ. 191 | Arguments may be iterables or single characters. 192 | 193 | jamo_to_hcj should convert every jamo character into HCJ in a given input, 194 | if possible. Anything else is unchanged. 195 | 196 | jamo_to_hcj is the generator version of j2hcj, the string version. Passing 197 | a character to jamo_to_hcj will still return a generator. 198 | """ 199 | return (_jamo_char_to_hcj(_) for _ in data) 200 | 201 | 202 | def j2hcj(jamo): 203 | """Convert jamo into HCJ. 204 | Arguments may be iterables or single characters. 205 | 206 | j2hcj should convert every jamo character into HCJ in a given input, if 207 | possible. Anything else is unchanged. 208 | 209 | j2hcj is the string version of jamo_to_hcj, the generator version. 210 | """ 211 | return ''.join(jamo_to_hcj(jamo)) 212 | 213 | 214 | def hcj_to_jamo(hcj_char, position="vowel"): 215 | """Convert a HCJ character to a jamo character. 216 | Arguments may be single characters along with the desired jamo class 217 | (lead, vowel, tail). Non-mappable input will raise an InvalidJamoError. 218 | """ 219 | if position == "lead": 220 | jamo_class = "CHOSEONG" 221 | elif position == "vowel": 222 | jamo_class = "JUNGSEONG" 223 | elif position == "tail": 224 | jamo_class = "JONGSEONG" 225 | else: 226 | raise InvalidJamoError("No mapping from input to jamo.", hcj_char) 227 | jamo_name = re.sub(r"(?<=HANGUL )(\w+)", 228 | jamo_class, 229 | _get_unicode_name(hcj_char)) 230 | # TODO: add tests that test non entries. 231 | if jamo_name in _JAMO_REVERSE_LOOKUP.keys(): 232 | return _JAMO_REVERSE_LOOKUP[jamo_name] 233 | return hcj_char 234 | 235 | 236 | def hcj2j(hcj_char, position="vowel"): 237 | """Convert a HCJ character to a jamo character. 238 | Identical to hcj_to_jamo. 239 | """ 240 | return hcj_to_jamo(hcj_char, position) 241 | 242 | 243 | def hangul_to_jamo(hangul_string): 244 | """Convert a string of Hangul to jamo. 245 | Arguments may be iterables of characters. 246 | 247 | hangul_to_jamo should split every Hangul character into U+11xx jamo 248 | characters for any given string. Non-hangul characters are not changed. 249 | 250 | hangul_to_jamo is the generator version of h2j, the string version. 251 | """ 252 | return (_ for _ in 253 | chain.from_iterable(_hangul_char_to_jamo(_) for _ in 254 | hangul_string)) 255 | 256 | 257 | def h2j(hangul_string): 258 | """Convert a string of Hangul to jamo. 259 | Arguments may be iterables of characters. 260 | 261 | h2j should split every Hangul character into U+11xx jamo for any given 262 | string. Non-hangul characters are not touched. 263 | 264 | h2j is the string version of hangul_to_jamo, the generator version. 265 | """ 266 | return ''.join(hangul_to_jamo(hangul_string)) 267 | 268 | 269 | def jamo_to_hangul(lead, vowel, tail=''): 270 | """Return the Hangul character for the given jamo input. 271 | Integers corresponding to U+11xx jamo codepoints, U+11xx jamo characters, 272 | or HCJ are valid inputs. 273 | 274 | Outputs a one-character Hangul string. 275 | 276 | This function is identical to j2h. 277 | """ 278 | # Internally, we convert everything to a jamo char, 279 | # then pass it to _jamo_to_hangul_char 280 | lead = hcj_to_jamo(lead, "lead") 281 | vowel = hcj_to_jamo(vowel, "vowel") 282 | if not tail or ord(tail) == 0: 283 | tail = None 284 | elif is_hcj(tail): 285 | tail = hcj_to_jamo(tail, "tail") 286 | if (is_jamo(lead) and get_jamo_class(lead) == "lead") and\ 287 | (is_jamo(vowel) and get_jamo_class(vowel) == "vowel") and\ 288 | ((not tail) or (is_jamo(tail) and get_jamo_class(tail) == "tail")): 289 | result = _jamo_to_hangul_char(lead, vowel, tail) 290 | if is_hangul_char(result): 291 | return result 292 | raise InvalidJamoError("Could not synthesize characters to Hangul.", 293 | '\x00') 294 | 295 | 296 | def j2h(lead, vowel, tail=0): 297 | """Arguments may be integers corresponding to the U+11xx codepoints, the 298 | actual U+11xx jamo characters, or HCJ. 299 | 300 | Outputs a one-character Hangul string. 301 | 302 | This function is defined solely for naming conisistency with 303 | jamo_to_hangul. 304 | """ 305 | return jamo_to_hangul(lead, vowel, tail) 306 | 307 | 308 | def decompose_jamo(compound): 309 | """Return a tuple of jamo character constituents of a compound. 310 | Note: Non-compound characters are echoed back. 311 | 312 | WARNING: Archaic jamo compounds will raise NotImplementedError. 313 | """ 314 | if len(compound) != 1: 315 | raise TypeError("decompose_jamo() expects a single character,", 316 | "but received", type(compound), "length", 317 | len(compound)) 318 | if compound not in JAMO_COMPOUNDS: 319 | # Strict version: 320 | # raise TypeError("decompose_jamo() expects a compound jamo,", 321 | # "but received", compound) 322 | return compound 323 | return _JAMO_TO_COMPONENTS.get(compound, compound) 324 | 325 | 326 | def compose_jamo(*parts): 327 | """Return the compound jamo for the given jamo input. 328 | Integers corresponding to U+11xx jamo codepoints, U+11xx jamo 329 | characters, or HCJ are valid inputs. 330 | 331 | Outputs a one-character jamo string. 332 | """ 333 | # Internally, we convert everything to a jamo char, 334 | # then pass it to _jamo_to_hangul_char 335 | # NOTE: Relies on hcj_to_jamo not strictly requiring "position" arg. 336 | for p in parts: 337 | if not (type(p) == str and len(p) == 1 and 2 <= len(parts) <= 3): 338 | raise TypeError("compose_jamo() expected 2-3 single characters " 339 | "but received " + str(parts), 340 | '\x00') 341 | hcparts = [j2hcj(_) for _ in parts] 342 | hcparts = tuple(hcparts) 343 | if hcparts in _COMPONENTS_REVERSE_LOOKUP: 344 | return _COMPONENTS_REVERSE_LOOKUP[hcparts] 345 | raise InvalidJamoError( 346 | "Could not synthesize characters to compound: " + ", ".join( 347 | str(_) + "(U+" + str(hex(ord(_)))[2:] + ")" for _ in parts), '\x00') 348 | 349 | 350 | def synth_hangul(string): 351 | """Convert jamo characters in a string into hcj as much as possible.""" 352 | raise NotImplementedError 353 | return ''.join([''.join(''.join(jamo_to_hcj(_)) for _ in string)]) 354 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | nose2[coverage_plugin] 2 | flake8 3 | autopep8 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from setuptools import setup, find_packages 3 | from jamo import __version__ 4 | import sys 5 | 6 | if sys.version_info <= (3, 0): 7 | print("ERROR: jamo requires Python 3.0 or later " 8 | "(bleeding edge preferred)", file=sys.stderr) 9 | sys.exit(1) 10 | 11 | with open('README.rst', encoding='utf8') as f: 12 | long_description = f.read() 13 | 14 | setup( 15 | name="jamo", 16 | version=__version__, 17 | description="A Hangul syllable and jamo analyzer.", 18 | long_description=long_description, 19 | url="https://github.com/jdongian/python-jamo", 20 | author="Joshua Dong", 21 | author_email="jdong42@gmail.com", 22 | license="http://www.apache.org/licenses/LICENSE-2.0", 23 | classifiers=[ 24 | "License :: OSI Approved :: Apache Software License", 25 | "Programming Language :: Python :: 3", 26 | "Programming Language :: Python :: 3.7", 27 | "Programming Language :: Python :: 3.8", 28 | "Programming Language :: Python :: 3.9", 29 | "Programming Language :: Python :: 3.10", 30 | "Programming Language :: Python :: 3 :: Only", 31 | "Programming Language :: Python :: Implementation :: PyPy", 32 | ], 33 | keywords="Korean Hangul jamo syllable nlp", 34 | packages=find_packages(), 35 | package_dir={'jamo': 'jamo'}, 36 | package_data={'jamo': ['data/*.json']}, 37 | ) 38 | -------------------------------------------------------------------------------- /tests/test_jamo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Unit tests for functional tests on Hangul <-> jamo toolkit. 3 | """ 4 | import unittest 5 | import random 6 | import itertools 7 | import io 8 | import jamo 9 | 10 | # See http://www.unicode.org/charts/PDF/U1100.pdf 11 | _JAMO_LEADS_MODERN = [chr(_) for _ in range(0x1100, 0x1113)] 12 | _JAMO_VOWELS_MODERN = [chr(_) for _ in range(0x1161, 0x1176)] 13 | _JAMO_TAILS_MODERN = [chr(_) for _ in range(0x11a8, 0x11c3)] 14 | 15 | # Corresponding HCJ for all valid leads in modern Hangul. 16 | _HCJ_LEADS_MODERN = [_ for _ in "ㄱㄲㄴㄷㄸㄹㅁㅂㅃㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎ"] 17 | # Corresponding HCJ for all valid vowels in modern Hangul. 18 | # "ㅏㅐㅑㅒㅓㅔㅕㅖㅗㅘㅙㅚㅛㅜㅝㅞㅟㅠㅡㅢㅣ" 19 | # See http://www.unicode.org/charts/PDF/U3130.pdf 20 | _HCJ_VOWELS_MODERN = [chr(_) for _ in range(0x314f, 0x3164)] 21 | # Corresponding HCJ for all valid tails in modern Hangul. 22 | _HCJ_TAILS_MODERN = "ㄱㄲㄳㄴㄵㄶㄷㄹㄺㄻㄼㄽㄾㄿㅀㅁㅂㅄㅅㅆㅇㅈㅊㅋㅌㅍㅎ" 23 | 24 | 25 | def _get_random_hangul(count=(0xd7a4 - 0xac00)): 26 | """Generate a sequence of random, unique, valid Hangul characters. 27 | Returns all possible modern Hangul characters by default. 28 | """ 29 | valid_hangul = [chr(_) for _ in range(0xac00, 0xd7a4)] 30 | return random.sample(valid_hangul, count) 31 | 32 | 33 | class TestJamo(unittest.TestCase): 34 | def test_is_jamo(self): 35 | """is_jamo tests 36 | Test if a single character is a jamo character. 37 | Valid jamo includes all modern and archaic jamo, as well as all HCJ. 38 | Non-assigned code points are invalid. 39 | """ 40 | 41 | # See http://www.unicode.org/charts/PDF/U1100.pdf 42 | valid_jamo = (chr(_) for _ in range(0x1100, 0x1200)) 43 | # See http://www.unicode.org/charts/PDF/U3130.pdf 44 | valid_hcj = itertools.chain((chr(_) for _ in range(0x3131, 0x3164)), 45 | (chr(_) for _ in range(0x3165, 0x318f))) 46 | # See http://www.unicode.org/charts/PDF/UA960.pdf 47 | valid_extA = (chr(_) for _ in range(0xa960, 0xa97d)) 48 | # See http://www.unicode.org/charts/PDF/UD7B0.pdf 49 | valid_extB = itertools.chain((chr(_) for _ in range(0xd7b0, 0xd7c7)), 50 | (chr(_) for _ in range(0xd7cb, 0xd7fc))) 51 | 52 | invalid_edge_cases = (chr(0x10ff), chr(0x1200), 53 | chr(0x3130), chr(0x3164), chr(0x318f), 54 | chr(0xa95f), chr(0xa07d), 55 | chr(0xd7af), chr(0xd7c7), 56 | chr(0xd7ca), chr(0xd7fc)) 57 | invalid_hangul = _get_random_hangul(20) 58 | invalid_other = "abABzyZY ,.:;~`―—–/!@#$%^&*()[]{}" 59 | 60 | # Positive tests 61 | for _ in itertools.chain(valid_jamo, valid_hcj, 62 | valid_extA, valid_extB): 63 | assert jamo.is_jamo(_),\ 64 | ("Incorrectly decided U+{} " 65 | "was not jamo.").format(hex(ord(_))[2:]) 66 | # Negative tests 67 | for _ in itertools.chain(invalid_edge_cases, 68 | invalid_hangul, 69 | invalid_other): 70 | assert not jamo.is_jamo(_),\ 71 | ("Incorrectly decided U+{} " 72 | "was jamo.").format(hex(ord(_))[2:]) 73 | 74 | def test_is_jamo_modern(self): 75 | """is_jamo_modern tests 76 | Test if a single character is a modern jamo character. 77 | Modern jamo includes all U+11xx jamo in addition to HCJ in usage. 78 | """ 79 | 80 | modern_jamo = itertools.chain(_JAMO_LEADS_MODERN, 81 | _JAMO_VOWELS_MODERN, 82 | _JAMO_TAILS_MODERN) 83 | modern_hcj = itertools.chain(_HCJ_LEADS_MODERN, 84 | _HCJ_VOWELS_MODERN, 85 | _HCJ_TAILS_MODERN) 86 | 87 | invalid_edge_cases = (chr(0x10ff), chr(0x1113), 88 | chr(0x1160), chr(0x1176), 89 | chr(0x11a7), chr(0x11c3)) 90 | invalid_hangul = _get_random_hangul(20) 91 | invalid_other = "abABzyZY ,.:;~`―—–/!@#$%^&*()[]{}ᄓ" 92 | 93 | # Positive tests 94 | for _ in itertools.chain(modern_jamo, modern_hcj): 95 | assert jamo.is_jamo_modern(_),\ 96 | ("Incorrectly decided U+{} " 97 | "was not modern jamo.").format(hex(ord(_))[2:]) 98 | # Negative tests 99 | for _ in itertools.chain(invalid_edge_cases, 100 | invalid_hangul, 101 | invalid_other): 102 | assert not jamo.is_jamo_modern(_),\ 103 | ("Incorrectly decided U+{} " 104 | "was modern jamo.").format(hex(ord(_))[2:]) 105 | 106 | def test_is_hcj(self): 107 | """is_hcj tests 108 | Test if a single character is a HCJ character. 109 | HCJ is defined as the U+313x to U+318x block, sans two non-assigned 110 | code points. 111 | """ 112 | 113 | # Note: The chaeum filler U+3164 is not considered HCJ, but a special 114 | # character as defined in http://www.unicode.org/charts/PDF/U3130.pdf. 115 | valid_hcj = itertools.chain((chr(_) for _ in range(0x3131, 0x3164)), 116 | (chr(_) for _ in range(0x3165, 0x318f))) 117 | 118 | invalid_edge_cases = (chr(0x3130), chr(0x3164), chr(0x318f)) 119 | invalid_hangul = _get_random_hangul(20) 120 | invalid_other = "abABzyZY ,.:;~`―—–/!@#$%^&*()[]{}ᄀᄓᅡᅶᆨᇃᇿ" 121 | 122 | # Positive tests 123 | for _ in valid_hcj: 124 | assert jamo.is_hcj(_),\ 125 | ("Incorrectly decided U+{} " 126 | "was not hcj.").format(hex(ord(_))[2:]) 127 | # Negative tests 128 | for _ in itertools.chain(invalid_edge_cases, 129 | invalid_hangul, 130 | invalid_other): 131 | assert not jamo.is_hcj(_),\ 132 | ("Incorrectly decided U+{} " 133 | "was hcj.").format(hex(ord(_))[2:]) 134 | 135 | def test_is_hcj_modern(self): 136 | """is_hcj_modern tests 137 | Test if a single character is a modern HCJ character. 138 | Modern HCJ is defined as HCJ that corresponds to a U+11xx jamo 139 | character in modern usage. 140 | """ 141 | 142 | # Note: The chaeum filler U+3164 is not considered HCJ, but a special 143 | # character as defined in http://www.unicode.org/charts/PDF/U3130.pdf. 144 | valid_hcj_modern = (chr(_) for _ in range(0x3131, 0x3164)) 145 | 146 | invalid_edge_cases = (chr(0x3130), chr(0x3164)) 147 | invalid_hangul = _get_random_hangul(20) 148 | invalid_other = "abABzyZY ,.:;~`―—–/!@#$%^&*()[]{}ᄀᄓᅡᅶᆨᇃᇿㆎㅥ" 149 | 150 | # Positive tests 151 | for _ in valid_hcj_modern: 152 | assert jamo.is_hcj_modern(_),\ 153 | ("Incorrectly decided U+{} " 154 | "was not modern hcj.").format(hex(ord(_))[2:]) 155 | # Negative tests 156 | for _ in itertools.chain(invalid_edge_cases, 157 | invalid_hangul, 158 | invalid_other): 159 | assert not jamo.is_hcj_modern(_),\ 160 | ("Incorrectly decided U+{} " 161 | "was modern hcj.").format(hex(ord(_))[2:]) 162 | 163 | def test_is_hangul_char(self): 164 | """is_hangul_char tests 165 | Test if a single character is in the U+AC00 to U+D7A3 code block, 166 | excluding unassigned codes. 167 | """ 168 | 169 | harcoded_tests = "가나다한글한극어힣" 170 | 171 | invalid_edge_cases = (chr(0xabff), chr(0xd7a4)) 172 | invalid_other = "ㄱㄴㅓabABzyZY ,.:;~`―—–/!@#$%^&*()[]{}ᄀᄓᅡᅶᆨᇃᇿㆎㅥ" 173 | 174 | for _ in itertools.chain(harcoded_tests, _get_random_hangul(1024)): 175 | assert jamo.is_hangul_char(_),\ 176 | ("Incorrectly decided U+{} " 177 | "was not a hangul character.").format(hex(ord(_))[2:]) 178 | for _ in itertools.chain(invalid_edge_cases, invalid_other): 179 | assert not jamo.is_hangul_char(_),\ 180 | ("Incorrectly decided U+{} " 181 | "was a hangul character.").format(hex(ord(_))[2:]) 182 | 183 | def test_get_jamo_class(self): 184 | """get_jamo_class tests 185 | Valid arguments are U+11xx characters (not HCJ). An InvalidJamoError 186 | exception is thrown if invalid input is used. 187 | 188 | get_jamo_class should return the class ["lead" | "vowel" | "tail"] of 189 | a given character. 190 | 191 | Note: strict adherence to Unicode 7.0 192 | """ 193 | 194 | # Note: Fillers are considered initial consonants according to 195 | # www.unicode.org/charts/PDF/U1100.pdf 196 | leads = (chr(_) for _ in range(0x1100, 0x1160)) 197 | lead_targets = ("lead" for _ in range(0x1100, 0x1161)) 198 | vowels = (chr(_) for _ in range(0x1160, 0x11a8)) 199 | vowel_targets = ("vowel" for _ in range(0x1160, 0x11a8)) 200 | tails = (chr(_) for _ in range(0x11a8, 0x1200)) 201 | tail_targets = ("tail" for _ in range(0x11a8, 0x1200)) 202 | 203 | invalid_cases = [chr(0x10ff), chr(0x1200), 'a', '~'] 204 | invalid_other_cases = ['', "ᄂᄃ"] 205 | 206 | all_tests = itertools.chain(zip(leads, lead_targets), 207 | zip(vowels, vowel_targets), 208 | zip(tails, tail_targets)) 209 | 210 | # Test characters 211 | for test, target in all_tests: 212 | try: 213 | trial = jamo.get_jamo_class(test) 214 | except jamo.InvalidJamoError: 215 | assert False,\ 216 | ("Thought U+{code} " 217 | "was invalid input.").format(code=hex(ord(test))[2:]) 218 | assert trial == target,\ 219 | ("Incorrectly decided {test} " 220 | "was a {trial}. " 221 | "(it's a {target})").format(test=hex(ord(test)), 222 | trial=trial, 223 | target=target) 224 | 225 | # Negative tests 226 | _stderr = jamo.jamo.stderr 227 | jamo.jamo.stderr = io.StringIO() 228 | for _ in invalid_cases: 229 | try: 230 | jamo.get_jamo_class(_) 231 | assert False, "Did not catch invalid jamo." 232 | except jamo.InvalidJamoError: 233 | pass 234 | for _ in invalid_other_cases: 235 | try: 236 | jamo.get_jamo_class(_) 237 | assert False, "Accepted bad input without throwing exception." 238 | except (AssertionError, TypeError): 239 | pass 240 | jamo.jamo.stderr = _stderr 241 | 242 | def test_jamo_to_hcj(self): 243 | """jamo_to_hcj tests 244 | Arguments may be iterables or single characters. 245 | 246 | jamo_to_hcj should convert every U+11xx jamo character into U+31xx HCJ 247 | in a given input. Anything else is unchanged. 248 | """ 249 | 250 | test_chars = itertools.chain(_JAMO_LEADS_MODERN, 251 | _JAMO_VOWELS_MODERN, 252 | _JAMO_TAILS_MODERN) 253 | target_chars = itertools.chain(_HCJ_LEADS_MODERN, 254 | _HCJ_VOWELS_MODERN, 255 | _HCJ_TAILS_MODERN) 256 | # TODO: Complete archaic jamo coverage 257 | test_archaic = ["ᄀᄁᄂᄃᇹᇫ"] 258 | target_archaic = ["ㄱㄲㄴㄷㆆㅿ"] 259 | test_strings_idempotent = ["", "aAzZ ,.:;~`―—–/!@#$%^&*()[]{}", 260 | "汉语 / 漢語; Hànyǔ or 中文; Zhōngwén", 261 | "ㄱㆎ"] 262 | target_strings_idempotent = test_strings_idempotent 263 | # TODO: Add more tests for unmapped jamo characters. 264 | test_strings_unmapped = ["ᅶᅷᅸᅹᅺᅻᅼᅽᅾᅿᆆ", 265 | ""] 266 | target_strings_unmapped = test_strings_unmapped 267 | 268 | all_tests = itertools.chain(zip(test_chars, target_chars), 269 | zip(test_archaic, target_archaic), 270 | zip(test_strings_idempotent, 271 | target_strings_idempotent), 272 | zip(test_strings_unmapped, 273 | target_strings_unmapped)) 274 | 275 | for test, target in all_tests: 276 | trial = jamo.jamo_to_hcj(test) 277 | assert trial.__name__ == "",\ 278 | "jamo_to_hcj didn't return an instance of a generator." 279 | trial, target = ''.join(trial), ''.join(target) 280 | assert trial == target,\ 281 | ("Matched {test} to {trial}, but " 282 | "expected {target}.").format(test=''.join(test), 283 | trial=trial, 284 | target=target) 285 | 286 | def test_j2hcj(self): 287 | """j2hcj tests 288 | Arguments may be iterables or single characters. 289 | 290 | j2hcj should convert every U+11xx jamo character into U+31xx HCJ in a 291 | given input. Anything else is unchanged. 292 | """ 293 | 294 | test_strings = ["", "test123", "ᄀᄁᄂᄃᇹᇫ"] 295 | target_strings = ["", "test123", "ㄱㄲㄴㄷㆆㅿ"] 296 | 297 | all_tests = itertools.chain(zip(test_strings, target_strings)) 298 | 299 | for test, target in all_tests: 300 | trial = jamo.j2hcj(test) 301 | assert trial == target,\ 302 | ("Matched {test} to {trial}, but " 303 | "expected {target}.").format(test=''.join(test), 304 | trial=trial, 305 | target=target) 306 | 307 | def test_hcj_to_jamo(self): 308 | """hcj_to_jamo tests 309 | Arguments may be single characters along with the desired jamo class 310 | (lead, vowel, tail). 311 | """ 312 | test_args = [("ㄱ", "lead"), ("ㄱ", "tail"), 313 | ("ㅎ", "lead"), ("ㅎ", "tail"), 314 | ("ㅹ", "lead"), ("ㅥ", "tail"), 315 | ("ㅏ", "vowel"), ("ㅣ", "vowel")] 316 | targets = [chr(0x1100), chr(0x11a8), 317 | chr(0x1112), chr(0x11c2), 318 | chr(0x112c), chr(0x11ff), 319 | chr(0x1161), chr(0x1175)] 320 | 321 | all_tests = itertools.chain(zip(test_args, targets)) 322 | 323 | for (jamo_class, jamo_char), target in all_tests: 324 | trial = jamo.hcj_to_jamo(jamo_class, jamo_char) 325 | assert trial == target,\ 326 | ("Converted {jamo_char} as {jamo_class} to {trial}, but " 327 | "expected {target}.").format(jamo_char=hex(ord(jamo_char)), 328 | jamo_class=jamo_class, 329 | trial=hex(ord(trial)), 330 | target=hex(ord(target))) 331 | 332 | def test_hcj2j(self): 333 | """hcj2j tests 334 | Arguments may be single characters along with the desired jamo class 335 | (lead, vowel, tail). 336 | """ 337 | test_args = [("ㄱ", "lead"), ("ㄱ", "tail"), 338 | ("ㅎ", "lead"), ("ㅎ", "tail"), 339 | ("ㅹ", "lead"), ("ㅥ", "tail"), 340 | ("ㅏ", "vowel"), ("ㅣ", "vowel")] 341 | targets = [chr(0x1100), chr(0x11a8), 342 | chr(0x1112), chr(0x11c2), 343 | chr(0x112c), chr(0x11ff), 344 | chr(0x1161), chr(0x1175)] 345 | 346 | all_tests = itertools.chain(zip(test_args, targets)) 347 | 348 | for args, target in all_tests: 349 | jamo_class, jamo_char = args 350 | trial = jamo.hcj2j(jamo_class, jamo_char) 351 | assert trial == target,\ 352 | ("Converted {jamo_char} as {jamo_class} to {trial}, but " 353 | "expected {target}.").format(jamo_char=hex(ord(jamo_char)), 354 | jamo_class=jamo_class, 355 | trial=hex(ord(trial)), 356 | target=hex(ord(target))) 357 | 358 | def test_hangul_to_jamo(self): 359 | """hangul_to_jamo tests 360 | Arguments may be iterables or characters. 361 | 362 | hangul_to_jamo should split every Hangul character into U+11xx jamo 363 | for any given string. Anything else is unchanged. 364 | """ 365 | 366 | test_cases = ["자", 367 | "모", 368 | "한", 369 | "글", 370 | "서", 371 | "울", 372 | "평", 373 | "양", 374 | "한굴", 375 | "Do you speak 한국어?", 376 | "자모=字母"] 377 | desired_jamo = [(chr(0x110c), chr(0x1161)), 378 | (chr(0x1106), chr(0x1169)), 379 | (chr(0x1112), chr(0x1161), chr(0x11ab)), 380 | (chr(0x1100), chr(0x1173), chr(0x11af)), 381 | (chr(0x1109), chr(0x1165)), 382 | (chr(0x110b), chr(0x116e), chr(0x11af)), 383 | (chr(0x1111), chr(0x1167), chr(0x11bc)), 384 | (chr(0x110b), chr(0x1163), chr(0x11bc)), 385 | (chr(0x1112), chr(0x1161), chr(0x11ab), 386 | chr(0x1100), chr(0x116e), chr(0x11af)), 387 | tuple(_ for _ in "Do you speak ") + 388 | (chr(0x1112), chr(0x1161), chr(0x11ab), 389 | chr(0x1100), chr(0x116e), chr(0x11a8), 390 | chr(0x110b), chr(0x1165)) + ('?',), 391 | (chr(0x110c), chr(0x1161), chr(0x1106), chr(0x1169), 392 | "=", "字", "母")] 393 | 394 | for hangul, target in zip(test_cases, desired_jamo): 395 | trial = jamo.hangul_to_jamo(hangul) 396 | assert trial.__name__ == "",\ 397 | ("hangul_to_jamo didn't return" 398 | "an instance of a generator.") 399 | trial = tuple(trial) 400 | assert target == trial,\ 401 | ("Converted {hangul} to {failure}, but expected " 402 | "({lead}, {vowel}, " 403 | "{tail}).").format(hangul=hangul, 404 | lead=hex(ord(target[0])), 405 | vowel=hex(ord(target[1])), 406 | tail=hex(ord(target[2])) 407 | if len(target) == 3 else "", 408 | failure=tuple([hex(ord(_)) for _ in 409 | trial]))\ 410 | if len(hangul) == 1 else\ 411 | ("Incorrectly converted {hangul} to " 412 | "{failure}.".format(hangul=hangul, 413 | failure=[hex(ord(_)) for _ in trial])) 414 | 415 | def test_h2j(self): 416 | """h2j tests 417 | Arguments may be iterables or characters. 418 | 419 | h2j should split every Hangul character into U+11xx jamo for any given 420 | string. Anything else is unchanged. 421 | """ 422 | tests = ["한굴", "자모=字母"] 423 | targets = ["한굴", "자모=字母"] 424 | tests_idempotent = ["", "test123~", "ㄱㄲㄴㄷㆆㅿ"] 425 | targets_idempotent = tests_idempotent 426 | 427 | all_tests = itertools.chain(zip(tests, targets), 428 | zip(tests_idempotent, targets_idempotent)) 429 | 430 | for test, target in all_tests: 431 | trial = jamo.h2j(test) 432 | assert trial == target,\ 433 | ("Converted {test} to {trial}, but " 434 | "expected {target}.").format(test=test, 435 | trial=trial, 436 | target=target) 437 | 438 | def test_jamo_to_hangul(self): 439 | """jamo_to_hangul tests 440 | Arguments may be jamo characters including HCJ. Throws an 441 | InvalidJamoError if there is no corresponding Hangul character to the 442 | inputs. 443 | 444 | Outputs a single Hangul character. 445 | """ 446 | 447 | # Support jamo -> Hangul conversion. 448 | chr_cases = ((chr(0x110c), chr(0x1161), chr(0)), 449 | (chr(0x1106), chr(0x1169), chr(0)), 450 | (chr(0x1112), chr(0x1161), chr(0x11ab)), 451 | (chr(0x1100), chr(0x1173), chr(0x11af)), 452 | (chr(0x1109), chr(0x1165), chr(0)), 453 | (chr(0x110b), chr(0x116e), chr(0x11af)), 454 | (chr(0x1111), chr(0x1167), chr(0x11bc)), 455 | (chr(0x110b), chr(0x1163), chr(0x11bc))) 456 | # Support HCJ -> Hangul conversion. 457 | hcj_cases = (('ㅈ', 'ㅏ', ''), 458 | ('ㅁ', 'ㅗ', ''), 459 | ('ㅎ', 'ㅏ', 'ㄴ'), 460 | ('ㄱ', 'ㅡ', 'ㄹ'), 461 | ('ㅅ', 'ㅓ', ''), 462 | ('ㅇ', 'ㅜ', 'ㄹ'), 463 | ('ㅍ', 'ㅕ', 'ㅇ'), 464 | ('ㅇ', 'ㅑ', 'ㅇ')) 465 | desired_hangul1 = ("자", 466 | "모", 467 | "한", 468 | "글", 469 | "서", 470 | "울", 471 | "평", 472 | "양") 473 | # Test the arity 2 version. 474 | arity2_cases = (('ㅎ', 'ㅏ'),) 475 | desired_hangul2 = ("하",) 476 | # Support mixed jamo and hcj conversion. 477 | mixed_cases = (('ᄒ', 'ㅏ', 'ㄴ'),) 478 | desired_hangul3 = ("한",) 479 | 480 | invalid_cases = [('a', 'b', 'c'), ('a', 'b'), 481 | ('ㄴ', 'ㄴ', 'ㄴ'), ('ㅏ', 'ㄴ')] 482 | 483 | all_tests = itertools.chain(zip(chr_cases, desired_hangul1), 484 | zip(hcj_cases, desired_hangul1), 485 | zip(arity2_cases, desired_hangul2), 486 | zip(mixed_cases, desired_hangul3)) 487 | 488 | for args, hangul in all_tests: 489 | trial = jamo.jamo_to_hangul(*args) 490 | assert hangul == trial,\ 491 | ("Conversion from hcj to Hangul failed. " 492 | "Incorrect conversion from" 493 | "({lead}, {vowel}, {tail}) to " 494 | "({hangul}). " 495 | "Got {failure}.").format(lead=lead, 496 | vowel=vowel, 497 | tail=tail, 498 | hangul=hangul, 499 | failure=trial) 500 | 501 | # Negative tests 502 | _stderr = jamo.jamo.stderr 503 | jamo.jamo.stderr = io.StringIO() 504 | for _ in invalid_cases: 505 | try: 506 | # print(_) 507 | jamo.jamo_to_hangul(*_) 508 | assert False, "Accepted bad input without throwing exception." 509 | except jamo.InvalidJamoError: 510 | pass 511 | jamo.jamo.stderr = _stderr 512 | 513 | def test_j2h(self): 514 | """j2h hardcoded tests. 515 | Arguments may be integers corresponding to the U+11xx codepoints, the 516 | actual U+11xx jamo characters, or HCJ. 517 | 518 | Outputs a one-character Hangul string. 519 | 520 | This function is defined solely for naming conisistency with 521 | jamo_to_hangul. 522 | """ 523 | 524 | assert jamo.j2h('ㅎ', 'ㅏ', 'ㄴ') == "한",\ 525 | "j2h doesn't work. Hint: it's the same as jamo_to_hangul." 526 | 527 | assert jamo.j2h('ㅎ', 'ㅏ') == "하",\ 528 | "j2h doesn't work. Hint: it's the same as jamo_to_hangul." 529 | 530 | def test_decompose_jamo(self): 531 | """decompose_jamo tests 532 | Arguments should be compound jamo - double consonants, consonant 533 | clusters, or dipthongs. 534 | 535 | Should output a tuple of non-compound jamo for every compound 536 | jamo. 537 | """ 538 | invalid_hangul = _get_random_hangul(20) 539 | invalid_other = "abABzyZY ,.:;~`―—–/!@#$%^&*()[]{}" 540 | 541 | # TODO: Expand tests to be more comprehensive, maybe use unicode names. 542 | test_chars = ["ㄸ", "ㅢ"] 543 | target_chars = [("ㄷ", "ㄷ"), ("ㅡ", "ㅣ")] 544 | 545 | test_chars_idempotent = list(itertools.chain(invalid_hangul, 546 | invalid_other)) 547 | target_chars_idempotent = test_chars_idempotent 548 | 549 | # Invalid 550 | invalid_strings = ["ab", "ㄸㄲ"] 551 | 552 | # Not implemented 553 | not_implemented_archaics = ["ᇑ", "ᇲ", "ퟡ"] 554 | 555 | all_tests = itertools.chain(zip(test_chars, target_chars), 556 | zip(test_chars_idempotent, 557 | target_chars_idempotent)) 558 | 559 | for test, target in all_tests: 560 | trial = jamo.decompose_jamo(test) 561 | assert not jamo.is_jamo_compound(trial),\ 562 | "decompose_jamo returned a compound" 563 | # Test for strict version of decompose_jamo(): 564 | # assert 2 <= len(trial) <= 3,\ 565 | # "decompose_jamo failed to return a tuple of 2-3 jamo " +\ 566 | # "and instead returned " + str(trial) + " for " + str(test) 567 | if trial != test: # for lenient version ONLY 568 | for trial_char in trial: 569 | assert jamo.is_jamo(trial_char),\ 570 | "decompose_jamo returned non-jamo character" 571 | trial, target = ''.join(trial), ''.join(target) 572 | assert trial == target,\ 573 | ("Matched {test} to {trial}, but " 574 | "expected {target}.").format(test=''.join(test), 575 | trial=trial, 576 | target=target) 577 | 578 | # Negative tests 579 | _stderr = jamo.jamo.stderr 580 | jamo.jamo.stderr = io.StringIO() 581 | for test_string in invalid_strings: 582 | try: 583 | jamo.decompose_jamo(test_string) 584 | assert False, "Accepted bad input without throwing exception." 585 | except (AssertionError, TypeError): 586 | pass 587 | for not_implemented_archaic in not_implemented_archaics: 588 | try: 589 | jamo.decompose_jamo(not_implemented_archaic) 590 | assert False, "Accepted archaic jamo without throwing " +\ 591 | "exception." 592 | except (AssertionError, NotImplementedError): 593 | pass 594 | jamo.jamo.stderr = _stderr 595 | 596 | def test_compose_jamo(self): 597 | """compose_jamo tests 598 | Arguments should be non-compound jamo that combine to form valid 599 | double consonants, consonant clusters, or dipthongs. 600 | 601 | Should output a compound jamo for every valid combination of 602 | components and raise InvalidJamoError in all other cases. 603 | """ 604 | 605 | # TODO: Expand tests to be more comprehensive, maybe use unicode names 606 | test_chars = [("ㄷ", "ㄷ"), ("ᄃ", "ㄷ"), ("ᄃ", "ᄃ"), ("ㅡ", "ㅣ")] 607 | target_chars = ["ㄸ", "ㄸ", "ㄸ", "ㅢ"] 608 | 609 | # Invalid 610 | invalid_cases = [("ㄷ", "ㄷ", "ㄷ"), ("ㅡ", "ㄷ")] 611 | 612 | # Not implemented 613 | not_implemented_archaics = [("ㄹ", "ㅁ", "ㄱ"), ("ㄹ", "ㄹ")] 614 | 615 | all_tests = zip(test_chars, target_chars) 616 | for test, target in all_tests: 617 | trial = jamo.compose_jamo(*test) 618 | assert jamo.is_jamo(trial),\ 619 | "compose_jamo returned non-jamo character" 620 | assert jamo.is_jamo_compound(trial),\ 621 | "compose_jamo returned non-compound" 622 | trial, target = ''.join(trial), ''.join(target) 623 | assert trial == target,\ 624 | ("Matched {test} to {trial}, but " 625 | "expected {target}.").format(test=''.join(test), 626 | trial=trial, 627 | target=target) 628 | 629 | # Negative tests 630 | _stderr = jamo.jamo.stderr 631 | jamo.jamo.stderr = io.StringIO() 632 | for invalid_case in invalid_cases: 633 | try: 634 | jamo.compose_jamo(*invalid_case) 635 | assert False, "Accepted bad input without throwing exception." 636 | except (AssertionError, TypeError, jamo.InvalidJamoError): 637 | pass 638 | for not_implemented_archaic in not_implemented_archaics: 639 | try: 640 | jamo.compose_jamo(*not_implemented_archaic) 641 | assert False, "Accepted unimplemented archaic input without" +\ 642 | " throwing exception." 643 | except (AssertionError, TypeError, jamo.InvalidJamoError): 644 | pass 645 | jamo.jamo.stderr = _stderr 646 | 647 | def test_is_jamo_compound(self): 648 | """Returns True for modern or archaic jamo compounds and False 649 | for others, raising a TypeError if receiving more than one 650 | character as input. 651 | """ 652 | valid_compounds = "ᄁᄄᄈᄊᄍᄓᄔᄕᄖᄗᄘᄙᄚᄛᄜᄝᄞᄟᄠᄡᄢᄣᄤᄥᄦᄧᄨᄩᄪᄫᄬᄭᄮᄯᄰᄱᄲᄳᄴᄵᄶᄷᄸᄹᄺᄻᄽᄿ" +\ 653 | "ᅁᅂᅃᅄᅅᅆᅇᅈᅉᅊᅋᅍᅏᅑᅒᅓᅖᅗᅘᅚᅛᅜᅝᅞᅪᅫᅬᅯᅰᅱᅴᅶᅷᅸᅹᅺᅻᅼᅽᅾᅿᆀᆁᆂᆃᆄᆅᆆ" +\ 654 | "ᆇᆈᆉᆊᆋᆌᆍᆎᆏᆐᆑᆒᆓᆔᆕᆖᆗᆘᆙᆚᆛᆜᆝᆟᆠᆡᆢᆣᆤᆥᆦᆧᆩᆪᆬᆭᆰᆱᆲᆳᆴᆵᆶᆹᆻᇃᇄᇅ" +\ 655 | "ᇆᇇᇈᇉᇊᇋᇌᇍᇎᇏᇐᇑᇒᇓᇔᇕᇖᇗᇘᇙᇚᇛᇜᇝᇞᇟᇠᇡᇢᇣᇤᇥᇦᇧᇨᇩᇪᇬᇭᇮᇯᇱᇲᇳᇴᇵᇶᇷ" +\ 656 | "ᇸᇺᇻᇼᇽᇾᇿㄲㄳㄵㄶㄸㄺㄻㄼㄽㄾㄿㅀㅃㅄㅆㅉㅘㅙㅚㅝㅞㅟㅢㅥㅦㅧㅨㅩㅪㅫㅬㅭㅮㅯㅰㅱㅲㅳㅴㅵㅶ" +\ 657 | "ㅷㅸㅹㅺㅻㅼㅽㅾㆀㆂㆃㆄㆅㆇㆈㆉㆊㆋㆌㆎꥠꥡꥢꥣꥤꥥꥦꥧꥨꥩꥪꥫꥬꥭꥮꥯꥰꥱꥲꥳꥴꥵꥶꥷꥸꥹꥺ" +\ 658 | "ꥻꥼힰힱힲힳힴힵힶힷힸힹힺힻힼힽힾힿퟀퟁퟂퟃퟄퟅퟆퟋퟌퟍퟎퟏퟐퟑퟒퟓퟔퟕퟖퟗퟘퟙퟚퟛퟜퟝퟞퟟퟠퟡ" +\ 659 | "ퟢퟣퟤퟥퟦퟧퟨퟩퟪퟫퟬퟭퟮퟯퟰퟱퟲퟳퟴퟵퟶퟷퟸퟹퟺퟻᅢᅤᅦᅨㅐㅒㅔㅖ" 660 | 661 | non_compound_jamo = "ᄀᄂᄃᄅᄆᄇᄉᄋᄌᄎᄏᄐᄑᄒᄼᄾᅀᅌᅎᅐᅔᅕᅟᅠᅡᅣᅥᅧᅩᅭᅮᅲᅳᅵᆞᆨᆫᆮᆯᆷᆸᆺᆼᆽᆾᆿ" +\ 662 | "ᇀᇁᇂᇫᇰㄱㄴㄷㄹㅁ ㅂㅅㅇㅈㅊㅋㅌㅍㅎㅏㅑㅓㅕㅗㅛㅜㅠㅡㅣㅿㆁㆍ" 663 | invalid_hangul = _get_random_hangul(20) 664 | invalid_other = "abABzyZY ,.:;~`―—–/!@#$%^&*()[]{}" 665 | 666 | # Positive tests 667 | for valid_compound in itertools.chain(valid_compounds): 668 | assert jamo.is_jamo_compound(valid_compound),\ 669 | ("Incorrectly decided U+{} was not a " + 670 | "jamo compound.").format(hex(ord(valid_compound))[2:]) 671 | # Negative tests 672 | for invalid_case in itertools.chain(non_compound_jamo, 673 | invalid_hangul, 674 | invalid_other): 675 | assert not jamo.is_jamo_compound(invalid_case),\ 676 | ("Incorrectly decided U+{} " 677 | "was jamo.").format(hex(ord(invalid_case))[2:]) 678 | 679 | def test_synth_hangul(self): 680 | # To be implemented in a future version 681 | pass 682 | 683 | 684 | if __name__ == "__main__": 685 | unittest.main() # verbosity = 2) 686 | -------------------------------------------------------------------------------- /tools/check.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def validate(filename): 5 | with open(filename, 'r') as fin: 6 | last_code = 0 7 | for line in fin: 8 | try: 9 | results = re.findall('([\dABCDEF]{4})(.*)(HANGUL .+)', line) 10 | if results: 11 | code, char, name = (int(results[0][0], 16), 12 | results[0][1].strip(), 13 | results[0][2]) 14 | if last_code == 0: 15 | last_code = code 16 | else: 17 | if code != last_code+1: 18 | return False, line, "Skipped code." 19 | if code != ord(char): 20 | return False, line, "Code mismatch." 21 | last_code += 1 22 | except: 23 | print("Error checking line: \"{}\"".format(line)) 24 | exit(1) 25 | return True 26 | 27 | 28 | if __name__ == "__main__": 29 | from sys import argv 30 | status = validate(argv[1]) 31 | if status == True: 32 | print("{ok, %s}" % argv[1]) 33 | else: 34 | print("{error, {reason, \"%s\"}, {line, \"%s\"}}" % 35 | (status[2], status[1])) 36 | -------------------------------------------------------------------------------- /tools/parse.py: -------------------------------------------------------------------------------- 1 | from sys import argv 2 | import re 3 | import json 4 | import check 5 | 6 | if __name__ == "__main__": 7 | filein, fileout = argv[1], argv[2] 8 | status = check.validate(filein) 9 | if status == True or len(argv) == 4 and argv[3] == '--nocheck': 10 | results = [] 11 | with open(filein, 'r') as fin: 12 | results = re.findall('([\dABCDEF]{4}).*(HANGUL .+)', fin.read()) 13 | with open(fileout, 'w') as fout: 14 | json.dump({chr(int(code, 16)): name.strip()\ 15 | for code, name in results}, fout, 16 | sort_keys=True, indent=2) 17 | print("{ok, %s}" % filein) 18 | else: 19 | print("{error, {reason, \"%s\"}, {line, \"%s\"}}" % 20 | (status[2], status[1])) 21 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = 3 | pypy{3.7,3.8} 4 | py{37,38,39,310} 5 | 6 | [gh-actions] 7 | python = 8 | pypy-3.7: pypy3.7 9 | pypy-3.8: pypy3.8 10 | 3.7: py37 11 | 3.8: py38 12 | 3.9: py39 13 | 3.10: py310 14 | 15 | [testenv] 16 | deps = 17 | nose2 18 | 19 | commands = 20 | nose2 21 | --------------------------------------------------------------------------------