├── .circleci
└── config.yml
├── .gitignore
├── LICENSE
├── README.md
├── README.txt
├── hgtk
├── .DS_Store
├── __init__.py
├── checker.py
├── const.py
├── exception.py
├── josa.py
├── letter.py
└── text.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
├── __init__.py
├── test_checker.py
├── test_josa.py
├── test_letter.py
└── test_text.py
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | # Use the latest 2.1 version of CircleCI pipeline process engine.
2 | # See: https://circleci.com/docs/2.0/configuration-reference
3 | version: 2.1
4 |
5 | # Orbs are reusable packages of CircleCI configuration that you may share across projects, enabling you to create encapsulated, parameterized commands, jobs, and executors that can be used across multiple projects.
6 | # See: https://circleci.com/docs/2.0/orb-intro/
7 | orbs:
8 | # The python orb contains a set of prepackaged CircleCI configuration you can use repeatedly in your configuration files
9 | # Orb commands and jobs help you with common scripting around a language/tool
10 | # so you dont have to copy and paste it everywhere.
11 | # See the orb documentation here: https://circleci.com/developer/orbs/orb/circleci/python
12 | python: circleci/python@1.5.0
13 |
14 | # Define a job to be invoked later in a workflow.
15 | # See: https://circleci.com/docs/2.0/configuration-reference/#jobs
16 | jobs:
17 | test-37: &test-template
18 | docker:
19 | - image: cimg/python:3.7
20 | # Checkout the code as the first step. This is a dedicated CircleCI step.
21 | # The python orb's install-packages step will install the dependencies from a Pipfile via Pipenv by default.
22 | # Here we're making sure we use just use the system-wide pip. By default it uses the project root's requirements.txt.
23 | # Then run your tests!
24 | # CircleCI will report the results back to your VCS provider.
25 | steps:
26 | - checkout
27 | - restore_cache:
28 | keys:
29 | - v1-dependencies-{{ checksum "requirements.txt" }}
30 | - v1-dependencies-
31 |
32 | - run:
33 | name: install dependencies
34 | command: |
35 | pip install -r requirements.txt
36 | pip install coverage
37 | pip install pytest
38 | - save_cache:
39 | paths:
40 | - ./venv
41 | key: v1-dependencies-{{ checksum "requirements.txt" }}
42 | - run:
43 | name: Run tests
44 | command: |
45 | coverage run -m pytest tests
46 | coverage html -d test-reports
47 | - store_artifacts:
48 | path: test-reports
49 |
50 | test-38:
51 | <<: *test-template
52 | docker:
53 | - image: cimg/python:3.8
54 |
55 | test-39:
56 | <<: *test-template
57 | docker:
58 | - image: cimg/python:3.9
59 |
60 | test-310:
61 | <<: *test-template
62 | docker:
63 | - image: cimg/python:3.10
64 |
65 | test-311:
66 | <<: *test-template
67 | docker:
68 | - image: cimg/python:3.11
69 |
70 | test-312:
71 | <<: *test-template
72 | docker:
73 | - image: cimg/python:3.12
74 |
75 | test-27:
76 | <<: *test-template
77 | docker:
78 | - image: cimg/python:2.7
79 |
80 | test-pypy2:
81 | <<: *test-template
82 | docker:
83 | - image: pypy:2
84 |
85 | test-pypy3:
86 | <<: *test-template
87 | docker:
88 | - image: pypy:3
89 |
90 | # Invoke jobs via workflows
91 | # See: https://circleci.com/docs/2.0/configuration-reference/#workflows
92 | workflows:
93 | testing: # This is the name of the workflow, feel free to change it to better match your workflow.
94 | # Inside the workflow, you define the jobs you want to run.
95 | jobs:
96 | - test-27
97 | - test-37
98 | - test-38
99 | - test-39
100 | - test-310
101 | - test-311
102 | - test-312
103 | - test-pypy2
104 | - test-pypy3
105 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | .DS_Store
6 | .idea/
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | env/
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *,cover
48 | .hypothesis/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 |
58 | # Flask stuff:
59 | instance/
60 | .webassets-cache
61 |
62 | # Scrapy stuff:
63 | .scrapy
64 |
65 | # Sphinx documentation
66 | docs/_build/
67 |
68 | # PyBuilder
69 | target/
70 |
71 | # IPython Notebook
72 | .ipynb_checkpoints
73 |
74 | # pyenv
75 | .python-version
76 |
77 | # celery beat schedule file
78 | celerybeat-schedule
79 |
80 | # dotenv
81 | .env
82 |
83 | # virtualenv
84 | venv/
85 | ENV/
86 |
87 | # Spyder project settings
88 | .spyderproject
89 |
90 | # Rope project settings
91 | .ropeproject
92 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://dl.circleci.com/status-badge/redirect/gh/bluedisk/hangul-toolkit/tree/main)
2 |
3 | Simple Toolkit for Hangul
4 | =========================
5 | base code forked from https://github.com/rhobot/Hangulpy
6 |
7 | 한글 자모 분해, 조합(오토마타), 조사 붙이기, 초/중/종 분해조합, 한글/한자/영문 여부 체크 등을 지원합니다.
8 |
9 | ## INSTALL
10 | ```
11 | pip install hgtk
12 | ```
13 |
14 | ## Samples
15 | ### Letter
16 | #### Decompose character
17 | ```python
18 | >>> hgtk.letter.decompose('감')
19 | ('ㄱ', 'ㅏ', 'ㅁ')
20 | ```
21 | #### Compose character
22 | ```python
23 | >>> hgtk.letter.compose('ㄱ', 'ㅏ', 'ㅁ')
24 | '감'
25 | ```
26 |
27 | ### Text
28 | #### Decompose text
29 | ```python
30 | >>> hgtk.text.decompose('학교종이 땡땡땡! hello world 1234567890 ㅋㅋ!')
31 | 'ㅎㅏㄱᴥㄱㅛᴥㅈㅗㅇᴥㅇㅣᴥ ㄸㅐㅇᴥㄸㅐㅇᴥㄸㅐㅇᴥ! hello world 1234567890 ㅋᴥㅋᴥ!'
32 | ```
33 |
34 | 기본 조합 완료 기호는 ᴥ이고, 아래와 같이 compose_code 옵션으로 변경 가능합니다.
35 | ```python
36 | >>> hgtk.text.decompose('학교종이 땡땡땡! hello world 1234567890 ㅋㅋ!', compose_code='/')
37 | 'ㅎㅏㄱ/ㄱㅛ/ㅈㅗㅇ/ㅇㅣ/ㄸㅐㅇ/ㄸㅐㅇ/ㄸㅐㅇ/! hello world 1234567890 ㅋ/ㅋ/!'
38 | ```
39 | 기본 조합기호의 의미는 곰돌이 입니다. 👇
40 |
41 |
42 | #### Compose text (Automata)
43 | ```python
44 | >>> hgtk.text.compose('ㅎㅏㄱᴥㄱㅛᴥㅈㅗㅇᴥㅇㅣᴥ ㄸㅐㅇᴥㄸㅐㅇᴥㄸㅐㅇᴥ! hello world 1234567890 ㅋᴥㅋᴥ!')
45 | '학교종이 땡땡땡! hello world 1234567890 ㅋㅋ!'
46 | ```
47 |
48 | ### Checker
49 |
50 | #### is hangul text
51 | ```python
52 | >>> hgtk.checker.is_hangul('한글입니다')
53 | True
54 | >>> hgtk.checker.is_hangul('no한글입니다')
55 | False
56 | >>> hgtk.checker.is_hangul('it is english')
57 | False
58 | ```
59 |
60 | #### is hanja text
61 | ```python
62 | >>> hgtk.checker.is_hanja('大韓民國')
63 | True
64 | >>> hgtk.checker.is_hanja('大한민국')
65 | False
66 | >>> hgtk.checker.is_hanja('대한민국')
67 | False
68 | ```
69 |
70 | #### is latin1 text
71 | ```python
72 | >>> hgtk.checker.is_latin1('abcdefghijklmnopqrstuvwxyz')
73 | True
74 | >>> hgtk.checker.is_latin1('한글latin1한')
75 | False
76 | ````
77 |
78 | #### has batchim
79 | ```python
80 | >>> hgtk.checker.has_batchim('한') # '한' has batchim 'ㄴ'
81 | True
82 | >>> hgtk.checker.has_batchim('하')
83 | False
84 | ```
85 |
86 |
87 | ### Josa
88 | #### EUN_NEUN - 은/는
89 | ```python
90 | >>> hgtk.josa.attach('하늘', hgtk.josa.EUN_NEUN)
91 | '하늘은'
92 | >>> hgtk.josa.attach('바다', hgtk.josa.EUN_NEUN)
93 | '바다는'
94 | ```
95 | #### I_GA - 이/가
96 | ```python
97 | >>> hgtk.josa.attach('하늘', hgtk.josa.I_GA)
98 | '하늘이'
99 | >>> hgtk.josa.attach('바다', hgtk.josa.I_GA)
100 | '바다가'
101 | ```
102 | #### EUL_REUL - 을/를
103 | ```python
104 | >>> hgtk.josa.attach('하늘', hgtk.josa.EUL_REUL)
105 | '하늘을'
106 | >>> hgtk.josa.attach('바다', hgtk.josa.EUL_REUL)
107 | '바다를'
108 | ```
109 | #### GWA_WA - 과/와
110 | ```python
111 | >>> hgtk.josa.attach('하늘', hgtk.josa.GWA_WA)
112 | '하늘과'
113 | >>> hgtk.josa.attach('바다', hgtk.josa.GWA_WA)
114 | '바다와'
115 | ```
116 | #### IDA_DA - 이다/다
117 | ```python
118 | >>> hgtk.josa.attach('하늘', hgtk.josa.IDA_DA)
119 | '하늘이다'
120 | >>> hgtk.josa.attach('바다', hgtk.josa.IDA_DA)
121 | '바다다'
122 | ```
123 | #### EURO_RO - 로/으로
124 | ```python
125 | >>> hgtk.josa.attach('하늘', hgtk.josa.EURO_RO)
126 | '하늘로'
127 | >>> hgtk.josa.attach('바다', hgtk.josa.EURO_RO)
128 | '바다로'
129 | >>> hgtk.josa.attach('태양', hgtk.josa.EURO_RO)
130 | '태양으로'
131 | ```
132 | #### RYUL_YUL - 율/률
133 | ```python
134 | >>> hgtk.josa.attach('방어', hgtk.josa.RYUL_YUL)
135 | '방어율'
136 | >>> hgtk.josa.attach('공격', hgtk.josa.RYUL_YUL)
137 | '공격률'
138 | >>> hgtk.josa.attach('반환', hgtk.josa.RYUL_YUL)
139 | '반환율'
140 | ```
141 |
142 | ### Const
143 | * CHO: 초성 리스트
144 | * JOONG: 중성 리스트
145 | * JONG: 종성 리스트, 종성이 없는 경우를 대비해 공백 문자가 추가됨
146 |
147 | * JAMO: 공백을 제외한 모든 자모(비조합문자)
148 |
149 | * NUM_CHO: 초성 개수
150 | * NUM_JOONG: 중성 개수
151 | * NUM_JONG: 종성 개수
152 |
153 | * FIRST_HANGUL_UNICODE: 유니코드 상의 한글 코드(조합문자) 시작 시점
154 | * LAST_HANGUL_UNICODE: 유니코드 상의 한글 코드(조합문자) 종료 시점
155 |
156 | ### Exception
157 | 예외 처리를 위한 Exception들, 의미는 보이는 대로..
158 | * NotHangulException
159 | * NotLetterException
160 | * NotWordException
161 |
162 |
163 | ## Tested in
164 | - python 2.7
165 |
166 | - python 3.7
167 | - python 3.8
168 | - python 3.9
169 | - python 3.10
170 | - python 3.11
171 | - python 3.12
172 |
173 | - PyPy 2.x
174 | - PyPy 3.x
175 |
176 |
177 | ----
178 |
179 | Apache 2.0 License
180 |
--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
1 | 한글 분해/조합/편집 종합 툴킷
2 | =========================
3 |
4 | - 한글 자모 분해, 조합
5 | - 문장 분해, 조합(오토마타)
6 | - 조사 붙이기
7 | - 초/중/종 분해조합
8 | - 한글/한자/영문 여부 체크
9 |
10 |
11 | Simple Toolkit for Hangul
12 | =========================
13 |
14 | - charactor compose/decompose to ja-mo
15 | - text decomposing & composing(automata)
16 | - attach postposition to word
17 | - get cho/joong/jong from charactor
18 | - check function for is hangul/hanja(chinese used in korean)/latin
19 |
20 |
21 | Tested in
22 | - python 2.7
23 | - python 3.7
24 | - python 3.8
25 | - python 3.9
26 | - python 3.10
27 | - python 3.11
28 | - python 3.12
29 |
30 | - PyPy 2
31 | - Pypy 3
32 |
33 | Apache 2.0 License
34 |
--------------------------------------------------------------------------------
/hgtk/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluedisk/hangul-toolkit/5346aacc8b0f504db87f3d234b9af17e314ca122/hgtk/.DS_Store
--------------------------------------------------------------------------------
/hgtk/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import division
4 | from __future__ import unicode_literals
5 |
6 | from . import checker
7 | from . import josa
8 | from . import letter
9 | from . import text
10 |
--------------------------------------------------------------------------------
/hgtk/checker.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import division
4 | from __future__ import unicode_literals
5 |
6 | from . import letter as lt
7 | from .const import JAMO, FIRST_HANGUL_UNICODE, LAST_HANGUL_UNICODE, NUM_JONG
8 | from .exception import NotHangulException
9 |
10 | # 한자와 라틴 문자 범위 by bluedisk
11 | FIRST_HANJA_UNICODE = 0x4E00
12 | LAST_HANJA_UNICODE = 0x9FFF
13 |
14 | FIRST_HANJA_EXT_A_UNICODE = 0x3400
15 | LAST_HANJA_EXT_A_UNICODE = 0x4DBF
16 |
17 | FIRST_LATIN1_UNICODE = 0x0000 # NUL
18 | LAST_LATIN1_UNICODE = 0x00FF # 'ÿ'
19 |
20 |
21 | # EXT B~E 는 무시
22 |
23 | ################################################################################
24 | # Boolean Hangul functions
25 | ################################################################################
26 |
27 |
28 | def is_hangul(phrase): # TODO: need tuning!!
29 | for letter in phrase:
30 | code = ord(letter)
31 | if (code < FIRST_HANGUL_UNICODE or code > LAST_HANGUL_UNICODE) and not is_jamo(letter):
32 | return False
33 |
34 | return True
35 |
36 |
37 | def is_jamo(letter):
38 | return letter in JAMO
39 |
40 |
41 | def is_hanja(phrase):
42 | for unicode_value in map(lambda letter: ord(letter), phrase):
43 | if ((unicode_value < FIRST_HANJA_UNICODE or unicode_value > LAST_HANJA_UNICODE) and
44 | (unicode_value < FIRST_HANJA_EXT_A_UNICODE or unicode_value > LAST_HANJA_EXT_A_UNICODE)):
45 | return False
46 | return True
47 |
48 |
49 | def is_latin1(phrase):
50 | for unicode_value in map(lambda letter: ord(letter), phrase):
51 | if unicode_value < FIRST_LATIN1_UNICODE or unicode_value > LAST_LATIN1_UNICODE:
52 | return False
53 | return True
54 |
55 |
56 | def has_jongsung(letter):
57 | """Check whether this letter contains Jongsung"""
58 | if len(letter) != 1:
59 | raise Exception('The target string must be one letter.')
60 | if not is_hangul(letter):
61 | raise NotHangulException('The target string must be Hangul')
62 |
63 | code = lt.hangul_index(letter)
64 | return code % NUM_JONG > 0
65 |
66 |
67 | def has_batchim(letter):
68 | """This method is the same as has_jongsung()"""
69 | return has_jongsung(letter)
70 |
71 | # DEPRECATED !
72 | # def has_approximant(letter):
73 | # """Approximant makes complex vowels, such as ones starting with y or w.
74 | # In Korean there is a unique approximant euㅡ making uiㅢ, but ㅢ does not make many irregularities."""
75 | # if len(letter) != 1:
76 | # raise Exception('The target string must be one letter.')
77 | # if not is_hangul(letter):
78 | # raise NotHangulException('The target string must be Hangul')
79 | #
80 | # jaso = lt.decompose(letter)
81 | # diphthong = (u'ㅑ', u'ㅒ', u'ㅕ', u'ㅖ', u'ㅘ', u'ㅙ', u'ㅛ', u'ㅝ', u'ㅞ', u'ㅠ')
82 | # # excluded 'ㅢ' because y- and w-based complex vowels are irregular.
83 | # # vowels with umlauts (ㅐ, ㅔ, ㅚ, ㅟ) are not considered complex vowels.
84 | # return jaso[1] in diphthong
85 | #
86 |
--------------------------------------------------------------------------------
/hgtk/const.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import division
4 | from __future__ import unicode_literals
5 |
6 | ################################################################################
7 | # Hangul Unicode Variables
8 | ################################################################################
9 |
10 | # Code = 0xAC00 + (Chosung_index * NUM_JOONG * NUM_JONG) + (Joongsung_index * NUM_JONG) + (Jongsung_index)
11 |
12 | CHO = (
13 | u'ㄱ', u'ㄲ', u'ㄴ', u'ㄷ', u'ㄸ', u'ㄹ', u'ㅁ', u'ㅂ', u'ㅃ', u'ㅅ',
14 | u'ㅆ', u'ㅇ', u'ㅈ', u'ㅉ', u'ㅊ', u'ㅋ', u'ㅌ', u'ㅍ', u'ㅎ'
15 | )
16 |
17 | JOONG = (
18 | u'ㅏ', u'ㅐ', u'ㅑ', u'ㅒ', u'ㅓ', u'ㅔ', u'ㅕ', u'ㅖ', u'ㅗ', u'ㅘ',
19 | u'ㅙ', u'ㅚ', u'ㅛ', u'ㅜ', u'ㅝ', u'ㅞ', u'ㅟ', u'ㅠ', u'ㅡ', u'ㅢ', u'ㅣ'
20 | )
21 |
22 | JONG = (
23 | u'', u'ㄱ', u'ㄲ', u'ㄳ', u'ㄴ', u'ㄵ', u'ㄶ', u'ㄷ', u'ㄹ', u'ㄺ',
24 | u'ㄻ', u'ㄼ', u'ㄽ', u'ㄾ', u'ㄿ', u'ㅀ', u'ㅁ', u'ㅂ', u'ㅄ', u'ㅅ',
25 | u'ㅆ', u'ㅇ', u'ㅈ', u'ㅊ', u'ㅋ', u'ㅌ', u'ㅍ', u'ㅎ'
26 | )
27 |
28 | JAMO = CHO + JOONG + JONG[1:]
29 |
30 | NUM_CHO = 19
31 | NUM_JOONG = 21
32 | NUM_JONG = 28
33 |
34 | FIRST_HANGUL_UNICODE = 0xAC00 # '가'
35 | LAST_HANGUL_UNICODE = 0xD7A3 # '힣'
36 | # AB C DEFGHIJK L M N OPQR ST UVWXYZ
37 | ENG_KOR_SUBSTITUENT = {'B': 'ㅂ', 'C': 'ㄱ', 'K': 'ㄱ', 'L': 'ㄹ', 'M': 'ㅁ', 'N': 'ㄴ', 'R': 'ㄹ', 'T': 'ㅅ'}
38 |
--------------------------------------------------------------------------------
/hgtk/exception.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 |
4 | ################################################################################
5 | # Exceptions
6 | ################################################################################
7 |
8 | class NotHangulException(Exception):
9 | pass
10 |
11 |
12 | class NotLetterException(Exception):
13 | pass
14 |
15 |
16 | class NotWordException(Exception):
17 | pass
18 |
--------------------------------------------------------------------------------
/hgtk/josa.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import division
4 | from __future__ import unicode_literals
5 |
6 | from . import letter
7 | from .exception import NotHangulException
8 |
9 | ################################################################################
10 | # Josa Type Parameters
11 | ################################################################################
12 |
13 | EUN_NEUN = {'not': u'은', 'has': '는', 'except': None}
14 | I_GA = {'not': u'이', 'has': '가', 'except': None}
15 | EUL_REUL = {'not': u'을', 'has': '를', 'except': None}
16 | GWA_WA = {'not': u'과', 'has': '와', 'except': None}
17 | IDA_DA = {'not': u'이다', 'has': '다', 'except': None}
18 |
19 | EURO_RO = {'not': u'으로', 'has': u'로', 'except': u'ㄹ'}
20 | RYUL_YUL = {'not': u'률', 'has': u'율', 'except': u'ㄴ'}
21 |
22 | JOSA_TYPES = (EUN_NEUN, I_GA, EUL_REUL, GWA_WA, IDA_DA, EURO_RO, RYUL_YUL)
23 | JOSAS = dict(sum([[[josa['not'], josa], [josa['has'], josa]] for josa in JOSA_TYPES], []))
24 |
25 |
26 | ################################################################################
27 | # Josa functions
28 | ################################################################################
29 |
30 | def get_josa_type(word):
31 | return JOSAS.get(word, None)
32 |
33 |
34 | def attach(word, josa=EUN_NEUN):
35 | """add josa at the end of this word"""
36 | last_letter = word.strip()[-1]
37 | try:
38 | _, _, letter_jong = letter.decompose(last_letter)
39 | except NotHangulException:
40 | letter_jong = letter.get_substituent_of(last_letter)
41 |
42 | if letter_jong in ('', josa['except']):
43 | return word + josa['has']
44 |
45 | return word + josa['not']
46 |
--------------------------------------------------------------------------------
/hgtk/letter.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import division
4 | from __future__ import unicode_literals
5 |
6 | from six import unichr
7 |
8 | from .const import CHO, JOONG, JONG, FIRST_HANGUL_UNICODE, NUM_JOONG, NUM_JONG, ENG_KOR_SUBSTITUENT
9 | from .exception import NotHangulException, NotLetterException
10 |
11 |
12 | ################################################################################
13 | # Decomposition & Combination
14 | ################################################################################
15 |
16 |
17 | def compose(chosung, joongsung, jongsung=u''):
18 | """This function returns a Hangul letter by composing the specified chosung, joongsung, and jongsung.
19 | @param chosung
20 | @param joongsung
21 | @param jongsung the terminal Hangul letter. This is optional if you do not need a jongsung."""
22 |
23 | if jongsung is None: jongsung = u''
24 |
25 | try:
26 | chosung_index = CHO.index(chosung)
27 | joongsung_index = JOONG.index(joongsung)
28 | jongsung_index = JONG.index(jongsung)
29 | except Exception:
30 | raise NotHangulException('No valid Hangul character index')
31 |
32 | return unichr(0xAC00 + chosung_index * NUM_JOONG * NUM_JONG + joongsung_index * NUM_JONG + jongsung_index)
33 |
34 |
35 | def hangul_index(letter):
36 | return ord(letter) - FIRST_HANGUL_UNICODE
37 |
38 |
39 | def decompose_index(code):
40 | jong = int(code % NUM_JONG)
41 | code /= NUM_JONG
42 | joong = int(code % NUM_JOONG)
43 | code /= NUM_JOONG
44 | cho = int(code)
45 |
46 | return cho, joong, jong
47 |
48 |
49 | def decompose(hangul_letter):
50 | """This function returns letters by decomposing the specified Hangul letter."""
51 |
52 | from . import checker
53 |
54 | if len(hangul_letter) < 1:
55 | raise NotLetterException('')
56 | elif not checker.is_hangul(hangul_letter):
57 | raise NotHangulException('')
58 |
59 | if hangul_letter in CHO:
60 | return hangul_letter, '', ''
61 |
62 | if hangul_letter in JOONG:
63 | return '', hangul_letter, ''
64 |
65 | if hangul_letter in JONG:
66 | return '', '', hangul_letter
67 |
68 | code = hangul_index(hangul_letter)
69 | cho, joong, jong = decompose_index(code)
70 |
71 | if cho < 0:
72 | cho = 0
73 |
74 | try:
75 | return CHO[cho], JOONG[joong], JONG[jong]
76 | except:
77 | print("%d / %d / %d" % (cho, joong, jong))
78 | print("%s / %s " % (JOONG[joong].encode("utf8"), JONG[jong].encode('utf8')))
79 | raise Exception()
80 |
81 |
82 | def get_substituent_of(letter):
83 | return ENG_KOR_SUBSTITUENT.get(letter.upper(), '')
84 |
--------------------------------------------------------------------------------
/hgtk/text.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import division
4 | from __future__ import unicode_literals
5 |
6 | from . import checker
7 | from . import letter
8 | from .const import CHO, JOONG, JONG
9 |
10 | # 코딩 효율과 가독성을 위해서 index대신 unicode사용 by bluedisk
11 | JONG_COMP = {
12 | u'ㄱ': {
13 | u'ㄱ': u'ㄲ',
14 | u'ㅅ': u'ㄳ',
15 | },
16 | u'ㄴ': {
17 | u'ㅈ': u'ㄵ',
18 | u'ㅎ': u'ㄶ',
19 | },
20 | u'ㄹ': {
21 | u'ㄱ': u'ㄺ',
22 | u'ㅁ': u'ㄻ',
23 | u'ㅂ': u'ㄼ',
24 | u'ㅅ': u'ㄽ',
25 | u'ㅌ': u'ㄾ',
26 | u'ㅍ': u'ㄿ',
27 | u'ㅎ': u'ㅀ',
28 | },
29 | u"ㅂ": {
30 | u"ㅅ": u"ㅄ",
31 | },
32 | u"ㅅ": {
33 | u"ㅅ": u"ㅆ",
34 | },
35 | }
36 |
37 | DEFAULT_COMPOSE_CODE = u'ᴥ'
38 |
39 |
40 | ################################################################################
41 | # Hangul Automata functions by bluedisk@gmail.com
42 | ################################################################################
43 |
44 |
45 | def decompose(text, latin_filter=True, compose_code=DEFAULT_COMPOSE_CODE):
46 | result = u""
47 |
48 | for c in list(text):
49 | if checker.is_hangul(c):
50 |
51 | if checker.is_jamo(c):
52 | result = result + c + compose_code
53 | else:
54 | result = result + "".join(letter.decompose(c)) + compose_code
55 |
56 | else:
57 | if latin_filter: # 한글 외엔 Latin1 범위까지만 포함 (한글+영어)
58 | if checker.is_latin1(c):
59 | result = result + c
60 | else:
61 | result = result + c
62 |
63 | return result
64 |
65 |
66 | STATUS_CHO = 0
67 | STATUS_JOONG = 1
68 | STATUS_JONG1 = 2
69 | STATUS_JONG2 = 3
70 |
71 |
72 | def compose(text, compose_code=DEFAULT_COMPOSE_CODE):
73 | res_text = u""
74 |
75 | status = STATUS_CHO
76 |
77 | for c in text:
78 |
79 | if status == STATUS_CHO:
80 |
81 | if c in CHO:
82 | chosung = c
83 | status = STATUS_JOONG
84 | else:
85 | if c != compose_code:
86 | res_text = res_text + c
87 |
88 | elif status == STATUS_JOONG:
89 |
90 | if c != compose_code and c in JOONG:
91 | joongsung = c
92 | status = STATUS_JONG1
93 | else:
94 | res_text = res_text + chosung
95 |
96 | if c in CHO:
97 | chosung = c
98 | status = STATUS_JOONG
99 | else:
100 | if c != compose_code:
101 | res_text = res_text + c
102 | status = STATUS_CHO
103 |
104 | elif status == STATUS_JONG1:
105 |
106 | if c != compose_code and c in JONG:
107 | jongsung = c
108 |
109 | if c in JONG_COMP:
110 | status = STATUS_JONG2
111 | else:
112 | res_text = res_text + letter.compose(chosung, joongsung, jongsung)
113 | status = STATUS_CHO
114 |
115 | else:
116 | res_text = res_text + letter.compose(chosung, joongsung)
117 |
118 | if c in CHO:
119 | chosung = c
120 | status = STATUS_JOONG
121 | else:
122 | if c != compose_code:
123 | res_text = res_text + c
124 |
125 | status = STATUS_CHO
126 |
127 | elif status == STATUS_JONG2:
128 |
129 | if c != compose_code and c in JONG_COMP[jongsung]:
130 | jongsung = JONG_COMP[jongsung][c]
131 | c = compose_code # 종성 재 출력 방지
132 |
133 | res_text = res_text + letter.compose(chosung, joongsung, jongsung)
134 |
135 | if c != compose_code:
136 | res_text = res_text + c
137 |
138 | status = STATUS_CHO
139 |
140 | return res_text
141 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | six==1.16.0
2 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | # This flag says that the code is written to work on both Python 2 and Python
3 | # 3. If at all possible, it is good practice to do this. If you cannot, you
4 | # will need to generate wheels for each Python version that you support.
5 | universal=1
6 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """A setuptools based setup module.
2 |
3 | See:
4 | https://packaging.python.org/en/latest/distributing.html
5 | https://github.com/pypa/sampleproject
6 | """
7 |
8 | # To use a consistent encoding
9 | from codecs import open
10 | from os import path
11 |
12 | # Always prefer setuptools over distutils
13 | from setuptools import setup, find_packages
14 |
15 | here = path.abspath(path.dirname(__file__))
16 |
17 | # Get the long description from the README file
18 | with open(path.join(here, 'README.txt'), encoding='utf-8') as f:
19 | long_description = f.read()
20 |
21 | setup(
22 | name='hgtk',
23 |
24 | # Versions should comply with PEP440. For a discussion on single-sourcing
25 | # the version across setup.py and the project code, see
26 | # https://packaging.python.org/en/latest/single_source_version.html
27 | version='0.2.1',
28 |
29 | description='Toolkit for Hangul composing, decomposing and etc...',
30 | long_description=long_description,
31 |
32 | # The project's main homepage.
33 | url='https://github.com/bluedisk/hangul-toolkit',
34 |
35 | # Author details
36 | author='Wonwoo, lee',
37 | author_email='bluedisk@gmail.com',
38 |
39 | # Choose your license
40 | license='Apache 2.0',
41 |
42 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
43 | classifiers=[
44 | # How mature is this project? Common values are
45 | # 3 - Alpha
46 | # 4 - Beta
47 | # 5 - Production/Stable
48 | 'Development Status :: 4 - Beta',
49 |
50 | # Indicate who your project is intended for
51 | 'Intended Audience :: Developers',
52 | 'Topic :: Text Processing',
53 |
54 | # Pick your license as you wish (should match "license" above)
55 | 'License :: OSI Approved :: Apache Software License',
56 |
57 | # Specify the Python versions you support here. In particular, ensure
58 | # that you indicate whether you support Python 2, Python 3 or both.
59 | 'Programming Language :: Python :: 2.6',
60 | 'Programming Language :: Python :: 2.7',
61 | 'Programming Language :: Python :: 3.3',
62 | 'Programming Language :: Python :: 3.4',
63 | 'Programming Language :: Python :: 3.5',
64 | 'Programming Language :: Python :: 3.6',
65 | 'Programming Language :: Python :: 3.7',
66 | 'Programming Language :: Python :: 3.8',
67 | 'Programming Language :: Python :: 3.9',
68 | 'Programming Language :: Python :: 3.10',
69 | 'Programming Language :: Python :: 3.11',
70 | 'Programming Language :: Python :: Implementation :: PyPy'
71 | ],
72 |
73 | # What does your project relate to?
74 | keywords='hangul charactorjamo automada composing decomposing josa',
75 |
76 | # You can just specify the packages manually here if your project is
77 | # simple. Or you can use find_packages().
78 | packages=find_packages(exclude=['contrib', 'docs', 'tests']),
79 |
80 | # Alternatively, if you want to distribute just a my_module.py, uncomment
81 | # this:
82 | # py_modules=["my_module"],
83 |
84 | # List run-time dependencies here. These will be installed by pip when
85 | # your project is installed. For an analysis of "install_requires" vs pip's
86 | # requirements files see:
87 | # https://packaging.python.org/en/latest/requirements.html
88 | install_requires=[],
89 |
90 | # List additional groups of dependencies here (e.g. development
91 | # dependencies). You can install these using the following syntax,
92 | # for example:
93 | # $ pip install -e .[dev,test]
94 | extras_require={},
95 |
96 | # If there are data files included in your packages that need to be
97 | # installed, specify them here. If using Python 2.6 or less, then these
98 | # have to be included in MANIFEST.in as well.
99 | package_data={},
100 |
101 | # Although 'package_data' is the preferred approach, in some case you may
102 | # need to place data files outside of your packages. See:
103 | # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa
104 | # In this case, 'data_file' will be installed into '/my_data'
105 | data_files=[],
106 |
107 | # To provide executable scripts, use entry points in preference to the
108 | # "scripts" keyword. Entry points provide cross-platform support and allow
109 | # pip to create the appropriate form of executable for the target platform.
110 | entry_points={},
111 | )
112 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bluedisk/hangul-toolkit/5346aacc8b0f504db87f3d234b9af17e314ca122/tests/__init__.py
--------------------------------------------------------------------------------
/tests/test_checker.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import division
4 | from __future__ import unicode_literals
5 |
6 | import hgtk
7 |
8 |
9 | # hangul test - true/false
10 | def test_is_hangul_1():
11 | assert hgtk.checker.is_hangul('한글입니다')
12 |
13 |
14 | def test_is_hangul_2():
15 | assert not hgtk.checker.is_hangul('no한글입니다')
16 |
17 |
18 | # hanja test - true/false
19 | def test_is_hanja_1():
20 | assert hgtk.checker.is_hanja('大韓民國')
21 |
22 |
23 | def test_is_hanja_2():
24 | assert not hgtk.checker.is_hanja('大한민국')
25 |
26 |
27 | # latin test - true/false
28 | def test_is_latin1_1():
29 | assert hgtk.checker.is_latin1('abcdefghijklmnopqrstuvwxyz')
30 |
31 |
32 | def test_is_latin1_2():
33 | assert not hgtk.checker.is_latin1('한글latin1한')
34 |
35 |
36 | # batchim test - true/false
37 | def test_has_batchim_1():
38 | assert hgtk.checker.has_batchim('한')
39 |
40 |
41 | def test_has_batchim_2():
42 | assert not hgtk.checker.has_batchim('하')
43 |
44 | # DEPRECATED! - not a general function
45 | # def test_has_approximant_1():
46 | # assert hgtk.checker.has_approximant('롹')
47 |
48 | # def test_has_approximant_2():
49 | # assert hgtk.checker.has_approximant('락') == False
50 |
--------------------------------------------------------------------------------
/tests/test_josa.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import division
4 | from __future__ import unicode_literals
5 |
6 | import hgtk
7 |
8 |
9 | # 은/는
10 | def test_eun_neun_1():
11 | assert hgtk.josa.attach('하늘', hgtk.josa.EUN_NEUN) == '하늘은'
12 |
13 |
14 | def test_eun_neun_2():
15 | assert hgtk.josa.attach('바다', hgtk.josa.EUN_NEUN) == '바다는'
16 |
17 |
18 | # 이/가
19 | def test_i_ga_1():
20 | assert hgtk.josa.attach('하늘', hgtk.josa.I_GA) == '하늘이'
21 |
22 |
23 | def test_i_ga_2():
24 | assert hgtk.josa.attach('바다', hgtk.josa.I_GA) == '바다가'
25 |
26 |
27 | # 을/를
28 | def test_eul_reul_1():
29 | assert hgtk.josa.attach('하늘', hgtk.josa.EUL_REUL) == '하늘을'
30 |
31 |
32 | def test_eul_reul_2():
33 | assert hgtk.josa.attach('바다', hgtk.josa.EUL_REUL) == '바다를'
34 |
35 |
36 | # 과/와
37 | def test_gwa_wa_1():
38 | assert hgtk.josa.attach('하늘', hgtk.josa.GWA_WA) == '하늘과'
39 |
40 |
41 | def test_gwa_wa_2():
42 | assert hgtk.josa.attach('바다', hgtk.josa.GWA_WA) == '바다와'
43 |
44 |
45 | # 이다/다
46 | def test_ida_da_1():
47 | assert hgtk.josa.attach('하늘', hgtk.josa.IDA_DA) == '하늘이다'
48 |
49 |
50 | def test_ida_da_2():
51 | assert hgtk.josa.attach('바다', hgtk.josa.IDA_DA) == '바다다'
52 |
53 |
54 | # 으로/로
55 | def test_euro_ro_1():
56 | assert hgtk.josa.attach('하늘', hgtk.josa.EURO_RO) == '하늘로'
57 |
58 |
59 | def test_euro_ro_2():
60 | assert hgtk.josa.attach('바다', hgtk.josa.EURO_RO) == '바다로'
61 |
62 |
63 | def test_euro_ro_3():
64 | assert hgtk.josa.attach('태양', hgtk.josa.EURO_RO) == '태양으로'
65 |
66 |
67 | # 률/율
68 | def test_ryul_yul_1():
69 | assert hgtk.josa.attach('방어', hgtk.josa.RYUL_YUL) == '방어율'
70 |
71 |
72 | def test_ryul_yul_2():
73 | assert hgtk.josa.attach('공격', hgtk.josa.RYUL_YUL) == '공격률'
74 |
75 |
76 | def test_ryul_yul_3():
77 | assert hgtk.josa.attach('반환', hgtk.josa.RYUL_YUL) == '반환율'
78 |
--------------------------------------------------------------------------------
/tests/test_letter.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import division
4 | from __future__ import unicode_literals
5 |
6 | import hgtk
7 |
8 |
9 | def test_compose():
10 | assert hgtk.letter.compose('ㄱ', 'ㅏ', 'ㅁ') == '감'
11 |
12 |
13 | def test_decompose():
14 | assert hgtk.letter.decompose('감') == ('ㄱ', 'ㅏ', 'ㅁ')
15 |
--------------------------------------------------------------------------------
/tests/test_text.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from __future__ import division
4 | from __future__ import unicode_literals
5 |
6 | import hgtk
7 |
8 | DECOMPOSED = 'ㅎㅏㄱᴥㄱㅛᴥㅈㅗㅇᴥㅇㅣᴥ ㄸㅐㅇᴥㄸㅐㅇᴥㄸㅐㅇᴥ! hello world 1234567890 ㅋᴥㅋᴥ!'
9 | COMPOSED = '학교종이 땡땡땡! hello world 1234567890 ㅋㅋ!'
10 |
11 |
12 | def test_compose():
13 | print("compose", hgtk.text.compose(DECOMPOSED))
14 | assert hgtk.text.compose(DECOMPOSED) == COMPOSED
15 |
16 |
17 | def test_decompose():
18 | print("decompose", hgtk.text.decompose(COMPOSED))
19 | assert hgtk.text.decompose(COMPOSED) == DECOMPOSED
20 |
21 |
22 | def test_regresstion_pr_1():
23 | REGRESSION_EXAMPLE = 'ㅇㅓㅂㅅᴥㄷㅏᴥㅇㅣㅅㅅᴥㄷㅏᴥ'
24 | REGRESSION_RESULT = "없다있다"
25 | print("compose", hgtk.text.compose(REGRESSION_EXAMPLE))
26 | assert hgtk.text.compose(REGRESSION_EXAMPLE) == REGRESSION_RESULT
27 |
--------------------------------------------------------------------------------