├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── koparadigm ├── __init__.py ├── endings.tsv ├── koparadigm.py ├── koparadigm.xlsx ├── paradigm.xlsx ├── template.xls └── verbs.tsv └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | working/ 2 | koparadigm/*.tsv 3 | koparadigm/*.xls 4 | koparadigm/paradigm.xlsx 5 | 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include koparadigm/koparadigm.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KoParadigm: A Korean Conjugation Paradigm Generator 2 | 3 | This is the offical repo for our paper: [KoParadigm: A Korean Conjugation Paradigm Generator](https://arxiv.org/abs/2004.13221) 4 | 5 | (Inflectional) paradigm means the set of all the inflected forms of a word. For example, English verb "look" has inflected forms like "look", "look-s", "look-ed", and "look-ing", as all of you know. 6 | Paradigms are widely used in corpus linguistics or search engines. 7 | To create the full paradigm set of a language is sometimes tricky. It is particularly so when we deal with a morphologically rich language like Korean. 8 | Inflection of Korean verbs is notorisouly complicated. Typically, a Korean verb can combine with more than 100 endings. What is worse, the combination rules are not simple at all. 9 | They are determined by the sound of the verb/ending, and the part-of-speech of the verb (action / descriptive). That's why so far there's no open sources of Korean paradigm generator, I think. 10 | Here's the first one. With KoParadigm, you can easily get the full paradigm of a Korean verb. 11 | 12 | ## Dependencies 13 | * python >=3.6 14 | * jamo >=0.4.1 15 | * xlrd == 1.2.0 16 | 17 | ## Installation 18 | ``` 19 | pip install koparadigm 20 | ``` 21 | 22 | ## Usage 23 | ``` 24 | >>> from koparadigm import Paradigm, prettify 25 | >>> p = Paradigm() 26 | >>> verb = "곱" # Note that you must drop the final ending 다 27 | >>> paradigms = p.conjugate(verb) # this returns list of lists 28 | >>> print(paradigms) 29 | [['Action Verb', [('거나', '곱거나'), ('거늘', '곱거늘'), ('거니', '곱거니') ...]]] 30 | >>> prettify(paradigms) 31 | POS = Action Verb 32 | • ending = 거나 form = 곱거나 33 | • ending = 거늘 form = 곱거늘 34 | • ending = 거니 form = 곱거니 35 | ... 36 | ==================== 2 ==================== 37 | POS = Descriptive Verb 38 | • ending = 거나 form = 곱거나 39 | • ending = 거늘 form = 곱거늘 40 | • ending = 거니 form = 곱거니 41 | • ending = 거니와 form = 곱거니와 42 | ... 43 | 44 | ``` 45 | ## References 46 | If you use our software for research, please cite: 47 | 48 | ``` 49 | @article{park2020KoParadigm, 50 | author = {Park, Kyubyong }, 51 | title={KoParadigm: A Korean Conjugation Paradigm Generator}, 52 | journal={arXiv preprint arXiv:2004.13221}, 53 | year={2020} 54 | } 55 | ``` 56 | -------------------------------------------------------------------------------- /koparadigm/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | r"""KoParadigm 3 | """ 4 | from __future__ import absolute_import 5 | 6 | from .koparadigm import Paradigm, prettify -------------------------------------------------------------------------------- /koparadigm/endings.tsv: -------------------------------------------------------------------------------- 1 | Num Ending Class 2 | 1 거나 1 3 | 2 거늘 1 4 | 3 거니 1 5 | 4 거니와 1 6 | 5 거드면 1 7 | 6 거든 1 8 | 7 거라 9 9 | 8 건 1 10 | 9 건대 9 11 | 10 건마는 1 12 | 11 것다 1 13 | 12 게 1 14 | 13 게 4 15 | 14 게 9 16 | 15 게끔 9 17 | 16 게나 9 18 | 17 겠 1 19 | 18 고 1 20 | 19 고는 4 21 | 20 고는 9 22 | 21 고도 1 23 | 22 고말고 1 24 | 23 고서 9 25 | 24 고야 9 26 | 25 고자 12 27 | 26 곤 9 28 | 27 관데 1 29 | 28 구나 16 30 | 29 구려 9 31 | 30 구려 16 32 | 31 구먼 9 33 | 32 군 16 34 | 33 기 1 35 | 34 기로 1 36 | 35 기로니 1 37 | 36 기로서 1 38 | 37 기로서니 1 39 | 38 기로선들 1 40 | 39 기에 1 41 | 40 ㄴ 10 42 | 41 ㄴ 23 43 | 42 ㄴ가 18 44 | 43 ㄴ감 18 45 | 44 ㄴ걸 2 46 | 45 ㄴ고 18 47 | 46 ㄴ다 10 48 | 47 ㄴ다고 10 49 | 48 ㄴ다나 10 50 | 49 ㄴ다네 10 51 | 50 ㄴ다느니 10 52 | 51 ㄴ다니 10 53 | 52 ㄴ다니까 10 54 | 53 ㄴ다더라 10 55 | 54 ㄴ다마는 10 56 | 55 ㄴ다며 10 57 | 56 ㄴ다면 10 58 | 57 ㄴ다면서 10 59 | 58 ㄴ다손 10 60 | 59 ㄴ다오 10 61 | 60 ㄴ다지 10 62 | 61 ㄴ단다 10 63 | 62 ㄴ담 10 64 | 63 ㄴ답니까 10 65 | 64 ㄴ답니다 10 66 | 65 ㄴ답시고 10 67 | 66 ㄴ대 10 68 | 67 ㄴ대요 10 69 | 68 ㄴ데 18 70 | 69 ㄴ뎁쇼 17 71 | 70 ㄴ들 2 72 | 71 ㄴ바 2 73 | 72 ㄴ즉 2 74 | 73 ㄴ즉슨 2 75 | 74 ㄴ지 18 76 | 75 나 2 77 | 76 나 9 78 | 77 나니 9 79 | 78 나마 2 80 | 79 나이까 12 81 | 80 나이다 12 82 | 81 남 9 83 | 82 냐 18 84 | 83 냐고 18 85 | 84 너라 23 86 | 85 네 1 87 | 86 노니 9 88 | 87 노라 9 89 | 88 노라고 9 90 | 89 노라니 12 91 | 90 노라니까 12 92 | 91 노라면 12 93 | 92 뇨 17 94 | 93 누 4 95 | 94 누나 9 96 | 95 누먼 9 97 | 96 느냐 12 98 | 97 느냐고 12 99 | 98 느뇨 12 100 | 99 느니 12 101 | 100 느니라 12 102 | 101 느니만 12 103 | 102 느니만치 12 104 | 103 느니만큼 12 105 | 104 느라 9 106 | 105 느라고 9 107 | 106 는 12 108 | 107 는 12 109 | 108 는가 12 110 | 109 는감 12 111 | 110 는걸 12 112 | 111 는고 12 113 | 112 는구나 9 114 | 113 는구려 9 115 | 114 는군 9 116 | 115 는다 14 117 | 116 는다고 14 118 | 117 는다나 14 119 | 118 는다네 14 120 | 119 는다느니 14 121 | 120 는다니 14 122 | 121 는다니까 14 123 | 122 는다더라 14 124 | 123 는다마는 14 125 | 124 는다며 14 126 | 125 는다면 14 127 | 126 는다면서 14 128 | 127 는다손 14 129 | 128 는다오 14 130 | 129 는다지 14 131 | 130 는단다 14 132 | 131 는담 14 133 | 132 는답니까 14 134 | 133 는답니다 14 135 | 134 는답시고 14 136 | 135 는대 14 137 | 136 는대요 14 138 | 137 는데 12 139 | 138 는뎁쇼 12 140 | 139 는바 12 141 | 140 는지 12 142 | 141 는지고 12 143 | 142 는지라 12 144 | 143 니 1 145 | 144 니 2 146 | 145 니 18 147 | 146 니까 2 148 | 147 니까는 2 149 | 148 니라 18 150 | 149 니만치 2 151 | 150 니만큼 2 152 | 151 다 1 153 | 152 다가 1 154 | 153 다가는 1 155 | 154 다간 1 156 | 155 다고 19 157 | 156 다나 19 158 | 157 다네 19 159 | 158 다느니 19 160 | 159 다니 1 161 | 160 다니 19 162 | 161 다니까 19 163 | 162 다더라 19 164 | 163 다마는 19 165 | 164 다마다 1 166 | 165 다며 19 167 | 166 다면 19 168 | 167 다면서 19 169 | 168 다손 19 170 | 169 다지 19 171 | 170 단다 19 172 | 171 담 19 173 | 172 답니까 19 174 | 173 답니다 19 175 | 174 답시고 19 176 | 175 대 19 177 | 176 대요 19 178 | 177 더 1 179 | 178 더구나 1 180 | 179 더구려 1 181 | 180 더군 1 182 | 181 더냐 1 183 | 182 더뇨 1 184 | 183 더니 1 185 | 184 더니라 1 186 | 185 더니마는 1 187 | 186 더니만 1 188 | 187 더니이까 1 189 | 188 더니이다 1 190 | 189 더라 1 191 | 190 더라나 1 192 | 191 더라니 1 193 | 192 더라니까 1 194 | 193 더라도 1 195 | 194 더라며 1 196 | 195 더라면 1 197 | 196 더라면서 1 198 | 197 더라손 1 199 | 198 더라지 1 200 | 199 더람 1 201 | 200 더이까 1 202 | 201 더이다 1 203 | 202 던 1 204 | 203 던가 1 205 | 204 던감 1 206 | 205 던걸 1 207 | 206 던고 1 208 | 207 던데 1 209 | 208 던바 1 210 | 209 던지 1 211 | 210 데 1 212 | 211 데요 1 213 | 212 도다 1 214 | 213 도록 9 215 | 214 되 1 216 | 215 든 1 217 | 216 든가 1 218 | 217 든지 1 219 | 218 듯 1 220 | 219 듯이 1 221 | 220 디 4 222 | 221 디 19 223 | 222 ㄹ 2 224 | 223 ㄹ거나 10 225 | 224 ㄹ걸 2 226 | 225 ㄹ게 10 227 | 226 ㄹ까 2 228 | 227 ㄹ깝쇼 10 229 | 228 ㄹ는지 2 230 | 229 ㄹ라 2 231 | 230 ㄹ라고 2 232 | 231 ㄹ라치면 10 233 | 232 ㄹ락 10 234 | 233 ㄹ래 10 235 | 234 ㄹ러니 2 236 | 235 ㄹ러라 2 237 | 236 ㄹ런고 2 238 | 237 ㄹ레 2 239 | 238 ㄹ레라 2 240 | 239 ㄹ망정 2 241 | 240 ㄹ밖에 2 242 | 241 ㄹ뿐더러 2 243 | 242 ㄹ사 2 244 | 243 ㄹ새 2 245 | 244 ㄹ세 2 246 | 245 ㄹ세 5 247 | 246 ㄹ세라 2 248 | 247 ㄹ세말이지 2 249 | 248 ㄹ수록 2 250 | 249 ㄹ시 2 251 | 250 ㄹ시고 18 252 | 251 ㄹ쏘냐 2 253 | 252 ㄹ쏜가 2 254 | 253 ㄹ작시면 10 255 | 254 ㄹ지 2 256 | 255 ㄹ지나 2 257 | 256 ㄹ지니 2 258 | 257 ㄹ지니라 2 259 | 258 ㄹ지라 2 260 | 259 ㄹ지라도 2 261 | 260 ㄹ지로다 2 262 | 261 ㄹ지며 10 263 | 262 ㄹ지어다 2 264 | 263 ㄹ지언정 2 265 | 264 ㄹ진대 2 266 | 265 ㄹ진대는 2 267 | 266 ㄹ진댄 2 268 | 267 ㄹ진저 2 269 | 268 라 22 270 | 269 라고 10 271 | 270 라고 22 272 | 271 라나 22 273 | 272 라네 22 274 | 273 라니 22 275 | 274 라니까 22 276 | 275 라도 22 277 | 276 라며 22 278 | 277 라면 22 279 | 278 라면서 22 280 | 279 라서 22 281 | 280 라야 22 282 | 281 라야만 22 283 | 282 라오 22 284 | 283 라지 22 285 | 284 락 5 286 | 285 란다 22 287 | 286 랍니까 22 288 | 287 랍니다 22 289 | 288 랍시고 22 290 | 289 래 22 291 | 290 래요 22 292 | 291 랴 10 293 | 292 러 10 294 | 293 러니 22 295 | 294 러니라 22 296 | 295 러니이까 22 297 | 296 러니이다 22 298 | 297 러라 22 299 | 298 러이까 22 300 | 299 러이다 22 301 | 300 런가 22 302 | 301 런들 22 303 | 302 려 10 304 | 303 려거든 2 305 | 304 려고 2 306 | 305 려나 2 307 | 306 려니 2 308 | 307 려니와 2 309 | 308 려든 10 310 | 309 려면 2 311 | 310 려무나 10 312 | 311 련마는 2 313 | 312 련만 2 314 | 313 렴 10 315 | 314 렷다 2 316 | 315 로고 22 317 | 316 로구나 22 318 | 317 로구려 22 319 | 318 로군 22 320 | 319 로다 22 321 | 320 로되 22 322 | 321 로라 22 323 | 322 로서니 22 324 | 323 로세 22 325 | 324 리 2 326 | 325 리까 2 327 | 326 리니 2 328 | 327 리니라 2 329 | 328 리다 2 330 | 329 리라 2 331 | 330 리로다 2 332 | 331 리만치 5 333 | 332 리만큼 5 334 | 333 리오 2 335 | 334 ㅁ 2 336 | 335 ㅁ세 10 337 | 336 ㅁ에도 2 338 | 337 ㅁ에랴 2 339 | 338 마 10 340 | 339 매 2 341 | 340 며 2 342 | 341 면 2 343 | 342 면서 2 344 | 343 므로 2 345 | 344 ㅂ네 2 346 | 345 ㅂ늰다 2 347 | 346 ㅂ니까 2 348 | 347 ㅂ니다 2 349 | 348 ㅂ디까 2 350 | 349 ㅂ디다 2 351 | 350 ㅂ딘다 2 352 | 351 ㅂ시다 10 353 | 352 ㅂ시사 10 354 | 353 ㅂ시오 10 355 | 354 ㅂ죠 16 356 | 355 ㅂ지요 16 357 | 356 사 5 358 | 357 사이다 10 359 | 358 세 10 360 | 359 세나 9 361 | 360 세요 2 362 | 361 셔요 2 363 | 362 소 4 364 | 363 소서 5 365 | 364 소이까 1 366 | 365 습네 8 367 | 366 습늰다 8 368 | 367 습니까 8 369 | 368 습니다 8 370 | 369 습디까 8 371 | 370 습디다 8 372 | 371 습딘다 8 373 | 372 습죠 8 374 | 373 습지요 8 375 | 374 시 2 376 | 375 시라 10 377 | 376 시압 10 378 | 377 시어요 2 379 | 378 십사 10 380 | 379 십시다 10 381 | 380 십시오 10 382 | 381 아 6 383 | 382 아다 11 384 | 383 아다가 11 385 | 384 아도 6 386 | 385 아라 6 387 | 386 아서 6 388 | 387 아야 6 389 | 388 아야만 6 390 | 389 아야지 6 391 | 390 아요 6 392 | 391 아지이다 6 393 | 392 았 6 394 | 393 았었 6 395 | 394 았자 6 396 | 395 야 22 397 | 396 어 3 398 | 397 어다 15 399 | 398 어다가 15 400 | 399 어도 3 401 | 400 어라 3 402 | 401 어서 3 403 | 402 어야 3 404 | 403 어야만 3 405 | 404 어야지 3 406 | 405 어요 3 407 | 406 어지이다 3 408 | 407 언마는 22 409 | 408 언정 22 410 | 409 었 3 411 | 410 었었 3 412 | 411 에요 21 413 | 412 여 24 414 | 413 여도 24 415 | 414 여라 24 416 | 415 여서 24 417 | 416 여야 24 418 | 417 여야지 24 419 | 418 여요 24 420 | 419 여지이다 24 421 | 420 였 24 422 | 421 였었 24 423 | 422 였자 24 424 | 423 오 5 425 | 424 오니까 5 426 | 425 오리까 5 427 | 426 오리다 5 428 | 427 오리이까 5 429 | 428 오리이다 5 430 | 429 오이다 5 431 | 430 올시다 22 432 | 431 옵 5 433 | 432 옵나이까 5 434 | 433 옵나이다 5 435 | 434 옵니까 5 436 | 435 옵니다 5 437 | 436 옵디까 5 438 | 437 옵디다 5 439 | 438 옵소서 5 440 | 439 옵시 5 441 | 440 와 5 442 | 441 외다 5 443 | 442 요 22 444 | 443 우 19 445 | 444 으나 7 446 | 445 으나마 7 447 | 446 으냐 20 448 | 447 으냐고 20 449 | 448 으뇨 20 450 | 449 으니 7 451 | 450 으니 20 452 | 451 으니까 7 453 | 452 으니까는 7 454 | 453 으니라 20 455 | 454 으니만치 7 456 | 455 으니만큼 7 457 | 456 으라 13 458 | 457 으라고 13 459 | 458 으라나 13 460 | 459 으라느니 13 461 | 460 으라니까 13 462 | 461 으라며 13 463 | 462 으라면서 13 464 | 463 으라손 13 465 | 464 으락 13 466 | 465 으란 13 467 | 466 으람 13 468 | 467 으랴 13 469 | 468 으러 13 470 | 469 으려 13 471 | 470 으려거든 13 472 | 471 으려고 13 473 | 472 으려나 13 474 | 473 으려니 13 475 | 474 으려니와 7 476 | 475 으려든 13 477 | 476 으려면 7 478 | 477 으려무나 13 479 | 478 으련 13 480 | 479 으련마는 7 481 | 480 으렴 13 482 | 481 으렵니까 13 483 | 482 으렵니다 13 484 | 483 으렷다 7 485 | 484 으리 7 486 | 485 으리까 7 487 | 486 으리니 7 488 | 487 으리니라 7 489 | 488 으리다 7 490 | 489 으리라 7 491 | 490 으리로다 7 492 | 491 으리만치 7 493 | 492 으리만큼 7 494 | 493 으리오 7 495 | 494 으마 13 496 | 495 으매 7 497 | 496 으며 7 498 | 497 으면 7 499 | 498 으면서 7 500 | 499 으므로 7 501 | 500 으사 7 502 | 501 으사이다 13 503 | 502 으세 13 504 | 503 으세요 7 505 | 504 으셔요 7 506 | 505 으소서 7 507 | 506 으시 7 508 | 507 으시라 13 509 | 508 으시압 13 510 | 509 으시어요 7 511 | 510 으십사 13 512 | 511 으십시다 13 513 | 512 으십시오 13 514 | 513 으오 7 515 | 514 으오니까 7 516 | 515 으오리까 7 517 | 516 으오리다 7 518 | 517 으오리이까 7 519 | 518 으오리이다 7 520 | 519 으오이다 7 521 | 520 으옵 7 522 | 521 으옵나이까 7 523 | 522 으옵나이다 7 524 | 523 으옵니까 7 525 | 524 으옵니다 7 526 | 525 으옵디까 7 527 | 526 으옵디다 7 528 | 527 으옵소서 7 529 | 528 으옵시 7 530 | 529 으와 7 531 | 530 으우 7 532 | 531 으이 20 533 | 532 은 7 534 | 533 은가 20 535 | 534 은감 20 536 | 535 은걸 7 537 | 536 은고 20 538 | 537 은데 20 539 | 538 은뎁쇼 20 540 | 539 은들 7 541 | 540 은즉 7 542 | 541 은즉슨 7 543 | 542 은지 20 544 | 543 을 7 545 | 544 을거나 13 546 | 545 을걸 7 547 | 546 을게 13 548 | 547 을까 7 549 | 548 을깝쇼 13 550 | 549 을는지 7 551 | 550 을데라니 20 552 | 551 을라 7 553 | 552 을라고 7 554 | 553 을라치면 13 555 | 554 을락 13 556 | 555 을래 13 557 | 556 을러니 7 558 | 557 을러라 7 559 | 558 을런가 7 560 | 559 을런고 7 561 | 560 을레 7 562 | 561 을레라 7 563 | 562 을망정 7 564 | 563 을밖에 7 565 | 564 을뿐더러 7 566 | 565 을새 7 567 | 566 을세 7 568 | 567 을세라 7 569 | 568 을세말이지 7 570 | 569 을수록 7 571 | 570 을시 7 572 | 571 을시고 20 573 | 572 을쏘냐 7 574 | 573 을쏜가 7 575 | 574 을작시면 13 576 | 575 을지 7 577 | 576 을지나 7 578 | 577 을지니 7 579 | 578 을지니라 7 580 | 579 을지라 7 581 | 580 을지라도 7 582 | 581 을지로다 7 583 | 582 을지며 7 584 | 583 을지어다 7 585 | 584 을지언정 7 586 | 585 을진대 7 587 | 586 을진대는 7 588 | 587 을진저 7 589 | 588 음 7 590 | 589 음세 13 591 | 590 음에도 7 592 | 591 음에랴 7 593 | 592 읍시다 13 594 | 593 읍시사 13 595 | 594 읍시오 7 596 | 595 자 9 597 | 596 자고 9 598 | 597 자꾸나 9 599 | 598 자느니 9 600 | 599 자니까 9 601 | 600 자마자 9 602 | 601 자며 9 603 | 602 자면 9 604 | 603 자면서 9 605 | 604 자손 9 606 | 605 죠 1 607 | 606 지 4 608 | 607 지마는 1 609 | 608 지만 1 610 | 609 지요 1 -------------------------------------------------------------------------------- /koparadigm/koparadigm.py: -------------------------------------------------------------------------------- 1 | import xlrd 2 | from jamo import h2j, j2h, hcj_to_jamo, is_hcj 3 | import re 4 | import os 5 | 6 | RESOURCE = xlrd.open_workbook(os.path.dirname(os.path.abspath(__file__)) + "/koparadigm.xlsx") 7 | 8 | class Paradigm(object): 9 | def __init__(self): 10 | self.verb2verb_classes = self.make_verb2verb_classes() 11 | self.ending_class2endings = self.make_ending_class2endings() 12 | self.verb_class2rules = self.make_verb_class2rules() 13 | 14 | 15 | def make_verb2verb_classes(self): 16 | verb2verb_classes = dict() # e.g., {"곱": [1,2]} 17 | 18 | sh = RESOURCE.sheet_by_name("Verbs") 19 | for rx in range(1, sh.nrows): 20 | verb = sh.row(rx)[1].value 21 | verb_class = int(sh.row(rx)[2].value) 22 | if verb in verb2verb_classes: 23 | verb2verb_classes[verb].append(verb_class) 24 | else: 25 | verb2verb_classes[verb] = [verb_class] 26 | return verb2verb_classes 27 | 28 | 29 | def make_ending_class2endings(self): 30 | ending_class2endings = dict() # e.g., {1: ["어야", "어서]} 31 | 32 | sh = RESOURCE.sheet_by_name("Endings") 33 | for rx in range(1, sh.nrows): 34 | ending = sh.row(rx)[1].value 35 | ending_class = int(sh.row(rx)[2].value) 36 | if ending_class in ending_class2endings: 37 | ending_class2endings[ending_class].append(ending) 38 | else: 39 | ending_class2endings[ending_class] = [ending] 40 | return ending_class2endings 41 | 42 | 43 | def make_verb_class2rules(self): 44 | verb_class2rules = dict() 45 | sh = RESOURCE.sheet_by_name("Template") 46 | 47 | ending_classes = sh.row(0)[2:] 48 | for rx in range(2, sh.nrows): 49 | verb_class = int(sh.row(rx)[0].value) 50 | for i, ending_class in enumerate(ending_classes, start=2): 51 | ending_class = int(ending_class.value) 52 | rule = sh.row(rx)[i].value 53 | if rule != "": 54 | rule = rule[1:-1] # (...) 55 | rule = (ending_class, rule) 56 | 57 | if verb_class in verb_class2rules: 58 | verb_class2rules[verb_class].append(rule) 59 | else: 60 | verb_class2rules[verb_class] = [rule] 61 | 62 | return verb_class2rules 63 | 64 | 65 | def contract(self, string): 66 | '''vowel contraction''' 67 | _string = string 68 | pairs = [ 69 | ("ᅡ|ᅡ","ᅡ", "required"), 70 | ("ᅥ|ᅥ","ᅥ", "required"), 71 | ("오|ᅡ","와", "required"), 72 | ("ᅩ|ᅡ","ᅪ", "optional"), 73 | ("ᅮ|ᅥ","ᅯ", "optional"), 74 | ("ᅳ|ᅥ","ᅥ", "required"), 75 | ("ᅵ|ᅥ","ᅧ", "optional"), 76 | ("ᅢ|ᅥ","ᅢ", "optional"), 77 | ("ᅦ|ᅥ","ᅦ", "optional"), 78 | ("ᅬ|ᅥ","ᅫ", "optional"), 79 | ("하|ᅧ","해", "optional"), 80 | ] 81 | 82 | for pair in pairs: 83 | untouched, contraction, cond = pair 84 | if untouched in string: 85 | string = string.replace(untouched, contraction) 86 | if cond == "optional": 87 | string = _string.replace("|", "ᄋ") + "/" + string 88 | return string 89 | return _string.replace("|", "") 90 | 91 | def compose(self, string): 92 | string = self.contract(string) 93 | 94 | choseong = "[\u1100-\u1112]" 95 | jungseong = "[\u1161-\u1175]" 96 | jongseong = "[\u11A8-\u11C2]" 97 | 98 | # CVC first 99 | matches = re.findall(f"{choseong}{jungseong}{jongseong}", string) 100 | for match in matches: 101 | syl = j2h(*match) 102 | string = string.replace(match, syl) 103 | 104 | # CV 105 | matches = re.findall(f"{choseong}{jungseong}", string) 106 | for match in matches: 107 | syl = j2h(*match) 108 | string = string.replace(match, syl) 109 | 110 | return string 111 | 112 | def combine(self, verb, ending, rule): 113 | if not rule: 114 | return [] 115 | 116 | stop, postfix, start = rule.split(",") 117 | stop = None if stop == "" else int(stop) 118 | start = None if start == "" else int(start) 119 | 120 | # STEP 1. Decompose verb 121 | verb = h2j(verb) # h: hangul syl. j: jamo 122 | 123 | # STEP 2. Slice 1 124 | verb = verb[:stop] 125 | 126 | # STEP 3. Merge 2 and postfix 127 | wordform = verb + postfix 128 | 129 | # STEP 4. Decompose ending 130 | ending = h2j(ending) 131 | ending = "".join(hcj_to_jamo(char, "tail") if is_hcj(char) else char for char in ending) 132 | 133 | # STEP 5. Slice 4 134 | ending = ending[start:] 135 | 136 | # STEP 6. Merge 3 and 5 137 | wordform +="|" + ending 138 | 139 | # STEP 7. Compose 6 140 | wordform = self.compose(wordform) 141 | 142 | return wordform 143 | 144 | 145 | def conjugate(self, verb): 146 | ''' 147 | Main method 148 | ''' 149 | if verb in self.verb2verb_classes: 150 | paradigms = [] 151 | verb_classes = self.verb2verb_classes[verb] 152 | for verb_class in verb_classes: 153 | # pos 154 | if verb_class < 3: 155 | pos = "Action Verb/Descriptive Verb" 156 | elif verb_class == 14: # copula 157 | pos = "Copula" 158 | elif (verb_class <= 6) or (15 <= verb_class <= 30): 159 | pos = "Action Verb" 160 | else: 161 | pos = "Descriptive Verb" 162 | # ending info. 163 | li_ending_and_form = [] 164 | rules = self.verb_class2rules[verb_class] 165 | for ending_class, rule in rules: 166 | endings = self.ending_class2endings[ending_class] 167 | for ending in endings: 168 | form = self.combine(verb=verb, ending=ending, rule=rule) 169 | if len(form) > 0: 170 | li_ending_and_form.append((ending, form)) 171 | 172 | paradigms.append([pos, li_ending_and_form]) 173 | return paradigms 174 | else: 175 | print(f"{verb} is NOT found.") 176 | 177 | 178 | def prettify(paradigms): 179 | '''utility function. pretty print. 180 | ''' 181 | for num, paradigm in enumerate(paradigms, start=1): 182 | pos, li_ending_and_form = paradigm 183 | print("="*20, num, "="*20) 184 | print("POS =", pos) 185 | for ending, form in li_ending_and_form: 186 | print("• ending = {} form = {}".format(ending, form)) 187 | print() -------------------------------------------------------------------------------- /koparadigm/koparadigm.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kyubyong/KoParadigm/6e5ee3c5356652ea19b46bb113dda905705bf6fc/koparadigm/koparadigm.xlsx -------------------------------------------------------------------------------- /koparadigm/paradigm.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kyubyong/KoParadigm/6e5ee3c5356652ea19b46bb113dda905705bf6fc/koparadigm/paradigm.xlsx -------------------------------------------------------------------------------- /koparadigm/template.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kyubyong/KoParadigm/6e5ee3c5356652ea19b46bb113dda905705bf6fc/koparadigm/template.xls -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", mode="r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | REQUIRED_PACKAGES = [ 7 | 'jamo>=0.4.1', 8 | 'xlrd==1.2.0', 9 | ] 10 | 11 | setuptools.setup( 12 | name="koparadigm", 13 | version="0.10.0", 14 | author="Kyubyong Park", 15 | author_email="kbpark.linguist@gmail.com", 16 | description="Korean Conjugation Paradigm Generator", 17 | install_requires=REQUIRED_PACKAGES, 18 | license='Apache License 2.0', 19 | long_description=long_description, 20 | long_description_content_type="text/markdown", 21 | url="https://github.com/Kyubyong/paradigm", 22 | packages=setuptools.find_packages(), 23 | package_data={'koparadigm': ['koparadigm/koparadigm.xlsx']}, 24 | python_requires=">=3.6", 25 | include_package_data=True, 26 | classifiers=[ 27 | 'Development Status :: 5 - Production/Stable', 28 | 'Intended Audience :: Developers', 29 | 'Intended Audience :: Science/Research', 30 | "License :: OSI Approved :: Apache Software License", 31 | "Operating System :: OS Independent", 32 | "Programming Language :: Python :: 3", 33 | 'Programming Language :: Python :: 3.6', 34 | 'Programming Language :: Python :: 3.7', 35 | ], 36 | ) 37 | --------------------------------------------------------------------------------