├── .coveragerc ├── .github ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── README.rst ├── build.sh ├── doc_source ├── _static │ └── style.css ├── conf.py ├── index.rst └── koalanlp.rst ├── docs ├── .nojekyll ├── doctrees │ ├── environment.pickle │ ├── index.doctree │ └── koalanlp.doctree ├── html │ ├── .buildinfo │ ├── .nojekyll │ ├── _sources │ │ ├── index.rst.txt │ │ └── koalanlp.rst.txt │ ├── _static │ │ ├── basic.css │ │ ├── css │ │ │ ├── badge_only.css │ │ │ ├── fonts │ │ │ │ ├── Roboto-Slab-Bold.woff │ │ │ │ ├── Roboto-Slab-Bold.woff2 │ │ │ │ ├── Roboto-Slab-Regular.woff │ │ │ │ ├── Roboto-Slab-Regular.woff2 │ │ │ │ ├── fontawesome-webfont.eot │ │ │ │ ├── fontawesome-webfont.svg │ │ │ │ ├── fontawesome-webfont.ttf │ │ │ │ ├── fontawesome-webfont.woff │ │ │ │ ├── fontawesome-webfont.woff2 │ │ │ │ ├── lato-bold-italic.woff │ │ │ │ ├── lato-bold-italic.woff2 │ │ │ │ ├── lato-bold.woff │ │ │ │ ├── lato-bold.woff2 │ │ │ │ ├── lato-normal-italic.woff │ │ │ │ ├── lato-normal-italic.woff2 │ │ │ │ ├── lato-normal.woff │ │ │ │ └── lato-normal.woff2 │ │ │ └── theme.css │ │ ├── doctools.js │ │ ├── documentation_options.js │ │ ├── file.png │ │ ├── fonts │ │ │ ├── Inconsolata-Bold.ttf │ │ │ ├── Inconsolata-Regular.ttf │ │ │ ├── Inconsolata.ttf │ │ │ ├── Lato-Bold.ttf │ │ │ ├── Lato-Regular.ttf │ │ │ ├── Lato │ │ │ │ ├── lato-bold.eot │ │ │ │ ├── lato-bold.ttf │ │ │ │ ├── lato-bold.woff │ │ │ │ ├── lato-bold.woff2 │ │ │ │ ├── lato-bolditalic.eot │ │ │ │ ├── lato-bolditalic.ttf │ │ │ │ ├── lato-bolditalic.woff │ │ │ │ ├── lato-bolditalic.woff2 │ │ │ │ ├── lato-italic.eot │ │ │ │ ├── lato-italic.ttf │ │ │ │ ├── lato-italic.woff │ │ │ │ ├── lato-italic.woff2 │ │ │ │ ├── lato-regular.eot │ │ │ │ ├── lato-regular.ttf │ │ │ │ ├── lato-regular.woff │ │ │ │ └── lato-regular.woff2 │ │ │ ├── RobotoSlab-Bold.ttf │ │ │ ├── RobotoSlab-Regular.ttf │ │ │ ├── RobotoSlab │ │ │ │ ├── roboto-slab-v7-bold.eot │ │ │ │ ├── roboto-slab-v7-bold.ttf │ │ │ │ ├── roboto-slab-v7-bold.woff │ │ │ │ ├── roboto-slab-v7-bold.woff2 │ │ │ │ ├── roboto-slab-v7-regular.eot │ │ │ │ ├── roboto-slab-v7-regular.ttf │ │ │ │ ├── roboto-slab-v7-regular.woff │ │ │ │ └── roboto-slab-v7-regular.woff2 │ │ │ ├── fontawesome-webfont.eot │ │ │ ├── fontawesome-webfont.svg │ │ │ ├── fontawesome-webfont.ttf │ │ │ ├── fontawesome-webfont.woff │ │ │ └── fontawesome-webfont.woff2 │ │ ├── jquery-3.5.1.js │ │ ├── jquery.js │ │ ├── js │ │ │ ├── badge_only.js │ │ │ ├── html5shiv-printshiv.min.js │ │ │ ├── html5shiv.min.js │ │ │ ├── modernizr.min.js │ │ │ └── theme.js │ │ ├── language_data.js │ │ ├── minus.png │ │ ├── plus.png │ │ ├── pygments.css │ │ ├── searchtools.js │ │ ├── style.css │ │ ├── translations.js │ │ ├── underscore-1.13.1.js │ │ └── underscore.js │ ├── genindex.html │ ├── index.html │ ├── koalanlp.html │ ├── objects.inv │ ├── py-modindex.html │ ├── search.html │ └── searchindex.js └── index.html ├── koalanlp ├── API.py ├── ExtUtil.py ├── Util.py ├── __init__.py ├── __init__.pyc ├── data.py ├── jip │ ├── LICENSE │ ├── __init__.py │ ├── cache.py │ ├── index.py │ ├── maven.py │ ├── repository.py │ └── util.py ├── jvm.py ├── kiwi │ └── __init__.py ├── kss │ └── __init__.py ├── proc.py └── types.py ├── requirements.txt ├── scripts ├── khaiii_install.sh └── utagger_install.sh ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── data_test.py ├── dictionary_test.py ├── extension_core_spec.py ├── finalize_test.py ├── khaiii_spec.py ├── multi_processing_test.py ├── native_spec.py ├── proc_core_spec.py ├── type_core_spec.py └── utagger_spec.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = ./ 4 | 5 | [report] 6 | exclude_lines = 7 | if self.debug: 8 | pragma: no cover 9 | raise NotImplementedError 10 | if __name__ == .__main__.: 11 | ignore_errors = True 12 | omit = 13 | tests/* 14 | doc_source/* 15 | setup.py -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### 기초정보 2 | - Python 버전이 얼마입니까? 3 | - [ ] 3.5.x 4 | - [ ] 3.6.x 5 | - KoalaNLP 버전이 얼마인가요? 6 | - [ ] 1.x 7 | - [ ] 2.x 8 | - 문제의 유형은 어떤 것인가요? 9 | - [ ] 버그리포트 10 | - [ ] 개선점제안 11 | - [ ] 사용법질문 12 | - [ ] 기타 13 | 14 | ### 재연을 위한 정보 15 | - 어떤 문장을 시도하셨습니까? 16 | ```text 17 | [여기에 문장을 넣어주세요] 18 | ``` 19 | - KoalaNLP를 사용한 코드 부분을 보여주세요. 20 | ```python 21 | # 여기에 사용한 코드부분을 보여주세요. 22 | ``` 23 | 24 | ### 본문 25 | - 아래에 본문을 입력해주세요. 26 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### 해결된 이슈를 알려주세요. 2 | 이 Pull Request는 이슈 #[번호] 를 수정하기 위한 것입니다. 3 | 4 | ### 무엇이 변경되었나요? 5 | - 어떤 파일의, 어떤 부분을, 어떻게 바꾸셨나요? 6 | 7 | ### 확인자 8 | - 누가 이 Request를 확인하고 승인해야하나요? 아래와 같이 지정해 주세요. 9 | - @[author]. 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .java/ 2 | .pytest_cache/ 3 | build/ 4 | dist/ 5 | koalanlp.egg-info/ 6 | py36/ 7 | venv/ 8 | .idea/ 9 | .coverage 10 | coverage.xml 11 | 12 | **/__pycache__/ 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | matrix: 3 | include: 4 | - name: "Python 3.8 on Windows (JDK11 LTS)" 5 | os: windows 6 | language: shell 7 | filter_secrets: false 8 | before_install: 9 | - choco install openjdk11 -y 10 | - choco install python --version 3.8.3 -y 11 | - export PATH=$(cmd.exe //c "refreshenv > nul & C:\Progra~1\Git\bin\bash -c 'echo \$PATH' ") 12 | - java --version 13 | - wget https://bootstrap.pypa.io/get-pip.py --no-check-certificate 14 | - $PYTHON get-pip.py --user 15 | env: 16 | - PYTHON=python 17 | - PYPIFLAG=--user 18 | - JAVA_HOME="/c/Progra~1/OpenJDK/openjdk-11.0.11_9" 19 | cache: 20 | directories: 21 | - $HOME/utagger 22 | - $HOME/khaiii-orig 23 | - $HOME/.hunter 24 | - ./.java 25 | - name: "Python 3.8 on macOS 11.2 (JDK11 LTS)" 26 | os: osx 27 | osx_image: xcode12u 28 | language: shell 29 | jdk: openjdk11 30 | before_install: 31 | - eval "$(pyenv init -)" 32 | - pyenv install 3.8.3 33 | - pyenv shell 3.8.3 34 | - $PYTHON --version 35 | - wget https://bootstrap.pypa.io/get-pip.py --no-check-certificate 36 | - PYTHON_CONFIGURE_OPTS=--enable-unicode=ucs2 $PYTHON get-pip.py $PYPIFLAG 37 | env: 38 | - LDFLAGS="${LDFLAGS} -L$(brew --prefix zlib)/lib -L$(brew --prefix readline)/lib -L$(brew --prefix openssl)/lib" 39 | - CPPFLAGS="${CPPFLAGS} -I$(brew --prefix zlib)/include -I$(brew --prefix readline)/include -I$(brew --prefix openssl)/include -I$(xcrun --show-sdk-path)/include" 40 | - PKG_CONFIG_PATH="${PKG_CONFIG_PATH} $(brew --prefix zlib)/lib/pkgconfig" 41 | - PATH="$HOME/.pyenv:$PATH" 42 | - PYTHON=python3.8 43 | - PYPIFLAG=--user 44 | cache: 45 | directories: 46 | - $HOME/utagger 47 | - $HOME/khaiii-orig 48 | - $HOME/.hunter 49 | - ./.java 50 | - name: "Python 3.8 on Ubuntu Focal (JDK11 LTS)" 51 | python: 3.8 52 | dist: focal 53 | jdk: openjdk11 54 | env: 55 | - PYTHON=python 56 | - PYPIFLAG= 57 | cache: 58 | directories: 59 | - $HOME/utagger 60 | - $HOME/khaiii-orig 61 | - $HOME/.hunter 62 | - ./.java 63 | install: 64 | - $PYTHON --version 65 | - $PYTHON -m pip install py4j requests $PYPIFLAG 66 | - $PYTHON -m pip install kss kiwipiepy $PYPIFLAG 67 | - $PYTHON -m pip install pytest codecov pytest-cov --upgrade $PYPIFLAG 68 | - bash ./scripts/khaiii_install.sh 69 | - bash ./scripts/utagger_install.sh 70 | script: 71 | - $PYTHON -m pytest --cov=./ tests/finalize_test.py 72 | - $PYTHON -m pytest --cov=./ --cov-append tests/multi_processing_test.py 73 | - $PYTHON -m pytest --cov=./ --cov-append tests/dictionary_test.py 74 | - $PYTHON -m pytest --cov=./ --cov-append tests/extension_core_spec.py 75 | - $PYTHON -m pytest --cov=./ --cov-append tests/proc_core_spec.py 76 | - $PYTHON -m pytest --cov=./ --cov-append tests/type_core_spec.py 77 | - $PYTHON -m pytest --cov=./ --cov-append tests/data_test.py 78 | - $PYTHON -m pytest --cov=./ --cov-append tests/native_spec.py 79 | - if [ $TRAVIS_OS_NAME != 'windows' ]; then $PYTHON -m pytest --cov=./ --cov-append tests/khaiii_spec.py; fi 80 | - if [ $TRAVIS_OS_NAME == 'windows' ]; then $PYTHON -m pytest --cov=./ --cov-append tests/utagger_spec.py; fi 81 | after_success: 82 | - if [ $TRAVIS_OS_NAME == 'linux' ]; then codecov; fi 83 | env: 84 | global: 85 | - secure: f/Qsj0v9bUCplwXVjM1PrDTU0Cyp1hqra4k9mwUU7JJwkX51OQO61l2PPVEixX5Q6OQzIwvYI6bxusJVLPU+WV0lQzQmTK9UPSATNmwURU8btzYNJInwZ563Q7KkTR/NuIm3CNvuj4CGgQLZS22eLdDTQsw8q41CCP3BtybWrP+bsx2kZoGY7GJ12gWkS327QY87+ZO+2vAkl+NSGghtrkbcYVLgodInq4Q6GjjhMvmJPp3WBcC/N1O3q8Q88ED+t06ok1j76xd0Ay/qQPAx0U1J2uaK0/oQbw8F978EyHOPHc3EqTKgSJLERj6zXBgWfSf7rzqtofDKkrG3qE0KT6vqU/WhMiz/wkm1/KJay0vkPKdnjhVHhw7lPbowbhm91MA4bEBKISzp7Yge3cGzLLcapa7FgiFTrCFAqiTsfWKiLLe8I+reFYvPA15PmOigf2iUXGWji2RT+HbiOLR4K14PADcmV9OheGKt0Lfanxy55RxJttAyQdJ7Bh6ICWlR185O2LV/iSmVnsDK7rAxNrjZ1TYSjp/eqlrEdt1MlawYPTZGYjMkcYP00LKynOLEFWWFBJN/XRk4wdIoUt0E1d8XBc55L8gpamFNGac9aifQxJOgEdnIHwT6Enr3mih4LXl43srbLbSyPOzifJJ7M5As0+8ZZHqbiFLAWDTTHP0= 86 | - secure: fJgd4np/MzH7IkK+vEkHEQete6DsqvfmYu50ZH8DuFLaG0iXuK/8Df4dgkKHJU9vm9/aa+Xua5SlkeWhVcU/JAOHZqCUtV2fdeqtIXWQEAYY9DNj5QRg2wzs+z3V/SyhBbJwPQWKM3U7YoO5Y8Gfp642FIcTlVoURVIN24KdbV08iuSxpVXMKiN0jeOqcty8cbKm1jZ8rH0iqvruBMxZnQRwBch2flIVef65yqHnNUVkReXh9kUQadknl7aGMBYvFFGkrT5AxGrGsStV4vRFPDf1kOhLn327zrlkenLkevGnUXPym6OH2UHJJPmZr/A6XP00VZx4uRGyX+hf4AhmAnFO9Bf6155bhXX5xkryd9utN/TGWLJgZHPKdvGBynIq8ue8YA5yTNP3out6WN9SGdRdwwUMM1qmDEx4bl5UmQnghMFp4JZq4/ZF8axzopeGekNhSM8Rtlr892Hnu3sfqhKPecOR0hoDPBR0icecUO1mQLvPfl9yIQyAPBVsLPeWwr7mXp+ubClZ5mk4wzam/4/Cee6fX3qAX3P7jXrFJj3hcgMOuCy6+NE4HD7xF/Nn4SH3zdyyRzo6gCWtjc4FATZ2oAh8Xzo6OJ0KjvOGRn5Jyo2OuP+8lx02QGplSPGLiCtRWjLjdEnmFV7J0FHggAImrBkeh/+h6i5zcC8yrD0= 87 | - KHAIII_RSC=$HOME/khaiii-orig/build/share/khaiii 88 | - KHAIII_LIB=$HOME/khaiii-orig/build/lib 89 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Bugeun Kim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = doc_source 8 | BUILDDIR = docs 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KoalaNLP (Python3 Support) 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/koalanlp.svg?style=flat-square)](https://github.com/koalanlp/python-support) 4 | [![분석기별 품사비교표](https://img.shields.io/badge/%ED%92%88%EC%82%AC-%EB%B9%84%EA%B5%90%ED%91%9C-blue.svg?style=flat-square)](https://docs.google.com/spreadsheets/d/1OGM4JDdLk6URuegFKXg1huuKWynhg_EQnZYgTmG4h0s/edit?usp=sharing) 5 | [![MIT License](https://img.shields.io/badge/license-MIT-green.svg?style=flat-square)](https://tldrlegal.com/license/mit-license) 6 | [![Sphinx doc](https://img.shields.io/badge/Python-Doc-blue.svg?style=flat-square)](https://koalanlp.github.io/python-support/html/) 7 | 8 | [![Build Status](https://img.shields.io/travis/koalanlp/python-support.svg?style=flat-square&branch=master)](https://travis-ci.com/koalanlp/python-support) 9 | [![codecov](https://codecov.io/gh/koalanlp/python-support/branch/master/graph/badge.svg)](https://codecov.io/gh/koalanlp/python-support) 10 | 11 | [![java-koalanlp](https://img.shields.io/badge/Java,Kotlin-KoalaNLP-red.svg?style=flat-square)](https://koalanlp.github.io/koalanlp) 12 | [![scala-koalanlp](https://img.shields.io/badge/Scala-KoalaNLP-blue.svg?style=flat-square)](https://koalanlp.github.io/scala-support) 13 | [![nodejs-koalanlp](https://img.shields.io/badge/Nodejs-KoalaNLP-blue.svg?style=flat-square)](https://koalanlp.github.io/nodejs-support) 14 | 15 | # 소개 16 | 한국어 형태소 및 구문 분석기의 모음인, [KoalaNLP](https://github.com/koalanlp/koalanlp)의 Python 판본입니다. 17 | 18 | 이 프로젝트는 __서로 다른 형태의 형태소 분석기를__ 모아, 19 | __동일한 인터페이스__ 아래에서 사용할 수 있도록 하는 것이 목적입니다. 20 | 21 | * 김상준님의 [Daon 분석기](https://github.com/rasoio/daon/tree/master/daon-core) 22 | 23 | * Shineware의 [코모란 v3.3.9](https://github.com/shin285/KOMORAN) 24 | 25 | * 서울대의 [꼬꼬마 형태소/구문 분석기 v2.1](http://kkma.snu.ac.kr/documents/index.jsp) 26 | 27 | * ETRI의 [공공 인공지능 Open API](http://aiopen.etri.re.kr/) 28 | 29 | * OpenKoreanText의 [오픈 소스 한국어 처리기 v2.3.1](http://openkoreantext.org) (구 Twitter 한국어 분석기) 30 | 31 | * 은전한닢 프로젝트의 [SEunjeon(S은전) v1.5.0](https://bitbucket.org/eunjeon/seunjeon) (Mecab-ko의 Scala/Java 판본) 32 | 33 | * 이수명님의 [Arirang Morpheme Analyzer](http://cafe.naver.com/korlucene) 1-1 34 | 35 | * 최석재님의 [RHINO v3.7.8](https://github.com/SukjaeChoi/RHINO) 36 | 37 | * KAIST의 [한나눔 형태소 분석기](http://kldp.net/projects/hannanum/)와 [NLP_HUB 구문분석기](http://semanticweb.kaist.ac.kr/home/index.php/NLP_HUB) 38 | 39 | * Kakao의 [카이(Khaiii) v0.4](https://github.com/kakao/khaiii) (별도설치 필요: [설치법](https://github.com/kakao/khaiii/wiki/빌드-및-설치)) 40 | 41 | * 울산대학교의 [UTagger 2018년 10월 31일자](http://nlplab.ulsan.ac.kr/doku.php?id=start) (별도설치 필요: [설치법](https://koalanlp.github.io/koalnlp/usage/Install-UTagger.md)) 1-2 42 | 43 | * 고현웅님의 [Korean Sentence Splitter v2.5.1](https://github.com/hyunwoongko/kss) 44 | 45 | * bab2min님의 [Kiwi](https://github.com/bab2min/kiwipiepy) (별도설치 필요: pip install kiwipiepy) 46 | 47 | > 주1-1 Arirang 분석기의 출력을 형태소분석에 적합하게 조금 다듬었으므로, 원본과 약간 다른 결과를 낼 수도 있습니다. 48 | > 49 | > 주1-2 UTagger의 2019-7 버전도 공개되어 있지만, 리눅스 개발환경을 위한 라이브러리 파일이 공개되어있지 않아 지원하지 않습니다. 50 | 51 | KoalaNLP의 Contributor가 되고 싶으시다면, 언제든지 Issue에 등록해주십시오. 52 | 또한, 추가하고자 하는 새로운 프로젝트가 있으시면, Issue에 등록해주십시오. 53 | 54 | ## 사용방법 55 | 56 | * [Usage](https://koalanlp.github.io/koalanlp/usage/) 57 | * [Sphinx Doc](http://koalanlp.github.io/python-support/html/) 58 | 59 | ## 특징 60 | 61 | KoalaNLP는 다음과 같은 특징을 가지고 있습니다. 62 | 63 | 1. 복잡한 설정이 필요없는 텍스트 분석: 64 | 65 | 모델은 자동으로 Maven으로 배포되기 때문에, 각 모델을 별도로 설치할 필요가 없습니다. 66 | 67 | 2. 코드 2~3 줄로 수행하는 텍스트 처리: 68 | 69 | 모델마다 다른 복잡한 설정 과정, 초기화 과정은 필요하지 않습니다. Dependency에 추가하고, 객체를 생성하고, 분석 메소드를 호출하는 3줄이면 끝납니다. 70 | 71 | 3. 모델에 상관 없는 동일한 코드, 동일한 결과: 72 | 73 | 모델마다 실행 방법, 실행 결과를 표현하는 형태가 다릅니다. KoalaNLP는 이를 정부 및 관계기관의 표준안에 따라 표준화합니다. 따라서 모델에 독립적으로 응용 프로그램 개발이 가능합니다. 74 | 75 | 4. Java, Kotlin, [Scala](https://koalanlp.github.io/scala-support), [Python 3](https://koalanlp.github.io/python-support), [NodeJS](https://koalanlp.github.io/nodejs-support)에서 크게 다르지 않은 코드: 76 | 77 | KoalaNLP는 여러 프로그래밍 언어에서 사용할 수 있습니다. 어디서 개발을 하더라도 크게 코드가 다르지 않습니다. 78 | 79 | # License 조항 80 | 81 | 이 프로젝트 자체(KoalaNLP-core)와 인터페이스 통합을 위한 코드는 82 | 소스코드에 저작권 귀속에 대한 별도 지시사항이 없는 한 v1.8.0부터 [*MIT License*](https://tldrlegal.com/license/mit-license)을 따르며, 83 | 원본 분석기의 License와 저작권은 각 저작권자가 지정한 바를 따릅니다. 84 | 85 | 단, GPL의 저작권 조항에 따라, GPL 하에서 이용이 허가되는 패키지들의 저작권은 해당 저작권 규정을 따릅니다. 86 | 87 | * Hannanum 및 NLP_HUB: [GPL v3](https://tldrlegal.com/license/gnu-general-public-license-v3-(gpl-3)) 88 | 89 | * KKMA: [GPL v2](https://tldrlegal.com/license/gnu-general-public-license-v2) (GPL v2를 따르지 않더라도, 상업적 이용시 별도 협의 가능) 90 | 91 | * KOMORAN 3.x: [Apache License 2.0](https://tldrlegal.com/license/apache-license-2.0-(apache-2.0)) 92 | 93 | * Open Korean Text: [Apache License 2.0](https://tldrlegal.com/license/apache-license-2.0-(apache-2.0)) 94 | 95 | * SEunjeon: [Apache License 2.0](https://tldrlegal.com/license/apache-license-2.0-(apache-2.0)) 96 | 97 | * 아리랑: [Apache License 2.0](https://tldrlegal.com/license/apache-license-2.0-(apache-2.0)) 98 | 99 | * RHINO: [GPL v3](https://tldrlegal.com/license/gnu-general-public-license-v3-(gpl-3)) (참고: 다운로드 위치별로 조항 상이함) 100 | 101 | * Daon: 지정된 조항 없음 102 | 103 | * ETRI: 별도 API 키 발급 동의 필요 104 | 105 | * Khaiii: [Apache License 2.0](https://tldrlegal.com/license/apache-license-2.0-(apache-2.0)) 106 | 107 | * UTagger: 교육 및 연구용으로 사용시 제한 없음. 상업용인 경우 울산대와 기술이전 등의 유료 협약 필요 108 | 109 | * Korean Sentence Splitter(KSS): [BSD 3-Clause (Revised)](https://tldrlegal.com/license/bsd-3-clause-license-(revised)) 110 | 111 | * Kiwi: [LGPL 2.1](https://tldrlegal.com/license/gnu-lesser-general-public-license-v2.1-(lgpl-2.1)) 112 | 113 | # 사용법 114 | 115 | ## Dependency 추가 116 | 우선 Java JDK 8 이상을 설치하고, JDK의 설치 위치를 `JAVA_HOME`으로 하여 환경변수에 등록해주십시오. 117 | 그런 다음, 아래와 같이 설치하십시오. (현재 python-koalanlp 버전은 [![PyPI](https://img.shields.io/pypi/v/koalanlp.svg?style=flat-square)](https://github.com/koalanlp/python-support)입니다.) 118 | 119 | * Python 3.7은 일부 의존 라이브러리의 문제로 인해 사용이 불가능합니다. 해당 라이브러리에서 문제를 해결하기 전까지는 3.6 이하를 사용해주세요. 120 | * Windows나 OSX에서 미리 설치할 항목들은 [OS별 사전설치 항목](https://koalanlp.github.io/koalanlp/PlatformInstall.md)을 참고하세요. 121 | 122 | ```bash 123 | $ pip install koalanlp 124 | ``` 125 | 126 | ### Packages 127 | 각 형태소 분석기는 별도의 패키지로 나뉘어 있습니다. 128 | 129 | | 패키지명 | 설명 | 사용 가능 버전 | License (원본) | 130 | | ------------------ | ------------------------------------------------------------------ | ---------------- | ----------------- | 131 | | API.KMR | 코모란 Wrapper, 분석범위: 형태소 | [![Ver-KMR](https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-kmr.svg?style=flat-square&label=r)](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22koalanlp-kmr%22) | Apache 2.0 | 132 | | API.EUNJEON | 은전한닢 Wrapper, 분석범위: 형태소 | [![Ver-EJN](https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-eunjeon.svg?style=flat-square&label=r)](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22koalanlp-eunjeon%22) | Apache 2.0 | 133 | | API.ARIRANG | 아리랑 Wrapper, 분석범위: 형태소 | [![Ver-ARR](https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-arirang.svg?style=flat-square&label=r)](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22koalanlp-arirang%22) | Apache 2.0 | 134 | | API.RHINO | RHINO Wrapper, 분석범위: 형태소 | [![Ver-RHI](https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-rhino.svg?style=flat-square&label=r)](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22koalanlp-rhino%22) | GPL v3 | 135 | | API.DAON | Daon Wrapper, 분석범위: 형태소 | [![Ver-DAN](https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-daon.svg?style=flat-square&label=r)](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22koalanlp-daon%22) | MIT(별도 지정 없음) | 136 | | API.KHAIII | Khaiii Wrapper, 분석범위: 형태소 주2-3 | [![Ver-KHA](https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-khaiii.svg?style=flat-square&label=r)](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22koalanlp-khaiii%22) | Apache 2.0 | 137 | | API.UTAGGER | 울산대 UTagger Wrapper / 분석범위: 형태소 2-4 | [![Ver-UTA](https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-utagger.svg?style=flat-square&label=r)](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22koalanlp-utagger%22) | 주2-4 | 138 | | API.OKT | Open Korean Text Wrapper, 분석범위: 문장분리, 형태소 | [![Ver-OKT](https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-okt.svg?style=flat-square&label=r)](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22koalanlp-okt%22) | Apache 2.0 | 139 | | API.KKMA | 꼬꼬마 Wrapper, 분석범위: 형태소, 의존구문 | [![Ver-KKM](https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-kkma.svg?style=flat-square&label=r)](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22koalanlp-kkma%22) | GPL v2 | 140 | | API.HNN | 한나눔 Wrapper, 분석범위: 문장분리, 형태소, 구문분석, 의존구문 | [![Ver-HNN](https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-hnn.svg?style=flat-square&label=r)](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22koalanlp-hnn%22) | GPL v3 | 141 | | API.ETRI | ETRI Open API Wrapper, 분석범위: 형태소, 구문분석, 의존구문, 개체명, 의미역 | [![Ver-ETR](https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-etri.svg?style=flat-square&label=r)](http://search.maven.org/#search%7Cga%7C1%7Ca%3A%22koalanlp-etri%22) | MIT2-2 | 142 | | API.KSS | KSS Wrapper, 분석범위: 문장분리 | 버전무관 | BSD 3 | 143 | | API.KIWI | Kiwi Wrapper, 분석범위: 형태소 | 버전무관 | LGPL v2.1 | 144 | 145 | > 주2-2 ETRI의 경우 Open API를 접근하기 위한 코드 부분은 KoalaNLP의 License 정책에 귀속되지만, Open API 접근 이후의 사용권에 관한 조항은 ETRI에서 별도로 정한 바를 따릅니다. 146 | > 따라서, ETRI의 사용권 조항에 동의하시고 키를 발급하셔야 하며, 다음 위치에서 발급을 신청할 수 있습니다: [키 발급 신청](http://aiopen.etri.re.kr/key_main.php) 147 | > 148 | > 주2-3 Khaiii 분석기의 경우는 Java가 아닌 C++로 구현되어 사용 전 분석기의 설치가 필요합니다. Python3.6 및 CMake 3.10+만 설치되어 있다면 설치 자체가 복잡한 편은 아니니 [여기](https://github.com/kakao/khaiii/blob/v0.1/doc/setup.md)를 참조하여 설치해보세요. (단, v0.1에서는 빌드시 'python3' 호출시 'python3.6'이 연결되어야 합니다.) 참고로, KoalaNLP가 Travis CI에서 패키지를 자동 테스트하기 위해 구현된 bash script는 [여기](https://github.com/koalanlp/koalanlp/blob/master/khaiii/install.sh)에 있습니다. 149 | > 150 | > 주2-4 UTagger 분석기의 경우에도 C/C++로 구현되어, 사용 전 분석기의 설치가 필요합니다. 윈도우와 리눅스(우분투, CentOS)용 라이브러리 파일만 제공되며, 설치 방법은 [여기](https://koalanlp.github.io/koalnlp/usage/Install-UTagger.md)를 참조하십시오. UTagger 분석기는 교육 연구용은 무료로 배포되며, 상업용은 별도 협약이 필요합니다. 151 | 152 | ### 초기화 153 | 초기화 과정에서 KoalaNLP는 필요한 Java Library를 자동으로 다운로드하여 설치합니다. 설치에는 시간이 다소 소요됩니다. 154 | 때문에, 프로그램 실행시 최초 1회에 한하여 초기화 작업이 필요합니다. 155 | 156 | > *참고*: KSS만 사용하는 경우, 초기화 과정이 필요하지 않습니다. 157 | 158 | ```python 159 | from koalanlp.Util import initialize, finalize 160 | 161 | # 꼬꼬마와 은전한닢 분석기의 2.0.0 버전을 참조합니다. 162 | initialize(java_options="-Xmx4g", KKMA="2.0.2", ETRI="2.0.2") 163 | # 사용이 끝나면 반드시 finalize를 호출합니다. 164 | finalize() 165 | ``` 166 | 167 | * `java_options` 인자는 JVM을 실행하기 위한 option string입니다. 168 | * 이후 인자들은 keyword argument들로, 상단 표를 참고하여 지정하실 수 있습니다. 169 | * 키워드가 지정되지 않으면 다음 코드와 같은 효과를 나타냅니다: `KMR="LATEST"` 170 | * 나머지 문서는 초기화 과정이 모두 완료되었다고 보고 진행합니다. 171 | * API 참고: [initialize](https://koalanlp.github.io/python-support/html/koalanlp.html#koalanlp.Util.initialize) 172 | 173 | ## 간단한 예시 174 | 다음과 같이 사용합니다. 175 | ```python 176 | from koalanlp.Util import initialize, finalize 177 | from koalanlp.proc import * 178 | from koalanlp import API 179 | 180 | # 초기화 합니다. 181 | initialize(java_options="-Xmx4g -Dfile.encoding=utf-8", KKMA="2.0.2", EUNJEON="2.0.2", ETRI="2.0.2") 182 | 183 | # 품사분석기 이용법 184 | tagger = Tagger(API.EUNJEON) 185 | tagged = tagger.tag("안녕하세요. 눈이 오는 설날 아침입니다.") 186 | print(tagged) 187 | 188 | # 의존구문분석기 이용법 189 | parser = Parser(API.KKMA) 190 | parsed = parser.analyze("안녕하세요. 눈이 오는 설날 아침입니다.") 191 | print(parsed) 192 | 193 | # ETRI API 이용법 194 | ETRI_API_KEY = "......" # ETRI에서 발급받은 키를 입력하세요. 195 | rolelabeler = RoleLabeler(API.ETRI, ETRI_API_KEY) 196 | paragraph = rolelabeler.analyze("첫 분석을 시도해봅시다!") 197 | print(paragraph) 198 | print(paragraph[0].getRoles()) 199 | 200 | # Data classes 201 | sentence = parsed[1] # 두번째 문장인, "눈이 오는 설날 아침입니다."를 선택합니다. 202 | 203 | wordAt0 = sentence[0] # 첫번째 어절을 선택해봅니다. 204 | print(wordAt0.exists(lambda m: m.isPredicate())) # 첫번째 어절에, 용언(동사/형용사)을 포함한 형태소가 있는지 확인합니다. 205 | print(sentence.exists(lambda w: w.exists(lambda m: m.isNoun()))) # 문장 전체에 체언(명사 등)을 포함한 어절이 있는지 확인합니다. 206 | print(sentence.getNouns()) # 문장에서 체언만 추출합니다. 207 | print(sentence.getVerbs()) # 문장에서 용언만 추출합니다. 208 | 209 | finalize() # KoalaNLP 사용을 종료합니다. 210 | ``` 211 | 212 | # 결과 비교 213 | [Sample:결과비교](https://koalanlp.github.io/sample/comparison)를 참조해주세요. 214 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TWINE_USERNAME=nearbydelta 4 | 5 | extract_version() 6 | { 7 | LIB_VER=$(cat setup.py | grep "version=" | cut -d\' -f2 | cut -d- -f1) 8 | LIB_VER_MAJOR=$(echo $LIB_VER | cut -d. -f1) 9 | LIB_VER_MINOR=$(echo $LIB_VER | cut -d. -f2) 10 | LIB_VER_INCRM=$(echo $LIB_VER | cut -d. -f3) 11 | LIB_VER_CURRENT=$LIB_VER_MAJOR.$LIB_VER_MINOR.$LIB_VER_INCRM 12 | } 13 | 14 | add_incremental_ver() 15 | { 16 | LIB_VER_NEXT=$LIB_VER_MAJOR.$LIB_VER_MINOR.$(($LIB_VER_INCRM + 1)) 17 | } 18 | 19 | add_minor_ver() 20 | { 21 | LIB_VER_NEXT=$LIB_VER_MAJOR.$(($LIB_VER_MINOR + 1)).0 22 | } 23 | 24 | set_version() 25 | { 26 | cat setup.py | sed -e "s/version=\s*'.*'/version='$1'/g" > setup.py.new 27 | rm setup.py 28 | mv setup.py.new setup.py 29 | git add setup.py 30 | 31 | cat doc_source/conf.py | sed -e "s/release\s*=\s*'.*'/release = '$1'/g" > doc_source/conf.new 32 | rm doc_source/conf.py 33 | mv doc_source/conf.new doc_source/conf.py 34 | git add doc_source/conf.py 35 | } 36 | 37 | ask_proceed() 38 | { 39 | read -p "Proceed $1 [Y/n/p]? " YN 40 | if [ "${YN,,}" = "n" ]; then 41 | exit 0 42 | fi 43 | } 44 | 45 | pip_upgrade() 46 | { 47 | read -p "Does this system requires superuser privileges for upgrade Pypi packages [y/N]? " SUDO 48 | if [ "${SUDO,,}" = "y" ]; then 49 | sudo -H pip3 install --upgrade sphinx sphinx_rtd_theme twine pytest pypandoc wheel 50 | else 51 | pip3 install --upgrade sphinx sphinx_rtd_theme twine pytest pypandoc wheel 52 | fi 53 | } 54 | 55 | ask_proceed "PIP upgrade" 56 | if [ "${YN,,}" != "p" ]; then 57 | pip_upgrade 58 | fi 59 | 60 | ask_proceed "Test" 61 | if [ "${YN,,}" != "p" ]; then 62 | python3 -m pytest tests/finalize_test.py 63 | python3 -m pytest tests/multi_processing_test.py 64 | python3 -m pytest tests/data_test.py 65 | python3 -m pytest tests/dictionary_test.py 66 | python3 -m pytest tests/extension_core_spec.py 67 | python3 -m pytest tests/proc_core_spec.py 68 | python3 -m pytest tests/type_core_spec.py 69 | python3 -m pytest tests/khaiii_spec.py 70 | python3 -m pytest tests/utagger_spec.py 71 | python3 -m pytest tests/native_spec.py 72 | fi 73 | 74 | extract_version 75 | echo $LIB_VER_CURRENT 76 | 77 | ask_proceed "Set Current Version" 78 | if [ "${YN,,}" != "p" ]; then 79 | set_version $LIB_VER_CURRENT 80 | fi 81 | 82 | ask_proceed "Build document" 83 | if [ "${YN,,}" != "p" ]; then 84 | mv docs/.nojekyll ./ 85 | mv docs/index.html ./ 86 | make clean 87 | make html 88 | mv ./.nojekyll docs/ 89 | mv ./index.html docs/ 90 | fi 91 | 92 | ask_proceed "Build package" 93 | if [ "${YN,,}" != "p" ]; then 94 | rm -r dist/ 95 | python3 setup.py bdist_wheel 96 | 97 | git add . 98 | git commit -m "Release: v$LIB_VER_CURRENT" 99 | git tag v$LIB_VER_CURRENT 100 | fi 101 | 102 | ask_proceed "Upload package" 103 | if [ "${YN,,}" != "p" ]; then 104 | twine upload dist/koalanlp-*.whl 105 | fi 106 | 107 | ask_proceed "Set Next" 108 | if [ "${YN,,}" != "p" ]; then 109 | add_incremental_ver 110 | set_version "$LIB_VER_NEXT-SNAPSHOT" 111 | fi 112 | 113 | ask_proceed "Commit" 114 | if [ "${YN,,}" != "p" ]; then 115 | git add . 116 | git commit -m "Initial commit for v$LIB_VER_NEXT" 117 | git push --all 118 | git push --tags 119 | fi 120 | -------------------------------------------------------------------------------- /doc_source/_static/style.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: 'S-CoreDream-L'; 3 | src: url('https://cdn.jsdelivr.net/gh/projectnoonnu/noonfonts_six@1.2/S-CoreDream-3Light.woff') format('woff'); 4 | font-weight: normal; 5 | font-style: normal; 6 | } 7 | 8 | @font-face { 9 | font-family: 'S-CoreDream-B'; 10 | src: url('https://cdn.jsdelivr.net/gh/projectnoonnu/noonfonts_six@1.2/S-CoreDream-6Bold.woff') format('woff'); 11 | font-weight: normal; 12 | font-style: normal; 13 | } 14 | 15 | @font-face { 16 | font-family: 'S-CoreDream-EB'; 17 | src: url('https://cdn.jsdelivr.net/gh/projectnoonnu/noonfonts_six@1.2/S-CoreDream-8Heavy.woff') format('woff'); 18 | font-weight: normal; 19 | font-style: normal; 20 | } 21 | 22 | body { 23 | font-family: 'S-CoreDream-L', "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; 24 | line-height: 1.6; 25 | } 26 | 27 | h1 { 28 | font-family: 'S-CoreDream-EB', "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; 29 | } 30 | 31 | h2, h3, h4 { 32 | font-family: 'S-CoreDream-B', "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; 33 | } -------------------------------------------------------------------------------- /doc_source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | sys.path.insert(0, os.path.abspath('..')) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'KoalaNLP' 23 | copyright = '2018, KoalaNLP' 24 | author = 'KoalaNLP' 25 | 26 | # The full version, including alpha/beta/rc tags 27 | release = '2.1.8-SNAPSHOT' 28 | # The short X.Y version 29 | version = '.'.join(release.split('.')[:2]) 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # If your documentation needs a minimal Sphinx version, state it here. 35 | # 36 | # needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | 'sphinx.ext.autodoc', 43 | 'sphinx.ext.githubpages', 44 | ] 45 | 46 | # Add any paths that contain templates here, relative to this directory. 47 | templates_path = ['_templates'] 48 | 49 | # The suffix(es) of doc_source filenames. 50 | # You can specify multiple suffix as a list of string: 51 | # 52 | # source_suffix = ['.rst', '.md'] 53 | source_suffix = '.rst' 54 | 55 | # The master toctree document. 56 | master_doc = 'index' 57 | 58 | # The language for content autogenerated by Sphinx. Refer to documentation 59 | # for a list of supported languages. 60 | # 61 | # This is also used if you do content translation via gettext catalogs. 62 | # Usually you set "language" from the command line for these cases. 63 | language = 'ko' 64 | 65 | # List of patterns, relative to doc_source directory, that match files and 66 | # directories to ignore when looking for doc_source files. 67 | # This pattern also affects html_static_path and html_extra_path. 68 | exclude_patterns = [] 69 | 70 | # The name of the Pygments (syntax highlighting) style to use. 71 | pygments_style = None 72 | 73 | 74 | # -- Options for HTML output ------------------------------------------------- 75 | 76 | # The theme to use for HTML and HTML Help pages. See the documentation for 77 | # a list of builtin themes. 78 | # 79 | html_theme = 'sphinx_rtd_theme' 80 | 81 | # Theme options are theme-specific and customize the look and feel of a theme 82 | # further. For a list of options available for each theme, see the 83 | # documentation. 84 | # 85 | # html_theme_options = {} 86 | 87 | # Add any paths that contain custom static files (such as style sheets) here, 88 | # relative to this directory. They are copied after the builtin static files, 89 | # so a file named "default.css" will overwrite the builtin "default.css". 90 | html_static_path = ['_static'] 91 | html_css_files = ['style.css'] 92 | 93 | # Custom sidebar templates, must be a dictionary that maps document names 94 | # to template names. 95 | # 96 | # The default sidebars (for documents that don't match any pattern) are 97 | # defined by theme itself. Builtin themes are using these templates by 98 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 99 | # 'searchbox.html']``. 100 | # 101 | # html_sidebars = {} 102 | 103 | 104 | # -- Options for HTMLHelp output --------------------------------------------- 105 | 106 | # Output file base name for HTML help builder. 107 | htmlhelp_basename = 'KoalaNLPdoc' 108 | 109 | 110 | # -- Options for LaTeX output ------------------------------------------------ 111 | 112 | latex_elements = { 113 | # The paper size ('letterpaper' or 'a4paper'). 114 | # 115 | # 'papersize': 'letterpaper', 116 | 117 | # The font size ('10pt', '11pt' or '12pt'). 118 | # 119 | # 'pointsize': '10pt', 120 | 121 | # Additional stuff for the LaTeX preamble. 122 | # 123 | # 'preamble': '', 124 | 125 | # Latex figure (float) alignment 126 | # 127 | # 'figure_align': 'htbp', 128 | } 129 | 130 | # Grouping the document tree into LaTeX files. List of tuples 131 | # (doc_source start file, target name, title, 132 | # author, documentclass [howto, manual, or own class]). 133 | latex_documents = [ 134 | (master_doc, 'KoalaNLP.tex', 'KoalaNLP Documentation', 135 | 'KoalaNLP', 'manual'), 136 | ] 137 | 138 | 139 | # -- Options for manual page output ------------------------------------------ 140 | 141 | # One entry per manual page. List of tuples 142 | # (doc_source start file, name, description, authors, manual section). 143 | man_pages = [ 144 | (master_doc, 'koalanlp', 'KoalaNLP Documentation', 145 | [author], 1) 146 | ] 147 | 148 | 149 | # -- Options for Texinfo output ---------------------------------------------- 150 | 151 | # Grouping the document tree into Texinfo files. List of tuples 152 | # (doc_source start file, target name, title, author, 153 | # dir menu entry, description, category) 154 | texinfo_documents = [ 155 | (master_doc, 'KoalaNLP', 'KoalaNLP Documentation', 156 | author, 'KoalaNLP', 'One line description of project.', 157 | 'Miscellaneous'), 158 | ] 159 | 160 | 161 | # -- Options for Epub output ------------------------------------------------- 162 | 163 | # Bibliographic Dublin Core info. 164 | epub_title = project 165 | 166 | # The unique identifier of the text. This can be a ISBN number 167 | # or the project homepage. 168 | # 169 | # epub_identifier = '' 170 | 171 | # A unique identification for the text. 172 | # 173 | # epub_uid = '' 174 | 175 | # A list of files that should not be packed into the epub file. 176 | epub_exclude_files = ['search.html'] 177 | 178 | 179 | # -- Extension configuration ------------------------------------------------- 180 | -------------------------------------------------------------------------------- /doc_source/index.rst: -------------------------------------------------------------------------------- 1 | .. KoalaNLP documentation master file, created by 2 | sphinx-quickstart on Sat Jan 6 00:50:52 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | 7 | ======== 8 | KoalaNLP 9 | ======== 10 | 11 | 소개 12 | ===== 13 | 14 | 한국어 형태소 및 구문 분석기의 모음인, 15 | `KoalaNLP `__\ 의 Python 16 | 판본입니다. 17 | 18 | 이 프로젝트는 **서로 다른 형태의 형태소 분석기를** 모아, **동일한 19 | 인터페이스** 아래에서 사용할 수 있도록 하는 것이 목적입니다. 20 | 21 | - Hannanum: KAIST의 `한나눔 형태소 22 | 분석기 `__\ 와 `NLP\_HUB 23 | 구문분석기 `__ 24 | 25 | - KKMA: 서울대의 `꼬꼬마 형태소/구문 26 | 분석기 `__ 27 | 28 | - KOMORAN: Junsoo 29 | Shin님의 `코모란 v3.3.9 `__ 30 | 31 | - OKT: OpenKoreanText의 `오픈 소스 한국어 32 | 처리기 `__ 33 | 34 | - Eunjeon: 은전한닢 프로젝트의 35 | `SEunjeon(S은전) `__ 36 | - Arirang: 37 | 이수명님의 `Arirang Morpheme 38 | Analyzer `__ 1-1 39 | 40 | - RHINO: 최석재님의 41 | `RHINO v3.7.8 `__ 42 | 43 | - Daon: 김상준님의 `Daon 분석기 `__ 44 | 45 | - ETRI: ETRI의 `공공 인공지능 Open API `__ 46 | 47 | 주1-1 Arirang 분석기의 출력을 형태소분석에 적합하게 조금 48 | 다듬었으므로, 원본과 약간 다른 결과를 낼 수도 있습니다. 49 | 50 | - Kakao의 `카이(Khaiii) v0.4 `__ (별도설치 필요: `설치법 `__) 51 | 52 | - 울산대학교의 `UTagger 2018년 10월 31일자 `__ (별도설치 필요: `설치법 `__) 53 | 54 | 주1-2 UTagger의 2019-7 버전도 공개되어 있지만, 리눅스 개발환경을 위한 라이브러리 파일이 공개되어있지 않아 지원하지 않습니다. 55 | 56 | - 고현웅님의 `Korean Sentence Splitter v2.5.1 `__ 57 | 58 | - bab2min님의 `Kiwi `__ (별도설치 필요: ``pip install kiwipiepy``) 59 | 60 | 61 | 문서 62 | ==== 63 | 64 | .. toctree:: 65 | 주요 기능 사용법 66 | koalanlp 67 | 사용법 샘플 68 | Kotlin/Java API 69 | Scala API 70 | NodeJS API 71 | 72 | 색인 73 | ==== 74 | 75 | * :ref:`genindex` 76 | * :ref:`modindex` 77 | * :ref:`search` 78 | 79 | -------------------------------------------------------------------------------- /doc_source/koalanlp.rst: -------------------------------------------------------------------------------- 1 | KoalaNLP Python3 API 2 | ==================== 3 | 4 | API 목록 5 | --------------------------- 6 | 7 | .. automodule:: koalanlp.API 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | 품사, 기능 표지자 등 13 | --------------------------- 14 | 15 | .. automodule:: koalanlp.types 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | 자료형 21 | -------------------------------- 22 | 23 | .. automodule:: koalanlp.data 24 | :members: 25 | :undoc-members: 26 | :inherited-members: 27 | :special-members: 28 | :inherited-members: 29 | :include-members: , __hash__, __eq__, __repr__, __contains__, __iter__, __getitem__, __len__ 30 | 31 | 사전 및 분석기 32 | ---------------------------- 33 | 34 | .. automodule:: koalanlp.proc 35 | :members: 36 | :undoc-members: 37 | :inherited-members: 38 | :show-inheritance: 39 | 40 | 문자열 추가기능 41 | ---------------------------- 42 | 43 | .. automodule:: koalanlp.ExtUtil 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | 48 | 편의기능 49 | --------------------------- 50 | 51 | .. automodule:: koalanlp.Util 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | 56 | 57 | 58 | .. |ver_hnn| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-hnn.svg?style=flat-square&label=API 59 | :alt: API.hnn Version 60 | 61 | .. |ver_kkma| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-kkma.svg?style=flat-square&label=API 62 | :alt: API.kkma Version 63 | 64 | .. |ver_kmr| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-kmr.svg?style=flat-square&label=API 65 | :alt: API.kmr Version 66 | 67 | .. |ver_eunjeon| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-eunjeon.svg?style=flat-square&label=API 68 | :alt: API.eunjeon Version 69 | 70 | .. |ver_arirang| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-arirang.svg?style=flat-square&label=API 71 | :alt: API.arirang Version 72 | 73 | .. |ver_daon| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-daon.svg?style=flat-square&label=API 74 | :alt: API.daon Version 75 | 76 | .. |ver_rhino| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-rhino.svg?style=flat-square&label=API 77 | :alt: API.rhino Version 78 | 79 | .. |ver_etri| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-etri.svg?style=flat-square&label=API 80 | :alt: API.etri Version 81 | 82 | .. |ver_okt| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-okt.svg?style=flat-square&label=API 83 | :alt: API.okt Version 84 | 85 | .. |ver_khaiii| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-khaiii.svg?style=flat-square&label=API 86 | :alt: API.khaiii Version 87 | 88 | .. |ver_utagger| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-utagger.svg?style=flat-square&label=API 89 | :alt: API.utagger Version 90 | 91 | .. |ver_core| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-core.svg?style=flat-square&label=API 92 | :alt: API.core Version 93 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/.nojekyll -------------------------------------------------------------------------------- /docs/doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/doctrees/index.doctree -------------------------------------------------------------------------------- /docs/doctrees/koalanlp.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/doctrees/koalanlp.doctree -------------------------------------------------------------------------------- /docs/html/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: 78f8bd433bc438f5771b9f11bea1e83f 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/html/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/.nojekyll -------------------------------------------------------------------------------- /docs/html/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | .. KoalaNLP documentation master file, created by 2 | sphinx-quickstart on Sat Jan 6 00:50:52 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | 7 | ======== 8 | KoalaNLP 9 | ======== 10 | 11 | 소개 12 | ===== 13 | 14 | 한국어 형태소 및 구문 분석기의 모음인, 15 | `KoalaNLP `__\ 의 Python 16 | 판본입니다. 17 | 18 | 이 프로젝트는 **서로 다른 형태의 형태소 분석기를** 모아, **동일한 19 | 인터페이스** 아래에서 사용할 수 있도록 하는 것이 목적입니다. 20 | 21 | - Hannanum: KAIST의 `한나눔 형태소 22 | 분석기 `__\ 와 `NLP\_HUB 23 | 구문분석기 `__ 24 | 25 | - KKMA: 서울대의 `꼬꼬마 형태소/구문 26 | 분석기 `__ 27 | 28 | - KOMORAN: Junsoo 29 | Shin님의 `코모란 v3.3.9 `__ 30 | 31 | - OKT: OpenKoreanText의 `오픈 소스 한국어 32 | 처리기 `__ 33 | 34 | - Eunjeon: 은전한닢 프로젝트의 35 | `SEunjeon(S은전) `__ 36 | - Arirang: 37 | 이수명님의 `Arirang Morpheme 38 | Analyzer `__ 1-1 39 | 40 | - RHINO: 최석재님의 41 | `RHINO v3.7.8 `__ 42 | 43 | - Daon: 김상준님의 `Daon 분석기 `__ 44 | 45 | - ETRI: ETRI의 `공공 인공지능 Open API `__ 46 | 47 | 주1-1 Arirang 분석기의 출력을 형태소분석에 적합하게 조금 48 | 다듬었으므로, 원본과 약간 다른 결과를 낼 수도 있습니다. 49 | 50 | - Kakao의 `카이(Khaiii) v0.4 `__ (별도설치 필요: `설치법 `__) 51 | 52 | - 울산대학교의 `UTagger 2018년 10월 31일자 `__ (별도설치 필요: `설치법 `__) 53 | 54 | 주1-2 UTagger의 2019-7 버전도 공개되어 있지만, 리눅스 개발환경을 위한 라이브러리 파일이 공개되어있지 않아 지원하지 않습니다. 55 | 56 | - 고현웅님의 `Korean Sentence Splitter v2.5.1 `__ 57 | 58 | - bab2min님의 `Kiwi `__ (별도설치 필요: ``pip install kiwipiepy``) 59 | 60 | 61 | 문서 62 | ==== 63 | 64 | .. toctree:: 65 | 주요 기능 사용법 66 | koalanlp 67 | 사용법 샘플 68 | Kotlin/Java API 69 | Scala API 70 | NodeJS API 71 | 72 | 색인 73 | ==== 74 | 75 | * :ref:`genindex` 76 | * :ref:`modindex` 77 | * :ref:`search` 78 | 79 | -------------------------------------------------------------------------------- /docs/html/_sources/koalanlp.rst.txt: -------------------------------------------------------------------------------- 1 | KoalaNLP Python3 API 2 | ==================== 3 | 4 | API 목록 5 | --------------------------- 6 | 7 | .. automodule:: koalanlp.API 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | 품사, 기능 표지자 등 13 | --------------------------- 14 | 15 | .. automodule:: koalanlp.types 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | 자료형 21 | -------------------------------- 22 | 23 | .. automodule:: koalanlp.data 24 | :members: 25 | :undoc-members: 26 | :inherited-members: 27 | :special-members: 28 | :inherited-members: 29 | :include-members: , __hash__, __eq__, __repr__, __contains__, __iter__, __getitem__, __len__ 30 | 31 | 사전 및 분석기 32 | ---------------------------- 33 | 34 | .. automodule:: koalanlp.proc 35 | :members: 36 | :undoc-members: 37 | :inherited-members: 38 | :show-inheritance: 39 | 40 | 문자열 추가기능 41 | ---------------------------- 42 | 43 | .. automodule:: koalanlp.ExtUtil 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | 48 | 편의기능 49 | --------------------------- 50 | 51 | .. automodule:: koalanlp.Util 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | 56 | 57 | 58 | .. |ver_hnn| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-hnn.svg?style=flat-square&label=API 59 | :alt: API.hnn Version 60 | 61 | .. |ver_kkma| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-kkma.svg?style=flat-square&label=API 62 | :alt: API.kkma Version 63 | 64 | .. |ver_kmr| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-kmr.svg?style=flat-square&label=API 65 | :alt: API.kmr Version 66 | 67 | .. |ver_eunjeon| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-eunjeon.svg?style=flat-square&label=API 68 | :alt: API.eunjeon Version 69 | 70 | .. |ver_arirang| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-arirang.svg?style=flat-square&label=API 71 | :alt: API.arirang Version 72 | 73 | .. |ver_daon| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-daon.svg?style=flat-square&label=API 74 | :alt: API.daon Version 75 | 76 | .. |ver_rhino| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-rhino.svg?style=flat-square&label=API 77 | :alt: API.rhino Version 78 | 79 | .. |ver_etri| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-etri.svg?style=flat-square&label=API 80 | :alt: API.etri Version 81 | 82 | .. |ver_okt| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-okt.svg?style=flat-square&label=API 83 | :alt: API.okt Version 84 | 85 | .. |ver_khaiii| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-khaiii.svg?style=flat-square&label=API 86 | :alt: API.khaiii Version 87 | 88 | .. |ver_utagger| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-utagger.svg?style=flat-square&label=API 89 | :alt: API.utagger Version 90 | 91 | .. |ver_core| image:: https://img.shields.io/maven-central/v/kr.bydelta/koalanlp-core.svg?style=flat-square&label=API 92 | :alt: API.core Version 93 | -------------------------------------------------------------------------------- /docs/html/_static/css/badge_only.css: -------------------------------------------------------------------------------- 1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/Roboto-Slab-Bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/Roboto-Slab-Bold.woff -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/Roboto-Slab-Bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/Roboto-Slab-Bold.woff2 -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/Roboto-Slab-Regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/Roboto-Slab-Regular.woff -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/Roboto-Slab-Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/Roboto-Slab-Regular.woff2 -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/lato-bold-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/lato-bold-italic.woff -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/lato-bold-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/lato-bold-italic.woff2 -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/lato-bold.woff -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/lato-normal-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/lato-normal-italic.woff -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/lato-normal-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/lato-normal-italic.woff2 -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/lato-normal.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/lato-normal.woff -------------------------------------------------------------------------------- /docs/html/_static/css/fonts/lato-normal.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/css/fonts/lato-normal.woff2 -------------------------------------------------------------------------------- /docs/html/_static/doctools.js: -------------------------------------------------------------------------------- 1 | /* 2 | * doctools.js 3 | * ~~~~~~~~~~~ 4 | * 5 | * Sphinx JavaScript utilities for all documentation. 6 | * 7 | * :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for details. 9 | * 10 | */ 11 | 12 | /** 13 | * select a different prefix for underscore 14 | */ 15 | $u = _.noConflict(); 16 | 17 | /** 18 | * make the code below compatible with browsers without 19 | * an installed firebug like debugger 20 | if (!window.console || !console.firebug) { 21 | var names = ["log", "debug", "info", "warn", "error", "assert", "dir", 22 | "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace", 23 | "profile", "profileEnd"]; 24 | window.console = {}; 25 | for (var i = 0; i < names.length; ++i) 26 | window.console[names[i]] = function() {}; 27 | } 28 | */ 29 | 30 | /** 31 | * small helper function to urldecode strings 32 | * 33 | * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL 34 | */ 35 | jQuery.urldecode = function(x) { 36 | if (!x) { 37 | return x 38 | } 39 | return decodeURIComponent(x.replace(/\+/g, ' ')); 40 | }; 41 | 42 | /** 43 | * small helper function to urlencode strings 44 | */ 45 | jQuery.urlencode = encodeURIComponent; 46 | 47 | /** 48 | * This function returns the parsed url parameters of the 49 | * current request. Multiple values per key are supported, 50 | * it will always return arrays of strings for the value parts. 51 | */ 52 | jQuery.getQueryParameters = function(s) { 53 | if (typeof s === 'undefined') 54 | s = document.location.search; 55 | var parts = s.substr(s.indexOf('?') + 1).split('&'); 56 | var result = {}; 57 | for (var i = 0; i < parts.length; i++) { 58 | var tmp = parts[i].split('=', 2); 59 | var key = jQuery.urldecode(tmp[0]); 60 | var value = jQuery.urldecode(tmp[1]); 61 | if (key in result) 62 | result[key].push(value); 63 | else 64 | result[key] = [value]; 65 | } 66 | return result; 67 | }; 68 | 69 | /** 70 | * highlight a given string on a jquery object by wrapping it in 71 | * span elements with the given class name. 72 | */ 73 | jQuery.fn.highlightText = function(text, className) { 74 | function highlight(node, addItems) { 75 | if (node.nodeType === 3) { 76 | var val = node.nodeValue; 77 | var pos = val.toLowerCase().indexOf(text); 78 | if (pos >= 0 && 79 | !jQuery(node.parentNode).hasClass(className) && 80 | !jQuery(node.parentNode).hasClass("nohighlight")) { 81 | var span; 82 | var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg"); 83 | if (isInSVG) { 84 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); 85 | } else { 86 | span = document.createElement("span"); 87 | span.className = className; 88 | } 89 | span.appendChild(document.createTextNode(val.substr(pos, text.length))); 90 | node.parentNode.insertBefore(span, node.parentNode.insertBefore( 91 | document.createTextNode(val.substr(pos + text.length)), 92 | node.nextSibling)); 93 | node.nodeValue = val.substr(0, pos); 94 | if (isInSVG) { 95 | var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect"); 96 | var bbox = node.parentElement.getBBox(); 97 | rect.x.baseVal.value = bbox.x; 98 | rect.y.baseVal.value = bbox.y; 99 | rect.width.baseVal.value = bbox.width; 100 | rect.height.baseVal.value = bbox.height; 101 | rect.setAttribute('class', className); 102 | addItems.push({ 103 | "parent": node.parentNode, 104 | "target": rect}); 105 | } 106 | } 107 | } 108 | else if (!jQuery(node).is("button, select, textarea")) { 109 | jQuery.each(node.childNodes, function() { 110 | highlight(this, addItems); 111 | }); 112 | } 113 | } 114 | var addItems = []; 115 | var result = this.each(function() { 116 | highlight(this, addItems); 117 | }); 118 | for (var i = 0; i < addItems.length; ++i) { 119 | jQuery(addItems[i].parent).before(addItems[i].target); 120 | } 121 | return result; 122 | }; 123 | 124 | /* 125 | * backward compatibility for jQuery.browser 126 | * This will be supported until firefox bug is fixed. 127 | */ 128 | if (!jQuery.browser) { 129 | jQuery.uaMatch = function(ua) { 130 | ua = ua.toLowerCase(); 131 | 132 | var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || 133 | /(webkit)[ \/]([\w.]+)/.exec(ua) || 134 | /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || 135 | /(msie) ([\w.]+)/.exec(ua) || 136 | ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || 137 | []; 138 | 139 | return { 140 | browser: match[ 1 ] || "", 141 | version: match[ 2 ] || "0" 142 | }; 143 | }; 144 | jQuery.browser = {}; 145 | jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; 146 | } 147 | 148 | /** 149 | * Small JavaScript module for the documentation. 150 | */ 151 | var Documentation = { 152 | 153 | init : function() { 154 | this.fixFirefoxAnchorBug(); 155 | this.highlightSearchWords(); 156 | this.initIndexTable(); 157 | if (DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) { 158 | this.initOnKeyListeners(); 159 | } 160 | }, 161 | 162 | /** 163 | * i18n support 164 | */ 165 | TRANSLATIONS : {}, 166 | PLURAL_EXPR : function(n) { return n === 1 ? 0 : 1; }, 167 | LOCALE : 'unknown', 168 | 169 | // gettext and ngettext don't access this so that the functions 170 | // can safely bound to a different name (_ = Documentation.gettext) 171 | gettext : function(string) { 172 | var translated = Documentation.TRANSLATIONS[string]; 173 | if (typeof translated === 'undefined') 174 | return string; 175 | return (typeof translated === 'string') ? translated : translated[0]; 176 | }, 177 | 178 | ngettext : function(singular, plural, n) { 179 | var translated = Documentation.TRANSLATIONS[singular]; 180 | if (typeof translated === 'undefined') 181 | return (n == 1) ? singular : plural; 182 | return translated[Documentation.PLURALEXPR(n)]; 183 | }, 184 | 185 | addTranslations : function(catalog) { 186 | for (var key in catalog.messages) 187 | this.TRANSLATIONS[key] = catalog.messages[key]; 188 | this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')'); 189 | this.LOCALE = catalog.locale; 190 | }, 191 | 192 | /** 193 | * add context elements like header anchor links 194 | */ 195 | addContextElements : function() { 196 | $('div[id] > :header:first').each(function() { 197 | $('\u00B6'). 198 | attr('href', '#' + this.id). 199 | attr('title', _('Permalink to this headline')). 200 | appendTo(this); 201 | }); 202 | $('dt[id]').each(function() { 203 | $('\u00B6'). 204 | attr('href', '#' + this.id). 205 | attr('title', _('Permalink to this definition')). 206 | appendTo(this); 207 | }); 208 | }, 209 | 210 | /** 211 | * workaround a firefox stupidity 212 | * see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075 213 | */ 214 | fixFirefoxAnchorBug : function() { 215 | if (document.location.hash && $.browser.mozilla) 216 | window.setTimeout(function() { 217 | document.location.href += ''; 218 | }, 10); 219 | }, 220 | 221 | /** 222 | * highlight the search words provided in the url in the text 223 | */ 224 | highlightSearchWords : function() { 225 | var params = $.getQueryParameters(); 226 | var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : []; 227 | if (terms.length) { 228 | var body = $('div.body'); 229 | if (!body.length) { 230 | body = $('body'); 231 | } 232 | window.setTimeout(function() { 233 | $.each(terms, function() { 234 | body.highlightText(this.toLowerCase(), 'highlighted'); 235 | }); 236 | }, 10); 237 | $('') 239 | .appendTo($('#searchbox')); 240 | } 241 | }, 242 | 243 | /** 244 | * init the domain index toggle buttons 245 | */ 246 | initIndexTable : function() { 247 | var togglers = $('img.toggler').click(function() { 248 | var src = $(this).attr('src'); 249 | var idnum = $(this).attr('id').substr(7); 250 | $('tr.cg-' + idnum).toggle(); 251 | if (src.substr(-9) === 'minus.png') 252 | $(this).attr('src', src.substr(0, src.length-9) + 'plus.png'); 253 | else 254 | $(this).attr('src', src.substr(0, src.length-8) + 'minus.png'); 255 | }).css('display', ''); 256 | if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) { 257 | togglers.click(); 258 | } 259 | }, 260 | 261 | /** 262 | * helper function to hide the search marks again 263 | */ 264 | hideSearchWords : function() { 265 | $('#searchbox .highlight-link').fadeOut(300); 266 | $('span.highlighted').removeClass('highlighted'); 267 | }, 268 | 269 | /** 270 | * make the url absolute 271 | */ 272 | makeURL : function(relativeURL) { 273 | return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL; 274 | }, 275 | 276 | /** 277 | * get the current relative url 278 | */ 279 | getCurrentURL : function() { 280 | var path = document.location.pathname; 281 | var parts = path.split(/\//); 282 | $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() { 283 | if (this === '..') 284 | parts.pop(); 285 | }); 286 | var url = parts.join('/'); 287 | return path.substring(url.lastIndexOf('/') + 1, path.length - 1); 288 | }, 289 | 290 | initOnKeyListeners: function() { 291 | $(document).keydown(function(event) { 292 | var activeElementType = document.activeElement.tagName; 293 | // don't navigate when in search box, textarea, dropdown or button 294 | if (activeElementType !== 'TEXTAREA' && activeElementType !== 'INPUT' && activeElementType !== 'SELECT' 295 | && activeElementType !== 'BUTTON' && !event.altKey && !event.ctrlKey && !event.metaKey 296 | && !event.shiftKey) { 297 | switch (event.keyCode) { 298 | case 37: // left 299 | var prevHref = $('link[rel="prev"]').prop('href'); 300 | if (prevHref) { 301 | window.location.href = prevHref; 302 | return false; 303 | } 304 | case 39: // right 305 | var nextHref = $('link[rel="next"]').prop('href'); 306 | if (nextHref) { 307 | window.location.href = nextHref; 308 | return false; 309 | } 310 | } 311 | } 312 | }); 313 | } 314 | }; 315 | 316 | // quick alias for translations 317 | _ = Documentation.gettext; 318 | 319 | $(document).ready(function() { 320 | Documentation.init(); 321 | }); 322 | -------------------------------------------------------------------------------- /docs/html/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '2.1.7', 4 | LANGUAGE: 'ko', 5 | COLLAPSE_INDEX: false, 6 | BUILDER: 'html', 7 | FILE_SUFFIX: '.html', 8 | LINK_SUFFIX: '.html', 9 | HAS_SOURCE: true, 10 | SOURCELINK_SUFFIX: '.txt', 11 | NAVIGATION_WITH_KEYS: false 12 | }; -------------------------------------------------------------------------------- /docs/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/file.png -------------------------------------------------------------------------------- /docs/html/_static/fonts/Inconsolata-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Inconsolata-Bold.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/Inconsolata-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Inconsolata-Regular.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/Inconsolata.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Inconsolata.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato-Bold.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato-Regular.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-bold.eot -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-bold.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-bold.woff -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-bold.woff2 -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-bolditalic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-bolditalic.eot -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-bolditalic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-bolditalic.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-bolditalic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-bolditalic.woff -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-bolditalic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-bolditalic.woff2 -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-italic.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-italic.eot -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-italic.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-italic.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-italic.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-italic.woff -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-italic.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-italic.woff2 -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-regular.eot -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-regular.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-regular.woff -------------------------------------------------------------------------------- /docs/html/_static/fonts/Lato/lato-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/Lato/lato-regular.woff2 -------------------------------------------------------------------------------- /docs/html/_static/fonts/RobotoSlab-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/RobotoSlab-Bold.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/RobotoSlab-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/RobotoSlab-Regular.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot -------------------------------------------------------------------------------- /docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff -------------------------------------------------------------------------------- /docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 -------------------------------------------------------------------------------- /docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot -------------------------------------------------------------------------------- /docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff -------------------------------------------------------------------------------- /docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 -------------------------------------------------------------------------------- /docs/html/_static/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/html/_static/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/html/_static/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/html/_static/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/html/_static/js/badge_only.js: -------------------------------------------------------------------------------- 1 | !function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}}); -------------------------------------------------------------------------------- /docs/html/_static/js/html5shiv-printshiv.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 3 | */ 4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document); -------------------------------------------------------------------------------- /docs/html/_static/js/html5shiv.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed 3 | */ 4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document); -------------------------------------------------------------------------------- /docs/html/_static/js/theme.js: -------------------------------------------------------------------------------- 1 | !function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("
"),n("table.docutils.footnote").wrap("
"),n("table.docutils.citation").wrap("
"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n(''),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}t.length>0&&($(".wy-menu-vertical .current").removeClass("current"),t.addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l1").parent().addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l2").addClass("current"),t.closest("li.toctree-l3").addClass("current"),t.closest("li.toctree-l4").addClass("current"),t.closest("li.toctree-l5").addClass("current"),t[0].scrollIntoView())}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t0 63 | var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 64 | var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 65 | var s_v = "^(" + C + ")?" + v; // vowel in stem 66 | 67 | this.stemWord = function (w) { 68 | var stem; 69 | var suffix; 70 | var firstch; 71 | var origword = w; 72 | 73 | if (w.length < 3) 74 | return w; 75 | 76 | var re; 77 | var re2; 78 | var re3; 79 | var re4; 80 | 81 | firstch = w.substr(0,1); 82 | if (firstch == "y") 83 | w = firstch.toUpperCase() + w.substr(1); 84 | 85 | // Step 1a 86 | re = /^(.+?)(ss|i)es$/; 87 | re2 = /^(.+?)([^s])s$/; 88 | 89 | if (re.test(w)) 90 | w = w.replace(re,"$1$2"); 91 | else if (re2.test(w)) 92 | w = w.replace(re2,"$1$2"); 93 | 94 | // Step 1b 95 | re = /^(.+?)eed$/; 96 | re2 = /^(.+?)(ed|ing)$/; 97 | if (re.test(w)) { 98 | var fp = re.exec(w); 99 | re = new RegExp(mgr0); 100 | if (re.test(fp[1])) { 101 | re = /.$/; 102 | w = w.replace(re,""); 103 | } 104 | } 105 | else if (re2.test(w)) { 106 | var fp = re2.exec(w); 107 | stem = fp[1]; 108 | re2 = new RegExp(s_v); 109 | if (re2.test(stem)) { 110 | w = stem; 111 | re2 = /(at|bl|iz)$/; 112 | re3 = new RegExp("([^aeiouylsz])\\1$"); 113 | re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 114 | if (re2.test(w)) 115 | w = w + "e"; 116 | else if (re3.test(w)) { 117 | re = /.$/; 118 | w = w.replace(re,""); 119 | } 120 | else if (re4.test(w)) 121 | w = w + "e"; 122 | } 123 | } 124 | 125 | // Step 1c 126 | re = /^(.+?)y$/; 127 | if (re.test(w)) { 128 | var fp = re.exec(w); 129 | stem = fp[1]; 130 | re = new RegExp(s_v); 131 | if (re.test(stem)) 132 | w = stem + "i"; 133 | } 134 | 135 | // Step 2 136 | re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 137 | if (re.test(w)) { 138 | var fp = re.exec(w); 139 | stem = fp[1]; 140 | suffix = fp[2]; 141 | re = new RegExp(mgr0); 142 | if (re.test(stem)) 143 | w = stem + step2list[suffix]; 144 | } 145 | 146 | // Step 3 147 | re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 148 | if (re.test(w)) { 149 | var fp = re.exec(w); 150 | stem = fp[1]; 151 | suffix = fp[2]; 152 | re = new RegExp(mgr0); 153 | if (re.test(stem)) 154 | w = stem + step3list[suffix]; 155 | } 156 | 157 | // Step 4 158 | re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 159 | re2 = /^(.+?)(s|t)(ion)$/; 160 | if (re.test(w)) { 161 | var fp = re.exec(w); 162 | stem = fp[1]; 163 | re = new RegExp(mgr1); 164 | if (re.test(stem)) 165 | w = stem; 166 | } 167 | else if (re2.test(w)) { 168 | var fp = re2.exec(w); 169 | stem = fp[1] + fp[2]; 170 | re2 = new RegExp(mgr1); 171 | if (re2.test(stem)) 172 | w = stem; 173 | } 174 | 175 | // Step 5 176 | re = /^(.+?)e$/; 177 | if (re.test(w)) { 178 | var fp = re.exec(w); 179 | stem = fp[1]; 180 | re = new RegExp(mgr1); 181 | re2 = new RegExp(meq1); 182 | re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 183 | if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) 184 | w = stem; 185 | } 186 | re = /ll$/; 187 | re2 = new RegExp(mgr1); 188 | if (re.test(w) && re2.test(w)) { 189 | re = /.$/; 190 | w = w.replace(re,""); 191 | } 192 | 193 | // and turn initial Y back to y 194 | if (firstch == "y") 195 | w = firstch.toLowerCase() + w.substr(1); 196 | return w; 197 | } 198 | } 199 | 200 | 201 | 202 | 203 | var splitChars = (function() { 204 | var result = {}; 205 | var singles = [96, 180, 187, 191, 215, 247, 749, 885, 903, 907, 909, 930, 1014, 1648, 206 | 1748, 1809, 2416, 2473, 2481, 2526, 2601, 2609, 2612, 2615, 2653, 2702, 207 | 2706, 2729, 2737, 2740, 2857, 2865, 2868, 2910, 2928, 2948, 2961, 2971, 208 | 2973, 3085, 3089, 3113, 3124, 3213, 3217, 3241, 3252, 3295, 3341, 3345, 209 | 3369, 3506, 3516, 3633, 3715, 3721, 3736, 3744, 3748, 3750, 3756, 3761, 210 | 3781, 3912, 4239, 4347, 4681, 4695, 4697, 4745, 4785, 4799, 4801, 4823, 211 | 4881, 5760, 5901, 5997, 6313, 7405, 8024, 8026, 8028, 8030, 8117, 8125, 212 | 8133, 8181, 8468, 8485, 8487, 8489, 8494, 8527, 11311, 11359, 11687, 11695, 213 | 11703, 11711, 11719, 11727, 11735, 12448, 12539, 43010, 43014, 43019, 43587, 214 | 43696, 43713, 64286, 64297, 64311, 64317, 64319, 64322, 64325, 65141]; 215 | var i, j, start, end; 216 | for (i = 0; i < singles.length; i++) { 217 | result[singles[i]] = true; 218 | } 219 | var ranges = [[0, 47], [58, 64], [91, 94], [123, 169], [171, 177], [182, 184], [706, 709], 220 | [722, 735], [741, 747], [751, 879], [888, 889], [894, 901], [1154, 1161], 221 | [1318, 1328], [1367, 1368], [1370, 1376], [1416, 1487], [1515, 1519], [1523, 1568], 222 | [1611, 1631], [1642, 1645], [1750, 1764], [1767, 1773], [1789, 1790], [1792, 1807], 223 | [1840, 1868], [1958, 1968], [1970, 1983], [2027, 2035], [2038, 2041], [2043, 2047], 224 | [2070, 2073], [2075, 2083], [2085, 2087], [2089, 2307], [2362, 2364], [2366, 2383], 225 | [2385, 2391], [2402, 2405], [2419, 2424], [2432, 2436], [2445, 2446], [2449, 2450], 226 | [2483, 2485], [2490, 2492], [2494, 2509], [2511, 2523], [2530, 2533], [2546, 2547], 227 | [2554, 2564], [2571, 2574], [2577, 2578], [2618, 2648], [2655, 2661], [2672, 2673], 228 | [2677, 2692], [2746, 2748], [2750, 2767], [2769, 2783], [2786, 2789], [2800, 2820], 229 | [2829, 2830], [2833, 2834], [2874, 2876], [2878, 2907], [2914, 2917], [2930, 2946], 230 | [2955, 2957], [2966, 2968], [2976, 2978], [2981, 2983], [2987, 2989], [3002, 3023], 231 | [3025, 3045], [3059, 3076], [3130, 3132], [3134, 3159], [3162, 3167], [3170, 3173], 232 | [3184, 3191], [3199, 3204], [3258, 3260], [3262, 3293], [3298, 3301], [3312, 3332], 233 | [3386, 3388], [3390, 3423], [3426, 3429], [3446, 3449], [3456, 3460], [3479, 3481], 234 | [3518, 3519], [3527, 3584], [3636, 3647], [3655, 3663], [3674, 3712], [3717, 3718], 235 | [3723, 3724], [3726, 3731], [3752, 3753], [3764, 3772], [3774, 3775], [3783, 3791], 236 | [3802, 3803], [3806, 3839], [3841, 3871], [3892, 3903], [3949, 3975], [3980, 4095], 237 | [4139, 4158], [4170, 4175], [4182, 4185], [4190, 4192], [4194, 4196], [4199, 4205], 238 | [4209, 4212], [4226, 4237], [4250, 4255], [4294, 4303], [4349, 4351], [4686, 4687], 239 | [4702, 4703], [4750, 4751], [4790, 4791], [4806, 4807], [4886, 4887], [4955, 4968], 240 | [4989, 4991], [5008, 5023], [5109, 5120], [5741, 5742], [5787, 5791], [5867, 5869], 241 | [5873, 5887], [5906, 5919], [5938, 5951], [5970, 5983], [6001, 6015], [6068, 6102], 242 | [6104, 6107], [6109, 6111], [6122, 6127], [6138, 6159], [6170, 6175], [6264, 6271], 243 | [6315, 6319], [6390, 6399], [6429, 6469], [6510, 6511], [6517, 6527], [6572, 6592], 244 | [6600, 6607], [6619, 6655], [6679, 6687], [6741, 6783], [6794, 6799], [6810, 6822], 245 | [6824, 6916], [6964, 6980], [6988, 6991], [7002, 7042], [7073, 7085], [7098, 7167], 246 | [7204, 7231], [7242, 7244], [7294, 7400], [7410, 7423], [7616, 7679], [7958, 7959], 247 | [7966, 7967], [8006, 8007], [8014, 8015], [8062, 8063], [8127, 8129], [8141, 8143], 248 | [8148, 8149], [8156, 8159], [8173, 8177], [8189, 8303], [8306, 8307], [8314, 8318], 249 | [8330, 8335], [8341, 8449], [8451, 8454], [8456, 8457], [8470, 8472], [8478, 8483], 250 | [8506, 8507], [8512, 8516], [8522, 8525], [8586, 9311], [9372, 9449], [9472, 10101], 251 | [10132, 11263], [11493, 11498], [11503, 11516], [11518, 11519], [11558, 11567], 252 | [11622, 11630], [11632, 11647], [11671, 11679], [11743, 11822], [11824, 12292], 253 | [12296, 12320], [12330, 12336], [12342, 12343], [12349, 12352], [12439, 12444], 254 | [12544, 12548], [12590, 12592], [12687, 12689], [12694, 12703], [12728, 12783], 255 | [12800, 12831], [12842, 12880], [12896, 12927], [12938, 12976], [12992, 13311], 256 | [19894, 19967], [40908, 40959], [42125, 42191], [42238, 42239], [42509, 42511], 257 | [42540, 42559], [42592, 42593], [42607, 42622], [42648, 42655], [42736, 42774], 258 | [42784, 42785], [42889, 42890], [42893, 43002], [43043, 43055], [43062, 43071], 259 | [43124, 43137], [43188, 43215], [43226, 43249], [43256, 43258], [43260, 43263], 260 | [43302, 43311], [43335, 43359], [43389, 43395], [43443, 43470], [43482, 43519], 261 | [43561, 43583], [43596, 43599], [43610, 43615], [43639, 43641], [43643, 43647], 262 | [43698, 43700], [43703, 43704], [43710, 43711], [43715, 43738], [43742, 43967], 263 | [44003, 44015], [44026, 44031], [55204, 55215], [55239, 55242], [55292, 55295], 264 | [57344, 63743], [64046, 64047], [64110, 64111], [64218, 64255], [64263, 64274], 265 | [64280, 64284], [64434, 64466], [64830, 64847], [64912, 64913], [64968, 65007], 266 | [65020, 65135], [65277, 65295], [65306, 65312], [65339, 65344], [65371, 65381], 267 | [65471, 65473], [65480, 65481], [65488, 65489], [65496, 65497]]; 268 | for (i = 0; i < ranges.length; i++) { 269 | start = ranges[i][0]; 270 | end = ranges[i][1]; 271 | for (j = start; j <= end; j++) { 272 | result[j] = true; 273 | } 274 | } 275 | return result; 276 | })(); 277 | 278 | function splitQuery(query) { 279 | var result = []; 280 | var start = -1; 281 | for (var i = 0; i < query.length; i++) { 282 | if (splitChars[query.charCodeAt(i)]) { 283 | if (start !== -1) { 284 | result.push(query.slice(start, i)); 285 | start = -1; 286 | } 287 | } else if (start === -1) { 288 | start = i; 289 | } 290 | } 291 | if (start !== -1) { 292 | result.push(query.slice(start)); 293 | } 294 | return result; 295 | } 296 | 297 | 298 | -------------------------------------------------------------------------------- /docs/html/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/minus.png -------------------------------------------------------------------------------- /docs/html/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/_static/plus.png -------------------------------------------------------------------------------- /docs/html/_static/pygments.css: -------------------------------------------------------------------------------- 1 | pre { line-height: 125%; } 2 | td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } 3 | span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } 4 | td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } 5 | span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } 6 | .highlight .hll { background-color: #ffffcc } 7 | .highlight { background: #f8f8f8; } 8 | .highlight .c { color: #408080; font-style: italic } /* Comment */ 9 | .highlight .err { border: 1px solid #FF0000 } /* Error */ 10 | .highlight .k { color: #008000; font-weight: bold } /* Keyword */ 11 | .highlight .o { color: #666666 } /* Operator */ 12 | .highlight .ch { color: #408080; font-style: italic } /* Comment.Hashbang */ 13 | .highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */ 14 | .highlight .cp { color: #BC7A00 } /* Comment.Preproc */ 15 | .highlight .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */ 16 | .highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */ 17 | .highlight .cs { color: #408080; font-style: italic } /* Comment.Special */ 18 | .highlight .gd { color: #A00000 } /* Generic.Deleted */ 19 | .highlight .ge { font-style: italic } /* Generic.Emph */ 20 | .highlight .gr { color: #FF0000 } /* Generic.Error */ 21 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 22 | .highlight .gi { color: #00A000 } /* Generic.Inserted */ 23 | .highlight .go { color: #888888 } /* Generic.Output */ 24 | .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ 25 | .highlight .gs { font-weight: bold } /* Generic.Strong */ 26 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 27 | .highlight .gt { color: #0044DD } /* Generic.Traceback */ 28 | .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ 29 | .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ 30 | .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ 31 | .highlight .kp { color: #008000 } /* Keyword.Pseudo */ 32 | .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ 33 | .highlight .kt { color: #B00040 } /* Keyword.Type */ 34 | .highlight .m { color: #666666 } /* Literal.Number */ 35 | .highlight .s { color: #BA2121 } /* Literal.String */ 36 | .highlight .na { color: #7D9029 } /* Name.Attribute */ 37 | .highlight .nb { color: #008000 } /* Name.Builtin */ 38 | .highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */ 39 | .highlight .no { color: #880000 } /* Name.Constant */ 40 | .highlight .nd { color: #AA22FF } /* Name.Decorator */ 41 | .highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */ 42 | .highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ 43 | .highlight .nf { color: #0000FF } /* Name.Function */ 44 | .highlight .nl { color: #A0A000 } /* Name.Label */ 45 | .highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ 46 | .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ 47 | .highlight .nv { color: #19177C } /* Name.Variable */ 48 | .highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ 49 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */ 50 | .highlight .mb { color: #666666 } /* Literal.Number.Bin */ 51 | .highlight .mf { color: #666666 } /* Literal.Number.Float */ 52 | .highlight .mh { color: #666666 } /* Literal.Number.Hex */ 53 | .highlight .mi { color: #666666 } /* Literal.Number.Integer */ 54 | .highlight .mo { color: #666666 } /* Literal.Number.Oct */ 55 | .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ 56 | .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ 57 | .highlight .sc { color: #BA2121 } /* Literal.String.Char */ 58 | .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ 59 | .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ 60 | .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ 61 | .highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ 62 | .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ 63 | .highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ 64 | .highlight .sx { color: #008000 } /* Literal.String.Other */ 65 | .highlight .sr { color: #BB6688 } /* Literal.String.Regex */ 66 | .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ 67 | .highlight .ss { color: #19177C } /* Literal.String.Symbol */ 68 | .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ 69 | .highlight .fm { color: #0000FF } /* Name.Function.Magic */ 70 | .highlight .vc { color: #19177C } /* Name.Variable.Class */ 71 | .highlight .vg { color: #19177C } /* Name.Variable.Global */ 72 | .highlight .vi { color: #19177C } /* Name.Variable.Instance */ 73 | .highlight .vm { color: #19177C } /* Name.Variable.Magic */ 74 | .highlight .il { color: #666666 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /docs/html/_static/style.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: 'S-CoreDream-L'; 3 | src: url('https://cdn.jsdelivr.net/gh/projectnoonnu/noonfonts_six@1.2/S-CoreDream-3Light.woff') format('woff'); 4 | font-weight: normal; 5 | font-style: normal; 6 | } 7 | 8 | @font-face { 9 | font-family: 'S-CoreDream-B'; 10 | src: url('https://cdn.jsdelivr.net/gh/projectnoonnu/noonfonts_six@1.2/S-CoreDream-6Bold.woff') format('woff'); 11 | font-weight: normal; 12 | font-style: normal; 13 | } 14 | 15 | @font-face { 16 | font-family: 'S-CoreDream-EB'; 17 | src: url('https://cdn.jsdelivr.net/gh/projectnoonnu/noonfonts_six@1.2/S-CoreDream-8Heavy.woff') format('woff'); 18 | font-weight: normal; 19 | font-style: normal; 20 | } 21 | 22 | body { 23 | font-family: 'S-CoreDream-L', "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; 24 | line-height: 1.6; 25 | } 26 | 27 | h1 { 28 | font-family: 'S-CoreDream-EB', "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; 29 | } 30 | 31 | h2, h3, h4 { 32 | font-family: 'S-CoreDream-B', "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; 33 | } -------------------------------------------------------------------------------- /docs/html/_static/translations.js: -------------------------------------------------------------------------------- 1 | Documentation.addTranslations({ 2 | "locale": "ko", 3 | "messages": { 4 | "%(filename)s — %(docstitle)s": "%(filename)s — %(docstitle)s", 5 | "© Copyright %(copyright)s.": "© \uc800\uc791\uad8c %(copyright)s.", 6 | "© Copyright %(copyright)s.": "© \uc800\uc791\uad8c %(copyright)s.", 7 | ", in ": ", \ubb38\uc11c - ", 8 | "About these documents": "\uc774 \ubb38\uc11c \uc815\ubcf4", 9 | "Automatically generated list of changes in version %(version)s": "\ubc84\uc804 %(version)s\uc758 \ubcc0\uacbd \uc0ac\ud56d (\uc790\ub3d9\uc73c\ub85c \uc0dd\uc131\ub41c \ubaa9\ub85d)", 10 | "C API changes": "C API \ubcc0\uacbd \uc0ac\ud56d", 11 | "Changes in Version %(version)s — %(docstitle)s": "\ubc84\uc804 %(version)s\uc758 \ubcc0\uacbd \uc0ac\ud56d — %(docstitle)s", 12 | "Collapse sidebar": "\uc0ac\uc774\ub4dc\ubc14 \ub2eb\uae30", 13 | "Complete Table of Contents": "\uc885\ud569 \ubaa9\ucc28", 14 | "Contents": "\ub0b4\uc6a9", 15 | "Copyright": "\uc800\uc791\uad8c", 16 | "Created using Sphinx %(sphinx_version)s.": "Sphinx %(sphinx_version)s \ubc84\uc804\uc73c\ub85c \uc0dd\uc131\ub418\uc5c8\uc2b5\ub2c8\ub2e4.", 17 | "Expand sidebar": "\uc0ac\uc774\ub4dc\ubc14 \uc5f4\uae30", 18 | "Full index on one page": "\ud55c \ud398\uc774\uc9c0\uc5d0 \uc804\uccb4 \uc0c9\uc778 \ubcf4\uae30", 19 | "General Index": "\uc804\uccb4 \uc0c9\uc778", 20 | "Global Module Index": "\ubaa8\ub4c8 \ucd1d \uc0c9\uc778", 21 | "Go": "\uc774\ub3d9", 22 | "Hide Search Matches": "\uac80\uc0c9 \uc77c\uce58 \uc228\uae30\uae30", 23 | "Index": "\uc0c9\uc778", 24 | "Index – %(key)s": "\uc0c9\uc778 – %(key)s", 25 | "Index pages by letter": "\uc54c\ud30c\ubcb3\ubcc4 \uc0c9\uc778", 26 | "Indices and tables:": "\uc0c9\uc778 \ubc0f \ud45c \ubaa9\ub85d:", 27 | "Last updated on %(last_updated)s.": "\ucd5c\uc885 \uc5c5\ub370\uc774\ud2b8: %(last_updated)s", 28 | "Library changes": "\ub77c\uc774\ube0c\ub7ec\ub9ac \ubcc0\uacbd \uc0ac\ud56d", 29 | "Navigation": "\ud0d0\uc0c9", 30 | "Next topic": "\ub2e4\uc74c \ud56d\ubaa9", 31 | "Other changes": "\ub2e4\ub978 \ubcc0\uacbd \uc0ac\ud56d", 32 | "Overview": "\uac1c\uc694", 33 | "Permalink to this definition": "\uc774 \uc815\uc758\uc5d0 \ub300\ud55c \ud37c\uba38\ub9c1\ud06c", 34 | "Permalink to this headline": "\uc774 \ud45c\uc81c\uc5d0 \ub300\ud55c \ud37c\uba38\ub9c1\ud06c", 35 | "Please activate JavaScript to enable the search\n functionality.": "\uac80\uc0c9 \uae30\ub2a5\uc744 \uc0ac\uc6a9\ud558\ub824\uba74 JavaScript\ub97c \ud65c\uc131\ud654\ud558\uc2ed\uc2dc\uc624.", 36 | "Preparing search...": "\uac80\uc0c9 \uc900\ube44 \uc911\u2026", 37 | "Previous topic": "\uc774\uc804 \ud56d\ubaa9", 38 | "Quick search": "\ube60\ub978 \uac80\uc0c9", 39 | "Search": "\uac80\uc0c9", 40 | "Search Page": "\uac80\uc0c9 \ud398\uc774\uc9c0", 41 | "Search Results": "\uac80\uc0c9 \uacb0\uacfc", 42 | "Search finished, found %s page(s) matching the search query.": "\uac80\uc0c9\uc774 \uc644\ub8cc\ub418\uc5c8\uc73c\uba70, \uac80\uc0c9\uc5b4\uc640 \uc77c\uce58\ud558\ub294 %s \uac1c \ud398\uc774\uc9c0\ub97c \ucc3e\uc558\uc2b5\ub2c8\ub2e4.", 43 | "Search within %(docstitle)s": "%(docstitle)s\uc5d0\uc11c \ucc3e\uae30", 44 | "Searching": "\uac80\uc0c9 \uc911", 45 | "Searching for multiple words only shows matches that contain\n all words.": "\uc5ec\ub7ec \ub2e8\uc5b4\ub97c \uac80\uc0c9\ud558\uba74 \ubaa8\ub4e0 \ub2e8\uc5b4\uac00 \ud3ec\ud568\ub41c \uc77c\uce58 \ud56d\ubaa9\ub9cc \ud45c\uc2dc\ub429\ub2c8\ub2e4.", 46 | "Show Source": "\uc18c\uc2a4 \ubcf4\uae30", 47 | "Table of Contents": "\ubaa9\ucc28", 48 | "This Page": "\ud604\uc7ac \ubb38\uc11c", 49 | "Welcome! This is": "\ud658\uc601\ud569\ub2c8\ub2e4!", 50 | "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories.": "\uac80\uc0c9\uc5b4\uc640 \uc77c\uce58\ud558\ub294 \ubb38\uc11c\uac00 \uc5c6\uc2b5\ub2c8\ub2e4. \ubaa8\ub4e0 \ub2e8\uc5b4\uc758 \ucca0\uc790\uac00 \uc62c\ubc14\ub978\uc9c0, \ucda9\ubd84\ud55c \uce74\ud14c\uace0\ub9ac\ub97c \uc120\ud0dd\ud588\ub294\uc9c0 \ud655\uc778\ud558\uc2ed\uc2dc\uc624.", 51 | "all functions, classes, terms": "\ud568\uc218, \ud074\ub798\uc2a4 \ubc0f \uc6a9\uc5b4 \uac1c\uad00", 52 | "can be huge": "\ud070 \uacbd\uc6b0\uac00 \uc788\uc73c\ubbc0\ub85c \uc8fc\uc758", 53 | "last updated": "\ucd5c\uc885 \uc5c5\ub370\uc774\ud2b8", 54 | "lists all sections and subsections": "\ubaa8\ub4e0 \uad6c\uc5ed\uacfc \ud558\uc704 \uad6c\uc5ed \ubaa9\ub85d", 55 | "next chapter": "\ub2e4\uc74c \uc7a5", 56 | "previous chapter": "\uc774\uc804 \uc7a5", 57 | "quick access to all modules": "\ubaa8\ub4e0 \ubaa8\ub4c8 \uc870\uacac\ud45c", 58 | "search": "\uac80\uc0c9", 59 | "search this documentation": "\ubb38\uc11c \uac80\uc0c9", 60 | "the documentation for": "\ubb38\uc11c:" 61 | }, 62 | "plural_expr": "0" 63 | }); -------------------------------------------------------------------------------- /docs/html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | KoalaNLP — KoalaNLP 2.1.7 문서 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 | 110 | 111 |
112 | 113 | 114 | 120 | 121 | 122 |
123 | 124 |
125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 |
145 | 146 |
    147 | 148 |
  • »
  • 149 | 150 |
  • KoalaNLP
  • 151 | 152 | 153 |
  • 154 | 155 | 156 | View page source 157 | 158 | 159 |
  • 160 | 161 |
162 | 163 | 164 |
165 |
166 |
167 |
168 | 169 |
170 |

KoalaNLP

171 |
172 |

소개

173 |

한국어 형태소 및 구문 분석기의 모음인, 174 | KoalaNLP의 Python 175 | 판본입니다.

176 |

이 프로젝트는 서로 다른 형태의 형태소 분석기를 모아, 동일한 177 | 인터페이스 아래에서 사용할 수 있도록 하는 것이 목적입니다.

178 | 211 |
212 |
213 |

문서

214 | 232 |
233 |
234 |

색인

235 | 240 |
241 |
242 | 243 | 244 |
245 | 246 |
247 |
248 | 251 | 252 |
253 | 254 |
255 |

256 | © 저작권 2018, KoalaNLP. 257 | 258 |

259 |
260 | 261 | 262 | 263 | Built with Sphinx using a 264 | 265 | theme 266 | 267 | provided by Read the Docs. 268 | 269 |
270 |
271 |
272 | 273 |
274 | 275 |
276 | 277 | 278 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | -------------------------------------------------------------------------------- /docs/html/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/docs/html/objects.inv -------------------------------------------------------------------------------- /docs/html/py-modindex.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Python 모듈 목록 — KoalaNLP 2.1.7 문서 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
55 | 56 | 112 | 113 |
114 | 115 | 116 | 122 | 123 | 124 |
125 | 126 |
127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 |
147 | 148 |
    149 | 150 |
  • »
  • 151 | 152 |
  • Python 모듈 목록
  • 153 | 154 | 155 |
  • 156 | 157 |
  • 158 | 159 |
160 | 161 | 162 |
163 |
164 |
165 |
166 | 167 | 168 |

Python 모듈 목록

169 | 170 |
171 | k 172 |
173 | 174 | 175 | 176 | 178 | 179 | 181 | 184 | 185 | 186 | 189 | 190 | 191 | 194 | 195 | 196 | 199 | 200 | 201 | 204 | 205 | 206 | 209 | 210 | 211 | 214 |
 
177 | k
182 | koalanlp 183 |
    187 | koalanlp.API 188 |
    192 | koalanlp.data 193 |
    197 | koalanlp.ExtUtil 198 |
    202 | koalanlp.proc 203 |
    207 | koalanlp.types 208 |
    212 | koalanlp.Util 213 |
215 | 216 | 217 |
218 | 219 |
220 |
221 | 222 |
223 | 224 |
225 |

226 | © 저작권 2018, KoalaNLP. 227 | 228 |

229 |
230 | 231 | 232 | 233 | Built with Sphinx using a 234 | 235 | theme 236 | 237 | provided by Read the Docs. 238 | 239 |
240 |
241 |
242 | 243 |
244 | 245 |
246 | 247 | 248 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | -------------------------------------------------------------------------------- /docs/html/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 검색 — KoalaNLP 2.1.7 문서 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
55 | 56 | 112 | 113 |
114 | 115 | 116 | 122 | 123 | 124 |
125 | 126 |
127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 |
147 | 148 |
    149 | 150 |
  • »
  • 151 | 152 |
  • 검색
  • 153 | 154 | 155 |
  • 156 | 157 |
  • 158 | 159 |
160 | 161 | 162 |
163 |
164 |
165 |
166 | 167 | 174 | 175 | 176 |
177 | 178 |
179 | 180 |
181 | 182 |
183 |
184 | 185 |
186 | 187 |
188 |

189 | © 저작권 2018, KoalaNLP. 190 | 191 |

192 |
193 | 194 | 195 | 196 | Built with Sphinx using a 197 | 198 | theme 199 | 200 | provided by Read the Docs. 201 | 202 |
203 |
204 |
205 | 206 |
207 | 208 |
209 | 210 | 211 | 216 | 217 | 218 | 219 | 220 | 221 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /koalanlp/API.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from importlib import import_module 5 | 6 | from .jvm import * 7 | 8 | HNN = 'hnn' #: 한나눔. 현재 |ver_hnn| 버전이 최신입니다. 문장분리, 품사분석, 구문분석, 의존분석이 가능합니다. 9 | KMR = 'kmr' #: 코모란. 현재 |ver_kmr| 버전이 최신입니다. 품사분석만 가능합니다. 10 | KKMA = 'kkma' #: 꼬꼬마. 현재 |ver_kkma| 버전이 최신입니다. 품사분석, 의존분석만 가능합니다. 11 | EUNJEON = 'eunjeon' #: 은전한닢. 현재 |ver_eunjeon| 버전이 최신입니다. 품사분석만 가능합니다. 12 | ARIRANG = 'arirang' #: 아리랑. 현재 |ver_arirang| 버전이 최신입니다. 품사분석만 가능합니다. 13 | RHINO = 'rhino' #: 라이노. 현재 |ver_rhino| 버전이 최신입니다. 품사분석만 가능합니다. 14 | OKT = 'okt' #: 트위터. 현재 |ver_okt| 버전이 최신입니다. 문장분리, 품사분석만 가능합니다. 15 | DAON = 'daon' #: 다온. 현재 |ver_daon| 버전이 최신입니다. 품사분석만 가능합니다. 16 | KHAIII = 'khaiii' #: Khaiii. 현재 |ver_khaiii| 버전이 최신입니다. 품사분석만 가능합니다. 17 | UTAGGER = 'utagger' #: UTagger. 현재 |ver_utagger| 버전이 최신입니다. 18 | ETRI = 'etri' #: ETRI Open API. 현재 |ver_etri| 버전이 최신입니다. 19 | KSS = 'kss' #: Korean Sentence Splitter. Python KoalaNLP는 KSS에 의존하므로, 버전 지정은 무의미합니다. 20 | KIWI = 'kiwi' #: Kiwi. 파이썬 환경에서 `https://github.com/bab2min/kiwipiepy` 설치가 필요합니다. 21 | CORE = 'core' #: 분석기 Interface 정의 라이브러리. 현재 |ver_core| 버전이 최신입니다. 편의기능을 제공하며 타 분석기 참조시 함께 참조됩니다. 22 | 23 | _REQUIRE_ASSEMBLY_ = [HNN, KKMA, ARIRANG, RHINO, DAON] #: 'assembly' classifier 필요 여부 24 | _REQUIRE_PYTHON_ = [KSS, KIWI] #: Python package 의존 여부 25 | 26 | 27 | def is_python_native(api): 28 | return api in _REQUIRE_PYTHON_ 29 | 30 | 31 | def query(api: str, type: str): 32 | api = api.lower() 33 | if is_python_native(api): 34 | try: 35 | module = import_module('koalanlp.%s' % api) 36 | return getattr(module, type) 37 | except Exception: 38 | raise Exception('API.%s는 %s를 지원하지 않는 것 같습니다.' 39 | 'API 문서에서 지원여부를 다시 한번 확인해주시고, 지원한다고 적혀있어도 혹시 문제가 지속된다면 이슈를 올려주세요.' % 40 | (api.upper(), str(type))) 41 | 42 | if not is_jvm_running(): 43 | raise Exception("사용 전 초기화 과정이 필요합니다. 사용법의 Util.initialize 문서를 참고하여 초기화를 해주세요." 44 | "사용하신 코드를 토대로는 다음 코드의 실행을 추천해드립니다.\n" 45 | "from koalanlp.Util import initialize" 46 | "initialize(%s='LATEST')" % api) 47 | 48 | try: 49 | java_api = koala_class_of(api, type) 50 | return java_api 51 | except Exception: 52 | raise Exception('API.%s는 %s를 지원하지 않는 것 같습니다.' 53 | 'API 문서에서 지원여부를 다시 한번 확인해주시고, 지원한다고 적혀있어도 혹시 문제가 지속된다면 이슈를 올려주세요.' % 54 | (api.upper(), str(type))) 55 | 56 | 57 | # ----- Define members exported ----- 58 | 59 | __all__ = ['HNN', 'KMR', 'KKMA', 'EUNJEON', 'ARIRANG', 'RHINO', 'OKT', 'DAON', 'ETRI', 'KHAIII', 'UTAGGER', 'CORE', 60 | 'query', 'is_python_native'] 61 | -------------------------------------------------------------------------------- /koalanlp/Util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import logging 5 | import shutil 6 | 7 | from pathlib import Path 8 | from typing import List 9 | 10 | from . import API 11 | from .jvm import koala_class_of, string, java_list, is_jvm_running, start_jvm, check_jvm, shutdown_jvm 12 | from .types import * 13 | 14 | from koalanlp.jip.repository import RepositoryManager 15 | from koalanlp.jip.index import IndexManager 16 | from koalanlp.jip.cache import CacheManager 17 | from koalanlp.jip.maven import Artifact, Pom 18 | from koalanlp.jip.util import wait_until_download_finished 19 | 20 | # Fix for JAVA 9 encapsulation 21 | _JAVA9_FIX = '--add-opens java.base/java.lang=ALL-UNNAMED' 22 | 23 | # Logging setup 24 | logging.basicConfig(level=logging.INFO, format="[%(name)s] %(message)s") 25 | logging.getLogger("requests").setLevel(logging.WARNING) 26 | logging.getLogger("py4j").setLevel(logging.WARNING) 27 | logger = logging.getLogger('koalanlp.jip') 28 | 29 | # ------- Repository Setup -------- 30 | repos_manager = RepositoryManager() 31 | index_manager = None 32 | cache_manager = None 33 | 34 | # Local Ivy2 repo 35 | repos_manager.add_repos('local-ivy2', str(Path(Path.home(), ".ivy2", "cache").absolute()), 'local', order=1) 36 | # Sonatype repo 37 | repos_manager.add_repos('sonatype', 38 | 'https://oss.sonatype.org/content/repositories/public/', 'remote', order=2) 39 | # JCenter 40 | repos_manager.add_repos('jcenter', 'https://jcenter.bintray.com/', 'remote', order=3) 41 | 42 | # Jitpack for Komoran v3 43 | repos_manager.add_repos('jitpack.io', 'https://jitpack.io/', 'remote', order=4) 44 | 45 | # Maven Central 46 | repos_manager.add_repos('central1', 'https://repo1.maven.org/maven2/', 'remote', order=5) 47 | repos_manager.add_repos('central2', 'http://insecure.repo1.maven.org/maven2/', 'remote', order=6) 48 | 49 | 50 | # JIP 코드 참조하여 변경함. 51 | def _find_pom(artifact): 52 | """ find pom and repos contains pom """ 53 | # lookup cache first 54 | if cache_manager.is_artifact_in_cache(artifact): 55 | pom = cache_manager.get_artifact_pom(artifact) 56 | return pom, cache_manager.as_repos() 57 | else: 58 | for repos in repos_manager.repos: 59 | pom = repos.download_pom(artifact) 60 | # find the artifact 61 | if pom is not None: 62 | cache_manager.put_artifact_pom(artifact, pom) 63 | return pom, repos 64 | return None 65 | 66 | 67 | # JIP 코드 참조하여 변경함. 68 | def _resolve_artifacts_modified(artifacts, exclusions=None): 69 | global index_manager, cache_manager, repos_manager 70 | 71 | if exclusions is None: 72 | exclusions = [] 73 | 74 | # download queue 75 | download_list = [] 76 | 77 | # dependency_set contains artifact objects to resolve 78 | dependency_stack = set() 79 | 80 | for a in artifacts: 81 | dependency_stack.add(a) 82 | 83 | while len(dependency_stack) > 0: 84 | artifact = dependency_stack.pop() 85 | 86 | if index_manager.is_same_installed(artifact) and artifact not in download_list: 87 | continue 88 | 89 | if any(map(artifact.is_same_artifact, exclusions)): 90 | continue 91 | 92 | pominfo = _find_pom(artifact) 93 | if pominfo is None: 94 | if not any(artifact.is_same_artifact(a) for a in exclusions): 95 | logger.warning("[Warning] Artifact is not found: %s", artifact) 96 | # Ignore this unknown pom. 97 | continue 98 | 99 | if not index_manager.is_installed(artifact): 100 | pom, repos = pominfo 101 | 102 | # repos.download_jar(artifact, get_lib_path()) 103 | artifact.repos = repos 104 | 105 | if not any(map(artifact.is_same_artifact, exclusions)): 106 | download_list.append(artifact) 107 | index_manager.add_artifact(artifact) 108 | 109 | pom_obj = Pom(pom, repos_manager, cache_manager) 110 | for r in pom_obj.get_repositories(): 111 | repos_manager.add_repos(*r) 112 | 113 | more_dependencies = pom_obj.get_dependencies() 114 | for d in more_dependencies: 115 | d.exclusions.extend(artifact.exclusions) 116 | if not index_manager.is_same_installed(d): 117 | dependency_stack.add(d) 118 | 119 | return download_list 120 | 121 | 122 | def initialize(java_options="--add-opens java.base/java.lang=ALL-UNNAMED -Xmx1g -Dfile.encoding=utf-8", lib_path=None, 123 | force_download=False, port=None, **packages): 124 | """ 125 | 초기화 함수. 필요한 Java library를 다운받습니다. 126 | 한번 초기화 된 다음에는 :py:func:`koalanlp.Util.finalize` 을 사용해 종료하지 않으면 다시 초기화 할 수 없습니다. 127 | 128 | :param str java_options: 자바 JVM option (기본값: "--add-opens java.base/java.lang=ALL-UNNAMED -Xmx1g -Dfile.encoding=utf-8") 129 | :param Optional[str] lib_path: 자바 라이브러리를 저장할 '.java' 디렉터리/폴더가 위치할 곳. (기본값: None = os.cwd()) 130 | :param bool force_download: 자바 라이브러리를 모두 다 다시 다운로드할 지의 여부. (기본값: False) 131 | :param int port: Multiprocessing을 사용하는 경우에, Java 분석기와 소통하는 Python proxy를 어떤 port에서 열 것인지 결정합니다. (기본값: None = 25334) 132 | :param Dict[str,str] packages: 사용할 분석기 API의 목록. (Keyword arguments; 기본값: KMR="LATEST") 133 | :raise Exception: JVM이 2회 이상 초기화 될때 Exception. 134 | """ 135 | if len(packages) == 0: 136 | packages = { 137 | 'KMR': "LATEST" 138 | } 139 | logger.info("[Warning] Since no package names are specified, I'll load packages by default: %s" % 140 | str(packages)) 141 | 142 | key_set = set(packages.keys()) 143 | if key_set.issubset({'KSS', 'KIWI'}): 144 | # KSS나 KIWI만 포함한 경우는 CORE만 불러들입니다. 145 | packages = { 146 | 'CORE': "LATEST" 147 | } 148 | logger.info("[Warning] KSS나 KIWI만 사용하고 있어, CORE 모듈만 자바로 초기화합니다.") 149 | 150 | if not lib_path: 151 | lib_path = Path.cwd() 152 | 153 | if force_download: 154 | clear_all_downloaded_jars(lib_path) 155 | 156 | # Initialize cache & index manager 157 | global cache_manager, index_manager 158 | cache_manager = CacheManager(lib_path) 159 | index_manager = IndexManager(lib_path) 160 | 161 | if not is_jvm_running(): 162 | if _JAVA9_FIX not in java_options: 163 | java_options += ' ' + _JAVA9_FIX 164 | 165 | java_options = java_options.split(" ") 166 | packages = {getattr(API, k.upper()): v for k, v in packages.items()} 167 | 168 | deps = [Artifact('kr.bydelta', 'koalanlp-%s' % pack, version, 169 | 'assembly' if pack in API._REQUIRE_ASSEMBLY_ else None) 170 | for pack, version in packages.items()] 171 | # Add py4j jar 172 | deps.append(Artifact('net.sf.py4j', 'py4j', '0.10.8.1')) 173 | 174 | exclusions = [Artifact('com.jsuereth', 'sbt-pgp', '*')] 175 | 176 | down_list = _resolve_artifacts_modified(deps, exclusions=exclusions) 177 | down_list.sort(key=lambda a: a.repos.uri) 178 | 179 | for artifact in down_list: 180 | local_path = cache_manager.get_jar_path(artifact) 181 | if artifact.repos != cache_manager.as_repos(): 182 | artifact.repos.download_jar(artifact, local_path) 183 | 184 | # Get all installed JAR files 185 | classpaths = [cache_manager.get_jar_path(artifact, filepath=True) 186 | for artifact in index_manager.installed] 187 | wait_until_download_finished() 188 | start_jvm(java_options, classpaths, port=port) 189 | 190 | 191 | try: 192 | check_jvm() 193 | except Exception as e: 194 | raise Exception("JVM test failed because %s" % str(e)) 195 | 196 | # Enum 항목 초기화 197 | POS.values() 198 | PhraseTag.values() 199 | DependencyTag.values() 200 | RoleType.values() 201 | CoarseEntityType.values() 202 | 203 | logger.info("JVM initialization procedure is completed.") 204 | else: 205 | raise Exception("JVM cannot be initialized more than once." 206 | "Please call koalanlp.Util.finalize() when you want to re-init the JVM with other options.") 207 | 208 | 209 | def finalize(): 210 | """ 211 | 사용이 종료된 다음, 실행되어 있는 JVM을 종료합니다. 212 | :return: 실행 이후 JVM이 꺼져있다면 True. 213 | """ 214 | if is_jvm_running(): 215 | is_running = shutdown_jvm() 216 | return not is_running 217 | else: 218 | return True 219 | 220 | 221 | def clear_all_downloaded_jars(lib_path=None): 222 | """ 223 | 다운로드 된 자바 라이브러리를 삭제합니다. 224 | :param Optional[str] lib_path: 자바 라이브러리를 저장할 '.java' 디렉터리/폴더가 위치한 곳. (Default: None, i.e. os.cwd()) 225 | """ 226 | if not lib_path: 227 | lib_path = Path.cwd() 228 | 229 | java_path = Path(lib_path, '.java') 230 | if java_path.exists(): 231 | shutil.rmtree(java_path) 232 | 233 | 234 | def contains(string_list: List[str], tag) -> bool: 235 | """ 236 | 주어진 문자열 리스트에 구문분석 표지자/의존구문 표지자/의미역 표지/개체명 분류가 포함되는지 확인합니다. 237 | 238 | :param List[str] string_list: 분류가 포함되는지 확인할 문자열 목록 239 | :param Union[PhraseTag,DependencyTag,CoarseEntityType,RoleType] tag: 포함되는지 확인할 구문분석 표지자/의존구문 표지자/의미역 표지/개체명 분류 240 | :rtype: bool 241 | :return: 포함되면 true 242 | """ 243 | 244 | if type(tag) is PhraseTag or type(tag) is DependencyTag or type(tag) is CoarseEntityType or type(tag) is RoleType: 245 | return koala_class_of('Util').contains(java_list([string(s) for s in string_list]), tag.reference) 246 | else: 247 | return False 248 | 249 | 250 | # -------- Declare members exported --------- 251 | 252 | __all__ = ['initialize', 'contains', 'finalize', 'clear_all_downloaded_jars'] 253 | -------------------------------------------------------------------------------- /koalanlp/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from . import API, data, ExtUtil, proc, types, Util 5 | -------------------------------------------------------------------------------- /koalanlp/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/koalanlp/__init__.pyc -------------------------------------------------------------------------------- /koalanlp/jip/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2011-2016 Sun Ning , 2 | Sergio Fernández 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | SOFTWARE. 21 | -------------------------------------------------------------------------------- /koalanlp/jip/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import koalanlp.jip.util 5 | import koalanlp.jip.maven 6 | import koalanlp.jip.cache 7 | import koalanlp.jip.index 8 | import koalanlp.jip.repository 9 | -------------------------------------------------------------------------------- /koalanlp/jip/cache.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env jython 2 | # Copyright (C) 2011 Sun Ning 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | # SOFTWARE. 21 | # 22 | import codecs 23 | import os 24 | import shutil 25 | 26 | from koalanlp.jip.repository import MavenRepos 27 | 28 | 29 | class CacheRepository(MavenRepos): 30 | def __init__(self, basepath=None): 31 | if basepath is None: 32 | basepath = os.getcwd() 33 | 34 | uri = os.path.expanduser(os.path.join(str(basepath), '.java', 'cache')) 35 | if not os.path.exists(uri): 36 | os.makedirs(uri) 37 | 38 | super(CacheRepository, self).__init__('cache', uri) 39 | 40 | def get_artifact_uri(self, artifact, ext): 41 | directory = self.get_artifact_dir(artifact) 42 | name = artifact.artifact + "-" + artifact.version + "." + ext 43 | 44 | return os.path.join(self.uri, directory, name) 45 | 46 | def get_artifact_dir(self, artifact): 47 | directory = os.path.join(self.uri, artifact.group, 48 | artifact.artifact) 49 | if not os.path.exists(directory): 50 | os.makedirs(directory) 51 | return directory 52 | 53 | def download_jar(self, artifact, local_path=None): 54 | path = self.get_artifact_uri(artifact, 'jar') 55 | shutil.copy(path, local_path) 56 | 57 | def download_pom(self, artifact, local_path=None): 58 | path = self.get_artifact_uri(artifact, 'pom') 59 | if os.path.exists(path): 60 | if local_path: 61 | shutil.copy(path, local_path) 62 | 63 | f = codecs.open(path, mode='r', encoding='utf-8') 64 | data = f.read() 65 | f.close() 66 | return data 67 | else: 68 | return None 69 | 70 | def put_pom(self, artifact, data): 71 | path = self.get_artifact_uri(artifact, 'pom') 72 | f = codecs.open(path, mode='w', encoding='utf-8') 73 | f.write(data) 74 | f.close() 75 | 76 | def put_jar(self, artifact, jarpath): 77 | path = self.get_artifact_uri(artifact, 'jar') 78 | shutil.copy(jarpath, path) 79 | 80 | 81 | class CacheManager(object): 82 | def __init__(self, basepath): 83 | self.enable = True 84 | self.cache = CacheRepository(basepath) 85 | 86 | def set_enable(self, enable): 87 | self.enable = enable 88 | 89 | def get_artifact_pom(self, artifact, topath=None): 90 | if self.enable: 91 | return self.cache.download_pom(artifact, topath) 92 | else: 93 | return None 94 | 95 | def get_artifact_jar(self, artifact, topath): 96 | self.cache.download_jar(artifact, topath) 97 | 98 | def put_artifact_pom(self, artifact, data): 99 | self.cache.put_pom(artifact, data) 100 | 101 | def put_artifact_jar(self, artifact, jarpath): 102 | self.cache.put_jar(artifact, jarpath) 103 | 104 | def as_repos(self): 105 | return self.cache 106 | 107 | def get_cache_path(self): 108 | return self.cache.uri 109 | 110 | def get_jar_path(self, artifact, filepath=False): 111 | if filepath: 112 | return self.cache.get_artifact_uri(artifact, 'jar') 113 | else: 114 | return self.cache.get_artifact_dir(artifact) 115 | 116 | def is_artifact_in_cache(self, artifact, jar=True): 117 | pom_in_cache = os.path.exists( 118 | self.cache.get_artifact_uri(artifact, 'pom')) 119 | jar_in_cache = os.path.exists( 120 | self.cache.get_artifact_uri(artifact, 'jar')) 121 | if jar: 122 | return pom_in_cache and jar_in_cache 123 | else: 124 | return pom_in_cache 125 | 126 | 127 | __all__ = ['CacheManager'] 128 | -------------------------------------------------------------------------------- /koalanlp/jip/index.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env jython 2 | # Copyright (C) 2011 Sun Ning 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | # SOFTWARE. 21 | # 22 | import threading 23 | import json 24 | 25 | from pathlib import Path 26 | from koalanlp.jip.maven import Artifact 27 | 28 | 29 | class IndexManager(object): 30 | """An IndexManager persists the artifacts you installed in your path and 31 | keep it consistent""" 32 | 33 | def __init__(self, basepath): 34 | self.installed = set() 35 | self.committed = False 36 | self.persist_lock = threading.Lock() 37 | 38 | # Initialize index manager 39 | self.committed = False 40 | if Path(basepath, '.java').exists(): 41 | for jarname in Path(basepath, '.java').glob('**/*.jar'): 42 | group_id = jarname.parent.parent.name 43 | artifact_id = jarname.parent.name 44 | 45 | name = jarname.name.replace('.jar', '') 46 | version_string = name[len(artifact_id)+1:] 47 | 48 | self.installed.add(Artifact(group_id, artifact_id, version_string)) 49 | 50 | def add_artifact(self, artifact): 51 | self.committed = True 52 | self.installed.add(artifact) 53 | # self.persist() 54 | 55 | def get_artifact(self, artifact_eq): 56 | for artifact in self.installed: 57 | if artifact == artifact_eq: 58 | return artifact 59 | return None 60 | 61 | def remove_artifact(self, artifact): 62 | self.committed = True 63 | a = self.get_artifact(artifact) 64 | 65 | if a is not None: 66 | self.installed.remove(a) 67 | 68 | def remove_all(self): 69 | self.committed = True 70 | self.installed = set() 71 | 72 | def is_installed(self, artifact_test): 73 | return self.get_artifact(artifact_test) is not None 74 | 75 | def is_same_installed(self, artifact): 76 | return any(a.is_same_artifact(artifact) for a in self.installed) 77 | 78 | 79 | __all__ = ['IndexManager'] 80 | -------------------------------------------------------------------------------- /koalanlp/jip/repository.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env jython 2 | # Copyright (C) 2011 Sun Ning 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | # SOFTWARE. 21 | # 22 | 23 | import os 24 | import locale 25 | import shutil 26 | import stat 27 | import time 28 | import hashlib 29 | import sys 30 | 31 | import urllib.error as urlerror 32 | import urllib.request as urlrequest 33 | from xml.etree import ElementTree 34 | 35 | from koalanlp.jip.util import DownloadException, download, download_string, logger 36 | 37 | 38 | class RepositoryManager(object): 39 | MAVEN_LOCAL_REPOS = ('local', os.path.expanduser(os.path.join('~', '.m2', 'repository')), 'local') 40 | MAVEN_PUBLIC_REPOS = ('public', "https://repo1.maven.org/maven2/", 'remote') 41 | 42 | def __init__(self): 43 | self.repos = [] 44 | 45 | for repo in [self.MAVEN_LOCAL_REPOS, self.MAVEN_PUBLIC_REPOS]: 46 | # create repos in order 47 | name, uri, rtype = self.MAVEN_LOCAL_REPOS 48 | self.add_repos(name, uri, rtype, order=len(self.repos)) 49 | 50 | def add_repos(self, name, uri, repos_type, order=None): 51 | if repos_type == 'local': 52 | repo = MavenFileSystemRepos(name, uri) 53 | elif repos_type == 'remote': 54 | repo = MavenHttpRemoteRepos(name, uri) 55 | else: 56 | logger.warning('[Error] Unknown repository type.') 57 | sys.exit(1) 58 | 59 | if repo not in self.repos: 60 | if order is not None: 61 | self.repos.insert(order, repo) 62 | else: 63 | self.repos.append(repo) 64 | logger.debug('[Repository] Added: %s' % repo.name) 65 | 66 | 67 | class MavenRepos(object): 68 | def __init__(self, name, uri): 69 | self.name = name 70 | self.uri = uri 71 | 72 | def __eq__(self, other): 73 | if isinstance(other, MavenRepos): 74 | return self.uri == other.uri 75 | else: 76 | return False 77 | 78 | def get_artifact_uri(self, artifact, ext): 79 | pass 80 | 81 | def download_jar(self, artifact, local_path): 82 | """ download or copy file to local path, raise exception when failed """ 83 | pass 84 | 85 | def download_pom(self, artifact): 86 | """ return a content string """ 87 | pass 88 | 89 | def last_modified(self, artifact): 90 | """ return last modified timestamp """ 91 | pass 92 | 93 | def download_check_sum(self, checksum_type, origin_file_name): 94 | """ return pre calculated checksum value, only avaiable for remote repos """ 95 | pass 96 | 97 | 98 | class MavenFileSystemRepos(MavenRepos): 99 | def __init__(self, name, uri): 100 | MavenRepos.__init__(self, name, os.path.expanduser(uri)) 101 | 102 | def get_artifact_uri(self, artifact, ext): 103 | maven_name = artifact.to_maven_name(ext) 104 | if sys.platform in ('dos', 'win16', 'win32'): 105 | maven_name = maven_name.replace('/', os.sep) 106 | 107 | maven_file_path = os.path.join(self.uri, maven_name) 108 | return maven_file_path 109 | 110 | def download_jar(self, artifact, local_path): 111 | maven_file_path = self.get_artifact_uri(artifact, 'jar') 112 | logger.info("[Checking] jar package from %s" % self.name) 113 | if os.path.exists(maven_file_path): 114 | local_jip_path = os.path.join(local_path, artifact.to_jip_name()) 115 | logger.info("[Downloading] %s" % maven_file_path) 116 | shutil.copy(maven_file_path, local_jip_path) 117 | logger.info("[Finished] %s completed" % local_jip_path) 118 | else: 119 | logger.error("[Error] File not found %s" % maven_file_path) 120 | raise IOError('File not found:' + maven_file_path) 121 | 122 | def download_pom(self, artifact): 123 | maven_file_path = self.get_artifact_uri(artifact, 'pom') 124 | logger.info('[Checking] pom file %s' % maven_file_path) 125 | if os.path.exists(maven_file_path): 126 | pom_file = open(maven_file_path, 'r') 127 | data = pom_file.read() 128 | pom_file.close() 129 | return data 130 | else: 131 | logger.info('[Skipped] pom file not found at %s' % maven_file_path) 132 | return None 133 | 134 | def last_modified(self, artifact): 135 | maven_file_path = self.get_artifact_uri(artifact, 'pom') 136 | if os.path.exists(maven_file_path): 137 | last_modify = os.stat(maven_file_path)[stat.ST_MTIME] 138 | return last_modify 139 | else: 140 | return None 141 | 142 | 143 | class MavenHttpRemoteRepos(MavenRepos): 144 | def __init__(self, name, uri): 145 | MavenRepos.__init__(self, name, uri) 146 | self.pom_cache = {} 147 | self.pom_not_found_cache = [] 148 | 149 | def download_jar(self, artifact, local_path): 150 | maven_path = self.get_artifact_uri(artifact, 'jar') 151 | logger.info('[Downloading] jar from %s' % maven_path) 152 | local_jip_path = os.path.join(local_path, artifact.to_jip_name()) 153 | local_f = open(local_jip_path, 'wb') 154 | # download jar asyncly 155 | download(maven_path, local_f, True) 156 | # self.logger.info('[Finished] %s downloaded ' % maven_path) 157 | 158 | def download_pom(self, artifact): 159 | if artifact in self.pom_not_found_cache: 160 | return None 161 | 162 | if artifact in self.pom_cache: 163 | return self.pom_cache[artifact] 164 | 165 | if artifact.is_snapshot(): 166 | snapshot_info = self.get_snapshot_info(artifact) 167 | if snapshot_info is not None: 168 | ts, bn = snapshot_info 169 | artifact.timestamp = ts 170 | artifact.build_number = bn 171 | 172 | maven_path = self.get_artifact_uri(artifact, 'pom') 173 | try: 174 | logger.info('[Checking] pom file %s' % maven_path) 175 | data = download_string(maven_path) 176 | # cache 177 | self.pom_cache[artifact] = data 178 | 179 | return data 180 | except DownloadException: 181 | self.pom_not_found_cache.append(artifact) 182 | logger.info('[Skipped] Pom file not found at %s' % maven_path) 183 | return None 184 | 185 | def get_artifact_uri(self, artifact, ext): 186 | if not artifact.is_snapshot(): 187 | maven_name = artifact.to_maven_name(ext) 188 | else: 189 | maven_name = artifact.to_maven_snapshot_name(ext) 190 | if self.uri.endswith('/'): 191 | maven_path = self.uri + maven_name 192 | else: 193 | maven_path = self.uri + '/' + maven_name 194 | return maven_path 195 | 196 | def get_snapshot_info(self, artifact): 197 | metadata_path = self.get_metadata_path(artifact) 198 | 199 | try: 200 | data = download_string(metadata_path) 201 | 202 | eletree = ElementTree.fromstring(data) 203 | timestamp = eletree.findtext('versioning/snapshot/timestamp') 204 | build_number = eletree.findtext('versioning/snapshot/buildNumber') 205 | 206 | return timestamp, build_number 207 | except DownloadException: 208 | return None 209 | 210 | def get_metadata_path(self, artifact): 211 | group = artifact.group.replace('.', '/') 212 | metadata_path = "%s/%s/%s/%s/maven-metadata.xml" % (self.uri, group, 213 | artifact.artifact, artifact.version) 214 | return metadata_path 215 | 216 | def last_modified(self, artifact): 217 | metadata_path = self.get_metadata_path(artifact) 218 | try: 219 | fd = urlrequest.urlopen(metadata_path) 220 | if 'last-modified' in fd.headers: 221 | ts = fd.headers['last-modified'] 222 | fd.close() 223 | locale.setlocale(locale.LC_TIME, 'en_US') 224 | last_modified = time.strptime(ts, '%a, %d %b %Y %H:%M:%S %Z') 225 | return time.mktime(last_modified) 226 | else: 227 | fd.close() 228 | return 0 229 | except urlerror.HTTPError: 230 | return None 231 | 232 | def download_check_sum(self, checksum_type, origin_file_name): 233 | """ return pre calculated checksum value, only avaiable for remote repos """ 234 | checksum_url = origin_file_name + "." + checksum_type 235 | try: 236 | return download_string(checksum_url) 237 | except DownloadException: 238 | return None 239 | 240 | @staticmethod 241 | def checksum(filepath, checksum_type): 242 | if checksum_type == 'md5': 243 | hasher = hashlib.md5() 244 | elif checksum_type == 'sha1': 245 | hasher = hashlib.sha1() 246 | else: 247 | raise ValueError() 248 | 249 | buf_size = 1024 * 8 250 | file_to_check = open(filepath, 'r') 251 | buf = file_to_check.read(buf_size) 252 | while len(buf) > 0: 253 | hasher.update(buf) 254 | buf = file_to_check.read(buf_size) 255 | 256 | file_to_check.close() 257 | return hasher.hexdigest() 258 | 259 | 260 | __all__ = ['RepositoryManager'] 261 | -------------------------------------------------------------------------------- /koalanlp/jip/util.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env jython 2 | # Copyright (C) 2011 Sun Ning 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | # SOFTWARE. 21 | # 22 | 23 | import sys 24 | import time 25 | import queue 26 | import threading 27 | import logging 28 | 29 | JIP_USER_AGENT = 'jip-koalanlp/1.0' 30 | BUF_SIZE = 4096 31 | 32 | # Logging setup 33 | logging.basicConfig(level=logging.INFO, format="[%(name)s] %(message)s") 34 | logging.getLogger("requests").setLevel(logging.WARNING) 35 | logging.getLogger("py4j").setLevel(logging.WARNING) 36 | logger = logging.getLogger('koalanlp.jip') 37 | 38 | 39 | class DownloadException(Exception): 40 | pass 41 | 42 | 43 | def download(url, target, asynchronous=False, close_target=False, quiet=True): 44 | import requests 45 | # download file to target (target is a file-like object) 46 | 47 | if asynchronous: 48 | pool.submit(url, target) 49 | else: 50 | try: 51 | t0 = time.time() 52 | source = requests.get(url, headers={'User-Agent': JIP_USER_AGENT}) 53 | source.raise_for_status() 54 | size = source.headers['Content-Length'] 55 | if not quiet: 56 | logger.info('[Downloading] %s %s bytes to download' % (url, size)) 57 | for buf in source.iter_content(BUF_SIZE): 58 | target.write(buf) 59 | source.close() 60 | if close_target: 61 | target.close() 62 | t1 = time.time() 63 | if not quiet: 64 | logger.info('[Downloading] Download %s completed in %f secs' % (url, (t1 - t0))) 65 | except requests.exceptions.RequestException: 66 | _, e, _ = sys.exc_info() 67 | raise DownloadException(url, e) 68 | 69 | 70 | def download_string(url): 71 | import requests 72 | try: 73 | response = requests.get(url, headers={'User-Agent': JIP_USER_AGENT}) 74 | response.raise_for_status() 75 | return response.text 76 | except requests.exceptions.RequestException: 77 | _, e, _ = sys.exc_info() 78 | raise DownloadException(url, e) 79 | 80 | 81 | def wait_until_download_finished(): 82 | pool.join() 83 | 84 | 85 | class DownloadThreadPool(object): 86 | def __init__(self, size=3): 87 | self.queue = queue.Queue() 88 | self.workers = [threading.Thread(target=self._do_work) for _ in range(size)] 89 | self.initialized = False 90 | 91 | def init_threads(self): 92 | for worker in self.workers: 93 | worker.setDaemon(True) 94 | worker.start() 95 | self.initialized = True 96 | 97 | def _do_work(self): 98 | while True: 99 | url, target = self.queue.get() 100 | download(url, target, close_target=True, quiet=False) 101 | self.queue.task_done() 102 | 103 | def join(self): 104 | self.queue.join() 105 | 106 | def submit(self, url, target): 107 | if not self.initialized: 108 | self.init_threads() 109 | self.queue.put((url, target)) 110 | 111 | 112 | pool = DownloadThreadPool(3) 113 | 114 | 115 | __all__ = ['DownloadException', 'download', 'download_string', 'wait_until_download_finished', 'logger'] 116 | -------------------------------------------------------------------------------- /koalanlp/jvm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import logging 5 | from typing import List, Dict, Tuple, Optional 6 | from py4j.java_gateway import JavaGateway, GatewayParameters, CallbackServerParameters, launch_gateway, \ 7 | DEFAULT_PYTHON_PROXY_PORT 8 | from py4j.java_collections import JavaArray 9 | from py4j.protocol import Py4JJavaError as JavaError 10 | 11 | _CLASS_DIC = {} 12 | GATEWAY = None 13 | 14 | 15 | def is_jvm_running(): 16 | global GATEWAY 17 | return GATEWAY is not None 18 | 19 | 20 | def start_jvm(option, classpath, port: int = None): 21 | import os 22 | global GATEWAY 23 | 24 | jarpath = None 25 | for path in classpath: 26 | if 'py4j' in path: 27 | jarpath = path 28 | break 29 | 30 | gateway_port = launch_gateway(jarpath=jarpath, classpath=os.pathsep.join(classpath), javaopts=option) 31 | logging.info("Java gateway started with port number %s", gateway_port) 32 | 33 | if port is None: 34 | port = DEFAULT_PYTHON_PROXY_PORT 35 | logging.info("Callback server will use port number %s", port) 36 | GATEWAY = JavaGateway(gateway_parameters=GatewayParameters(port=gateway_port, auto_close=True), 37 | callback_server_parameters=CallbackServerParameters(port=port)) 38 | return is_jvm_running() 39 | 40 | 41 | def check_jvm(): 42 | class_of('java.lang.String')('123') 43 | 44 | 45 | def shutdown_jvm(): 46 | global GATEWAY 47 | global _CLASS_DIC 48 | from gc import collect 49 | 50 | # Shutdown gateway 51 | GATEWAY.shutdown() 52 | GATEWAY = None 53 | 54 | # Remove cached class dictionary 55 | _CLASS_DIC.clear() 56 | 57 | # Execute garbage collection 58 | collect() 59 | return is_jvm_running() 60 | 61 | 62 | def class_of(*path): 63 | global GATEWAY 64 | 65 | strpath = '.'.join(path) 66 | level = GATEWAY.jvm 67 | 68 | if strpath not in _CLASS_DIC: 69 | for package in path: 70 | level = level.__getattr__(package) 71 | 72 | _CLASS_DIC[strpath] = level 73 | 74 | return _CLASS_DIC[strpath] 75 | 76 | 77 | def koala_class_of(*path): 78 | return class_of('kr.bydelta.koala', *path) 79 | 80 | 81 | def koala_enum_of(tagset, tag): 82 | return koala_class_of(tagset).valueOf(tag) if tag is not None else None 83 | 84 | 85 | def cast_of(obj, *path): 86 | # Py4j does not require explicit casting. 87 | return obj 88 | # if obj is None: 89 | # return None 90 | # 91 | # from jnius import cast 92 | # return cast('.'.join(path), obj) 93 | 94 | 95 | def koala_cast_of(obj, *path): 96 | return cast_of(obj, 'kr.bydelta.koala', *path) 97 | 98 | 99 | def string(s: str): 100 | return class_of('java.lang.String')(s) if s is not None else None 101 | 102 | 103 | def py_list(result, item_converter) -> List: 104 | if result is None: 105 | return [] 106 | 107 | if type(result) is JavaArray: 108 | items = [] 109 | 110 | length = len(result) 111 | for i in range(length): 112 | items.append(result[i]) 113 | 114 | result = items 115 | elif type(result) is not list: 116 | items = [] 117 | 118 | it = result.iterator() 119 | while it.hasNext(): 120 | items.append(it.next()) 121 | 122 | result = items 123 | 124 | return [item_converter(item) for item in result] 125 | 126 | 127 | def py_triple(result) -> Optional[Tuple]: 128 | if result is None: 129 | return None 130 | 131 | return result.getFirst(), result.getSecond(), result.getThird() 132 | 133 | 134 | def py_dict(result, key_converter=None, value_converter=None) -> Dict: 135 | dic = {} 136 | keys = result.keySet().toArray() 137 | 138 | for key in keys: 139 | py_key = key_converter(key) if key_converter is not None else key 140 | py_value = result.get(key) 141 | py_value = value_converter(py_value) if value_converter is not None else py_value 142 | 143 | dic[py_key] = py_value 144 | 145 | return dic 146 | 147 | 148 | def java_list(pylist: List): 149 | array_list = class_of('java.util.ArrayList')() 150 | 151 | for item in pylist: 152 | array_list.add(item) 153 | 154 | return array_list 155 | 156 | 157 | def java_tuple(first, second): 158 | return class_of('kotlin.Pair')(first, second) 159 | 160 | 161 | def java_triple(first, second, third): 162 | return class_of('kotlin.Triple')(first, second, third) 163 | 164 | 165 | def java_set(pylist): 166 | hash_set = class_of('java.util.HashSet')() 167 | 168 | for item in pylist: 169 | hash_set.add(item) 170 | 171 | return hash_set 172 | 173 | 174 | def java_pos_filter(pos_set): 175 | global GATEWAY 176 | 177 | class PyPOSFilter(object): 178 | def __init__(self, gateway): 179 | self.gateway = gateway 180 | 181 | def invoke(self, tag): 182 | return koala_cast_of(tag, 'POS').name() in pos_set 183 | 184 | class Java: 185 | implements = ['kotlin.jvm.functions.Function1'] 186 | 187 | return PyPOSFilter(GATEWAY) 188 | 189 | 190 | def java_varargs(pylist, java_class): 191 | global GATEWAY 192 | 193 | varargs = GATEWAY.new_array(java_class, len(pylist)) 194 | for i, item in enumerate(pylist): 195 | varargs[i] = item 196 | 197 | return varargs 198 | 199 | 200 | def error_handler(e: JavaError): 201 | string = str(e) 202 | if 'NoClassDefFoundError' in string or \ 203 | 'ClassNotFoundException' in string or \ 204 | 'NoSuchMethodException' in string: 205 | logging.exception('Java와 통신 중에 필요한 클래스가 없다는 것을 확인했습니다. ' 206 | '자바 Jar 파일을 강제로 다시 다운로드하는 것을 추천합니다. ' 207 | '처음 initialize() 함수를 호출하실 때, force_download=True를 추가해주세요.\n' 208 | '(예) initialize(KKMA="LATEST", force_download=True).\n' 209 | '이렇게 해도 문제가 계속된다면, Issue를 등록해주세요.', exc_info=e) 210 | else: 211 | logging.exception('Java에서 처리하던 중에 문제가 발생했습니다. ' 212 | '문제가 계속된다면, Issue를 등록해주세요.', exc_info=e) 213 | raise e 214 | 215 | 216 | # ----- Define members exported ----- 217 | 218 | __all__ = [ 219 | 'class_of', 220 | 'koala_class_of', 221 | 'koala_enum_of', 222 | 'cast_of', 223 | 'koala_cast_of', 224 | 'string', 225 | 'py_list', 226 | 'py_dict', 227 | 'py_triple', 228 | 'java_list', 229 | 'java_tuple', 230 | 'java_triple', 231 | 'java_set', 232 | 'java_pos_filter', 233 | 'java_varargs', 234 | 'is_jvm_running', 235 | 'start_jvm', 236 | 'check_jvm', 237 | 'shutdown_jvm', 238 | 'error_handler', 239 | 'JavaError' 240 | ] 241 | -------------------------------------------------------------------------------- /koalanlp/kiwi/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | from threading import Lock 4 | from importlib import import_module 5 | from kss import split_sentences 6 | 7 | from koalanlp.data import Sentence, Word, Morpheme 8 | from koalanlp.types import POS 9 | 10 | try: 11 | _module = import_module('kiwipiepy') 12 | _KiwiClass = getattr(_module, 'Kiwi') 13 | _KiwiInstance = _KiwiClass() 14 | _KiwiInstance.prepare() 15 | _KiwiLock = Lock() 16 | except Exception: 17 | raise ModuleNotFoundError('KoalaNLP는 Kiwi를 자동으로 설치하지 않습니다. 다음 명령으로 Kiwi를 설치해주세요:\n' 18 | 'pip install kiwipiepy') 19 | 20 | 21 | def _convert_tag(raw_tag: str) -> str: 22 | raw_tag = raw_tag.upper() 23 | if raw_tag == 'UN': 24 | return 'NA' 25 | if raw_tag.startswith('W_'): 26 | return 'SW' 27 | return raw_tag 28 | 29 | 30 | class Tagger: 31 | def tag(self, paragraph: str, no_split = False) -> List[Sentence]: 32 | # KSS를 사용해 문단을 문장들로 분리함 33 | if not no_split: 34 | sentences = split_sentences(paragraph) 35 | else: 36 | sentences = [paragraph] 37 | 38 | # Kiwi 실행 39 | _KiwiLock.acquire() 40 | kiwi_result = _KiwiInstance.analyze(sentences) 41 | _KiwiLock.release() 42 | 43 | # 변환 44 | result = [] 45 | for sentence, analyzed in zip(sentences, kiwi_result): 46 | word_begin = 0 47 | word_end = 0 48 | words = [] 49 | curr_morphs = [] 50 | 51 | for morph, raw_tag, begin, length in analyzed[0][0]: 52 | if begin > word_end > word_begin: 53 | # 공백문자가 사이에 있었으므로, 하나의 단어로 간주. 54 | words.append(Word(sentence[word_begin:word_end], curr_morphs)) 55 | curr_morphs = [] 56 | word_begin = begin 57 | 58 | tag = _convert_tag(raw_tag) 59 | curr_morphs.append(Morpheme(morph, tag, raw_tag)) 60 | word_end = begin + length 61 | 62 | # 모든게 종료되고 나서, 남은 형태소들 하나의 단어로 추가 63 | if curr_morphs: 64 | # 공백문자가 사이에 있었으므로, 하나의 단어로 간주. 65 | words.append(Word(sentence[word_begin:word_end], curr_morphs)) 66 | 67 | # 문장 등록 68 | result.append(Sentence(words)) 69 | 70 | return result 71 | 72 | def tagSentence(self, sentence: str) -> Sentence: 73 | return self.tag(sentence, no_split=True)[0] 74 | 75 | 76 | class Dictionary: 77 | def addUserDictionary(self, *pairs: Tuple[str, POS]): 78 | """ 79 | 사용자 사전에, 표면형과 그 품사를 추가. 80 | 81 | :param Tuple[str,POS] pairs: (표면형, 품사)의 가변형 인자 82 | """ 83 | _KiwiLock.acquire() 84 | for word, tag in pairs: 85 | _KiwiInstance.add_user_word(word, pos=tag.name) 86 | 87 | _KiwiInstance.prepare() 88 | _KiwiLock.release() 89 | 90 | def contains(self, word: str, *pos_tags: POS) -> bool: 91 | raise NotImplementedError() 92 | 93 | def importFrom(self, other, fastAppend=False, filter=lambda t: t.isNoun()): 94 | raise NotImplementedError() 95 | 96 | def getBaseEntries(self, filter=lambda t: t.isNoun()): 97 | raise NotImplementedError() 98 | 99 | def getItems(self) -> List[Tuple[str, POS]]: 100 | raise NotImplementedError() 101 | 102 | def getNotExists(self, onlySystemDic: bool, *word: Tuple[str, POS]) -> List[Tuple[str, POS]]: 103 | raise NotImplementedError() 104 | -------------------------------------------------------------------------------- /koalanlp/kss/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from importlib import import_module 3 | 4 | 5 | class SentenceSplitter: 6 | def __init__(self): 7 | try: 8 | module = import_module('kss') 9 | self._call = getattr(module, 'split_sentences') 10 | except ModuleNotFoundError: 11 | raise Exception('KSS Python package를 설치해야 합니다. 다음 명령을 실행하세요:\n pip install kss') 12 | 13 | def invoke(self, paragraph) -> List[str]: 14 | if paragraph: 15 | return self._call(paragraph) 16 | else: 17 | return [] 18 | -------------------------------------------------------------------------------- /koalanlp/types.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from .jvm import * 5 | 6 | 7 | def _enum_value_dict(cls, item_converter): 8 | try: 9 | return {value.name: value for value in py_list(koala_class_of(cls).values(), item_converter)} 10 | except JavaError as e: 11 | error_handler(e) 12 | 13 | 14 | class _JavaEnum(object): 15 | name = '' #: Enum 명칭 16 | ordinal = -1 #: 순서 번호 17 | 18 | def __init__(self, reference): 19 | self.reference = reference 20 | try: 21 | self.name = reference.name() 22 | self.ordinal = reference.ordinal() 23 | self.classType = reference.getClass().getName() 24 | except JavaError as e: 25 | error_handler(e) 26 | 27 | def __repr__(self): 28 | return self.name 29 | 30 | def __eq__(self, other): 31 | return isinstance(other, _JavaEnum) and other.classType == self.classType and other.ordinal == self.ordinal 32 | 33 | 34 | class POS(_JavaEnum): 35 | """ 세종 품사표기 """ 36 | 37 | __VALUES__ = {} 38 | 39 | def __init__(self, reference): 40 | """ 41 | 세종 품사표기 표준안을 Enum Class로 담았습니다. 42 | """ 43 | super().__init__(reference) 44 | 45 | @staticmethod 46 | def values(): 47 | """ 48 | POS 값들을 모두 돌려줍니다. 49 | 50 | :rtype: Set[POS] 51 | :return: 모든 품사 태그의 Set 52 | """ 53 | if len(POS.__VALUES__) == 0: 54 | POS.__VALUES__ = _enum_value_dict(__class__.__name__, lambda x: POS(x)) 55 | for name, value in POS.__VALUES__.items(): 56 | setattr(POS, name, value) 57 | 58 | return POS.__VALUES__.values() 59 | 60 | @staticmethod 61 | def valueOf(name: str): 62 | """ 63 | 해당하는 name과 일치하는 POS 값을 돌려줍니다. 64 | 65 | :param str name: POS 값을 찾을 명칭. 66 | 67 | :rtype: POS 68 | :return: 일치하는 POS값 69 | """ 70 | return getattr(POS, name) 71 | 72 | def isNoun(self) -> bool: 73 | """ 74 | 이 값이 체언인지 확인합니다. 75 | 76 | :rtype: bool 77 | :return: 체언인 경우 True 78 | """ 79 | try: 80 | return self.reference.isNoun() 81 | except JavaError as e: 82 | error_handler(e) 83 | 84 | def isPredicate(self) -> bool: 85 | """ 86 | 이 값이 용언인지 확인합니다. 87 | 88 | :rtype: bool 89 | :return: 용언인 경우 True 90 | """ 91 | try: 92 | return self.reference.isPredicate() 93 | except JavaError as e: 94 | error_handler(e) 95 | 96 | def isModifier(self) -> bool: 97 | """ 98 | 이 값이 수식언인지 확인합니다. 99 | 100 | :rtype: bool 101 | :return: 수식언인 경우 True 102 | """ 103 | try: 104 | return self.reference.isModifier() 105 | except JavaError as e: 106 | error_handler(e) 107 | 108 | def isPostPosition(self) -> bool: 109 | """ 110 | 이 값이 관계언(조사)인지 확인합니다. 111 | 112 | :rtype: bool 113 | :return: 관계언인 경우 True 114 | """ 115 | try: 116 | return self.reference.isPostPosition() 117 | except JavaError as e: 118 | error_handler(e) 119 | 120 | def isEnding(self) -> bool: 121 | """ 122 | 이 값이 어미인지 확인합니다. 123 | 124 | :rtype: bool 125 | :return: 어미인 경우 True 126 | """ 127 | try: 128 | return self.reference.isEnding() 129 | except JavaError as e: 130 | error_handler(e) 131 | 132 | def isAffix(self) -> bool: 133 | """ 134 | 이 값이 접사인지 확인합니다. 135 | 136 | :rtype: bool 137 | :return: 접사인 경우 True 138 | """ 139 | try: 140 | return self.reference.isAffix() 141 | except JavaError as e: 142 | error_handler(e) 143 | 144 | def isSuffix(self) -> bool: 145 | """ 146 | 이 값이 접미사인지 확인합니다. 147 | 148 | :rtype: bool 149 | :return: 접미사인 경우 True 150 | """ 151 | try: 152 | return self.reference.isSuffix() 153 | except JavaError as e: 154 | error_handler(e) 155 | 156 | def isSymbol(self) -> bool: 157 | """ 158 | 이 값이 기호인지 확인합니다. 159 | 160 | :rtype: bool 161 | :return: 기호인 경우 True 162 | """ 163 | try: 164 | return self.reference.isSymbol() 165 | except JavaError as e: 166 | error_handler(e) 167 | 168 | def isUnknown(self) -> bool: 169 | """ 170 | 이 값이 미확인 단어인지 확인합니다. 171 | 172 | :rtype: bool 173 | :return: 미확인 단어인 경우 True 174 | """ 175 | try: 176 | return self.reference.isUnknown() 177 | except JavaError as e: 178 | error_handler(e) 179 | 180 | def startsWith(self, tag: str) -> bool: 181 | """ 182 | 이 값이 주어진 [tag]로 시작하는지 확인합니다. 183 | 184 | :param str tag: 시작하는지 확인할 품사 분류 185 | 186 | :return: 포함되는 경우(시작하는 경우) True 187 | """ 188 | try: 189 | return self.reference.startsWith(string(tag)) 190 | except JavaError as e: 191 | error_handler(e) 192 | 193 | 194 | class PhraseTag(_JavaEnum): 195 | """ 세종 구문구조 표지자 """ 196 | 197 | __VALUES__ = {} 198 | 199 | def __init__(self, reference): 200 | """ 201 | 세종 구문구조 표지자를 Enum Class로 담았습니다. 202 | """ 203 | super().__init__(reference) 204 | 205 | @staticmethod 206 | def values(): 207 | """ 208 | PhraseTag 값들을 모두 돌려줍니다. 209 | 210 | :rtype: Set[PhraseTag] 211 | :return: 모든 구문구조 태그의 Set 212 | """ 213 | if len(PhraseTag.__VALUES__) == 0: 214 | PhraseTag.__VALUES__ = _enum_value_dict(__class__.__name__, lambda x: PhraseTag(x)) 215 | for name, value in PhraseTag.__VALUES__.items(): 216 | setattr(PhraseTag, name, value) 217 | 218 | return PhraseTag.__VALUES__.values() 219 | 220 | @staticmethod 221 | def valueOf(name: str): 222 | """ 223 | 해당하는 name과 일치하는 PhraseTag 값을 돌려줍니다. 224 | 225 | :param str name: 값을 찾을 명칭. 226 | 227 | :rtype: PhraseTag 228 | :return: 일치하는 PhraseTag값 229 | """ 230 | return getattr(PhraseTag, name) 231 | 232 | 233 | class DependencyTag(_JavaEnum): 234 | """ ETRI 의존구문구조 기능표지자 """ 235 | 236 | __VALUES__ = {} 237 | 238 | def __init__(self, reference): 239 | """ 240 | ETRI 의존구문구조 기능표지자를 Enum Class로 담았습니다. 241 | """ 242 | super().__init__(reference) 243 | 244 | @staticmethod 245 | def values(): 246 | """ 247 | DependencyTag 값들을 모두 돌려줍니다. 248 | 249 | :rtype: Set[DependencyTag] 250 | :return: 모든 의존구조 기능 태그의 Set 251 | """ 252 | if len(DependencyTag.__VALUES__) == 0: 253 | DependencyTag.__VALUES__ = _enum_value_dict(__class__.__name__, lambda x: DependencyTag(x)) 254 | for name, value in DependencyTag.__VALUES__.items(): 255 | setattr(DependencyTag, name, value) 256 | 257 | return DependencyTag.__VALUES__.values() 258 | 259 | @staticmethod 260 | def valueOf(name: str): 261 | """ 262 | 해당하는 name과 일치하는 DependencyTag 값을 돌려줍니다. 263 | 264 | :param str name: 값을 찾을 명칭. 265 | 266 | :rtype: DependencyTag 267 | :return: 일치하는 DependencyTag값 268 | """ 269 | return getattr(DependencyTag, name) 270 | 271 | 272 | class RoleType(_JavaEnum): 273 | """ ETRI 의미역 분석 표지 """ 274 | 275 | __VALUES__ = {} 276 | 277 | def __init__(self, reference): 278 | """ 279 | ETRI 의미역 분석 표지를 Enum Class로 담았습니다. 280 | """ 281 | super().__init__(reference) 282 | 283 | @staticmethod 284 | def values(): 285 | """ 286 | RoleType 값들을 모두 돌려줍니다. 287 | 288 | :rtype: Set[RoleType] 289 | :return: 모든 의미역 태그의 Set 290 | """ 291 | 292 | if len(RoleType.__VALUES__) == 0: 293 | RoleType.__VALUES__ = _enum_value_dict(__class__.__name__, lambda x: RoleType(x)) 294 | for name, value in RoleType.__VALUES__.items(): 295 | setattr(RoleType, name, value) 296 | 297 | return RoleType.__VALUES__.values() 298 | 299 | @staticmethod 300 | def valueOf(name: str): 301 | """ 302 | 해당하는 name과 일치하는 RoleType 값을 돌려줍니다. 303 | 304 | :param str name: 값을 찾을 명칭. 305 | 306 | :rtype: RoleType 307 | :return: 일치하는 RoleType값 308 | """ 309 | return getattr(RoleType, name) 310 | 311 | 312 | class CoarseEntityType(_JavaEnum): 313 | """ ETRI 개체명 대분류 """ 314 | __VALUES__ = {} 315 | 316 | def __init__(self, reference): 317 | """ 318 | ETRI 개체명 대분류를 Enum Class로 담았습니다. 319 | """ 320 | super().__init__(reference) 321 | 322 | @staticmethod 323 | def values(): 324 | """ 325 | CoarseEntityType 값들을 모두 돌려줍니다. 326 | 327 | :rtype: Set[CoarseEntityType] 328 | :return: 모든 개체명 태그의 Set 329 | """ 330 | if len(CoarseEntityType.__VALUES__) == 0: 331 | CoarseEntityType.__VALUES__ = _enum_value_dict(__class__.__name__, lambda x: CoarseEntityType(x)) 332 | for name, value in CoarseEntityType.__VALUES__.items(): 333 | setattr(CoarseEntityType, name, value) 334 | 335 | return CoarseEntityType.__VALUES__.values() 336 | 337 | @staticmethod 338 | def valueOf(name: str): 339 | """ 340 | 해당하는 name과 일치하는 CoarseEntityType 값을 돌려줍니다. 341 | 342 | :param str name: 값을 찾을 명칭. 343 | 344 | :rtype: CoarseEntityType 345 | :return: 일치하는 CoarseEntityType값 346 | """ 347 | return getattr(CoarseEntityType, name) 348 | 349 | 350 | # ----- Declare members exported ----- 351 | 352 | __all__ = ['POS', 'CoarseEntityType', 'PhraseTag', 'DependencyTag', 'RoleType'] 353 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | py4j~=0.10.9 2 | requests~=2.24.0 3 | kss~=2.5.1 -------------------------------------------------------------------------------- /scripts/khaiii_install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ $TRAVIS_OS_NAME == 'windows' ] 4 | then 5 | echo "Khaiii does not support Windows architecture." 6 | exit 0 7 | fi 8 | 9 | ### Khaiii 저장소 clone 10 | if [ ! -d "$HOME/khaiii-orig/.git" ] 11 | then 12 | ### Clone to ~/khaiii-orig/ 13 | cd $HOME 14 | git clone https://github.com/kakao/khaiii.git khaiii-orig 15 | cd khaiii-orig 16 | echo "\033[34mClone finished!\033[0m" 17 | else 18 | ### Travis CI에 이미 Caching된 데이터가 있다면, pull 19 | cd $HOME/khaiii-orig 20 | git pull origin master 21 | echo "\033[34mPull finished!\033[0m" 22 | fi 23 | 24 | KHAIII_LATEST=$(git tag -l | tail -n 1) 25 | git checkout -f tags/$KHAIII_LATEST 26 | 27 | ### Make build files 28 | if [ ! -d "build" ] 29 | then 30 | mkdir build 31 | fi 32 | 33 | # OS Release check (khaiii/khaiii#103) 34 | if [ $TRAVIS_OS_NAME == 'linux' ] 35 | then 36 | RELEASE=`lsb_release -cs` 37 | else 38 | RELEASE=none 39 | fi 40 | 41 | cd build 42 | if [ $RELEASE == 'focal' ] 43 | then 44 | cmake -E env CXXFLAGS="-w" cmake .. 45 | else 46 | cmake .. 47 | fi 48 | 49 | ### Make shared object file 50 | if [ ! -f "lib/libkhaiii.so.${KHAIII_LATEST}" ] 51 | then 52 | make clean 53 | make all 54 | echo "\033[34mBuild finished!\033[0m" 55 | 56 | ### Make resources 57 | if [ ! -f "share/khaiii/restore.key" ] 58 | then 59 | ### Build resource 60 | make resource 61 | 62 | echo "\033[34mResource build finished!\033[0m" 63 | else 64 | echo resource files exist. 65 | fi 66 | else 67 | echo libkhaiii.so.${KHAIII_LATEST} already exists. 68 | fi 69 | 70 | 71 | if [ "$TRAVIS_OS_NAME" != "linux" ] 72 | then 73 | make install 74 | fi -------------------------------------------------------------------------------- /scripts/utagger_install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ### Architecture 확인 4 | if [ $TRAVIS_OS_NAME == 'osx' ] 5 | then 6 | echo "UTagger does not support OSX architecture." 7 | exit 0 8 | fi 9 | 10 | if [ -f "$HOME/utagger/Hlxcfg.txt" ] 11 | then 12 | echo "UTagger already installed." 13 | exit 0 14 | fi 15 | 16 | ### UTagger 파일 다운로드 17 | cd $HOME 18 | if [ -z "$TRAVIS_OS_NAME" ] 19 | then 20 | echo "Downloading from FTP of Ulsan University" 21 | wget ftp://203.250.77.242/utagger%20delay%202018%2010%2031.zip -O utagger.zip 22 | else 23 | echo "Downloading a light-weighted version for Travis CI. (Only for testing use)" 24 | wget https://bitbucket.org/nearbydelta/koalanlp-test-large/downloads/utagger.zip -O utagger.zip 25 | fi 26 | 27 | ### 압축 풀기 28 | unzip -o utagger.zip -d utagger 29 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | try: 4 | import pypandoc 5 | long_description = pypandoc.convert_file('README.md', 'rst', extra_args=['--wrap=none']) 6 | except(IOError, ImportError): 7 | long_description = open('README.md', encoding='UTF-8').read() 8 | 9 | 10 | setup( 11 | name='koalanlp', 12 | version='2.1.8-SNAPSHOT', 13 | description='Python wrapper for KoalaNLP', 14 | long_description=long_description, 15 | author='koalanlp', 16 | url='https://koalanlp.github.io/python-support', 17 | install_requires=["py4j~=0.10", "requests~=2.22", "kss~=2.5.1"], 18 | packages=find_packages(exclude=["docs", "tests", "doc_source", "scripts"]), 19 | keywords=['korean', 'natural language processing', 'koalanlp', '한국어 처리', '한국어 분석', 20 | '형태소', '의존구문', '구문구조', '개체명', '의미역'], 21 | python_requires='>=3.5', 22 | package_data={}, 23 | zip_safe=False, 24 | license="MIT", 25 | project_urls={ 26 | "Issue Tracker": "https://github.com/koalanlp/python-support/issues", 27 | "Source Code": "https://github.com/koalanlp/python-support", 28 | }, 29 | classifiers=[ 30 | 'Development Status :: 4 - Beta', 31 | 'License :: OSI Approved :: MIT License', 32 | 'Natural Language :: Korean', 33 | 'Programming Language :: Python :: 3.5', 34 | 'Programming Language :: Python :: 3.6', 35 | 'Programming Language :: Python :: 3.7', 36 | 'Topic :: Text Processing', 37 | 'Topic :: Text Processing :: General', 38 | 'Topic :: Text Processing :: Linguistic' 39 | ] 40 | ) 41 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koalanlp/python-support/3240968e555b0f8ba8988dd5c17680a5e028cb68/tests/__init__.py -------------------------------------------------------------------------------- /tests/dictionary_test.py: -------------------------------------------------------------------------------- 1 | from koalanlp import * 2 | from koalanlp.types import * 3 | from koalanlp.proc import * 4 | import pytest 5 | import inspect 6 | 7 | 8 | @pytest.fixture(scope="session") 9 | def dicts(): 10 | Util.initialize(KKMA="LATEST", OKT="LATEST") 11 | yield Dictionary(API.KKMA), Dictionary(API.OKT) 12 | Util.finalize() 13 | 14 | 15 | def test_add_user_dictionary(dicts): 16 | dict1, dict2 = dicts 17 | dict1.addUserDictionary(("설빙", POS.NNP)) 18 | assert dict1.contains("설빙", POS.NNP) 19 | 20 | dict1.addUserDictionary(("설국열차", POS.NNP), ("안드로이드", POS.NNP)) 21 | assert dict1.contains("안드로이드", POS.NNP) 22 | assert dict1.contains("설국열차", POS.NNP) 23 | 24 | dict1.addUserDictionary(("하동균", POS.NNP)) 25 | dict1.addUserDictionary(("나비야", POS.NNP)) 26 | 27 | assert dict1.contains("하동균", POS.NNP, POS.NNG) 28 | assert ("하동균", POS.NNP) in dict1 29 | 30 | assert len(list(dict1.getItems())) == 5 31 | 32 | 33 | def test_get_not_exists(dicts): 34 | dict1, dict2 = dicts 35 | assert len(dict2.getNotExists(True, ("쓰국", POS.NNP), ("일", POS.NNG))) == 1 36 | 37 | 38 | def test_base_entries_of(dicts): 39 | dict1, dict2 = dicts 40 | gen = dict1.getBaseEntries(lambda t: t.isNoun()) 41 | assert inspect.isgenerator(gen) 42 | assert next(gen) is not None 43 | gen = list(gen) 44 | 45 | gen2 = dict1.getBaseEntries(lambda t: t.isAffix()) 46 | assert inspect.isgenerator(gen2) 47 | gen2 = list(gen2) 48 | assert len(gen2) > 0 49 | 50 | counter = 0 51 | for entry in gen: 52 | counter += (entry in gen2) 53 | assert counter == 0 54 | 55 | 56 | def test_import_from(dicts): 57 | dict1, dict2 = dicts 58 | item_sz_prev = len(dict2.getItems()) 59 | item_noun_prev = sum(1 for _, p in dict2.getItems() if p.isNoun()) 60 | 61 | dict2.importFrom(dict1, True, lambda t: t.isNoun()) 62 | 63 | item_sz_after = len(dict2.getItems()) 64 | item_noun_after = sum(1 for _, p in dict2.getItems() if p.isNoun()) 65 | assert item_sz_prev < item_sz_after 66 | assert item_sz_after - item_sz_prev == item_noun_after - item_noun_prev 67 | -------------------------------------------------------------------------------- /tests/finalize_test.py: -------------------------------------------------------------------------------- 1 | from koalanlp import * 2 | from koalanlp.jvm import is_jvm_running 3 | from koalanlp.proc import * 4 | import pytest 5 | import inspect 6 | 7 | 8 | def test_init_finalize_tagger(): 9 | Util.initialize(EUNJEON="LATEST") 10 | assert is_jvm_running() 11 | tagger = Tagger(api=API.EUNJEON) 12 | 13 | # Reference will be changed 14 | tagged_before = ''.join([str(sent) for sent in tagger("하나의 예시 문장입니다.")]) 15 | 16 | del tagger 17 | assert Util.finalize() 18 | assert not is_jvm_running() 19 | 20 | # clear all and initialize 21 | Util.initialize(EUNJEON="LATEST") 22 | assert is_jvm_running() 23 | 24 | tagger = Tagger(api=API.EUNJEON) 25 | tagged_after = ''.join([str(sent) for sent in tagger("하나의 예시 문장입니다.")]) 26 | 27 | del tagger 28 | assert tagged_before == tagged_after 29 | assert Util.finalize() 30 | -------------------------------------------------------------------------------- /tests/khaiii_spec.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | 4 | from koalanlp import * 5 | from koalanlp.proc import * 6 | from tests.proc_core_spec import test_Tagger_Sentence_typecheck as typecheck 7 | 8 | 9 | @pytest.fixture(scope="session") 10 | def environ(): 11 | lib_path = os.environ['KHAIII_LIB'] 12 | Util.initialize(KHAIII="LATEST", java_options="-Xmx1g -Dfile.encoding=utf-8 -Djna.library.path=%s" % lib_path) 13 | tagger = Tagger(API.KHAIII, kha_resource=os.environ['KHAIII_RSC']) 14 | yield None, tagger, None, None, None 15 | del tagger 16 | Util.finalize() 17 | 18 | 19 | def test_khaiii(environ): 20 | typecheck(environ) 21 | -------------------------------------------------------------------------------- /tests/multi_processing_test.py: -------------------------------------------------------------------------------- 1 | from koalanlp import * 2 | from koalanlp.jvm import is_jvm_running 3 | from koalanlp.proc import * 4 | import pytest 5 | import inspect 6 | import multiprocessing 7 | 8 | 9 | def init_and_finalize(port): 10 | Util.initialize(EUNJEON="LATEST", port=port) 11 | if not is_jvm_running(): 12 | return -1 13 | 14 | tagger = Tagger(api=API.EUNJEON) 15 | tagger("하나의 예시 문장입니다.") 16 | 17 | del tagger 18 | if not Util.finalize(): 19 | return -1 20 | if is_jvm_running(): 21 | return -1 22 | 23 | return 1 24 | 25 | 26 | def test_init_finalize_tagger(): 27 | pool = multiprocessing.Pool(4) 28 | 29 | results = pool.imap_unordered(init_and_finalize, [51111, 51112, 51113, 51114, 51115, 51116]) 30 | assert all(res > 0 for res in results) 31 | -------------------------------------------------------------------------------- /tests/native_spec.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from koalanlp import * 4 | from koalanlp.proc import * 5 | from tests.proc_core_spec import EXAMPLES 6 | 7 | from kss import split_sentences 8 | from kiwipiepy import Kiwi 9 | 10 | 11 | @pytest.fixture(scope="session") 12 | def environ(): 13 | Util.initialize(KSS="LATEST") 14 | ssplit = SentenceSplitter(API.KSS) 15 | tagger = Tagger(API.KIWI) 16 | yield ssplit, tagger 17 | del ssplit 18 | del tagger 19 | Util.finalize() 20 | 21 | 22 | def test_kss_empty(environ): 23 | splitter, _ = environ 24 | sentences = splitter.sentences("") 25 | assert len(sentences) == 0 26 | 27 | 28 | def test_kss_equal(environ): 29 | splitter, _ = environ 30 | for _, line in EXAMPLES: 31 | res1 = splitter(line) 32 | res2 = split_sentences(line) 33 | assert res1 == res2 34 | 35 | 36 | def test_kiwi_empty(environ): 37 | _, tagger = environ 38 | sentences = tagger.tag("") 39 | assert len(sentences) == 0 40 | 41 | 42 | def test_kiwi(environ): 43 | _, tagger = environ 44 | kiwi = Kiwi() 45 | kiwi.prepare() 46 | 47 | for _, line in EXAMPLES: 48 | res1 = tagger.tagSentence(line)[0] 49 | res2 = kiwi.analyze(line) 50 | 51 | res1 = [(m.surface, m.originalTag) for w in res1 for m in w] 52 | res2 = [m[:2] for m in res2[0][0]] 53 | 54 | assert res1 == res2 55 | -------------------------------------------------------------------------------- /tests/type_core_spec.py: -------------------------------------------------------------------------------- 1 | from koalanlp import * 2 | from koalanlp.types import * 3 | import pytest 4 | import random 5 | 6 | 7 | @pytest.fixture(scope="session") 8 | def jvm(): 9 | Util.initialize(CORE="LATEST") 10 | yield None 11 | Util.finalize() 12 | 13 | 14 | def test_POS_discriminate_tags(jvm): 15 | SET_NOUNS = lambda x: x.isNoun() 16 | SET_PREDICATES = lambda x: x.isPredicate() 17 | SET_MODIFIERS = lambda x: x.isModifier() 18 | SET_POSTPOSITIONS = lambda x: x.isPostPosition() 19 | SET_ENDINGS = lambda x: x.isEnding() 20 | SET_AFFIXES = lambda x: x.isAffix() 21 | SET_SUFFIXES = lambda x: x.isSuffix() 22 | SET_SYMBOLS = lambda x: x.isSymbol() 23 | SET_UNKNOWNS = lambda x: x.isUnknown() 24 | 25 | map = { 26 | 'NNG': {SET_NOUNS}, 27 | 'NNP': {SET_NOUNS}, 28 | 'NNB': {SET_NOUNS}, 29 | 'NNM': {SET_NOUNS}, 30 | 'NR': {SET_NOUNS}, 31 | 'NP': {SET_NOUNS}, 32 | 'VV': {SET_PREDICATES}, 33 | 'VA': {SET_PREDICATES}, 34 | 'VX': {SET_PREDICATES}, 35 | 'VCP': {SET_PREDICATES}, 36 | 'VCN': {SET_PREDICATES}, 37 | 'MM': {SET_MODIFIERS}, 38 | 'MAG': {SET_MODIFIERS}, 39 | 'MAJ': {SET_MODIFIERS}, 40 | 'IC': set(), 41 | 'JKS': {SET_POSTPOSITIONS}, 42 | 'JKC': {SET_POSTPOSITIONS}, 43 | 'JKG': {SET_POSTPOSITIONS}, 44 | 'JKO': {SET_POSTPOSITIONS}, 45 | 'JKB': {SET_POSTPOSITIONS}, 46 | 'JKV': {SET_POSTPOSITIONS}, 47 | 'JKQ': {SET_POSTPOSITIONS}, 48 | 'JC': {SET_POSTPOSITIONS}, 49 | 'JX': {SET_POSTPOSITIONS}, 50 | 'EP': {SET_ENDINGS}, 51 | 'EF': {SET_ENDINGS}, 52 | 'EC': {SET_ENDINGS}, 53 | 'ETN': {SET_ENDINGS}, 54 | 'ETM': {SET_ENDINGS}, 55 | 'XPN': {SET_AFFIXES}, 56 | 'XPV': {SET_AFFIXES}, 57 | 'XSN': {SET_AFFIXES, SET_SUFFIXES}, 58 | 'XSV': {SET_AFFIXES, SET_SUFFIXES}, 59 | 'XSA': {SET_AFFIXES, SET_SUFFIXES}, 60 | 'XSM': {SET_AFFIXES, SET_SUFFIXES}, 61 | 'XSO': {SET_AFFIXES, SET_SUFFIXES}, 62 | 'XR': set(), 63 | 'SF': {SET_SYMBOLS}, 64 | 'SP': {SET_SYMBOLS}, 65 | 'SS': {SET_SYMBOLS}, 66 | 'SE': {SET_SYMBOLS}, 67 | 'SO': {SET_SYMBOLS}, 68 | 'SW': {SET_SYMBOLS}, 69 | 'NF': {SET_UNKNOWNS}, 70 | 'NV': {SET_UNKNOWNS}, 71 | 'NA': {SET_UNKNOWNS}, 72 | 'SL': set(), 73 | 'SH': set(), 74 | 'SN': set() 75 | } 76 | 77 | tagset = [SET_UNKNOWNS, 78 | SET_SYMBOLS, 79 | SET_SUFFIXES, 80 | SET_AFFIXES, 81 | SET_ENDINGS, 82 | SET_POSTPOSITIONS, 83 | SET_MODIFIERS, 84 | SET_PREDICATES, 85 | SET_NOUNS] 86 | 87 | assert set(map.keys()) == {it.name for it in POS.values() if it.name != 'TEMP'} 88 | 89 | for tag, setup in map.items(): 90 | for target in tagset: 91 | assert target(getattr(POS, tag)) == (target in setup) 92 | assert getattr(POS, tag) == POS.valueOf(tag) 93 | 94 | 95 | def test_POS_startsWith(jvm): 96 | partialCodes = set() 97 | for tag in POS.values(): 98 | if tag != POS.TEMP: 99 | name = tag.name 100 | 101 | for l in range(1, len(name) + 1): 102 | partialCodes.add(name[:l]) 103 | partialCodes.add(name[:l].lower()) 104 | 105 | for tag in POS.values(): 106 | if tag != POS.TEMP: 107 | if tag.isUnknown(): 108 | for code in partialCodes: 109 | if code.upper() == 'N': 110 | assert not tag.startsWith(code) 111 | else: 112 | assert tag.startsWith(code) == tag.name.startswith(code.upper()) 113 | else: 114 | for code in partialCodes: 115 | assert tag.startsWith(code) == tag.name.startswith(code.upper()) 116 | 117 | 118 | def test_PhraseTag_ExtUtil(jvm): 119 | values = PhraseTag.values() 120 | codes = {it.name for it in values} 121 | 122 | for _ in range(100): 123 | filtered = [code for code in codes if random.random() > 0.5] 124 | 125 | for tag in values: 126 | assert Util.contains(filtered, tag) == (tag.name in filtered) 127 | 128 | for code in codes: 129 | assert PhraseTag.valueOf(code) == getattr(PhraseTag, code) 130 | 131 | 132 | def test_DepTag_ExtUtil(jvm): 133 | values = DependencyTag.values() 134 | codes = {it.name for it in values} 135 | 136 | for _ in range(100): 137 | filtered = [code for code in codes if random.random() > 0.5] 138 | 139 | for tag in values: 140 | assert Util.contains(filtered, tag) == (tag.name in filtered) 141 | 142 | for code in codes: 143 | assert DependencyTag.valueOf(code) == getattr(DependencyTag, code) 144 | 145 | 146 | def test_RoleType_ExtUtil(jvm): 147 | values = RoleType.values() 148 | codes = {it.name for it in values} 149 | 150 | for _ in range(100): 151 | filtered = [code for code in codes if random.random() > 0.5] 152 | 153 | for tag in values: 154 | assert Util.contains(filtered, tag) == (tag.name in filtered) 155 | 156 | for code in codes: 157 | assert RoleType.valueOf(code) == getattr(RoleType, code) 158 | 159 | 160 | def test_CET_ExtUtil(jvm): 161 | values = CoarseEntityType.values() 162 | codes = {it.name for it in values} 163 | 164 | for _ in range(100): 165 | filtered = [code for code in codes if random.random() > 0.5] 166 | 167 | for tag in values: 168 | assert Util.contains(filtered, tag) == (tag.name in filtered) 169 | 170 | for code in codes: 171 | assert CoarseEntityType.valueOf(code) == getattr(CoarseEntityType, code) 172 | -------------------------------------------------------------------------------- /tests/utagger_spec.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from pathlib import Path 4 | 5 | from koalanlp import * 6 | from koalanlp.proc import * 7 | from tests.proc_core_spec import compare_sentence, EXAMPLES 8 | 9 | 10 | @pytest.fixture(scope="session") 11 | def tagger(): 12 | Util.initialize(UTAGGER="LATEST") 13 | 14 | travis_os = os.environ.get('TRAVIS_OS_NAME', 'linux') 15 | 16 | utagger_path = Path(os.environ['HOME'], 'utagger').absolute() 17 | bin_path = os.path.join(utagger_path, 'bin') 18 | lib_path = "utagger-win64.dll" if travis_os == 'windows' else 'utagger-ubuntu1804.so' # No CentOS in travis CI. 19 | 20 | lib_path = os.path.join(bin_path, lib_path) 21 | config_path = os.path.join(utagger_path, "Hlxcfg.txt") 22 | UTagger.setPath(lib_path, config_path) 23 | 24 | lines = Path(config_path).open(encoding='euc-kr').readlines() 25 | lines = [it.replace("HLX_DIR ../", "HLX_DIR %s/" % utagger_path) if it.startswith('HLX_DIR') else it 26 | for it in lines] 27 | Path(config_path).open('w+t', encoding='euc-kr').writelines(lines) 28 | tagger = Tagger(API.UTAGGER) 29 | yield tagger 30 | del tagger 31 | Util.finalize() 32 | 33 | 34 | def test_utagger(tagger): 35 | for cnt, line in EXAMPLES: 36 | para = tagger(line) 37 | assert type(para) is list 38 | for sent in para: 39 | compare_sentence(sent, {'WSD': True}) 40 | 41 | single = tagger.tagSentence(line) 42 | assert type(single) is list 43 | assert len(single) == 1 44 | 45 | compare_sentence(single[0], {'WSD': True}) 46 | 47 | if cnt == 1 and len(para) == 1: 48 | assert len(para) == len(single) 49 | else: 50 | singles = tagger.tagSentence(*[sent.surfaceString() for sent in para]) 51 | assert len(para) == len(singles) 52 | --------------------------------------------------------------------------------