├── .coveragerc ├── .editorconfig ├── .flake8 ├── .gitignore ├── .style.yapf ├── .travis.yml ├── AUTHORS.md ├── CONTRIBUTING.md ├── HISTORY.md ├── LICENSE ├── MANIFEST.in ├── Pipfile ├── Pipfile.lock ├── README.md ├── arpa ├── __init__.py ├── api.py ├── exceptions.py ├── models │ ├── __init__.py │ ├── base.py │ └── simple.py └── parsers │ ├── __init__.py │ ├── base.py │ └── quick.py ├── docs ├── Makefile ├── arpa.models.rst ├── arpa.parsers.rst ├── arpa.rst ├── conf.py ├── examples.md ├── index.rst ├── make.bat └── setup.md ├── requirements.txt ├── requirements_dev.txt ├── setup.cfg ├── setup.py └── tests ├── .gitignore ├── data ├── download.py └── download.sh ├── test_arpa.py ├── test_arpa_kenlm.py ├── test_model_base.py ├── test_model_simple.py └── test_parser_base.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | exclude_lines = 3 | pragma: no cover 4 | raise NotImplementedError 5 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 4 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.md] 12 | trim_trailing_whitespace = false 13 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.tox,.eggs,*.egg,build,docs/conf.py 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = pep8 3 | column_limit = 88 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | sudo: required 3 | 4 | language: python 5 | 6 | cache: pip 7 | 8 | python: 9 | - 3.4 10 | - 3.5 11 | - 3.6 12 | - 3.7 13 | - nightly 14 | 15 | matrix: 16 | fast_finish: true 17 | allow_failures: 18 | - python: nightly 19 | 20 | install: 21 | - pip install . 22 | - if [[ $TRAVIS_PYTHON_VERSION != 3.7 ]]; then pip install -r requirements.txt && pip install -r requirements_dev.txt; fi 23 | - if [[ $TRAVIS_PYTHON_VERSION == 3.7 ]]; then pip install -U pipenv==2018.11.26 && pipenv sync --dev; fi 24 | 25 | before_script: 26 | - cd tests/data && ./download.sh && cd ../.. 27 | - cd tests/data && python -m download && cd ../.. 28 | 29 | script: 30 | - if [[ $TRAVIS_PYTHON_VERSION == 3.4 ]]; then py.test; fi 31 | - if [[ $TRAVIS_PYTHON_VERSION == 3.5 || $TRAVIS_PYTHON_VERSION == 3.6 || $TRAVIS_PYTHON_VERSION == 3.7 || $TRAVIS_PYTHON_VERSION == nightly ]]; then pytest; fi 32 | - if [[ $TRAVIS_PYTHON_VERSION == 3.7 ]]; then mkdir docs/_static && sphinx-build -nWT -b html -d docs/_build/doctrees docs docs/_build/html; fi 33 | - if [[ $TRAVIS_PYTHON_VERSION == 3.7 ]]; then coverage run --source=arpa setup.py test; fi 34 | 35 | after_success: 36 | - if [[ $TRAVIS_PYTHON_VERSION == 3.7 ]]; then coveralls; fi 37 | 38 | notifications: 39 | on_failure: always 40 | on_success: change 41 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | Credits 2 | ======= 3 | 4 | Development Lead 5 | ---------------- 6 | 7 | - Stefan Fischer <[sfischer13@ymail.com](mailto:sfischer13@ymail.com)> 8 | 9 | Contributors 10 | ------------ 11 | 12 | None yet. Contributions are welcome! 13 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============ 3 | 4 | Contributions are welcome! 5 | 6 | Dependencies 7 | ------------ 8 | 9 | ```sh 10 | pipenv --rm 11 | pipenv sync 12 | pipenv sync --dev 13 | 14 | pipenv check 15 | pipenv update 16 | pipenv update --dev 17 | ``` 18 | 19 | ```sh 20 | vim requirements.txt 21 | vim requirements_dev.txt 22 | ``` 23 | 24 | Version 25 | ------- 26 | 27 | - `arpa/__init__.py` 28 | - `__date__` 29 | - `__version__` 30 | - `docs/conf.py` 31 | - `release` 32 | - `version` 33 | - `setup.py` 34 | - `version` 35 | - `AUTHORS.md` 36 | - `HISTORY.md` 37 | 38 | Format 39 | ------ 40 | 41 | ```sh 42 | pipenv run yapf -e docs/conf.py -i -r . 43 | ``` 44 | 45 | ```sh 46 | pipenv run flake8 . 47 | ``` 48 | 49 | Documentation 50 | ------------- 51 | 52 | ```sh 53 | cd docs 54 | pipenv run sphinx-apidoc -f -o . ../arpa 55 | ``` 56 | 57 | ```sh 58 | cd docs 59 | pipenv run make html 60 | ``` 61 | 62 | ```sh 63 | cd docs 64 | pipenv run sphinx-build -nWT -b html -d _build/doctrees . _build/html 65 | ``` 66 | 67 | Packaging 68 | --------- 69 | 70 | ```sh 71 | git clean -dxn 72 | git clean -dxf 73 | ``` 74 | 75 | ```sh 76 | pipenv run check-manifest -v 77 | ``` 78 | 79 | ```sh 80 | pipenv run pyroma . 81 | ``` 82 | 83 | Tests 84 | ----- 85 | 86 | ```sh 87 | pipenv run python setup.py test 88 | ``` 89 | 90 | Release 91 | ------- 92 | 93 | ```sh 94 | pipenv run python setup.py check 95 | 96 | pipenv run python setup.py sdist 97 | pipenv run python setup.py bdist_wheel 98 | 99 | pipenv run twine check dist/* 100 | 101 | pipenv run twine upload dist/* 102 | ``` 103 | -------------------------------------------------------------------------------- /HISTORY.md: -------------------------------------------------------------------------------- 1 | History 2 | ======= 3 | 4 | All notable changes to this project will be documented in this file. 5 | This project adheres to [Semantic Versioning](https://semver.org/). 6 | You should [Keep a CHANGELOG](https://keepachangelog.com/), too! 7 | 8 | [Next Release](https://github.com/sfischer13/python-arpa/compare/0.1.0b3...HEAD) 9 | -------------------------------------------------------------------------------- 10 | 11 | ### Added 12 | 13 | ### Changed 14 | 15 | ### Deprecated 16 | 17 | ### Fixed 18 | 19 | ### Removed 20 | 21 | ### Security 22 | 23 | [0.1.0b4](https://github.com/sfischer13/python-arpa/compare/0.1.0b3...0.1.0b4) - 2018-12-12 24 | ------------------------------------------------------------------------------------------- 25 | 26 | [0.1.0b3](https://github.com/sfischer13/python-arpa/compare/0.1.0b2...0.1.0b3) - 2018-12-06 27 | ------------------------------------------------------------------------------------------- 28 | 29 | [0.1.0b2](https://github.com/sfischer13/python-arpa/compare/0.1.0b1...0.1.0b2) - 2018-04-28 30 | ------------------------------------------------------------------------------------------- 31 | 32 | [0.1.0b1](https://github.com/sfischer13/python-arpa/compare/0.1.0a6...0.1.0b1) - 2015-09-13 33 | ------------------------------------------------------------------------------------------- 34 | 35 | [0.1.0a6](https://github.com/sfischer13/python-arpa/compare/0.1.0a5...0.1.0a6) - 2015-07-24 36 | ------------------------------------------------------------------------------------------- 37 | 38 | [0.1.0a5](https://github.com/sfischer13/python-arpa/compare/0.1.0a4...0.1.0a5) - 2015-07-19 39 | ------------------------------------------------------------------------------------------- 40 | 41 | [0.1.0a4](https://github.com/sfischer13/python-arpa/compare/0.1.0a3...0.1.0a4) - 2015-07-14 42 | ------------------------------------------------------------------------------------------- 43 | 44 | [0.1.0a3](https://github.com/sfischer13/python-arpa/compare/0.1.0a2...0.1.0a3) - 2015-07-10 45 | ------------------------------------------------------------------------------------------- 46 | 47 | [0.1.0a2](https://github.com/sfischer13/python-arpa/compare/0.1.0a1...0.1.0a2) - 2015-07-05 48 | ------------------------------------------------------------------------------------------- 49 | 50 | 0.1.0a1 - 2015-07-05 51 | -------------------- 52 | 53 | First release on PyPI. 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2015-2018 Stefan Fischer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS.md 2 | include CONTRIBUTING.md 3 | include HISTORY.md 4 | include LICENSE 5 | include README.md 6 | include requirements.txt 7 | include requirements_dev.txt 8 | include .coveragerc 9 | include .editorconfig 10 | include .flake8 11 | include .style.yapf 12 | include Pipfile 13 | include Pipfile.lock 14 | 15 | recursive-include arpa *.py 16 | 17 | include docs/conf.py 18 | include docs/Makefile 19 | include docs/make.bat 20 | recursive-include docs *.md 21 | recursive-include docs *.rst 22 | 23 | recursive-include tests * 24 | recursive-exclude tests *.arpa 25 | recursive-exclude tests *.arpa.* 26 | 27 | recursive-exclude * __pycache__ 28 | recursive-exclude * *.py[co] 29 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | check-manifest = "*" 8 | coveralls = "*" 9 | flake8 = "*" 10 | kenlm = {git = "git://github.com/kpu/kenlm.git"} 11 | nltk = "*" 12 | pyroma = "*" 13 | pytest = "*" 14 | pytest-runner = "*" 15 | readme_renderer = {extras = ["md"],version = "*"} 16 | recommonmark = "*" 17 | sphinx = "*" 18 | twine = "*" 19 | yapf = "*" 20 | 21 | [packages] 22 | 23 | [requires] 24 | python_version = "3.8" 25 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "b6a3ba6f07e75428ad0d4bc53597b0973f8dda46caa049c0eb18f30fb72bb14c" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.8" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": {}, 19 | "develop": { 20 | "alabaster": { 21 | "hashes": [ 22 | "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359", 23 | "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02" 24 | ], 25 | "version": "==0.7.12" 26 | }, 27 | "attrs": { 28 | "hashes": [ 29 | "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6", 30 | "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700" 31 | ], 32 | "version": "==20.3.0" 33 | }, 34 | "babel": { 35 | "hashes": [ 36 | "sha256:9d35c22fcc79893c3ecc85ac4a56cde1ecf3f19c540bba0922308a6c06ca6fa5", 37 | "sha256:da031ab54472314f210b0adcff1588ee5d1d1d0ba4dbd07b94dba82bde791e05" 38 | ], 39 | "version": "==2.9.0" 40 | }, 41 | "bleach": { 42 | "hashes": [ 43 | "sha256:6123ddc1052673e52bab52cdc955bcb57a015264a1c57d37bea2f6b817af0125", 44 | "sha256:98b3170739e5e83dd9dc19633f074727ad848cbedb6026708c8ac2d3b697a433" 45 | ], 46 | "version": "==3.3.0" 47 | }, 48 | "build": { 49 | "hashes": [ 50 | "sha256:85123bf327404e68142b1eb2a8298b052e984ad5b12738549688371e6337c73a", 51 | "sha256:88bc8ff6cb948247bebd5b3bf6b8b71d10fd93bce848f9d2fd9b28cbdd40ae8b" 52 | ], 53 | "version": "==0.3.1.post1" 54 | }, 55 | "certifi": { 56 | "hashes": [ 57 | "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", 58 | "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" 59 | ], 60 | "version": "==2020.12.5" 61 | }, 62 | "cffi": { 63 | "hashes": [ 64 | "sha256:005a36f41773e148deac64b08f233873a4d0c18b053d37da83f6af4d9087b813", 65 | "sha256:0857f0ae312d855239a55c81ef453ee8fd24136eaba8e87a2eceba644c0d4c06", 66 | "sha256:1071534bbbf8cbb31b498d5d9db0f274f2f7a865adca4ae429e147ba40f73dea", 67 | "sha256:158d0d15119b4b7ff6b926536763dc0714313aa59e320ddf787502c70c4d4bee", 68 | "sha256:1f436816fc868b098b0d63b8920de7d208c90a67212546d02f84fe78a9c26396", 69 | "sha256:2894f2df484ff56d717bead0a5c2abb6b9d2bf26d6960c4604d5c48bbc30ee73", 70 | "sha256:29314480e958fd8aab22e4a58b355b629c59bf5f2ac2492b61e3dc06d8c7a315", 71 | "sha256:34eff4b97f3d982fb93e2831e6750127d1355a923ebaeeb565407b3d2f8d41a1", 72 | "sha256:35f27e6eb43380fa080dccf676dece30bef72e4a67617ffda586641cd4508d49", 73 | "sha256:3d3dd4c9e559eb172ecf00a2a7517e97d1e96de2a5e610bd9b68cea3925b4892", 74 | "sha256:43e0b9d9e2c9e5d152946b9c5fe062c151614b262fda2e7b201204de0b99e482", 75 | "sha256:48e1c69bbacfc3d932221851b39d49e81567a4d4aac3b21258d9c24578280058", 76 | "sha256:51182f8927c5af975fece87b1b369f722c570fe169f9880764b1ee3bca8347b5", 77 | "sha256:58e3f59d583d413809d60779492342801d6e82fefb89c86a38e040c16883be53", 78 | "sha256:5de7970188bb46b7bf9858eb6890aad302577a5f6f75091fd7cdd3ef13ef3045", 79 | "sha256:65fa59693c62cf06e45ddbb822165394a288edce9e276647f0046e1ec26920f3", 80 | "sha256:69e395c24fc60aad6bb4fa7e583698ea6cc684648e1ffb7fe85e3c1ca131a7d5", 81 | "sha256:6c97d7350133666fbb5cf4abdc1178c812cb205dc6f41d174a7b0f18fb93337e", 82 | "sha256:6e4714cc64f474e4d6e37cfff31a814b509a35cb17de4fb1999907575684479c", 83 | "sha256:72d8d3ef52c208ee1c7b2e341f7d71c6fd3157138abf1a95166e6165dd5d4369", 84 | "sha256:8ae6299f6c68de06f136f1f9e69458eae58f1dacf10af5c17353eae03aa0d827", 85 | "sha256:8b198cec6c72df5289c05b05b8b0969819783f9418e0409865dac47288d2a053", 86 | "sha256:99cd03ae7988a93dd00bcd9d0b75e1f6c426063d6f03d2f90b89e29b25b82dfa", 87 | "sha256:9cf8022fb8d07a97c178b02327b284521c7708d7c71a9c9c355c178ac4bbd3d4", 88 | "sha256:9de2e279153a443c656f2defd67769e6d1e4163952b3c622dcea5b08a6405322", 89 | "sha256:9e93e79c2551ff263400e1e4be085a1210e12073a31c2011dbbda14bda0c6132", 90 | "sha256:9ff227395193126d82e60319a673a037d5de84633f11279e336f9c0f189ecc62", 91 | "sha256:a465da611f6fa124963b91bf432d960a555563efe4ed1cc403ba5077b15370aa", 92 | "sha256:ad17025d226ee5beec591b52800c11680fca3df50b8b29fe51d882576e039ee0", 93 | "sha256:afb29c1ba2e5a3736f1c301d9d0abe3ec8b86957d04ddfa9d7a6a42b9367e396", 94 | "sha256:b85eb46a81787c50650f2392b9b4ef23e1f126313b9e0e9013b35c15e4288e2e", 95 | "sha256:bb89f306e5da99f4d922728ddcd6f7fcebb3241fc40edebcb7284d7514741991", 96 | "sha256:cbde590d4faaa07c72bf979734738f328d239913ba3e043b1e98fe9a39f8b2b6", 97 | "sha256:cd2868886d547469123fadc46eac7ea5253ea7fcb139f12e1dfc2bbd406427d1", 98 | "sha256:d42b11d692e11b6634f7613ad8df5d6d5f8875f5d48939520d351007b3c13406", 99 | "sha256:f2d45f97ab6bb54753eab54fffe75aaf3de4ff2341c9daee1987ee1837636f1d", 100 | "sha256:fd78e5fee591709f32ef6edb9a015b4aa1a5022598e36227500c8f4e02328d9c" 101 | ], 102 | "version": "==1.14.5" 103 | }, 104 | "chardet": { 105 | "hashes": [ 106 | "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", 107 | "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" 108 | ], 109 | "version": "==4.0.0" 110 | }, 111 | "check-manifest": { 112 | "hashes": [ 113 | "sha256:5895e42a012989bdc51854a02c82c8d6898112a4ab11f2d7878200520b49d428", 114 | "sha256:b59b0e7c7ed3946537677c9ab9b2c2cb7be9b1807fd40bc4dfc1eef31d42cff5" 115 | ], 116 | "index": "pypi", 117 | "version": "==0.46" 118 | }, 119 | "click": { 120 | "hashes": [ 121 | "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a", 122 | "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc" 123 | ], 124 | "version": "==7.1.2" 125 | }, 126 | "cmarkgfm": { 127 | "hashes": [ 128 | "sha256:00c58c885742519fccdc495b583c875c2db1d3dd26ca805779d6ad476a774df1", 129 | "sha256:045ce291b1ee31616ee05665bea065d63d28516c91404d687d7df04a1722012c", 130 | "sha256:0854dc5719b7dfb67455a5a24d461f86c654fd611f43d693ffb74fe1f18ec113", 131 | "sha256:08e3ccf04dcae7443e6bc253605df0ffc719256f04a700de7b99cd0c126bd819", 132 | "sha256:0b5c3a5fdca50151995f56f9559aef718e274c43948823acebabd66e113e7b3f", 133 | "sha256:0c3e8a62c5879019d6ccd24c158eda5c0ee18b215c02a6093cf1780ad127b239", 134 | "sha256:0d44b102a5cb4dd135165d7e681fd9131aeae020dd20b75b68038f26bf41df59", 135 | "sha256:0ec879872e2b33c19cc15bdbfd1fdad52be13a27b7e2586fcc4423307a4e834c", 136 | "sha256:18ac6415a362b56348cec151e21c73639096bda173757595a2f416bc71081780", 137 | "sha256:21ec7c0933abd40cf949c783f184cb81856f0a3e593393f9702883ecba45f345", 138 | "sha256:3f6f4603837efcc46051c084915658f5cb8761d0655cfdefe5b010f750fb7400", 139 | "sha256:41cecbe887e9ed5fdc5bbba9ef47f2da4a07ce42513c282a137d538e160fb7ad", 140 | "sha256:43ffcbdc06c3c87d5bf208b21c4ae2dea07d3a62bcd7c5b023ed7cf25474c6e0", 141 | "sha256:447da4b88a84e847e29f4e87276b1e3b397dfcc22245c2f3448be6ef34d5c07e", 142 | "sha256:4642b424af8a9da2d9e4eddc3a3fbf1f2e5980cbf28c250f008a51c31aab6cdf", 143 | "sha256:4897c194740c4839a735d701f17756c015f56fbfe2c39bd9954374ffad87c88f", 144 | "sha256:4b43eb944de4952b45ecd39fa76e097573c643dee21541a9e24561d3aafe4694", 145 | "sha256:4d3b78b6ddc1d97d9eb3769e3585207a85d4c37fc9a62cc0248655fe79e281d2", 146 | "sha256:560627a30275f97c3e805686e4523d6298cae8b18ec5f4ff9046679289d8421a", 147 | "sha256:57457bf42c567c832b7dd99d6c6ff2a0cf99d7cddb0ec4667939a02cfad7da02", 148 | "sha256:5f6911a093dd8d89f482230adf30c004394eb612a85245beb74def05731c29c5", 149 | "sha256:676b17a1f9997900540181efabff11c00b92964bc93f1e0113beec69b805b5c6", 150 | "sha256:6b01d232f0577ec0727cc4cad9d66fec9a67389274cd4136ff3198ca1e40d251", 151 | "sha256:6ef2fe5b0f3a243b7b6edb8b5416f3fce3d853b103b5a970dc9a8c8b6ced79fb", 152 | "sha256:7c58046589f5bfb038ed8441519935dc2ceb36a25c9324eaa6a63433bed4482a", 153 | "sha256:811321cc377ae085309943f04a6dcc93ebe0d07ebf9ca0e91bb916ad3a253429", 154 | "sha256:8aaca2028746e526af1072d15c4ef0e7433fdc73ff2d1efb6df64ad9e91c0aa2", 155 | "sha256:8d496dccaeb950774bf1bf3e3164efef9804dc431c7bb87f977239931b71c239", 156 | "sha256:8d731933dec20240d697ba36a1fdae5e77158d0736fb972dab40030e97a1afa5", 157 | "sha256:9ecf43d6845da0349f5563f3587208b8afbbd5438178600e1a51a572bd3afc14", 158 | "sha256:a220ffba6181cb9343dbdd7275d61eea6828e4b6790f942d7c96fe444cf19027", 159 | "sha256:b4ea342a4e25750cd71245c4f9a39097a8c320dfc98d2482fe486aa9b091a629", 160 | "sha256:b6a549aba327abd986d6748cf21c8637d741c76850e7fefbdef8d28bfe138e32", 161 | "sha256:b817e5e269449307c6db16d8afbfd0273960bc119d3289559ca8909a45a68069", 162 | "sha256:b903508505f27d5721908fc2801a5cc3e72f561902106d6921440c83338be66c", 163 | "sha256:bc9a487a1dcfa8987c557dc056c308d32512c1ac271c788b49db130952558118", 164 | "sha256:bf95571ab17ba7369e64749bef48e394a479f8f5d512480845f8855d1673dbbb", 165 | "sha256:c1ba7c7b03a4afd0cc6cb64378775ca17b8c8a90ecd9f0a98224bce763864452", 166 | "sha256:c4e78bec8b0010f5d4e844466a9ecdd2401b8fad1b646ecda63697d47607c824", 167 | "sha256:c71ca515e938beb4fd441499c07d972032ef187db11f2374c060198b6fcafe07", 168 | "sha256:d025fd97e457a26c0d6008bb46b02d0e593975d9f5af10b1f686eeeb969440f5", 169 | "sha256:d17ec96c0e986d8818b94a97b13745d59ef55ac03d632cd3310a7e887ef07815", 170 | "sha256:d81bd5a5a2ea2df15bceb2b8b16ec759ac27bcaf512b11226e467f395c50f71e", 171 | "sha256:db874490723a66be40c1c86d3bf510011753eeab6c6e07a8131730c9beffa5fa", 172 | "sha256:e476706926a5a1f1f71a25287d9004126d8a177e2ca39f7a1c8e1ca7f7c75c4b", 173 | "sha256:e716f068538d4fa5a28cf9320cd7211bf6622229e57b04b54a2775ef129d2d45", 174 | "sha256:e9f382fc10ca5748c619ebb1d249e215e5479eb2c474030d254d32db18769ac9", 175 | "sha256:f7521afa0e1daf65a30c1830c35eeeb56605dc778a22b3d23236863634fa8b0b", 176 | "sha256:fa91bd1f01fa40bea9b418a66b15d54d330f2c58c78f0e03ae767922d6e6dab0", 177 | "sha256:ffc4e244ea2e752b03f2d82eb40ad87b2c0faac28db21223e91b29f61c8580c3" 178 | ], 179 | "version": "==0.5.3" 180 | }, 181 | "colorama": { 182 | "hashes": [ 183 | "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b", 184 | "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2" 185 | ], 186 | "version": "==0.4.4" 187 | }, 188 | "commonmark": { 189 | "hashes": [ 190 | "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60", 191 | "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9" 192 | ], 193 | "version": "==0.9.1" 194 | }, 195 | "coverage": { 196 | "hashes": [ 197 | "sha256:004d1880bed2d97151facef49f08e255a20ceb6f9432df75f4eef018fdd5a78c", 198 | "sha256:01d84219b5cdbfc8122223b39a954820929497a1cb1422824bb86b07b74594b6", 199 | "sha256:040af6c32813fa3eae5305d53f18875bedd079960822ef8ec067a66dd8afcd45", 200 | "sha256:06191eb60f8d8a5bc046f3799f8a07a2d7aefb9504b0209aff0b47298333302a", 201 | "sha256:13034c4409db851670bc9acd836243aeee299949bd5673e11844befcb0149f03", 202 | "sha256:13c4ee887eca0f4c5a247b75398d4114c37882658300e153113dafb1d76de529", 203 | "sha256:184a47bbe0aa6400ed2d41d8e9ed868b8205046518c52464fde713ea06e3a74a", 204 | "sha256:18ba8bbede96a2c3dde7b868de9dcbd55670690af0988713f0603f037848418a", 205 | "sha256:1aa846f56c3d49205c952d8318e76ccc2ae23303351d9270ab220004c580cfe2", 206 | "sha256:217658ec7187497e3f3ebd901afdca1af062b42cfe3e0dafea4cced3983739f6", 207 | "sha256:24d4a7de75446be83244eabbff746d66b9240ae020ced65d060815fac3423759", 208 | "sha256:2910f4d36a6a9b4214bb7038d537f015346f413a975d57ca6b43bf23d6563b53", 209 | "sha256:2949cad1c5208b8298d5686d5a85b66aae46d73eec2c3e08c817dd3513e5848a", 210 | "sha256:2a3859cb82dcbda1cfd3e6f71c27081d18aa251d20a17d87d26d4cd216fb0af4", 211 | "sha256:2cafbbb3af0733db200c9b5f798d18953b1a304d3f86a938367de1567f4b5bff", 212 | "sha256:2e0d881ad471768bf6e6c2bf905d183543f10098e3b3640fc029509530091502", 213 | "sha256:30c77c1dc9f253283e34c27935fded5015f7d1abe83bc7821680ac444eaf7793", 214 | "sha256:3487286bc29a5aa4b93a072e9592f22254291ce96a9fbc5251f566b6b7343cdb", 215 | "sha256:372da284cfd642d8e08ef606917846fa2ee350f64994bebfbd3afb0040436905", 216 | "sha256:41179b8a845742d1eb60449bdb2992196e211341818565abded11cfa90efb821", 217 | "sha256:44d654437b8ddd9eee7d1eaee28b7219bec228520ff809af170488fd2fed3e2b", 218 | "sha256:4a7697d8cb0f27399b0e393c0b90f0f1e40c82023ea4d45d22bce7032a5d7b81", 219 | "sha256:51cb9476a3987c8967ebab3f0fe144819781fca264f57f89760037a2ea191cb0", 220 | "sha256:52596d3d0e8bdf3af43db3e9ba8dcdaac724ba7b5ca3f6358529d56f7a166f8b", 221 | "sha256:53194af30d5bad77fcba80e23a1441c71abfb3e01192034f8246e0d8f99528f3", 222 | "sha256:5fec2d43a2cc6965edc0bb9e83e1e4b557f76f843a77a2496cbe719583ce8184", 223 | "sha256:6c90e11318f0d3c436a42409f2749ee1a115cd8b067d7f14c148f1ce5574d701", 224 | "sha256:74d881fc777ebb11c63736622b60cb9e4aee5cace591ce274fb69e582a12a61a", 225 | "sha256:7501140f755b725495941b43347ba8a2777407fc7f250d4f5a7d2a1050ba8e82", 226 | "sha256:796c9c3c79747146ebd278dbe1e5c5c05dd6b10cc3bcb8389dfdf844f3ead638", 227 | "sha256:869a64f53488f40fa5b5b9dcb9e9b2962a66a87dab37790f3fcfb5144b996ef5", 228 | "sha256:8963a499849a1fc54b35b1c9f162f4108017b2e6db2c46c1bed93a72262ed083", 229 | "sha256:8d0a0725ad7c1a0bcd8d1b437e191107d457e2ec1084b9f190630a4fb1af78e6", 230 | "sha256:900fbf7759501bc7807fd6638c947d7a831fc9fdf742dc10f02956ff7220fa90", 231 | "sha256:92b017ce34b68a7d67bd6d117e6d443a9bf63a2ecf8567bb3d8c6c7bc5014465", 232 | "sha256:970284a88b99673ccb2e4e334cfb38a10aab7cd44f7457564d11898a74b62d0a", 233 | "sha256:972c85d205b51e30e59525694670de6a8a89691186012535f9d7dbaa230e42c3", 234 | "sha256:9a1ef3b66e38ef8618ce5fdc7bea3d9f45f3624e2a66295eea5e57966c85909e", 235 | "sha256:af0e781009aaf59e25c5a678122391cb0f345ac0ec272c7961dc5455e1c40066", 236 | "sha256:b6d534e4b2ab35c9f93f46229363e17f63c53ad01330df9f2d6bd1187e5eaacf", 237 | "sha256:b7895207b4c843c76a25ab8c1e866261bcfe27bfaa20c192de5190121770672b", 238 | "sha256:c0891a6a97b09c1f3e073a890514d5012eb256845c451bd48f7968ef939bf4ae", 239 | "sha256:c2723d347ab06e7ddad1a58b2a821218239249a9e4365eaff6649d31180c1669", 240 | "sha256:d1f8bf7b90ba55699b3a5e44930e93ff0189aa27186e96071fac7dd0d06a1873", 241 | "sha256:d1f9ce122f83b2305592c11d64f181b87153fc2c2bbd3bb4a3dde8303cfb1a6b", 242 | "sha256:d314ed732c25d29775e84a960c3c60808b682c08d86602ec2c3008e1202e3bb6", 243 | "sha256:d636598c8305e1f90b439dbf4f66437de4a5e3c31fdf47ad29542478c8508bbb", 244 | "sha256:deee1077aae10d8fa88cb02c845cfba9b62c55e1183f52f6ae6a2df6a2187160", 245 | "sha256:ebe78fe9a0e874362175b02371bdfbee64d8edc42a044253ddf4ee7d3c15212c", 246 | "sha256:f030f8873312a16414c0d8e1a1ddff2d3235655a2174e3648b4fa66b3f2f1079", 247 | "sha256:f0b278ce10936db1a37e6954e15a3730bea96a0997c26d7fee88e6c396c2086d", 248 | "sha256:f11642dddbb0253cc8853254301b51390ba0081750a8ac03f20ea8103f0c56b6" 249 | ], 250 | "version": "==5.5" 251 | }, 252 | "coveralls": { 253 | "hashes": [ 254 | "sha256:7bd173b3425733661ba3063c88f180127cc2b20e9740686f86d2622b31b41385", 255 | "sha256:cbb942ae5ef3d2b55388cb5b43e93a269544911535f1e750e1c656aef019ce60" 256 | ], 257 | "index": "pypi", 258 | "version": "==3.0.1" 259 | }, 260 | "cryptography": { 261 | "hashes": [ 262 | "sha256:0f1212a66329c80d68aeeb39b8a16d54ef57071bf22ff4e521657b27372e327d", 263 | "sha256:1e056c28420c072c5e3cb36e2b23ee55e260cb04eee08f702e0edfec3fb51959", 264 | "sha256:240f5c21aef0b73f40bb9f78d2caff73186700bf1bc6b94285699aff98cc16c6", 265 | "sha256:26965837447f9c82f1855e0bc8bc4fb910240b6e0d16a664bb722df3b5b06873", 266 | "sha256:37340614f8a5d2fb9aeea67fd159bfe4f5f4ed535b1090ce8ec428b2f15a11f2", 267 | "sha256:3d10de8116d25649631977cb37da6cbdd2d6fa0e0281d014a5b7d337255ca713", 268 | "sha256:3d8427734c781ea5f1b41d6589c293089704d4759e34597dce91014ac125aad1", 269 | "sha256:7ec5d3b029f5fa2b179325908b9cd93db28ab7b85bb6c1db56b10e0b54235177", 270 | "sha256:8e56e16617872b0957d1c9742a3f94b43533447fd78321514abbe7db216aa250", 271 | "sha256:de4e5f7f68220d92b7637fc99847475b59154b7a1b3868fb7385337af54ac9ca", 272 | "sha256:eb8cc2afe8b05acbd84a43905832ec78e7b3873fb124ca190f574dca7389a87d", 273 | "sha256:ee77aa129f481be46f8d92a1a7db57269a2f23052d5f2433b4621bb457081cc9" 274 | ], 275 | "version": "==3.4.7" 276 | }, 277 | "docopt": { 278 | "hashes": [ 279 | "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" 280 | ], 281 | "version": "==0.6.2" 282 | }, 283 | "docutils": { 284 | "hashes": [ 285 | "sha256:a71042bb7207c03d5647f280427f14bfbd1a65c9eb84f4b341d85fafb6bb4bdf", 286 | "sha256:e2ffeea817964356ba4470efba7c2f42b6b0de0b04e66378507e3e2504bbff4c" 287 | ], 288 | "version": "==0.17" 289 | }, 290 | "flake8": { 291 | "hashes": [ 292 | "sha256:12d05ab02614b6aee8df7c36b97d1a3b2372761222b19b58621355e82acddcff", 293 | "sha256:78873e372b12b093da7b5e5ed302e8ad9e988b38b063b61ad937f26ca58fc5f0" 294 | ], 295 | "index": "pypi", 296 | "version": "==3.9.0" 297 | }, 298 | "idna": { 299 | "hashes": [ 300 | "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", 301 | "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" 302 | ], 303 | "version": "==2.10" 304 | }, 305 | "imagesize": { 306 | "hashes": [ 307 | "sha256:6965f19a6a2039c7d48bca7dba2473069ff854c36ae6f19d2cde309d998228a1", 308 | "sha256:b1f6b5a4eab1f73479a50fb79fcf729514a900c341d8503d62a62dbc4127a2b1" 309 | ], 310 | "version": "==1.2.0" 311 | }, 312 | "importlib-metadata": { 313 | "hashes": [ 314 | "sha256:c9db46394197244adf2f0b08ec5bc3cf16757e9590b02af1fca085c16c0d600a", 315 | "sha256:d2d46ef77ffc85cbf7dac7e81dd663fde71c45326131bea8033b9bad42268ebe" 316 | ], 317 | "version": "==3.10.0" 318 | }, 319 | "iniconfig": { 320 | "hashes": [ 321 | "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", 322 | "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32" 323 | ], 324 | "version": "==1.1.1" 325 | }, 326 | "jeepney": { 327 | "hashes": [ 328 | "sha256:7d59b6622675ca9e993a6bd38de845051d315f8b0c72cca3aef733a20b648657", 329 | "sha256:aec56c0eb1691a841795111e184e13cad504f7703b9a64f63020816afa79a8ae" 330 | ], 331 | "version": "==0.6.0" 332 | }, 333 | "jinja2": { 334 | "hashes": [ 335 | "sha256:03e47ad063331dd6a3f04a43eddca8a966a26ba0c5b7207a9a9e4e08f1b29419", 336 | "sha256:a6d58433de0ae800347cab1fa3043cebbabe8baa9d29e668f1c768cb87a333c6" 337 | ], 338 | "version": "==2.11.3" 339 | }, 340 | "joblib": { 341 | "hashes": [ 342 | "sha256:9c17567692206d2f3fb9ecf5e991084254fe631665c450b443761c4186a613f7", 343 | "sha256:feeb1ec69c4d45129954f1b7034954241eedfd6ba39b5e9e4b6883be3332d5e5" 344 | ], 345 | "version": "==1.0.1" 346 | }, 347 | "kenlm": { 348 | "git": "git://github.com/kpu/kenlm.git" 349 | }, 350 | "keyring": { 351 | "hashes": [ 352 | "sha256:045703609dd3fccfcdb27da201684278823b72af515aedec1a8515719a038cb8", 353 | "sha256:8f607d7d1cc502c43a932a275a56fe47db50271904513a379d39df1af277ac48" 354 | ], 355 | "version": "==23.0.1" 356 | }, 357 | "markupsafe": { 358 | "hashes": [ 359 | "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473", 360 | "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161", 361 | "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", 362 | "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", 363 | "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42", 364 | "sha256:195d7d2c4fbb0ee8139a6cf67194f3973a6b3042d742ebe0a9ed36d8b6f0c07f", 365 | "sha256:22c178a091fc6630d0d045bdb5992d2dfe14e3259760e713c490da5323866c39", 366 | "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", 367 | "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", 368 | "sha256:2beec1e0de6924ea551859edb9e7679da6e4870d32cb766240ce17e0a0ba2014", 369 | "sha256:3b8a6499709d29c2e2399569d96719a1b21dcd94410a586a18526b143ec8470f", 370 | "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", 371 | "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", 372 | "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", 373 | "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66", 374 | "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b", 375 | "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1", 376 | "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15", 377 | "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", 378 | "sha256:6f1e273a344928347c1290119b493a1f0303c52f5a5eae5f16d74f48c15d4a85", 379 | "sha256:6fffc775d90dcc9aed1b89219549b329a9250d918fd0b8fa8d93d154918422e1", 380 | "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", 381 | "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", 382 | "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", 383 | "sha256:7fed13866cf14bba33e7176717346713881f56d9d2bcebab207f7a036f41b850", 384 | "sha256:84dee80c15f1b560d55bcfe6d47b27d070b4681c699c572af2e3c7cc90a3b8e0", 385 | "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", 386 | "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d", 387 | "sha256:98bae9582248d6cf62321dcb52aaf5d9adf0bad3b40582925ef7c7f0ed85fceb", 388 | "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e", 389 | "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d", 390 | "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c", 391 | "sha256:a6a744282b7718a2a62d2ed9d993cad6f5f585605ad352c11de459f4108df0a1", 392 | "sha256:acf08ac40292838b3cbbb06cfe9b2cb9ec78fce8baca31ddb87aaac2e2dc3bc2", 393 | "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21", 394 | "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2", 395 | "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5", 396 | "sha256:b1dba4527182c95a0db8b6060cc98ac49b9e2f5e64320e2b56e47cb2831978c7", 397 | "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b", 398 | "sha256:b7d644ddb4dbd407d31ffb699f1d140bc35478da613b441c582aeb7c43838dd8", 399 | "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6", 400 | "sha256:bf5aa3cbcfdf57fa2ee9cd1822c862ef23037f5c832ad09cfea57fa846dec193", 401 | "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f", 402 | "sha256:caabedc8323f1e93231b52fc32bdcde6db817623d33e100708d9a68e1f53b26b", 403 | "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f", 404 | "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2", 405 | "sha256:d53bc011414228441014aa71dbec320c66468c1030aae3a6e29778a3382d96e5", 406 | "sha256:d73a845f227b0bfe8a7455ee623525ee656a9e2e749e4742706d80a6065d5e2c", 407 | "sha256:d9be0ba6c527163cbed5e0857c451fcd092ce83947944d6c14bc95441203f032", 408 | "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7", 409 | "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be", 410 | "sha256:feb7b34d6325451ef96bc0e36e1a6c0c1c64bc1fbec4b854f4529e51887b1621" 411 | ], 412 | "version": "==1.1.1" 413 | }, 414 | "mccabe": { 415 | "hashes": [ 416 | "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", 417 | "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" 418 | ], 419 | "version": "==0.6.1" 420 | }, 421 | "nltk": { 422 | "hashes": [ 423 | "sha256:1235660f52ab10fda34d5277096724747f767b2903e1c0c4e14bde013552c9ba", 424 | "sha256:cbc2ed576998fcf7cd181eeb3ca029e5f0025b264074b4beb57ce780673f8b86" 425 | ], 426 | "index": "pypi", 427 | "version": "==3.6.1" 428 | }, 429 | "packaging": { 430 | "hashes": [ 431 | "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5", 432 | "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a" 433 | ], 434 | "version": "==20.9" 435 | }, 436 | "pep517": { 437 | "hashes": [ 438 | "sha256:ac59f3f6b9726a49e15a649474539442cf76e0697e39df4869d25e68e880931b", 439 | "sha256:eba39d201ef937584ad3343df3581069085bacc95454c80188291d5b3ac7a249" 440 | ], 441 | "version": "==0.10.0" 442 | }, 443 | "pkginfo": { 444 | "hashes": [ 445 | "sha256:029a70cb45c6171c329dfc890cde0879f8c52d6f3922794796e06f577bb03db4", 446 | "sha256:9fdbea6495622e022cc72c2e5e1b735218e4ffb2a2a69cde2694a6c1f16afb75" 447 | ], 448 | "version": "==1.7.0" 449 | }, 450 | "pluggy": { 451 | "hashes": [ 452 | "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0", 453 | "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d" 454 | ], 455 | "version": "==0.13.1" 456 | }, 457 | "py": { 458 | "hashes": [ 459 | "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3", 460 | "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a" 461 | ], 462 | "version": "==1.10.0" 463 | }, 464 | "pycodestyle": { 465 | "hashes": [ 466 | "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068", 467 | "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef" 468 | ], 469 | "version": "==2.7.0" 470 | }, 471 | "pycparser": { 472 | "hashes": [ 473 | "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0", 474 | "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705" 475 | ], 476 | "version": "==2.20" 477 | }, 478 | "pyflakes": { 479 | "hashes": [ 480 | "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3", 481 | "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db" 482 | ], 483 | "version": "==2.3.1" 484 | }, 485 | "pygments": { 486 | "hashes": [ 487 | "sha256:2656e1a6edcdabf4275f9a3640db59fd5de107d88e8663c5d4e9a0fa62f77f94", 488 | "sha256:534ef71d539ae97d4c3a4cf7d6f110f214b0e687e92f9cb9d2a3b0d3101289c8" 489 | ], 490 | "version": "==2.8.1" 491 | }, 492 | "pyparsing": { 493 | "hashes": [ 494 | "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", 495 | "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" 496 | ], 497 | "version": "==2.4.7" 498 | }, 499 | "pyroma": { 500 | "hashes": [ 501 | "sha256:45ad8201da9a813b5597bb85c80bbece93af9ec89170fc2be5ad85fa9463cef1", 502 | "sha256:a97e116b6e9f4ca4b66bdd530c9a18c1db99d4400f6eead8d9297b9205640bef" 503 | ], 504 | "index": "pypi", 505 | "version": "==3.1" 506 | }, 507 | "pytest": { 508 | "hashes": [ 509 | "sha256:671238a46e4df0f3498d1c3270e5deb9b32d25134c99b7d75370a68cfbe9b634", 510 | "sha256:6ad9c7bdf517a808242b998ac20063c41532a570d088d77eec1ee12b0b5574bc" 511 | ], 512 | "index": "pypi", 513 | "version": "==6.2.3" 514 | }, 515 | "pytest-runner": { 516 | "hashes": [ 517 | "sha256:448959d9ada752de2b369cf05c1c0f9e6d2027e7d32441187c16c24c1d4d6e77", 518 | "sha256:ca3f58ff4957e8be6c54c55d575b235725cbbcf4dc0d5091c29c6444cfc8a5fe" 519 | ], 520 | "index": "pypi", 521 | "version": "==5.3.0" 522 | }, 523 | "pytz": { 524 | "hashes": [ 525 | "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da", 526 | "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798" 527 | ], 528 | "version": "==2021.1" 529 | }, 530 | "readme-renderer": { 531 | "hashes": [ 532 | "sha256:63b4075c6698fcfa78e584930f07f39e05d46f3ec97f65006e430b595ca6348c", 533 | "sha256:92fd5ac2bf8677f310f3303aa4bce5b9d5f9f2094ab98c29f13791d7b805a3db" 534 | ], 535 | "index": "pypi", 536 | "version": "==29.0" 537 | }, 538 | "recommonmark": { 539 | "hashes": [ 540 | "sha256:1b1db69af0231efce3fa21b94ff627ea33dee7079a01dd0a7f8482c3da148b3f", 541 | "sha256:bdb4db649f2222dcd8d2d844f0006b958d627f732415d399791ee436a3686d67" 542 | ], 543 | "index": "pypi", 544 | "version": "==0.7.1" 545 | }, 546 | "regex": { 547 | "hashes": [ 548 | "sha256:01afaf2ec48e196ba91b37451aa353cb7eda77efe518e481707e0515025f0cd5", 549 | "sha256:11d773d75fa650cd36f68d7ca936e3c7afaae41b863b8c387a22aaa78d3c5c79", 550 | "sha256:18c071c3eb09c30a264879f0d310d37fe5d3a3111662438889ae2eb6fc570c31", 551 | "sha256:1e1c20e29358165242928c2de1482fb2cf4ea54a6a6dea2bd7a0e0d8ee321500", 552 | "sha256:281d2fd05555079448537fe108d79eb031b403dac622621c78944c235f3fcf11", 553 | "sha256:314d66636c494ed9c148a42731b3834496cc9a2c4251b1661e40936814542b14", 554 | "sha256:32e65442138b7b76dd8173ffa2cf67356b7bc1768851dded39a7a13bf9223da3", 555 | "sha256:339456e7d8c06dd36a22e451d58ef72cef293112b559010db3d054d5560ef439", 556 | "sha256:3916d08be28a1149fb97f7728fca1f7c15d309a9f9682d89d79db75d5e52091c", 557 | "sha256:3a9cd17e6e5c7eb328517969e0cb0c3d31fd329298dd0c04af99ebf42e904f82", 558 | "sha256:47bf5bf60cf04d72bf6055ae5927a0bd9016096bf3d742fa50d9bf9f45aa0711", 559 | "sha256:4c46e22a0933dd783467cf32b3516299fb98cfebd895817d685130cc50cd1093", 560 | "sha256:4c557a7b470908b1712fe27fb1ef20772b78079808c87d20a90d051660b1d69a", 561 | "sha256:52ba3d3f9b942c49d7e4bc105bb28551c44065f139a65062ab7912bef10c9afb", 562 | "sha256:563085e55b0d4fb8f746f6a335893bda5c2cef43b2f0258fe1020ab1dd874df8", 563 | "sha256:598585c9f0af8374c28edd609eb291b5726d7cbce16be6a8b95aa074d252ee17", 564 | "sha256:619d71c59a78b84d7f18891fe914446d07edd48dc8328c8e149cbe0929b4e000", 565 | "sha256:67bdb9702427ceddc6ef3dc382455e90f785af4c13d495f9626861763ee13f9d", 566 | "sha256:6d1b01031dedf2503631d0903cb563743f397ccaf6607a5e3b19a3d76fc10480", 567 | "sha256:741a9647fcf2e45f3a1cf0e24f5e17febf3efe8d4ba1281dcc3aa0459ef424dc", 568 | "sha256:7c2a1af393fcc09e898beba5dd59196edaa3116191cc7257f9224beaed3e1aa0", 569 | "sha256:7d9884d86dd4dd489e981d94a65cd30d6f07203d90e98f6f657f05170f6324c9", 570 | "sha256:90f11ff637fe8798933fb29f5ae1148c978cccb0452005bf4c69e13db951e765", 571 | "sha256:919859aa909429fb5aa9cf8807f6045592c85ef56fdd30a9a3747e513db2536e", 572 | "sha256:96fcd1888ab4d03adfc9303a7b3c0bd78c5412b2bfbe76db5b56d9eae004907a", 573 | "sha256:97f29f57d5b84e73fbaf99ab3e26134e6687348e95ef6b48cfd2c06807005a07", 574 | "sha256:980d7be47c84979d9136328d882f67ec5e50008681d94ecc8afa8a65ed1f4a6f", 575 | "sha256:a91aa8619b23b79bcbeb37abe286f2f408d2f2d6f29a17237afda55bb54e7aac", 576 | "sha256:ade17eb5d643b7fead300a1641e9f45401c98eee23763e9ed66a43f92f20b4a7", 577 | "sha256:b9c3db21af35e3b3c05764461b262d6f05bbca08a71a7849fd79d47ba7bc33ed", 578 | "sha256:bd28bc2e3a772acbb07787c6308e00d9626ff89e3bfcdebe87fa5afbfdedf968", 579 | "sha256:bf5824bfac591ddb2c1f0a5f4ab72da28994548c708d2191e3b87dd207eb3ad7", 580 | "sha256:c0502c0fadef0d23b128605d69b58edb2c681c25d44574fc673b0e52dce71ee2", 581 | "sha256:c38c71df845e2aabb7fb0b920d11a1b5ac8526005e533a8920aea97efb8ec6a4", 582 | "sha256:ce15b6d103daff8e9fee13cf7f0add05245a05d866e73926c358e871221eae87", 583 | "sha256:d3029c340cfbb3ac0a71798100ccc13b97dddf373a4ae56b6a72cf70dfd53bc8", 584 | "sha256:e512d8ef5ad7b898cdb2d8ee1cb09a8339e4f8be706d27eaa180c2f177248a10", 585 | "sha256:e8e5b509d5c2ff12f8418006d5a90e9436766133b564db0abaec92fd27fcee29", 586 | "sha256:ee54ff27bf0afaf4c3b3a62bcd016c12c3fdb4ec4f413391a90bd38bc3624605", 587 | "sha256:fa4537fb4a98fe8fde99626e4681cc644bdcf2a795038533f9f711513a862ae6", 588 | "sha256:fd45ff9293d9274c5008a2054ecef86a9bfe819a67c7be1afb65e69b405b3042" 589 | ], 590 | "version": "==2021.4.4" 591 | }, 592 | "requests": { 593 | "hashes": [ 594 | "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", 595 | "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" 596 | ], 597 | "version": "==2.25.1" 598 | }, 599 | "requests-toolbelt": { 600 | "hashes": [ 601 | "sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f", 602 | "sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0" 603 | ], 604 | "version": "==0.9.1" 605 | }, 606 | "rfc3986": { 607 | "hashes": [ 608 | "sha256:112398da31a3344dc25dbf477d8df6cb34f9278a94fee2625d89e4514be8bb9d", 609 | "sha256:af9147e9aceda37c91a05f4deb128d4b4b49d6b199775fd2d2927768abdc8f50" 610 | ], 611 | "version": "==1.4.0" 612 | }, 613 | "secretstorage": { 614 | "hashes": [ 615 | "sha256:422d82c36172d88d6a0ed5afdec956514b189ddbfb72fefab0c8a1cee4eaf71f", 616 | "sha256:fd666c51a6bf200643495a04abb261f83229dcb6fd8472ec393df7ffc8b6f195" 617 | ], 618 | "markers": "sys_platform == 'linux'", 619 | "version": "==3.3.1" 620 | }, 621 | "six": { 622 | "hashes": [ 623 | "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", 624 | "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" 625 | ], 626 | "version": "==1.15.0" 627 | }, 628 | "snowballstemmer": { 629 | "hashes": [ 630 | "sha256:b51b447bea85f9968c13b650126a888aabd4cb4463fca868ec596826325dedc2", 631 | "sha256:e997baa4f2e9139951b6f4c631bad912dfd3c792467e2f03d7239464af90e914" 632 | ], 633 | "version": "==2.1.0" 634 | }, 635 | "sphinx": { 636 | "hashes": [ 637 | "sha256:3f01732296465648da43dec8fb40dc451ba79eb3e2cc5c6d79005fd98197107d", 638 | "sha256:ce9c228456131bab09a3d7d10ae58474de562a6f79abb3dc811ae401cf8c1abc" 639 | ], 640 | "index": "pypi", 641 | "version": "==3.5.3" 642 | }, 643 | "sphinxcontrib-applehelp": { 644 | "hashes": [ 645 | "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a", 646 | "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58" 647 | ], 648 | "version": "==1.0.2" 649 | }, 650 | "sphinxcontrib-devhelp": { 651 | "hashes": [ 652 | "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e", 653 | "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4" 654 | ], 655 | "version": "==1.0.2" 656 | }, 657 | "sphinxcontrib-htmlhelp": { 658 | "hashes": [ 659 | "sha256:3c0bc24a2c41e340ac37c85ced6dafc879ab485c095b1d65d2461ac2f7cca86f", 660 | "sha256:e8f5bb7e31b2dbb25b9cc435c8ab7a79787ebf7f906155729338f3156d93659b" 661 | ], 662 | "version": "==1.0.3" 663 | }, 664 | "sphinxcontrib-jsmath": { 665 | "hashes": [ 666 | "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", 667 | "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8" 668 | ], 669 | "version": "==1.0.1" 670 | }, 671 | "sphinxcontrib-qthelp": { 672 | "hashes": [ 673 | "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72", 674 | "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6" 675 | ], 676 | "version": "==1.0.3" 677 | }, 678 | "sphinxcontrib-serializinghtml": { 679 | "hashes": [ 680 | "sha256:eaa0eccc86e982a9b939b2b82d12cc5d013385ba5eadcc7e4fed23f4405f77bc", 681 | "sha256:f242a81d423f59617a8e5cf16f5d4d74e28ee9a66f9e5b637a18082991db5a9a" 682 | ], 683 | "version": "==1.1.4" 684 | }, 685 | "toml": { 686 | "hashes": [ 687 | "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", 688 | "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" 689 | ], 690 | "version": "==0.10.2" 691 | }, 692 | "tqdm": { 693 | "hashes": [ 694 | "sha256:daec693491c52e9498632dfbe9ccfc4882a557f5fa08982db1b4d3adbe0887c3", 695 | "sha256:ebdebdb95e3477ceea267decfc0784859aa3df3e27e22d23b83e9b272bf157ae" 696 | ], 697 | "version": "==4.60.0" 698 | }, 699 | "twine": { 700 | "hashes": [ 701 | "sha256:16f706f2f1687d7ce30e7effceee40ed0a09b7c33b9abb5ef6434e5551565d83", 702 | "sha256:a56c985264b991dc8a8f4234eb80c5af87fa8080d0c224ad8f2cd05a2c22e83b" 703 | ], 704 | "index": "pypi", 705 | "version": "==3.4.1" 706 | }, 707 | "urllib3": { 708 | "hashes": [ 709 | "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df", 710 | "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937" 711 | ], 712 | "version": "==1.26.4" 713 | }, 714 | "webencodings": { 715 | "hashes": [ 716 | "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", 717 | "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923" 718 | ], 719 | "version": "==0.5.1" 720 | }, 721 | "yapf": { 722 | "hashes": [ 723 | "sha256:408fb9a2b254c302f49db83c59f9aa0b4b0fd0ec25be3a5c51181327922ff63d", 724 | "sha256:e3a234ba8455fe201eaa649cdac872d590089a18b661e39bbac7020978dd9c2e" 725 | ], 726 | "index": "pypi", 727 | "version": "==0.31.0" 728 | }, 729 | "zipp": { 730 | "hashes": [ 731 | "sha256:3607921face881ba3e026887d8150cca609d517579abe052ac81fc5aeffdbd76", 732 | "sha256:51cb66cc54621609dd593d1787f286ee42a5c0adbb4b29abea5a63edc3e03098" 733 | ], 734 | "version": "==3.4.1" 735 | } 736 | } 737 | } 738 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Python ARPA Package 2 | =================== 3 | 4 | Python library for reading ARPA n-gram models. 5 | 6 | - [Documentation](https://arpa.readthedocs.io/en/latest/) is available. 7 | - [Changes](https://github.com/sfischer13/python-arpa/blob/master/HISTORY.md) between releases are documented. 8 | - [Bugs](https://github.com/sfischer13/python-arpa/issues) can be reported on the issue tracker. 9 | - [Questions](mailto:sfischer13@ymail.com) can be asked via e-mail. 10 | - [Source code](https://github.com/sfischer13/python-arpa) is tracked on GitHub. 11 | 12 | Setup 13 | ----- 14 | 15 | ### Python 3.4+ 16 | 17 | [![PyPI Python Versions](https://img.shields.io/pypi/pyversions/arpa.svg)](https://pypi.python.org/pypi/arpa) [![PyPI Version](https://img.shields.io/pypi/v/arpa.svg)](https://pypi.python.org/pypi/arpa) 18 | 19 | In order to install the Python 3 version: 20 | 21 | $ pip install --user -U arpa 22 | 23 | ### Python 2.7 24 | 25 | [![PyPI Python Versions](https://img.shields.io/pypi/pyversions/arpa-backport.svg)](https://pypi.python.org/pypi/arpa-backport) [![PyPI Version](https://img.shields.io/pypi/v/arpa-backport.svg)](https://pypi.python.org/pypi/arpa-backport) 26 | 27 | In order to install the Python 2.7 version: 28 | 29 | $ pip install --user -U arpa-backport 30 | 31 | Usage 32 | ----- 33 | 34 | The package may be imported directly: 35 | 36 | import arpa # Python 3.4+ 37 | # OR 38 | import arpa_backport as arpa # Python 2.7 39 | 40 | models = arpa.loadf("foo.arpa") 41 | lm = models[0] # ARPA files may contain several models. 42 | 43 | # probability p(end|in, the) 44 | lm.p("in the end") 45 | lm.log_p("in the end") 46 | 47 | # sentence score w/ sentence markers 48 | lm.s("This is the end .") 49 | lm.log_s("This is the end .") 50 | 51 | # sentence score w/o sentence markers 52 | lm.s("This is the end .", sos=False, eos=False) 53 | lm.log_s("This is the end .", sos=False, eos=False) 54 | 55 | Development 56 | ----------- 57 | 58 | [![Travis](https://img.shields.io/travis/sfischer13/python-arpa.svg)](https://travis-ci.org/sfischer13/python-arpa) [![Documentation Status](https://readthedocs.org/projects/arpa/badge/?version=latest)](https://arpa.readthedocs.io/en/latest/?badge=latest) [![Coverage Status](https://coveralls.io/repos/sfischer13/python-arpa/badge.svg?branch=master&service=github)](https://coveralls.io/github/sfischer13/python-arpa?branch=master) 59 | 60 | *Contributions are welcome!* 61 | Write a bug report or send a pull request. 62 | Other [contributors](https://github.com/sfischer13/python-arpa/graphs/contributors) have done so before. 63 | 64 | License 65 | ------- 66 | 67 | Copyright (c) 2015-2018 Stefan Fischer 68 | The source code is available under the **MIT License**. 69 | See [LICENSE](https://github.com/sfischer13/python-arpa/blob/master/LICENSE) for further details. 70 | -------------------------------------------------------------------------------- /arpa/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2015-2018 Stefan Fischer 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | """ 23 | Library for reading ARPA n-gram models. 24 | 25 | The package may be imported directly:: 26 | 27 | import arpa 28 | 29 | Details about the ARPA n-gram format can be found here: 30 | 31 | - `SRILM `_ 32 | - `ICSI Speech `_ 33 | 34 | The library was initiated by Stefan Fischer and is developed and maintained by many others. 35 | """ 36 | 37 | from .api import dump, dumpf, dumps, load, loadf, loads 38 | 39 | __all__ = ['dump', 'dumpf', 'dumps', 'load', 'loadf', 'loads'] 40 | 41 | __author__ = 'Stefan Fischer' 42 | __contact__ = 'Stefan Fischer ' 43 | __copyright__ = 'Copyright (c) 2015-2018 Stefan Fischer' 44 | __credits__ = ['Stefan Fischer'] 45 | __date__ = '2018-12-12' 46 | __license__ = 'MIT' 47 | __status__ = 'development' 48 | __version__ = '0.1.0b4' 49 | -------------------------------------------------------------------------------- /arpa/api.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | 3 | from io import StringIO 4 | 5 | from .models.simple import ARPAModelSimple 6 | from .parsers.quick import ARPAParserQuick 7 | 8 | 9 | def dump(obj, fp): 10 | """Serialize obj to fp (a file-like object) in ARPA format.""" 11 | obj.write(fp) 12 | 13 | 14 | def dumpf(obj, path, encoding=None): 15 | """Serialize obj to path in ARPA format (.arpa, .gz).""" 16 | path = str(path) 17 | if path.endswith('.gz'): 18 | with gzip.open(path, mode='wt', encoding=encoding) as f: 19 | return dump(obj, f) 20 | else: 21 | with open(path, mode='wt', encoding=encoding) as f: 22 | dump(obj, f) 23 | 24 | 25 | def dumps(obj): 26 | """Serialize obj to an ARPA formatted str.""" 27 | with StringIO() as f: 28 | dump(obj, f) 29 | return f.getvalue() 30 | 31 | 32 | def load(fp, model=None, parser=None): 33 | """Deserialize fp (a file-like object) to a Python object.""" 34 | if not model: 35 | model = 'simple' 36 | if not parser: 37 | parser = 'quick' 38 | 39 | if model not in ['simple']: 40 | raise ValueError 41 | if parser not in ['quick']: 42 | raise ValueError 43 | 44 | if model == 'simple' and parser == 'quick': 45 | return ARPAParserQuick(ARPAModelSimple).parse(fp) 46 | else: 47 | raise ValueError 48 | 49 | 50 | def loadf(path, encoding=None, model=None, parser=None): 51 | """Deserialize path (.arpa, .gz) to a Python object.""" 52 | path = str(path) 53 | if path.endswith('.gz'): 54 | with gzip.open(path, mode='rt', encoding=encoding) as f: 55 | return load(f, model=model, parser=parser) 56 | else: 57 | with open(path, mode='rt', encoding=encoding) as f: 58 | return load(f, model=model, parser=parser) 59 | 60 | 61 | def loads(s, model=None, parser=None): 62 | """Deserialize s (a str) to a Python object.""" 63 | with StringIO(s) as f: 64 | return load(f, model=model, parser=parser) 65 | -------------------------------------------------------------------------------- /arpa/exceptions.py: -------------------------------------------------------------------------------- 1 | """Exceptions raised by this package.""" 2 | 3 | 4 | class ARPAException(Exception): 5 | """Common base class for all package exceptions.""" 6 | 7 | pass 8 | 9 | 10 | class FatalException(ARPAException): 11 | """This should not have happened.""" 12 | 13 | pass 14 | 15 | 16 | class FrozenException(ARPAException): 17 | """Language model is frozen.""" 18 | 19 | pass 20 | 21 | 22 | class ParseException(ARPAException): 23 | """ARPA file could not be parsed.""" 24 | 25 | pass 26 | -------------------------------------------------------------------------------- /arpa/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfischer13/python-arpa/861a19b8cf7c64100a12f24440f2267626195c01/arpa/models/__init__.py -------------------------------------------------------------------------------- /arpa/models/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | UNK = '' 4 | SOS = '' 5 | EOS = '' 6 | 7 | 8 | class ARPAModel(metaclass=ABCMeta): 9 | def __init__(self, unk=UNK): 10 | self._base = 10 11 | self._unk = unk 12 | 13 | def __contains__(self, word): 14 | self._check_word(word) 15 | return word in self.vocabulary() 16 | 17 | def __len__(self): 18 | return len(self.vocabulary()) 19 | 20 | @abstractmethod 21 | def add_count(self, order, count): # pragma: no cover 22 | pass 23 | 24 | @abstractmethod 25 | def add_entry(self, ngram, p, bo=None, order=None): # pragma: no cover 26 | pass 27 | 28 | def log_p(self, ngram): 29 | words = self._check_input(ngram) 30 | if self._unk: 31 | words = self._replace_unks(words) 32 | return self.log_p_raw(words) 33 | 34 | def log_p_raw(self, ngram): 35 | try: 36 | return self._log_p(ngram) 37 | except KeyError: 38 | if len(ngram) == 1: 39 | raise KeyError 40 | else: 41 | try: 42 | log_bo = self._log_bo(ngram[:-1]) 43 | except KeyError: 44 | log_bo = 0 45 | return log_bo + self.log_p_raw(ngram[1:]) 46 | 47 | def log_s(self, sentence, sos=SOS, eos=EOS): 48 | words = self._check_input(sentence) 49 | if self._unk: 50 | words = self._replace_unks(words) 51 | if sos: 52 | words = (sos, ) + words 53 | if eos: 54 | words = words + (eos, ) 55 | result = sum(self.log_p_raw(words[:i]) for i in range(1, len(words) + 1)) 56 | if sos: 57 | result = result - self.log_p_raw(words[:1]) 58 | return result 59 | 60 | def p(self, ngram): 61 | return self._base**self.log_p(ngram) 62 | 63 | def s(self, sentence): 64 | return self._base**self.log_s(sentence) 65 | 66 | @abstractmethod 67 | def counts(self): # pragma: no cover 68 | pass 69 | 70 | @abstractmethod 71 | def order(self): # pragma: no cover 72 | pass 73 | 74 | @abstractmethod 75 | def vocabulary(self, sort=True): # pragma: no cover 76 | pass 77 | 78 | def write(self, fp): 79 | fp.write('\n\\data\\\n') 80 | for order, count in self.counts(): 81 | fp.write('ngram {}={}\n'.format(order, count)) 82 | fp.write('\n') 83 | for order, _ in self.counts(): 84 | fp.write('\\{}-grams:\n'.format(order)) 85 | for e in self._entries(order): 86 | prob = e[0] 87 | ngram = ' '.join(e[1]) 88 | if len(e) == 2: 89 | fp.write('{}\t{}\n'.format(prob, ngram)) 90 | elif len(e) == 3: 91 | backoff = e[2] 92 | fp.write('{}\t{}\t{}\n'.format(prob, ngram, backoff)) 93 | else: 94 | raise ValueError 95 | fp.write('\n') 96 | fp.write('\\end\\\n') 97 | 98 | @abstractmethod 99 | def _entries(self, order): # pragma: no cover 100 | pass 101 | 102 | @abstractmethod 103 | def _log_bo(self, ngram): # pragma: no cover 104 | pass 105 | 106 | @abstractmethod 107 | def _log_p(self, ngram): # pragma: no cover 108 | pass 109 | 110 | @staticmethod 111 | def _check_input(input): 112 | if not input: 113 | raise ValueError 114 | elif isinstance(input, tuple): 115 | return input 116 | elif isinstance(input, list): 117 | return tuple(input) 118 | elif isinstance(input, str): 119 | return tuple(input.strip().split(' ')) 120 | else: 121 | raise ValueError 122 | 123 | @staticmethod 124 | def _check_word(input): 125 | if not isinstance(input, str): 126 | raise ValueError 127 | if ' ' in input: 128 | raise ValueError 129 | 130 | def _replace_unks(self, words): 131 | return tuple((w if w in self else self._unk) for w in words) 132 | -------------------------------------------------------------------------------- /arpa/models/simple.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from .base import ARPAModel 4 | from .base import UNK 5 | from ..exceptions import FrozenException 6 | 7 | 8 | class ARPAModelSimple(ARPAModel): 9 | def __init__(self, unk=UNK): 10 | super().__init__(unk=unk) 11 | self._counts = OrderedDict() 12 | self._ps = OrderedDict() 13 | self._bos = OrderedDict() 14 | self._vocabulary = None 15 | self._vocabulary_sorted = None 16 | 17 | def __contains__(self, word): 18 | self._check_word(word) 19 | return word in self.vocabulary(sort=False) 20 | 21 | def add_count(self, order, count): 22 | self._counts[order] = count 23 | 24 | def add_entry(self, ngram, p, bo=None, order=None): 25 | if self._vocabulary is not None: 26 | raise FrozenException 27 | self._ps[ngram] = p 28 | if bo is not None: 29 | self._bos[ngram] = bo 30 | 31 | def counts(self): 32 | return sorted(self._counts.items()) 33 | 34 | def order(self): 35 | return max(self._counts.keys(), default=None) 36 | 37 | def vocabulary(self, sort=True): 38 | if self._vocabulary is None: 39 | self._vocabulary = set(word for ngram in self._ps.keys() for word in ngram) 40 | self._vocabulary_sorted = sorted(self._vocabulary) 41 | if sort: 42 | return self._vocabulary_sorted 43 | else: 44 | return self._vocabulary 45 | 46 | def _entries(self, order): 47 | return (self._entry(k) for k in self._ps.keys() if len(k) == order) 48 | 49 | def _entry(self, ngram): 50 | if ngram in self._bos: 51 | return self._ps[ngram], ngram, self._bos[ngram] 52 | else: 53 | return self._ps[ngram], ngram 54 | 55 | def _log_bo(self, ngram): 56 | return self._bos[ngram] 57 | 58 | def _log_p(self, ngram): 59 | return self._ps[ngram] 60 | -------------------------------------------------------------------------------- /arpa/parsers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfischer13/python-arpa/861a19b8cf7c64100a12f24440f2267626195c01/arpa/parsers/__init__.py -------------------------------------------------------------------------------- /arpa/parsers/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class ARPAParser(metaclass=ABCMeta): 5 | @abstractmethod 6 | def __init__(self, model): # pragma: no cover 7 | pass 8 | 9 | @abstractmethod 10 | def parse(self, fp): # pragma: no cover 11 | pass 12 | -------------------------------------------------------------------------------- /arpa/parsers/quick.py: -------------------------------------------------------------------------------- 1 | from enum import Enum, unique 2 | import re 3 | 4 | from .base import ARPAParser 5 | from ..exceptions import ParseException 6 | 7 | 8 | class ARPAParserQuick(ARPAParser): 9 | @unique 10 | class State(Enum): 11 | DATA = 1 12 | COUNT = 2 13 | HEADER = 3 14 | ENTRY = 4 15 | 16 | re_count = re.compile(r'^ngram (\d+)=(\d+)$') 17 | re_header = re.compile(r'^\\(\d+)-grams:$') 18 | re_entry = re.compile('^(-?\\d+(\\.\\d+)?([eE]-?\\d+)?)' 19 | '\t' 20 | '(\\S+( \\S+)*)' 21 | '(\t(-?\\d+(\\.\\d+)?)([eE]-?\\d+)?)?$') 22 | 23 | def __init__(self, model): 24 | self.ModelClass = model 25 | 26 | def parse(self, fp): 27 | self._result = [] 28 | self._state = self.State.DATA 29 | self._tmp_model = None 30 | self._tmp_order = None 31 | for line in fp: 32 | line = line.strip() 33 | if self._state == self.State.DATA: 34 | self._data(line) 35 | elif self._state == self.State.COUNT: 36 | self._count(line) 37 | elif self._state == self.State.HEADER: 38 | self._header(line) 39 | elif self._state == self.State.ENTRY: 40 | self._entry(line) 41 | if self._state != self.State.DATA: 42 | raise ParseException(line) 43 | return self._result 44 | 45 | def _data(self, line): 46 | if line == '\\data\\': 47 | self._state = self.State.COUNT 48 | self._tmp_model = self.ModelClass() 49 | else: 50 | pass # skip comment line 51 | 52 | def _count(self, line): 53 | match = self.re_count.match(line) 54 | if match: 55 | order = match.group(1) 56 | count = match.group(2) 57 | self._tmp_model.add_count(int(order), int(count)) 58 | elif not line: 59 | self._state = self.State.HEADER # there are no counts 60 | else: 61 | raise ParseException(line) 62 | 63 | def _header(self, line): 64 | match = self.re_header.match(line) 65 | if match: 66 | self._state = self.State.ENTRY 67 | self._tmp_order = int(match.group(1)) 68 | elif line == '\\end\\': 69 | self._result.append(self._tmp_model) 70 | self._state = self.State.DATA 71 | self._tmp_model = None 72 | self._tmp_order = None 73 | elif not line: 74 | pass # skip empty line 75 | else: 76 | raise ParseException(line) 77 | 78 | def _entry(self, line): 79 | match = self.re_entry.match(line) 80 | if match: 81 | p = self._float_or_int(match.group(1)) 82 | ngram = tuple(match.group(4).split(' ')) 83 | bo_match = match.group(7) 84 | bo = self._float_or_int(bo_match) if bo_match else None 85 | self._tmp_model.add_entry(ngram, p, bo, self._tmp_order) 86 | elif not line: 87 | self._state = self.State.HEADER # last entry 88 | else: 89 | raise ParseException(line) 90 | 91 | @staticmethod 92 | def _float_or_int(s): 93 | f = float(s) 94 | i = int(f) 95 | if str(i) == s: # don't drop trailing ".0" 96 | return i 97 | else: 98 | return f 99 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = -nW -j auto 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = . 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 20 | -------------------------------------------------------------------------------- /docs/arpa.models.rst: -------------------------------------------------------------------------------- 1 | arpa.models package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | arpa.models.base module 8 | ----------------------- 9 | 10 | .. automodule:: arpa.models.base 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | arpa.models.simple module 16 | ------------------------- 17 | 18 | .. automodule:: arpa.models.simple 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: arpa.models 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/arpa.parsers.rst: -------------------------------------------------------------------------------- 1 | arpa.parsers package 2 | ==================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | arpa.parsers.base module 8 | ------------------------ 9 | 10 | .. automodule:: arpa.parsers.base 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | arpa.parsers.quick module 16 | ------------------------- 17 | 18 | .. automodule:: arpa.parsers.quick 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: arpa.parsers 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/arpa.rst: -------------------------------------------------------------------------------- 1 | arpa package 2 | ============ 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | arpa.models 10 | arpa.parsers 11 | 12 | Submodules 13 | ---------- 14 | 15 | arpa.api module 16 | --------------- 17 | 18 | .. automodule:: arpa.api 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | arpa.exceptions module 24 | ---------------------- 25 | 26 | .. automodule:: arpa.exceptions 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: arpa 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | sys.path.insert(0, os.path.abspath('..')) 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = 'arpa' 22 | copyright = '2015-2018, Stefan Fischer' 23 | author = 'Stefan Fischer' 24 | 25 | # The short X.Y version 26 | version = '0.1' 27 | # The full version, including alpha/beta/rc tags 28 | release = '0.1.0b4' 29 | 30 | 31 | # -- General configuration --------------------------------------------------- 32 | 33 | # If your documentation needs a minimal Sphinx version, state it here. 34 | # 35 | # needs_sphinx = '1.0' 36 | 37 | # Add any Sphinx extension module names here, as strings. They can be 38 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 39 | # ones. 40 | extensions = [ 41 | 'sphinx.ext.autodoc', 42 | 'sphinx.ext.coverage', 43 | 'sphinx.ext.napoleon', 44 | 'sphinx.ext.viewcode', 45 | ] 46 | 47 | # Add any paths that contain templates here, relative to this directory. 48 | templates_path = ['_templates'] 49 | 50 | from recommonmark.parser import CommonMarkParser 51 | 52 | source_parsers = { 53 | '.md': CommonMarkParser, 54 | } 55 | 56 | # The suffix(es) of source filenames. 57 | # You can specify multiple suffix as a list of string: 58 | # 59 | source_suffix = ['.rst', '.md'] 60 | 61 | # The master toctree document. 62 | master_doc = 'index' 63 | 64 | # The language for content autogenerated by Sphinx. Refer to documentation 65 | # for a list of supported languages. 66 | # 67 | # This is also used if you do content translation via gettext catalogs. 68 | # Usually you set "language" from the command line for these cases. 69 | language = None 70 | 71 | # List of patterns, relative to source directory, that match files and 72 | # directories to ignore when looking for source files. 73 | # This pattern also affects html_static_path and html_extra_path. 74 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 75 | 76 | # The name of the Pygments (syntax highlighting) style to use. 77 | pygments_style = 'sphinx' 78 | 79 | 80 | # -- Options for HTML output ------------------------------------------------- 81 | 82 | # The theme to use for HTML and HTML Help pages. See the documentation for 83 | # a list of builtin themes. 84 | # 85 | html_theme = 'alabaster' 86 | 87 | # Theme options are theme-specific and customize the look and feel of a theme 88 | # further. For a list of options available for each theme, see the 89 | # documentation. 90 | # 91 | # html_theme_options = {} 92 | 93 | # Add any paths that contain custom static files (such as style sheets) here, 94 | # relative to this directory. They are copied after the builtin static files, 95 | # so a file named "default.css" will overwrite the builtin "default.css". 96 | html_static_path = ['_static'] 97 | 98 | # Custom sidebar templates, must be a dictionary that maps document names 99 | # to template names. 100 | # 101 | # The default sidebars (for documents that don't match any pattern) are 102 | # defined by theme itself. Builtin themes are using these templates by 103 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 104 | # 'searchbox.html']``. 105 | # 106 | # html_sidebars = {} 107 | 108 | 109 | # -- Options for HTMLHelp output --------------------------------------------- 110 | 111 | # Output file base name for HTML help builder. 112 | htmlhelp_basename = 'arpadoc' 113 | 114 | 115 | # -- Options for LaTeX output ------------------------------------------------ 116 | 117 | latex_elements = { 118 | # The paper size ('letterpaper' or 'a4paper'). 119 | # 120 | # 'papersize': 'letterpaper', 121 | 122 | # The font size ('10pt', '11pt' or '12pt'). 123 | # 124 | # 'pointsize': '10pt', 125 | 126 | # Additional stuff for the LaTeX preamble. 127 | # 128 | # 'preamble': '', 129 | 130 | # Latex figure (float) alignment 131 | # 132 | # 'figure_align': 'htbp', 133 | } 134 | 135 | # Grouping the document tree into LaTeX files. List of tuples 136 | # (source start file, target name, title, 137 | # author, documentclass [howto, manual, or own class]). 138 | latex_documents = [ 139 | (master_doc, 'arpa.tex', 'arpa Documentation', 140 | 'Stefan Fischer', 'manual'), 141 | ] 142 | 143 | 144 | # -- Options for manual page output ------------------------------------------ 145 | 146 | # One entry per manual page. List of tuples 147 | # (source start file, name, description, authors, manual section). 148 | man_pages = [ 149 | (master_doc, 'arpa', 'arpa Documentation', 150 | [author], 1) 151 | ] 152 | 153 | 154 | # -- Options for Texinfo output ---------------------------------------------- 155 | 156 | # Grouping the document tree into Texinfo files. List of tuples 157 | # (source start file, target name, title, author, 158 | # dir menu entry, description, category) 159 | texinfo_documents = [ 160 | (master_doc, 'arpa', 'arpa Documentation', 161 | author, 'arpa', 'One line description of project.', 162 | 'Miscellaneous'), 163 | ] 164 | 165 | 166 | # -- Options for Epub output ------------------------------------------------- 167 | 168 | # Bibliographic Dublin Core info. 169 | epub_title = project 170 | 171 | # The unique identifier of the text. This can be a ISBN number 172 | # or the project homepage. 173 | # 174 | # epub_identifier = '' 175 | 176 | # A unique identification for the text. 177 | # 178 | # epub_uid = '' 179 | 180 | # A list of files that should not be packed into the epub file. 181 | epub_exclude_files = ['search.html'] 182 | 183 | 184 | # -- Extension configuration ------------------------------------------------- 185 | 186 | nitpick_ignore = [ 187 | ('py:class', 'Exception'), 188 | ('py:class', 'enum.Enum'), 189 | ('py:class', 'object'), 190 | ] 191 | -------------------------------------------------------------------------------- /docs/examples.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | ```python 4 | import arpa 5 | models = arpa.loadf("foo.arpa") 6 | lm = models[0] # ARPA files may contain several models. 7 | 8 | # probability p(end|in, the) 9 | lm.p("in the end") 10 | lm.log_p("in the end") 11 | 12 | # sentence score w/ sentence markers 13 | lm.s("This is the end .") 14 | lm.log_s("This is the end .") 15 | 16 | # sentence score w/o sentence markers 17 | lm.s("This is the end .", sos=False, eos=False) 18 | lm.log_s("This is the end .", sos=False, eos=False) 19 | ``` 20 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. arpa documentation master file, created by 2 | sphinx-quickstart on Sun Dec 2 14:16:09 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Python :mod:`arpa` package 7 | ========================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | setup 13 | examples 14 | arpa 15 | 16 | 17 | Indices and tables 18 | ================== 19 | 20 | * :ref:`genindex` 21 | * :ref:`modindex` 22 | * :ref:`search` 23 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/setup.md: -------------------------------------------------------------------------------- 1 | # Setup 2 | 3 | Global installation: 4 | 5 | ```sh 6 | pip install -U arpa 7 | ``` 8 | 9 | Local installation: 10 | 11 | ```sh 12 | pip install --user -U arpa 13 | ``` 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | alabaster==0.7.12 2 | attrs==20.3.0 3 | babel==2.9.0 4 | bleach==3.3.0 5 | build==0.3.1.post1 6 | certifi==2020.12.5 7 | cffi==1.14.5 8 | chardet==4.0.0 9 | check-manifest==0.46 10 | click==7.1.2 11 | cmarkgfm==0.5.3 12 | colorama==0.4.4 13 | commonmark==0.9.1 14 | coverage==5.5 15 | coveralls==3.0.1 16 | cryptography==3.4.7 17 | docopt==0.6.2 18 | docutils==0.17 19 | flake8==3.9.0 20 | git+git://github.com/kpu/kenlm.git#egg=kenlm 21 | idna==2.10 22 | imagesize==1.2.0 23 | importlib-metadata==3.10.0 24 | iniconfig==1.1.1 25 | jeepney==0.6.0 26 | jinja2==2.11.3 27 | joblib==1.0.1 28 | keyring==23.0.1 29 | markupsafe==1.1.1 30 | mccabe==0.6.1 31 | nltk==3.6.1 32 | packaging==20.9 33 | pep517==0.10.0 34 | pkginfo==1.7.0 35 | pluggy==0.13.1 36 | py==1.10.0 37 | pycodestyle==2.7.0 38 | pycparser==2.20 39 | pyflakes==2.3.1 40 | pygments==2.8.1 41 | pyparsing==2.4.7 42 | pyroma==3.1 43 | pytest-runner==5.3.0 44 | pytest==6.2.3 45 | pytz==2021.1 46 | readme-renderer==29.0 47 | recommonmark==0.7.1 48 | regex==2021.4.4 49 | requests-toolbelt==0.9.1 50 | requests==2.25.1 51 | rfc3986==1.4.0 52 | secretstorage==3.3.1; sys_platform == 'linux' 53 | six==1.15.0 54 | snowballstemmer==2.1.0 55 | sphinx==3.5.3 56 | sphinxcontrib-applehelp==1.0.2 57 | sphinxcontrib-devhelp==1.0.2 58 | sphinxcontrib-htmlhelp==1.0.3 59 | sphinxcontrib-jsmath==1.0.1 60 | sphinxcontrib-qthelp==1.0.3 61 | sphinxcontrib-serializinghtml==1.1.4 62 | toml==0.10.2 63 | tqdm==4.60.0 64 | twine==3.4.1 65 | urllib3==1.26.4 66 | webencodings==0.5.1 67 | yapf==0.31.0 68 | zipp==3.4.1 69 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test = pytest 3 | 4 | [bdist_wheel] 5 | universal = 0 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | 5 | try: 6 | from setuptools import setup 7 | except ImportError: 8 | from distutils.core import setup 9 | 10 | if not ((3, 4) <= sys.version_info < (4, 0)): 11 | print('ERROR: Python 3.4+ is required!') 12 | sys.exit(1) 13 | 14 | with open('README.md') as readme_file: 15 | readme = readme_file.read() 16 | 17 | with open('HISTORY.md') as history_file: 18 | history = history_file.read() 19 | 20 | setup( 21 | author='Stefan Fischer', 22 | author_email='sfischer13@ymail.com', 23 | classifiers=[ 24 | 'Development Status :: 4 - Beta', 25 | 'Intended Audience :: Developers', 26 | 'Intended Audience :: Information Technology', 27 | 'Intended Audience :: Science/Research', 28 | 'License :: OSI Approved :: MIT License', 29 | 'Operating System :: OS Independent', 30 | 'Programming Language :: Python', 31 | 'Programming Language :: Python :: 3', 32 | 'Programming Language :: Python :: 3.4', 33 | 'Programming Language :: Python :: 3.5', 34 | 'Programming Language :: Python :: 3.6', 35 | 'Programming Language :: Python :: 3.7', 36 | 'Programming Language :: Python :: 3 :: Only', 37 | 'Programming Language :: Python :: Implementation', 38 | 'Programming Language :: Python :: Implementation :: CPython', 39 | 'Topic :: Scientific/Engineering', 40 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 41 | 'Topic :: Software Development :: Libraries', 42 | 'Topic :: Software Development :: Libraries :: Python Modules', 43 | 'Topic :: Text Processing', 44 | 'Topic :: Text Processing :: Linguistic', 45 | ], 46 | description='Library for reading ARPA n-gram models.', 47 | include_package_data=True, 48 | install_requires=[], 49 | keywords='ARPA,n-gram,ngram,language model,LM,language technology,LT,' 50 | 'computational linguistics,CL,natural language processing,NLP,unigram,bigram,trigram', 51 | license='MIT', 52 | long_description=readme + '\n\n' + history, 53 | long_description_content_type='text/markdown', 54 | name='arpa', 55 | package_dir={'arpa': 'arpa'}, 56 | packages=['arpa'], 57 | project_urls={ 58 | 'bug tracker': 'https://github.com/sfischer13/python-arpa/issues/', 59 | 'documentation': 'https://arpa.readthedocs.io/', 60 | 'source code': 'https://github.com/sfischer13/python-arpa/', 61 | }, 62 | python_requires='~=3.4', 63 | setup_requires=['pytest-runner'], 64 | tests_require=['pytest'], 65 | url='https://github.com/sfischer13/python-arpa', 66 | version='0.1.0b4', 67 | zip_safe=True, 68 | ) 69 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | data/test.arpa 2 | data/test.arpa.gz 3 | -------------------------------------------------------------------------------- /tests/data/download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | 5 | import nltk 6 | 7 | 8 | def main(): 9 | for corpus in ['punkt', 'udhr2', 'words']: 10 | nltk.download(corpus) 11 | return 0 12 | 13 | 14 | if __name__ == '__main__': 15 | sys.exit(main()) 16 | -------------------------------------------------------------------------------- /tests/data/download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | wget https://raw.githubusercontent.com/kpu/kenlm/master/lm/test.arpa 4 | 5 | gzip -k test.arpa 6 | -------------------------------------------------------------------------------- /tests/test_arpa.py: -------------------------------------------------------------------------------- 1 | import filecmp 2 | import os 3 | import os.path 4 | import tempfile 5 | 6 | import arpa 7 | 8 | import pytest 9 | 10 | PARSERS = [None, 'quick'] 11 | TEST_ARPA = os.path.join(os.path.dirname(__file__), 'data/test.arpa') 12 | TEST_ARPA_GZ = os.path.join(os.path.dirname(__file__), 'data/test.arpa.gz') 13 | 14 | 15 | def test_load_option_model(): 16 | with pytest.raises(ValueError): 17 | arpa.load(None, model='foo') 18 | 19 | 20 | def test_load_option_parser(): 21 | with pytest.raises(ValueError): 22 | arpa.load(None, parser='foo') 23 | 24 | 25 | def test_load_dump(): 26 | with open(TEST_ARPA, 'rt') as fp: 27 | lm = arpa.load(fp)[0] 28 | fp.seek(0) 29 | with tempfile.TemporaryFile(mode='w+t') as gp: 30 | arpa.dump(lm, gp) 31 | gp.seek(0) 32 | assert fp.read() == gp.read() 33 | 34 | 35 | def test_loadf_dumpf_read(): 36 | for p in PARSERS: 37 | for src in [TEST_ARPA, TEST_ARPA_GZ]: 38 | # read 39 | lm = arpa.loadf(src, parser=p)[0] 40 | # write 41 | out = tempfile.NamedTemporaryFile(mode='w+t', suffix='.arpa', delete=False) 42 | arpa.dumpf(lm, out.name) 43 | out.close() 44 | # compare 45 | assert filecmp.cmp(TEST_ARPA, out.name, shallow=False) 46 | os.unlink(out.name) 47 | 48 | 49 | def test_loadf_dumpf_write(): 50 | for p in PARSERS: 51 | for suf in ['.arpa', '.gz']: 52 | # read 53 | lm1 = arpa.loadf(TEST_ARPA, parser=p)[0] 54 | # write 55 | out1 = tempfile.NamedTemporaryFile(mode='w+t', suffix=suf, delete=False) 56 | arpa.dumpf(lm1, out1.name) 57 | out1.close() 58 | # read again 59 | lm2 = arpa.loadf(out1.name, parser=p)[0] 60 | # write again 61 | out2 = tempfile.NamedTemporaryFile(mode='w+t', suffix='.arpa', delete=False) 62 | arpa.dumpf(lm2, out2.name) 63 | out2.close() 64 | # compare 65 | assert filecmp.cmp(TEST_ARPA, out2.name, shallow=False) 66 | os.unlink(out2.name) 67 | 68 | 69 | def test_loads_dumps(): 70 | with open(TEST_ARPA, 'rt') as fp: 71 | txt = fp.read() 72 | lm = arpa.loads(txt)[0] 73 | out = arpa.dumps(lm) 74 | assert txt == out 75 | -------------------------------------------------------------------------------- /tests/test_arpa_kenlm.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import arpa 4 | 5 | import kenlm 6 | 7 | import nltk 8 | 9 | from test_arpa import PARSERS 10 | from test_arpa import TEST_ARPA 11 | 12 | MAX_ORDER = 5 13 | N_QUERIES = 10 14 | N_SENTENCES = 10 15 | 16 | WORDS = list(nltk.corpus.PlaintextCorpusReader('/usr/share/dict', 'words').words()) 17 | 18 | 19 | def test_log_p_random(): 20 | random_queries = list(_random_queries()) 21 | _test_log_p(random_queries) 22 | 23 | 24 | def _test_log_p(queries): 25 | lm_ken = kenlm.LanguageModel(TEST_ARPA) 26 | for p in PARSERS: 27 | lm_me = arpa.loadf(TEST_ARPA, parser=p)[0] 28 | results_me = [] 29 | results_ken = [] 30 | for ngram in queries: 31 | prob_me = lm_me.log_p(ngram) 32 | prob_ken = list(lm_ken.full_scores(' '.join(ngram), False, False))[-1][0] 33 | results_me.append(prob_me) 34 | results_ken.append(prob_ken) 35 | assert all(round(m - k, 4) == 0 for m, k in zip(results_me, results_ken)) 36 | 37 | 38 | def _random_ngram(length): 39 | return tuple(random.sample(WORDS, length)) 40 | 41 | 42 | def _random_queries(): 43 | for order in range(1, MAX_ORDER + 1): 44 | for _ in range(N_QUERIES): 45 | yield _random_ngram(order) 46 | 47 | 48 | def test_log_s_random_no_tags(): 49 | _test_log_s(_random_sentences(), None, None) 50 | 51 | 52 | def test_log_s_random_sos(): 53 | _test_log_s(_random_sentences(), '', None) 54 | 55 | 56 | def test_log_s_random_eos(): 57 | _test_log_s(_random_sentences(), None, '') 58 | 59 | 60 | def test_log_s_random_sos_eos(): 61 | _test_log_s(_random_sentences(), '', '') 62 | 63 | 64 | def _test_log_s(sentences, sos, eos): 65 | lm_ken = kenlm.LanguageModel(TEST_ARPA) 66 | for p in PARSERS: 67 | lm_me = arpa.loadf(TEST_ARPA, parser=p)[0] 68 | results_me = [] 69 | results_ken = [] 70 | for sentence in sentences: 71 | score_me = lm_me.log_s(sentence, sos=sos, eos=eos) 72 | score_ken = lm_ken.score(sentence, bool(sos), bool(eos)) 73 | results_me.append(score_me) 74 | results_ken.append(score_ken) 75 | assert all(round(m - k, 2) == 0 for m, k in zip(results_me, results_ken)) 76 | 77 | 78 | def _random_sentences(): 79 | sample = [' '.join(words) for words in nltk.corpus.udhr2.sents('eng.txt')] 80 | return random.sample(sample, N_SENTENCES) 81 | -------------------------------------------------------------------------------- /tests/test_model_base.py: -------------------------------------------------------------------------------- 1 | import arpa 2 | from arpa.models.base import ARPAModel 3 | from arpa.models.simple import ARPAModelSimple 4 | 5 | import pytest 6 | 7 | from test_arpa import PARSERS 8 | from test_arpa import TEST_ARPA 9 | 10 | 11 | def test_manual_log_p_unk(): 12 | lm = arpa.loadf(TEST_ARPA)[0] 13 | assert lm.log_p('UnladenSwallow') == -1.995635 14 | 15 | 16 | def test_manual_p(): 17 | lm = arpa.loadf(TEST_ARPA)[0] 18 | assert round(lm.p(''), 4) == 0 19 | 20 | 21 | def test_manual_contains(): 22 | lm = arpa.loadf(TEST_ARPA)[0] 23 | assert 'foo' in lm 24 | with pytest.raises(ValueError): 25 | assert ('foo', ) in lm 26 | with pytest.raises(ValueError): 27 | assert 'a little' in lm 28 | with pytest.raises(ValueError): 29 | assert ('a', 'little') in lm 30 | 31 | 32 | def test_new_model_contains_not(): 33 | lm = ARPAModelSimple() 34 | assert 'foo' not in lm 35 | with pytest.raises(ValueError): 36 | assert ('foo', ) not in lm 37 | with pytest.raises(ValueError): 38 | assert 'a little' not in lm 39 | with pytest.raises(ValueError): 40 | assert ('a', 'little') not in lm 41 | 42 | 43 | def test_new_model_counts(): 44 | lm = ARPAModelSimple() 45 | assert lm.counts() == [] 46 | 47 | 48 | def test_new_model_len(): 49 | lm = ARPAModelSimple() 50 | assert len(lm) == 0 51 | 52 | 53 | def test_log_p_raw(): 54 | lm = ARPAModelSimple() 55 | with pytest.raises(KeyError): 56 | lm.log_p_raw('UnladenSwallow') 57 | 58 | 59 | def test_log_p_empty_string(): 60 | lm = ARPAModelSimple() 61 | with pytest.raises(ValueError): 62 | lm.log_p('') 63 | 64 | 65 | def test_log_p_empty_tuple(): 66 | lm = ARPAModelSimple() 67 | with pytest.raises(ValueError): 68 | lm.log_p(tuple()) 69 | 70 | 71 | def test_log_p_int(): 72 | lm = ARPAModelSimple() 73 | with pytest.raises(ValueError): 74 | lm.log_p(1) 75 | 76 | 77 | def test_log_s_int(): 78 | lm = ARPAModelSimple() 79 | with pytest.raises(ValueError): 80 | lm.log_s(1) 81 | 82 | 83 | def test_input_equality(): 84 | lm = ARPAModelSimple() 85 | with pytest.raises(KeyError): 86 | assert lm.p('foo') == lm.p(('foo', )) 87 | with pytest.raises(KeyError): 88 | assert lm.p('xxx') == lm.p(('xxx', )) 89 | with pytest.raises(KeyError): 90 | assert lm.p('a little') == lm.p(('a', 'little')) 91 | with pytest.raises(KeyError): 92 | assert lm.p('xxx little') == lm.p(('xxx', 'little')) 93 | 94 | lm = arpa.loadf(TEST_ARPA)[0] 95 | assert lm.p('foo') == lm.p(('foo', )) 96 | assert lm.p('xxx') == lm.p(('xxx', )) 97 | assert lm.p('a little') == lm.p(('a', 'little')) 98 | assert lm.p('xxx little') == lm.p(('xxx', 'little')) 99 | 100 | 101 | def test_check_input_list(): 102 | result = ARPAModel._check_input(['foo', 'bar']) 103 | assert isinstance(result, tuple) 104 | 105 | 106 | def test_check_input_string_word(): 107 | result = ARPAModel._check_input('foo') 108 | assert isinstance(result, tuple) and len(result) == 1 109 | 110 | 111 | def test_check_input_string_words(): 112 | result = ARPAModel._check_input('foo bar') 113 | assert isinstance(result, tuple) and len(result) == 2 114 | 115 | 116 | def test_new_model_order(): 117 | lm = ARPAModelSimple() 118 | assert lm.order() is None 119 | 120 | for p in PARSERS: 121 | lm = arpa.loadf(TEST_ARPA, parser=p)[0] 122 | assert lm.order() == 5 123 | -------------------------------------------------------------------------------- /tests/test_model_simple.py: -------------------------------------------------------------------------------- 1 | from arpa.models.simple import ARPAModelSimple 2 | 3 | 4 | def test_new_model_contains_not(): 5 | lm = ARPAModelSimple() 6 | assert 'foo' not in lm 7 | 8 | 9 | def test_new_model_contains(): 10 | lm = ARPAModelSimple() 11 | lm.add_entry(('foo', ), 1.0) 12 | assert 'foo' in lm 13 | 14 | 15 | def test_new_model_counts(): 16 | lm = ARPAModelSimple() 17 | assert lm.counts() == [] 18 | 19 | 20 | def test_new_model_len(): 21 | lm = ARPAModelSimple() 22 | assert len(lm) == 0 23 | 24 | 25 | def test_new_model_order(): 26 | lm = ARPAModelSimple() 27 | assert lm.order() is None 28 | 29 | 30 | def test_new_model_vocabulary(): 31 | lm = ARPAModelSimple() 32 | assert lm.vocabulary() == [] 33 | -------------------------------------------------------------------------------- /tests/test_parser_base.py: -------------------------------------------------------------------------------- 1 | from arpa.models.base import ARPAModel 2 | from arpa.models.simple import ARPAModelSimple 3 | from arpa.parsers.quick import ARPAParserQuick 4 | 5 | from test_arpa import TEST_ARPA 6 | 7 | 8 | def test_parse(): 9 | with open(TEST_ARPA, 'rt') as fp: 10 | result = ARPAParserQuick(ARPAModelSimple).parse(fp)[0] 11 | assert isinstance(result, ARPAModel) 12 | --------------------------------------------------------------------------------