├── .coveragerc ├── .github ├── FUNDING.yml └── workflows │ └── ci.yml ├── .gitignore ├── .pylintrc ├── LICENSE ├── Makefile ├── README.rst ├── anorack ├── data └── overrides ├── doc ├── LICENSE ├── Makefile ├── README ├── changelog └── manpage.rst ├── lib ├── __init__.py ├── articles.py ├── cli.py ├── espeak.py ├── io.py ├── misc.py ├── parser.py ├── phonetics.py └── version.py ├── private ├── build-source-tarball ├── run-pylint ├── update-coverage └── update-version └── tests ├── __init__.py ├── coverage ├── test_choose_art.py ├── test_cli.py ├── test_coerce_case.py ├── test_open_file.py ├── test_parse_file.py ├── test_phonetics.py ├── test_vcmp.py ├── test_version.py ├── test_warn.py └── tools.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = true 3 | source = lib 4 | 5 | [report] 6 | show_missing = true 7 | exclude_lines = # (no coverage|Python >= [0-9.]+ is required)\b 8 | 9 | # vim:ft=dosini 10 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | custom: https://paypal.me/ijklw 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | - push 4 | - pull_request 5 | permissions: {} 6 | jobs: 7 | main: 8 | strategy: 9 | matrix: 10 | include: 11 | - python-version: '3.8' 12 | os: ubuntu-22.04 13 | - python-version: '3.9' 14 | os: ubuntu-22.04 15 | ng: ng 16 | - python-version: '3.10' 17 | os: ubuntu-22.04 18 | ng: ng 19 | - python-version: '3.11' 20 | os: ubuntu-22.04 21 | - python-version: '3.12' 22 | os: ubuntu-22.04 23 | - python-version: '3.13' 24 | os: ubuntu-24.04 25 | runs-on: ${{matrix.os}} 26 | steps: 27 | - uses: actions/checkout@v4 28 | - name: set up Python ${{matrix.python-version}} 29 | uses: actions/setup-python@v5 30 | with: 31 | python-version: ${{matrix.python-version}} 32 | - name: set up APT 33 | run: | 34 | printf 'Apt::Install-Recommends "false";\n' | sudo tee -a /etc/apt/apt.conf 35 | sudo apt-get update 36 | - name: install eSpeak 37 | run: | 38 | sudo apt-get install libespeak${{matrix.ng && '-ng'}}1 39 | - name: run tests 40 | run: | 41 | make test 42 | # may time out because of 43 | timeout-minutes: 1 44 | - name: run pydiatra 45 | run: | 46 | python3 -m pip install pydiatra 47 | python3 -m pydiatra -v . 48 | - name: run pyflakes 49 | run: | 50 | python3 -m pip install pyflakes 51 | python3 -m pyflakes . 52 | - name: run pylint 53 | run: | 54 | python3 -m pip install pylint 55 | private/run-pylint 56 | - name: build docs 57 | run: | 58 | python3 -m pip install docutils 59 | make -C doc 60 | - name: check docs 61 | run: | 62 | python3 -m pip install pygments 63 | make -C doc check 64 | - name: install 65 | run: 66 | make install PREFIX=~/.local 67 | - name: check if the executable was installed correctly 68 | run: | 69 | cd / 70 | anorack --version 71 | anorack < /dev/null 72 | - name: check if the man page was installed correctly 73 | env: 74 | MANPATH: /home/runner/.local/share/man 75 | MANWIDTH: 80 76 | run: | 77 | cd / 78 | man 1 anorack | grep -A 10 -w ANORACK 79 | 80 | # vim:ts=2 sts=2 sw=2 et 81 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.coverage 2 | /dist 3 | /doc/*.1 4 | __pycache__ 5 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | load-plugins = pylint.extensions.check_elif 3 | 4 | [MESSAGES CONTROL] 5 | disable = 6 | bad-continuation, 7 | consider-using-f-string, 8 | duplicate-code, 9 | invalid-name, 10 | locally-disabled, 11 | no-else-return, 12 | too-few-public-methods, 13 | too-many-arguments, 14 | useless-option-value, 15 | 16 | [REPORTS] 17 | reports = no 18 | score = no 19 | msg-template = {path}:{line}: {C}: {symbol} [{obj}] {msg} 20 | 21 | [FORMAT] 22 | max-line-length = 120 23 | expected-line-ending-format = LF 24 | 25 | # vim:ft=dosini ts=4 sts=4 sw=4 et 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © 2012-2025 Jakub Wilk 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the “Software”), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright © 2012-2021 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | PYTHON = python3 22 | 23 | PREFIX = /usr/local 24 | DESTDIR = 25 | 26 | bindir = $(PREFIX)/bin 27 | basedir = $(PREFIX)/share/anorack 28 | mandir = $(PREFIX)/share/man 29 | 30 | .PHONY: all 31 | all: ; 32 | 33 | .PHONY: install 34 | install: anorack 35 | $(PYTHON) - < lib/__init__.py # Python version check 36 | # executable: 37 | install -d $(DESTDIR)$(bindir) 38 | python_exe=$$($(PYTHON) -c 'import sys; print(sys.executable)') && \ 39 | sed \ 40 | -e "1 s@^#!.*@#!$$python_exe@" \ 41 | -e "s#^basedir = .*#basedir = '$(basedir)/'#" \ 42 | $(<) > $(<).tmp 43 | install $(<).tmp $(DESTDIR)$(bindir)/$(<) 44 | rm $(<).tmp 45 | # data: 46 | install -d $(DESTDIR)$(basedir)/data 47 | install -p -m644 data/* $(DESTDIR)$(basedir)/data/ 48 | # library: 49 | install -d $(DESTDIR)$(basedir)/lib 50 | install -p -m644 lib/*.py $(DESTDIR)$(basedir)/lib/ 51 | ifeq "$(DESTDIR)" "" 52 | umask 022 && $(PYTHON) -m compileall -q $(basedir)/lib/ 53 | endif 54 | ifeq "$(wildcard doc/*.1)" "" 55 | # run "$(MAKE) -C doc" to build the manpage 56 | else 57 | # manual page: 58 | install -d $(DESTDIR)$(mandir)/man1 59 | install -p -m644 doc/$(<).1 $(DESTDIR)$(mandir)/man1/ 60 | endif 61 | 62 | .PHONY: test 63 | test: 64 | $(PYTHON) -bb -m unittest discover --verbose 65 | 66 | .PHONY: clean 67 | clean: 68 | find . -type f -name '*.py[co]' -delete 69 | find . -type d -name '__pycache__' -delete 70 | rm -f .coverage 71 | rm -f *.tmp 72 | 73 | .error = GNU make is required 74 | 75 | # vim:ts=4 sts=4 sw=4 noet 76 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | doc/README -------------------------------------------------------------------------------- /anorack: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # encoding=UTF-8 3 | 4 | # Copyright © 2016-2024 Jakub Wilk 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the “Software”), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # SOFTWARE. 23 | 24 | import sys 25 | 26 | basedir = None 27 | if basedir is not None: 28 | sys.path[:0] = [basedir] 29 | 30 | import lib.cli # pylint: disable=wrong-import-position 31 | 32 | if __name__ == '__main__': 33 | lib.cli.main() 34 | 35 | # vim:ts=4 sts=4 sw=4 et 36 | -------------------------------------------------------------------------------- /data/overrides: -------------------------------------------------------------------------------- 1 | EWMH E.W.M.H 2 | UCS U.C.S 3 | UDP U.D.P 4 | UPS U.P.S 5 | UTF U.T.F 6 | UTS U.T.S 7 | UUID U.U.I.D 8 | UUIDv U.U.I.D_v 9 | src source 10 | unary [[j'un@ri]] [[jˈunəɹi]] 11 | usr U.S.R 12 | -------------------------------------------------------------------------------- /doc/LICENSE: -------------------------------------------------------------------------------- 1 | ../LICENSE -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright © 2014-2022 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | export LC_ALL=C 22 | 23 | rst2man = $(notdir $(shell command -v rst2man || echo rst2man.py)) 24 | rst2xml = $(notdir $(shell command -v rst2xml || echo rst2xml.py)) 25 | 26 | exe = anorack 27 | 28 | .PHONY: all 29 | all: $(exe).1 30 | 31 | $(exe).1: manpage.rst 32 | $(rst2man) --input-encoding=UTF-8 < $(<) > $(@).tmp 33 | perl -pi -e '/^[.]BI\b/ and s/\\fP/\\fR/g' $(@).tmp # work-around for https://bugs.debian.org/806601 34 | perl -ni -e 'print unless /^[.]\\" vim:/' $(@).tmp 35 | mv $(@).tmp $(@) 36 | 37 | .PHONY: check 38 | check: check-changelog check-rst 39 | 40 | .PHONY: check-changelog 41 | check-changelog: changelog 42 | dpkg-parsechangelog -l$(<) --all 2>&1 >/dev/null | { ! grep .; } 43 | 44 | .PHONY: check-rst 45 | check-rst: 46 | ls README *.rst | xargs -t -I{} $(rst2xml) --input-encoding=UTF-8 --strict {} > /dev/null 47 | 48 | .PHONY: clean 49 | clean: 50 | rm -f $(exe).1 *.tmp 51 | 52 | .error = GNU make is required 53 | 54 | # vim:ts=4 sts=4 sw=4 noet 55 | -------------------------------------------------------------------------------- /doc/README: -------------------------------------------------------------------------------- 1 | Overview 2 | ======== 3 | 4 | The English language has two indefinite articles: 5 | 6 | + *a*: used before words that begin with a consonant sound (e.g., *a program*, *a host*, *a user*); 7 | + *an*: used before words that begin with a vowel sound (e.g., *an example*, *an hour*, *an undefined variable*). 8 | 9 | **anorack** is a specialized spell-checker 10 | that finds incorrect indefinite articles: 11 | 12 | .. code:: console 13 | 14 | $ cat test 15 | a Ubuntu user 16 | a 8-byte word 17 | an username 18 | 19 | $ anorack test 20 | test:1: a Ubuntu -> an Ubuntu /u:b'u:ntu:/ 21 | test:2: a 8 -> an 8 /'eIt/ 22 | test:3: an username -> a username /j'u:z3n,eIm/ 23 | 24 | Prerequisites 25 | ============= 26 | 27 | * Python ≥ 3.7 28 | 29 | * `eSpeak NG`_ or eSpeak_ ≥ 1.47.08 (shared library only) 30 | 31 | .. _eSpeak NG: 32 | https://github.com/espeak-ng/espeak-ng 33 | 34 | .. _eSpeak: 35 | https://espeak.sourceforge.net/ 36 | 37 | Installation 38 | ============ 39 | 40 | You can use anorack without installing it, 41 | straight out of unpacked source tarball or a VCS checkout. 42 | 43 | It's also possible to install it system-wide with:: 44 | 45 | # make install 46 | 47 | By default, ``make install`` installs the package to ``/usr/local``. 48 | You can specify a different installation prefix 49 | by setting the ``PREFIX`` variable, e.g.:: 50 | 51 | $ make install PREFIX="$HOME/.local" 52 | 53 | .. vim:ft=rst ts=3 sts=3 sw=3 et 54 | -------------------------------------------------------------------------------- /doc/changelog: -------------------------------------------------------------------------------- 1 | anorack (0.3.1) UNRELEASED; urgency=low 2 | 3 | * 4 | 5 | -- Jakub Wilk Mon, 12 May 2025 11:09:16 +0200 6 | 7 | anorack (0.3) unstable; urgency=low 8 | 9 | * Add the -e option (to exit with non-zero status if any incorrect articles 10 | were found). 11 | https://github.com/jwilk/anorack/issues/9 12 | * Add override for “UUIDv” (as in “UUIDv4”). 13 | * Clarify help message for --ipa. 14 | * Improve README: 15 | + Use HTTPS for espeak.sourceforge.net. 16 | + Clarify that only eSpeak (NG) shared library is needed. 17 | * Fix minor bugs in IO: 18 | + Fix stderr buffering. 19 | + Don't reset encoding error handler to “strict”. 20 | * Exit with status 1 (not 2) on argument parsing error. 21 | * Drop support for Python < 3.7. 22 | 23 | -- Jakub Wilk Wed, 02 Apr 2025 19:04:39 +0200 24 | 25 | anorack (0.2.8) unstable; urgency=low 26 | 27 | * Drop support for Python < 3.6. 28 | * Stop using the distutils module. 29 | (It's deprecated and scheduled for removal in Python 3.12.) 30 | * Improve the test suite: 31 | + Stop using nose for the test suite. 32 | + Disallow dubious operations on bytes. 33 | 34 | -- Jakub Wilk Wed, 24 Aug 2022 20:49:50 +0200 35 | 36 | anorack (0.2.7) unstable; urgency=low 37 | 38 | * Add override for “UPS”. 39 | https://github.com/jwilk/anorack/issues/6 40 | Thanks to Martin Michlmayr for the bug report. 41 | * Fix parsing words with intra-word apostrophes. 42 | https://github.com/jwilk/anorack/issues/7 43 | Thanks to Martin Michlmayr for the bug report. 44 | 45 | -- Jakub Wilk Mon, 01 Jun 2020 20:12:29 +0200 46 | 47 | anorack (0.2.6) unstable; urgency=low 48 | 49 | * Add override for “src”. 50 | https://github.com/jwilk/anorack/issues/5 51 | Thanks to Paul Wise and Emmanuel Arias for the bug report. 52 | 53 | -- Jakub Wilk Mon, 25 May 2020 14:49:32 +0200 54 | 55 | anorack (0.2.5) unstable; urgency=low 56 | 57 | * Drop support for Python 3.2. 58 | * Don't die with exception when a file cannot be opened. 59 | (If there are many input files, it's helpful to continue when one of them 60 | cannot be opened.) 61 | * Improve the build system: 62 | + Check Python version on install. 63 | + Byte-compile Python code on install (if DESTDIR is not set). 64 | + Add checks against BSD make. (Only GNU make is supported.) 65 | + Don't require GNU install(1). 66 | + Remove the test coverage file in the clean target. 67 | * Rephrase description of --version in help messages. 68 | * Improve the test suite. 69 | 70 | -- Jakub Wilk Mon, 21 Oct 2019 18:08:22 +0200 71 | 72 | anorack (0.2.4) unstable; urgency=low 73 | 74 | * Reset the SIGPIPE signal disposition. 75 | * Improve the build system. 76 | 77 | -- Jakub Wilk Fri, 18 May 2018 21:05:06 +0200 78 | 79 | anorack (0.2.3) unstable; urgency=low 80 | 81 | * Rewrite shebang at install time. 82 | * Make the doc makefile more portable. 83 | * Add installation instructions to README. 84 | * Improve the test suite. 85 | 86 | -- Jakub Wilk Wed, 22 Mar 2017 18:46:51 +0100 87 | 88 | anorack (0.2.2) unstable; urgency=low 89 | 90 | * Fix compatibility with eSpeak >= 1.48.11. 91 | * Add support for eSpeak NG. 92 | * Put license into a separate file. 93 | 94 | -- Jakub Wilk Wed, 19 Oct 2016 20:11:33 +0200 95 | 96 | anorack (0.2.1) unstable; urgency=low 97 | 98 | * Explain the grammar rules in README and in the manual page. 99 | * Don't disable stdout/stderr line buffering. 100 | 101 | -- Jakub Wilk Sun, 21 Aug 2016 21:31:00 +0200 102 | 103 | anorack (0.2) unstable; urgency=low 104 | 105 | * Fix word-splitting for compounds that include numbers or underscores. 106 | This fixes, among others, false positives involving acronyms such as 107 | “a UTF16”. 108 | * Retain original article's case in correction. 109 | * Add option for printing phonemes using IPA (--ipa). 110 | * Add Makefile. 111 | * Use /usr/bin/env in shebang. 112 | 113 | -- Jakub Wilk Mon, 18 Jul 2016 12:12:40 +0200 114 | 115 | anorack (0.1.1) unstable; urgency=low 116 | 117 | * Add the manual page. 118 | * Add N (ŋ) to the consonants set. 119 | * Allow quotation character between the article and the other word. 120 | * Fix false positives for the following phrases: 121 | + an EWMH 122 | + a UCS 123 | + a UDP 124 | + a UTF 125 | + a UTS 126 | + a UUID 127 | + a unary 128 | + a usr 129 | * Improve the test suite. 130 | 131 | -- Jakub Wilk Mon, 11 Jul 2016 21:42:44 +0200 132 | 133 | anorack (0.1) unstable; urgency=low 134 | 135 | * Initial release. 136 | 137 | -- Jakub Wilk Mon, 04 Jul 2016 21:04:36 +0200 138 | -------------------------------------------------------------------------------- /doc/manpage.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | anorack 3 | ======= 4 | 5 | ------------------- 6 | “a” vs “an” checker 7 | ------------------- 8 | 9 | :manual section: 1 10 | :version: anorack 0.3.1 11 | :date: 2025-05-12 12 | 13 | Synopsis 14 | -------- 15 | **anorack** [*option*...] [*file*...] 16 | 17 | Description 18 | ----------- 19 | 20 | The English language has two indefinite articles: 21 | 22 | + *a*: used before words that begin with a consonant sound (e.g., *a program*, *a host*, *a user*); 23 | + *an*: used before words that begin with a vowel sound (e.g., *an example*, *an hour*, *an undefined variable*). 24 | 25 | **anorack** is a specialized spell-checker 26 | that finds incorrect indefinite articles. 27 | 28 | Options 29 | ------- 30 | 31 | -e 32 | Exit with non-zero status if any incorrect articles were found. 33 | --ipa 34 | Print phonemes using IPA (International Phonetic Alphabet) 35 | instead of ASCII phoneme mnemonics. 36 | -h, --help 37 | Show help message and exit. 38 | --version 39 | Show version information and exit. 40 | 41 | Example 42 | ------- 43 | 44 | :: 45 | 46 | $ cat test 47 | a Ubuntu user 48 | a 8-byte word 49 | an username 50 | 51 | $ anorack test 52 | test:1: a Ubuntu -> an Ubuntu /u:b'u:ntu:/ 53 | test:2: a 8 -> an 8 /'eIt/ 54 | test:3: an username -> a username /j'u:z3n,eIm/ 55 | 56 | See also 57 | -------- 58 | 59 | **espeak**\ (1) 60 | 61 | .. vim:ts=3 sts=3 sw=3 62 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | anorack's private modules 3 | ''' 4 | 5 | # pylint: disable=multiple-statements 6 | async def _(): return f'{await "# Python >= 3.7 is required #"}' 7 | del _ 8 | 9 | # vim:ts=4 sts=4 sw=4 et 10 | -------------------------------------------------------------------------------- /lib/articles.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2022 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | ''' 22 | English articles 23 | ''' 24 | 25 | from lib import phonetics 26 | 27 | accents = str.join('', phonetics.accents) 28 | 29 | def choose_art(phonemes): 30 | ''' 31 | choose correct article for the phonemes: 32 | return "a" or "an" or NotImplemented 33 | ''' 34 | try: 35 | p = phonemes.strip(accents)[0] 36 | except IndexError: 37 | return NotImplemented 38 | if p in phonetics.consonants: 39 | return 'a' 40 | elif p in phonetics.vowels: 41 | return 'an' 42 | else: 43 | return NotImplemented 44 | 45 | __all__ = ['choose_art'] 46 | 47 | # vim:ts=4 sts=4 sw=4 et 48 | -------------------------------------------------------------------------------- /lib/cli.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2025 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | ''' 22 | anorack CLI 23 | ''' 24 | 25 | import argparse 26 | import signal 27 | import sys 28 | 29 | from lib.articles import choose_art 30 | from lib.io import open_file 31 | from lib.misc import coerce_case, warn 32 | from lib.parser import parse_file 33 | from lib.phonetics import init as init_phonetics, text_to_phonemes 34 | from lib.version import __version__ 35 | 36 | class ArgumentParser(argparse.ArgumentParser): 37 | ''' 38 | ArgumentParser with exit status 1 39 | ''' 40 | 41 | def exit(self, status=0, message=None): 42 | if status: 43 | status = 1 44 | argparse.ArgumentParser.exit(self, status=status, message=message) 45 | 46 | class VersionAction(argparse.Action): 47 | ''' 48 | argparse --version action 49 | ''' 50 | 51 | def __init__(self, option_strings, dest=argparse.SUPPRESS): 52 | super().__init__( 53 | option_strings=option_strings, 54 | dest=dest, 55 | nargs=0, 56 | help='show version information and exit' 57 | ) 58 | 59 | def __call__(self, parser, namespace, values, option_string=None): 60 | from lib import espeak # pylint: disable=import-outside-toplevel 61 | print(f'{parser.prog} {__version__}') 62 | print('+ Python {0}.{1}.{2}'.format(*sys.version_info)) 63 | ng = ' NG' if espeak.ng else '' 64 | print(f'+ eSpeak{ng} {espeak.version}') 65 | parser.exit() 66 | 67 | def check_word(loc, art, word, *, ipa=False): 68 | ''' 69 | check if the word has correct article 70 | ''' 71 | phon = text_to_phonemes(word, ipa=ipa) 72 | correct_art = choose_art(phon) 73 | if correct_art is NotImplemented: 74 | warn(f"can't determine correct article for {word!r} /{phon}/") 75 | elif art.lower() != correct_art: 76 | correct_art = coerce_case(art, correct_art) 77 | print(f'{loc}: {art} {word} -> {correct_art} {word} /{phon}/') 78 | return False 79 | return True 80 | 81 | def main(): 82 | ''' 83 | run the program 84 | ''' 85 | signal.signal(signal.SIGPIPE, signal.SIG_DFL) 86 | ap = ArgumentParser(description='"a" vs "an" checker') 87 | ap.add_argument('--version', action=VersionAction) 88 | ap.add_argument('-e', action='store_true', help='exit with non-zero status if issues were found') 89 | ap.add_argument('--ipa', action='store_true', help='use IPA instead of ASCII phoneme mnemonics') 90 | ap.add_argument('--traceback', action='store_true', help=argparse.SUPPRESS) 91 | ap.add_argument('files', metavar='FILE', nargs='*', default=['-'], 92 | help='file to check (default: stdin)') 93 | options = ap.parse_args() 94 | init_phonetics() 95 | ok = True 96 | rc = 0 97 | for path in options.files: 98 | try: 99 | file = open_file(path, encoding=sys.stdout.encoding, errors='replace') 100 | except OSError as exc: 101 | if options.traceback: 102 | raise 103 | msg = f'{ap.prog}: {path}: {exc.strerror}' 104 | print(msg, file=sys.stderr) 105 | rc = 1 106 | continue 107 | with file: 108 | for loc, art, word in parse_file(file): 109 | ok &= check_word(loc, art, word, ipa=options.ipa) 110 | if rc == 0 and options.e and not ok: 111 | rc = 2 112 | sys.exit(rc) 113 | 114 | __all__ = ['main'] 115 | 116 | # vim:ts=4 sts=4 sw=4 et 117 | -------------------------------------------------------------------------------- /lib/espeak.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2022 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | ''' 22 | interface to eSpeak (NG) 23 | ''' 24 | 25 | import ctypes 26 | import itertools 27 | 28 | try: 29 | _shlib = ctypes.CDLL('libespeak-ng.so.1') 30 | ng = True 31 | except OSError: # no coverage 32 | _shlib = ctypes.CDLL('libespeak.so.1') 33 | ng = False 34 | 35 | # const char *espeak_Info(const char **path_data) 36 | _info = _shlib.espeak_Info 37 | _info.argtypes = [ctypes.POINTER(ctypes.c_char_p)] 38 | _info.restype = ctypes.c_char_p 39 | 40 | def info(): 41 | ''' 42 | return eSpeak version information 43 | ''' 44 | dummy = ctypes.c_char_p(b'') 45 | res = _info(ctypes.byref(dummy)) 46 | return res.decode('ASCII') 47 | version = info().split()[0] 48 | del info 49 | 50 | def vcmp(v1, v2): 51 | ''' 52 | cmp()-style version comparison 53 | ''' 54 | v1 = v1.split('.') 55 | v2 = v2.split('.') 56 | for c1, c2 in itertools.zip_longest(v1, v2, fillvalue=0): 57 | c1 = int(c1) 58 | c2 = int(c2) 59 | if c1 > c2: 60 | return 1 61 | elif c1 < c2: 62 | return -1 63 | return 0 64 | 65 | # int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const char *path, int options) 66 | _initialize = _shlib.espeak_Initialize 67 | _initialize.argtypes = [ctypes.c_int, ctypes.c_int, ctypes.c_char_p, ctypes.c_int] 68 | _initialize.restype = ctypes.c_int 69 | 70 | def init(): 71 | ''' 72 | initialize eSpeak 73 | ''' 74 | rc = _initialize(0, 0, None, 0) 75 | if rc <= 0: 76 | raise RuntimeError('espeak_Initialize(): internal error') # no coverage 77 | 78 | # espeak_ERROR espeak_SetVoiceByName(const char *name) 79 | _set_voice_by_name = _shlib.espeak_SetVoiceByName 80 | _set_voice_by_name.argtypes = [ctypes.c_char_p] 81 | _set_voice_by_name.restype = ctypes.c_int 82 | 83 | def set_voice_by_name(s): 84 | ''' 85 | use this voice for synthesis 86 | ''' 87 | s = s.encode('ASCII') 88 | rc = _set_voice_by_name(s) 89 | if rc == 0: 90 | return 91 | else: # no coverage 92 | if rc == -1: 93 | msg = 'internal error' 94 | elif rc == 1: 95 | msg = 'the command could not be buffered' 96 | else: 97 | msg = f'unknown error {rc}' 98 | raise RuntimeError('espeak_SetVoiceByName(): ' + msg) 99 | 100 | if vcmp(version, '1.48.1') >= 0: 101 | 102 | # const char *espeak_TextToPhonemes(const void **textptr, int textmode, int phonememode) 103 | _text_to_phonemes = _shlib.espeak_TextToPhonemes 104 | _text_to_phonemes.restype = ctypes.c_char_p 105 | _text_to_phonemes.argtypes = [ctypes.POINTER(ctypes.c_char_p), ctypes.c_int, ctypes.c_int] 106 | 107 | def text_to_phonemes(s, *, ipa=False): 108 | ''' 109 | translate text to phonemes 110 | ''' 111 | s = s.encode('UTF-8') 112 | z = ctypes.c_char_p(s) 113 | zptr = ctypes.pointer(z) 114 | assert zptr.contents is not None 115 | if vcmp(version, '1.48.11') >= 0: 116 | ipa = ipa << 1 117 | else: 118 | ipa = ipa << 4 # no coverage 119 | res = _text_to_phonemes(zptr, 1, ipa) 120 | if zptr.contents.value is not None: 121 | raise RuntimeError # no coverage 122 | return res.decode('UTF-8').strip() 123 | 124 | elif vcmp(version, '1.47.08') >= 0: # no coverage 125 | 126 | # void espeak_TextToPhonemes(const void *text, char *buffer, int size, int textmode, int phonememode) 127 | _text_to_phonemes = _shlib.espeak_TextToPhonemes 128 | _text_to_phonemes.restype = ctypes.c_char_p 129 | _text_to_phonemes.argtypes = [ 130 | ctypes.c_char_p, 131 | ctypes.POINTER(ctypes.c_char), ctypes.c_int, 132 | ctypes.c_int, ctypes.c_int 133 | ] 134 | 135 | def text_to_phonemes(s, *, ipa=False): 136 | ''' 137 | translate text to phonemes 138 | ''' 139 | s = s.encode('UTF-8') 140 | bufsize = 250 141 | buf = ctypes.create_string_buffer(bufsize) 142 | _text_to_phonemes(s, buf, bufsize, 1, ipa << 4) 143 | return buf.value.decode('UTF-8').strip() 144 | 145 | else: # no coverage 146 | 147 | raise RuntimeError('eSpeak >= 1.47.08 is required') 148 | 149 | __all__ = [ 150 | 'init', 151 | 'ng', 152 | 'set_voice_by_name', 153 | 'text_to_phonemes', 154 | 'version', 155 | ] 156 | 157 | # vim:ts=4 sts=4 sw=4 et 158 | -------------------------------------------------------------------------------- /lib/io.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2025 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | ''' 22 | I/O and encodings 23 | ''' 24 | 25 | import io 26 | import sys 27 | 28 | def open_file(path, *, encoding, errors): 29 | ''' 30 | open() with special case for "-" 31 | ''' 32 | if path == '-': 33 | return io.TextIOWrapper( 34 | sys.stdin.buffer, 35 | encoding=encoding, 36 | errors=errors, 37 | ) 38 | else: 39 | return open( # pylint: disable=consider-using-with 40 | path, 'rt', 41 | encoding=encoding, 42 | errors=errors, 43 | ) 44 | 45 | __all__ = [ 46 | 'open_file', 47 | ] 48 | 49 | # vim:ts=4 sts=4 sw=4 et 50 | -------------------------------------------------------------------------------- /lib/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2022 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | ''' 22 | miscellanea 23 | ''' 24 | 25 | import os 26 | import sys 27 | 28 | def warn(msg): 29 | ''' 30 | print warning message 31 | ''' 32 | prog = os.path.basename(sys.argv[0]) 33 | print(f'{prog}: warning: {msg}', file=sys.stderr) 34 | 35 | def _coerce_case(src, word): 36 | ''' 37 | coerce word to the same case as src 38 | (simple version doesn't support title-case) 39 | ''' 40 | if src.isupper(): 41 | return word.upper() 42 | else: 43 | return word.lower() 44 | 45 | def coerce_case(src, word): 46 | ''' 47 | coerce word to the same case as src 48 | ''' 49 | return ( 50 | _coerce_case(src[:1], word[:1]) + 51 | _coerce_case(src[1:], word[1:]) 52 | ) 53 | 54 | __all__ = [ 55 | 'warn', 56 | 'coerce_case', 57 | ] 58 | 59 | # vim:ts=4 sts=4 sw=4 et 60 | -------------------------------------------------------------------------------- /lib/parser.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2024 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | ''' 22 | English parser 23 | ''' 24 | 25 | import re 26 | 27 | class Location: 28 | ''' 29 | location in a file 30 | ''' 31 | def __init__(self, file, lineno): 32 | self.file = file 33 | self.lineno = lineno 34 | 35 | def __str__(self): 36 | return f'{self.file.name}:{self.lineno}' 37 | 38 | find_articles = re.compile( 39 | r'''(?,
, ) tuples 47 | ''' 48 | carry = '' 49 | for i, line in enumerate(file, start=1): 50 | cline = carry + line 51 | carry = '' 52 | for match in find_articles(cline): 53 | art, word, eol_art = match.groups() 54 | if art is not None: 55 | assert word is not None 56 | yield (Location(file, i), art, word) 57 | else: 58 | assert eol_art is not None 59 | carry = eol_art + ' ' 60 | 61 | __all__ = ['parse_file'] 62 | 63 | # vim:ts=4 sts=4 sw=4 et 64 | -------------------------------------------------------------------------------- /lib/phonetics.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2022 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | ''' 22 | English phonetics 23 | ''' 24 | 25 | import functools 26 | import os 27 | 28 | consonants = frozenset( 29 | 'DNSTZbdfghjklmnprstvwz' 30 | 'ðŋʃθʒbdfɡhjkɬmnpɹstvwz' 31 | ) 32 | vowels = frozenset( 33 | '03@AEIOUVaeiou' 34 | 'ɒɜəɑɛɪɔʊʌɐeiɔu' 35 | ) 36 | accents = frozenset( 37 | ",'" 38 | "ˌˈ" 39 | ) 40 | 41 | espeak = None 42 | overrides = {} 43 | 44 | def init(): 45 | ''' 46 | initialize underlying speech engine 47 | ''' 48 | global espeak # pylint: disable=global-statement,global-variable-not-assigned 49 | from lib import espeak # pylint: disable=redefined-outer-name,import-outside-toplevel 50 | espeak.init() 51 | espeak.set_voice_by_name('en') 52 | here = os.path.dirname(__file__) 53 | # Ideally false positives should be fixed in eSpeak, 54 | # but as a stop-gap measure, we carry data file to correct some of them. 55 | path = f'{here}/../data/overrides' 56 | with open(path, 'rt', encoding='UTF-8') as file: 57 | for line in file: 58 | line = line.strip() 59 | (word, phon) = line.split('\t', 1) 60 | word = word.lower() 61 | overrides[word] = phon 62 | 63 | @functools.lru_cache(maxsize=9999) 64 | def text_to_phonemes(s, *, ipa=False): 65 | ''' 66 | translate text to phonemes 67 | ''' 68 | s = overrides.get(s.lower(), s) 69 | if s.startswith('[[') and s.endswith(']]'): 70 | return s.split('\t')[ipa][2:-2] 71 | else: 72 | return espeak.text_to_phonemes(s, ipa=ipa) 73 | 74 | __all__ = [ 75 | 'consonants', 76 | 'vowels', 77 | 'accents', 78 | 'text_to_phonemes', 79 | ] 80 | 81 | # vim:ts=4 sts=4 sw=4 et 82 | -------------------------------------------------------------------------------- /lib/version.py: -------------------------------------------------------------------------------- 1 | '''anorack's version''' 2 | 3 | __version__ = '0.3.1' 4 | -------------------------------------------------------------------------------- /private/build-source-tarball: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Copyright © 2014-2022 Jakub Wilk 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the “Software”), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | set -e -u 24 | cd "${0%/*}/.." 25 | if [ $# -ge 2 ] 26 | then 27 | printf '%s [commit]\n' "$0" >&2 28 | exit 1 29 | fi 30 | if ! [ -d .git ] 31 | then 32 | printf '%s requires git checkout\n' "$0" >&2 33 | exit 1 34 | fi 35 | pwd="$PWD" 36 | commit=${1:-HEAD} 37 | commit=$(git describe "$commit") 38 | name=$(git cat-file blob "$commit:doc/changelog" | head -n1 | cut -d ' ' -f 1) 39 | date=$(git rev-list -1 --format=%cI "$commit" | grep -o '^....-..-..' | tr -d -) 40 | version=$(git cat-file blob "$commit:doc/changelog" | head -n1 | cut -d ' ' -f2 | tr -d '()') 41 | released=$(git cat-file blob "$commit:doc/changelog" | head -n1 | grep -v -w UNRELEASED || true) 42 | [ -n "$released" ] || version="${version}rc${date}" 43 | printf 'Commit: %s\nVersion: %s\n' "$commit" "$version" >&2 44 | set -x 45 | sourceroot=$(mktemp -d -t "$name-source-XXXXXX") 46 | tar_opts='--owner root --group root --mode u=rwX,go=rX --format ustar --sort name' 47 | gzip='gzip -9 -n' 48 | git archive "$commit" --prefix="$name-$version/" | tar -C "$sourceroot" -xf - 49 | cd "$sourceroot"/* 50 | rm -r .git* 51 | rm private/build-source-tarball 52 | rm README.rst # shipped as doc/README 53 | mv LICENSE doc/ 54 | make -C doc/ 55 | cd .. 56 | mkdir -p "$pwd/dist" 57 | tar $tar_opts -I "$gzip" -cf "$pwd/dist/$name-$version.tar.gz" */ 58 | rm -r "$sourceroot" 59 | set +x 60 | cd "$pwd" 61 | ls -d "dist/$name-$version.tar.gz" 62 | 63 | # vim:ts=4 sts=4 sw=4 et 64 | -------------------------------------------------------------------------------- /private/run-pylint: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Copyright © 2015-2018 Jakub Wilk 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the “Software”), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | set -e -u 24 | 25 | PYTHON=${PYTHON:-python3} 26 | "$PYTHON" -m pylint --version >/dev/null || exit 1 27 | if [ $# -eq 0 ] 28 | then 29 | pyscripts=$(grep -l -r '^#!.*python' .) 30 | set -- lib tests $pyscripts 31 | fi 32 | if [ -n "${VIRTUAL_ENV:-}" ] 33 | then 34 | # https://github.com/PyCQA/pylint/issues/73 35 | set -- --ignored-modules=distutils "$@" 36 | fi 37 | log=$(mktemp -t pylint.XXXXXX) 38 | "$PYTHON" -m pylint "$@" > "$log" || [ $? != 1 ] 39 | ! grep -P '^\S+:' "$log" \ 40 | | grep -v -P '^(?!lib/).*: missing-(\w+-)?docstring ' \ 41 | | grep '.' || exit 1 42 | rm "$log" 43 | 44 | # vim:ts=4 sts=4 sw=4 et 45 | -------------------------------------------------------------------------------- /private/update-coverage: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Copyright © 2021 Jakub Wilk 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the “Software”), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | set -e -u 24 | cd "${0%/*}/.." 25 | true ${PYTHON:=python3} 26 | "$PYTHON" -m coverage run -m unittest discover 27 | cat > tests/coverage.tmp <> tests/coverage.tmp 32 | mv tests/coverage.tmp tests/coverage 33 | 34 | # vim:ts=4 sts=4 sw=4 et 35 | -------------------------------------------------------------------------------- /private/update-version: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e -u 3 | version=${1:?"no version number provided"} 4 | date="$(date -u --rfc-3339=date)" 5 | PS4='$ ' 6 | set -x 7 | dch -m -v "$version" -u low -c doc/changelog 8 | export version date 9 | perl -pi -e 's/^__version__ = '"'"'\K[\w.]+/$ENV{version}/' lib/version.py 10 | perl -pi -e 's/^:version: \S+ \K[\w.]+/$ENV{version}/; s/^(:date:) \K[0-9-]+/$ENV{date}/' doc/manpage.rst 11 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=multiple-statements 2 | async def _(): return f'{await "# Python >= 3.7 is required #"}' 3 | del _ 4 | -------------------------------------------------------------------------------- /tests/coverage: -------------------------------------------------------------------------------- 1 | Generated automatically by private/update-coverage. Do not edit. 2 | 3 | Name Stmts Miss Branch BrPart Cover Missing 4 | -------------------------------------------------------------- 5 | lib/__init__.py 1 0 0 0 100% 6 | lib/articles.py 14 0 4 0 100% 7 | lib/cli.py 64 0 16 0 100% 8 | lib/espeak.py 55 0 6 0 100% 9 | lib/io.py 8 0 2 0 100% 10 | lib/misc.py 13 0 2 0 100% 11 | lib/parser.py 22 0 6 0 100% 12 | lib/phonetics.py 27 0 8 0 100% 13 | lib/version.py 1 0 0 0 100% 14 | -------------------------------------------------------------- 15 | TOTAL 205 0 44 0 100% 16 | -------------------------------------------------------------------------------- /tests/test_choose_art.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2021 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | from tests.tools import ( 22 | assert_equal, 23 | testcase, 24 | ) 25 | 26 | import lib.articles as M 27 | 28 | @testcase 29 | def test_choose_a(): 30 | art = M.choose_art("sp'am") 31 | assert_equal(art, 'a') 32 | 33 | @testcase 34 | def test_choose_a_ipa(): 35 | art = M.choose_art("θˈɜːmɪdˌɔː") 36 | assert_equal(art, 'a') 37 | 38 | @testcase 39 | def test_choose_an(): 40 | art = M.choose_art("'Eg") 41 | assert_equal(art, 'an') 42 | 43 | @testcase 44 | def test_choose_an_ipa(): 45 | art = M.choose_art("ˈɛɡ") 46 | assert_equal(art, 'an') 47 | 48 | @testcase 49 | def test_choose_other(): 50 | art = M.choose_art('%') 51 | assert_equal(art, NotImplemented) 52 | art = M.choose_art('') 53 | assert_equal(art, NotImplemented) 54 | 55 | del testcase 56 | 57 | # vim:ts=4 sts=4 sw=4 et 58 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2024 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | import contextlib 22 | import errno 23 | import io 24 | import os 25 | import sys 26 | import tempfile 27 | import unittest.mock 28 | 29 | from tests.tools import ( 30 | assert_equal, 31 | assert_is_instance, 32 | assert_not_equal, 33 | isolation, 34 | testcase, 35 | ) 36 | 37 | @contextlib.contextmanager 38 | def tmpcwd(): 39 | with tempfile.TemporaryDirectory(prefix='anorack.tests.') as tmpdir: 40 | orig_cwd = os.getcwd() 41 | os.chdir(tmpdir) 42 | try: 43 | yield 44 | finally: 45 | os.chdir(orig_cwd) 46 | 47 | def TextIO(s=None, *, name): 48 | fp = io.BytesIO(s) 49 | fp.name = name 50 | return io.TextIOWrapper(fp, encoding='UTF-8') 51 | 52 | class CompletedProcess: 53 | def __init__(self, rc, stdout, stderr): 54 | self.rc = rc 55 | self.stdout = stdout 56 | self.stderr = stderr 57 | 58 | def __run_main(argv, stdin): 59 | sys.argv = argv 60 | if stdin is not None: 61 | if isinstance(stdin, str): 62 | stdin = stdin.encode('UTF-8') 63 | sys.stdin = mock_stdin = TextIO(stdin, name=sys.__stdin__.name) 64 | else: 65 | mock_stdin = None 66 | sys.stdout = mock_stdout = TextIO(name=sys.__stdout__.name) 67 | sys.stderr = mock_stderr = TextIO(name=sys.__stderr__.name) 68 | import lib.cli # pylint: disable=bad-option-value,import-outside-toplevel 69 | rc = 0 70 | try: 71 | lib.cli.main() 72 | except SystemExit as exc: 73 | rc = exc.code 74 | except OSError as exc: 75 | rc = exc 76 | yield rc 77 | for fp in (sys.stdout, sys.stderr): 78 | fp.flush() 79 | s = fp.buffer.getvalue() # pylint: disable=no-member 80 | yield s.decode('UTF-8') 81 | del mock_stdin, mock_stdout, mock_stderr 82 | 83 | def _run_main(argv, stdin): 84 | # abuse mock to save&restore sys.argv, sys.stdin, etc.: 85 | with unittest.mock.patch.multiple(sys, argv=None, stdin=None, stdout=None, stderr=None): 86 | return CompletedProcess(*__run_main(argv, stdin)) 87 | 88 | run_main = isolation(_run_main) 89 | 90 | def t(*, stdin=None, files=None, stdout, stdout_ipa=None, stderr='', stderr_ipa=None): 91 | if stdout_ipa is None: 92 | stdout_ipa = stdout 93 | if stderr_ipa is None: 94 | stderr_ipa = stderr 95 | argv = ['anorack'] 96 | if files is not None: 97 | for (name, content) in files: 98 | with open(name, 'wt', encoding='UTF-8') as file: 99 | file.write(content) 100 | argv += [name] 101 | actual = run_main(argv, stdin) 102 | if '-@' in stdout: 103 | stdout = stdout.replace('-@', '@') 104 | actual.stdout = actual.stdout.replace('-@', '@') 105 | assert_equal(stdout, actual.stdout) 106 | assert_equal(stderr, actual.stderr) 107 | assert_equal(actual.rc, 0) 108 | argv += ['--ipa'] 109 | actual = run_main(argv, stdin) 110 | actual.stderr = actual.stderr.replace('t͡ʃ', 'tʃ') 111 | assert_equal(stdout_ipa, actual.stdout) 112 | assert_equal(stderr_ipa, actual.stderr) 113 | assert_equal(actual.rc, 0) 114 | 115 | @testcase 116 | def test_stdin(): 117 | t( 118 | stdin=( 119 | 'It could be carried by an African swallow!\n' 120 | 'Oh, yeah, a African swallow maybe, but not an\n' 121 | 'European swallow.\n' 122 | ), 123 | stdout=( 124 | ":2: a African -> an African /'afrIk@n/\n" 125 | ":3: an European -> a European /j,U@r-@p'i@n/\n" 126 | ), 127 | stdout_ipa=( 128 | ":2: a African -> an African /ˈafɹɪkən/\n" 129 | ":3: an European -> a European /jˌʊəɹəpˈiən/\n" 130 | ), 131 | ) 132 | 133 | @testcase 134 | @tmpcwd() 135 | def test_files(): 136 | t( 137 | files=( 138 | ('holy', 'It could be carried by a African swallow!'), 139 | ('grail', 'Oh, yeah, an African swallow maybe, but not an European swallow.'), 140 | ), 141 | stdout=( 142 | "holy:1: a African -> an African /'afrIk@n/\n" 143 | "grail:1: an European -> a European /j,U@r-@p'i@n/\n" 144 | ), 145 | stdout_ipa=( 146 | "holy:1: a African -> an African /ˈafɹɪkən/\n" 147 | "grail:1: an European -> a European /jˌʊəɹəpˈiən/\n" 148 | ), 149 | ) 150 | 151 | @testcase 152 | def test_warning(): 153 | def dummy_choose_art(phon): 154 | del phon 155 | return NotImplemented 156 | with unittest.mock.patch('lib.cli.choose_art', dummy_choose_art): 157 | t( 158 | stdin='A scratch?!', 159 | stdout='', 160 | stderr="anorack: warning: can't determine correct article for 'scratch' /skr'atS/\n", 161 | stderr_ipa="anorack: warning: can't determine correct article for 'scratch' /skɹˈatʃ/\n", 162 | ) 163 | 164 | @testcase 165 | def test_bad_io(): 166 | argv = ['anorack', '/nonexistent', '-'] 167 | actual = run_main(argv, 'a African') 168 | assert_equal(':', actual.stdout[:8]) 169 | err = os.strerror(errno.ENOENT) 170 | stderr = f'{argv[0]}: {argv[1]}: {err}\n' 171 | assert_equal(stderr, actual.stderr) 172 | assert_equal(actual.rc, 1) 173 | argv[1:1] = ['--traceback'] 174 | actual = run_main(argv, 'a African') 175 | assert_equal('', actual.stdout) 176 | assert_equal('', actual.stderr) 177 | assert_is_instance(actual.rc, OSError) 178 | assert_equal(actual.rc.errno, errno.ENOENT) 179 | 180 | @testcase 181 | def test_e(): 182 | argv = ['anorack', '-e'] 183 | actual = run_main(argv, 'a African') 184 | assert_equal(':', actual.stdout[:8]) 185 | assert_equal('', actual.stderr) 186 | assert_equal(actual.rc, 2) 187 | 188 | @testcase 189 | def test_changelog(): 190 | argv = ['anorack', 'doc/changelog'] 191 | actual = run_main(argv, None) 192 | assert_equal('', actual.stdout) 193 | assert_equal('', actual.stderr) 194 | assert_equal(actual.rc, 0) 195 | 196 | @testcase 197 | def test_version(): 198 | argv = ['anorack', '--version'] 199 | actual = run_main(argv, None) 200 | assert_not_equal('', actual.stdout) 201 | assert_equal('', actual.stderr) 202 | assert_equal(actual.rc, 0) 203 | 204 | @testcase 205 | def test_bad_arg(): 206 | argv = ['anorack', '---'] 207 | actual = run_main(argv, None) 208 | assert_equal('', actual.stdout) 209 | assert_not_equal('', actual.stderr) 210 | assert_equal(actual.rc, 1) 211 | 212 | del testcase 213 | 214 | # vim:ts=4 sts=4 sw=4 et 215 | -------------------------------------------------------------------------------- /tests/test_coerce_case.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2021 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | from tests.tools import ( 22 | assert_equal, 23 | testcase, 24 | ) 25 | 26 | import lib.misc as M 27 | 28 | def t(src, word, exp): 29 | res = M.coerce_case(src, word) 30 | assert_equal(exp, res) 31 | 32 | @testcase 33 | def test_a(): 34 | t('a', 'a', 'a') 35 | t('A', 'a', 'A') 36 | t('an', 'a', 'a') 37 | t('An', 'a', 'A') 38 | t('AN', 'a', 'A') 39 | 40 | @testcase 41 | def test_an(): 42 | t('a', 'an', 'an') 43 | t('A', 'an', 'An') 44 | t('an', 'an', 'an') 45 | t('An', 'an', 'An') 46 | t('AN', 'an', 'AN') 47 | 48 | del testcase 49 | 50 | # vim:ts=4 sts=4 sw=4 et 51 | -------------------------------------------------------------------------------- /tests/test_open_file.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2021 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | import os 22 | import sys 23 | import unittest 24 | 25 | from tests.tools import ( 26 | assert_equal, 27 | testcase, 28 | ) 29 | 30 | import lib.io as M 31 | 32 | def t(path): 33 | encoding = 'ISO-8859-2' 34 | errors = 'xmlcharrefreplace' 35 | with M.open_file(path, encoding=encoding, errors=errors) as file: 36 | assert_equal(file.encoding, encoding) 37 | assert_equal(file.errors, errors) 38 | 39 | @testcase 40 | def test_open_real_file(): 41 | t(os.devnull) 42 | 43 | @testcase 44 | def test_open_stdin(): 45 | with unittest.mock.patch('sys.stdin', sys.__stdin__): 46 | t('-') 47 | 48 | del testcase 49 | 50 | # vim:ts=4 sts=4 sw=4 et 51 | -------------------------------------------------------------------------------- /tests/test_parse_file.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2021 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | import io 22 | 23 | from tests.tools import ( 24 | assert_equal, 25 | assert_is, 26 | assert_is_instance, 27 | testcase, 28 | ) 29 | 30 | import lib.parser as M 31 | 32 | def t(s, exp): 33 | if isinstance(exp, tuple): 34 | exp = [exp] 35 | file = io.StringIO(s) 36 | result = list(M.parse_file(file)) 37 | assert_equal(len(result), len(exp)) 38 | for (loc, art, word), (xi, xart, xword) in zip(result, exp): 39 | assert_is_instance(loc, M.Location) 40 | assert_is(loc.file, file) 41 | assert_equal(loc.lineno, xi) 42 | assert_equal(xart, art) 43 | assert_equal(xword, word) 44 | 45 | @testcase 46 | def test_mid_line(): 47 | t('Oh, yeah, an African swallow maybe,\nbut not a European swallow.\n', [ 48 | (1, 'an', 'African'), 49 | (2, 'a', 'European'), 50 | ]) 51 | 52 | @testcase 53 | def test_wrapped(): 54 | t('I thought we were an\nautonomous collective.', (2, 'an', 'autonomous')) 55 | 56 | @testcase 57 | def test_quotes(): 58 | t( 59 | "a 'scratch'\n" 60 | 'a ‘scratch’\n' 61 | 'a "scratch"\n' 62 | 'a “scratch”\n', 63 | [(i, 'a', 'scratch') for i in range(1, 5)] 64 | ) 65 | 66 | @testcase 67 | def test_underscore(): 68 | t('a European_swallow', (1, 'a', 'European')) 69 | 70 | @testcase 71 | def test_numbers(): 72 | t('an 8', (1, 'an', '8')) 73 | t('an 8bit', (1, 'an', '8')) 74 | t('an 8-bit', (1, 'an', '8')) 75 | t('an 8 bit', (1, 'an', '8')) 76 | t('a UTF16', (1, 'a', 'UTF')) 77 | t('a UTF-16', (1, 'a', 'UTF')) 78 | t('a UTF 16', (1, 'a', 'UTF')) 79 | 80 | @testcase 81 | def test_inner_apostrophe(): 82 | t("Esra'a Al Shafei", []) 83 | t('Esra’a Al Shafei', []) 84 | 85 | del testcase 86 | 87 | # vim:ts=4 sts=4 sw=4 et 88 | -------------------------------------------------------------------------------- /tests/test_phonetics.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2021 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | from tests.tools import ( 22 | assert_equal, 23 | isolation, 24 | testcase, 25 | TestCase, 26 | ) 27 | 28 | import lib.phonetics as M 29 | 30 | def __test(word, xphon, xipa): 31 | M.init() 32 | phon = M.text_to_phonemes(word) 33 | assert_equal(xphon, phon) 34 | ipa = M.text_to_phonemes(word, ipa=True) 35 | ipa = ipa.replace('t͡ʃ', 'tʃ') 36 | assert_equal(xipa, ipa) 37 | _test = isolation(__test) 38 | 39 | class test_overrides(TestCase): 40 | pass 41 | 42 | def _init_test_overrides(): 43 | def add(word, xphon, xipa): 44 | @staticmethod 45 | def test(): 46 | _test(word, xphon, xipa) 47 | setattr(test_overrides, 'test_' + word, test) 48 | add('EWMH', ",i:d,Vb@Lj,u:,Em'eItS", 'ˌiːdˌʌbəljˌuːˌɛmˈeɪtʃ') 49 | add('UCS', "j,u:s,i:;'Es", 'jˌuːsˌiːˈɛs') 50 | add('UDP', "j,u:d,i:p'i:", 'jˌuːdˌiːpˈiː') 51 | add('UPS', "j,u:p,i:;'Es", 'jˌuːpˌiːˈɛs') 52 | add('UTF', "j,u:t,i:;'Ef", 'jˌuːtˌiːˈɛf') 53 | add('UTS', "j,u:t,i:;'Es", 'jˌuːtˌiːˈɛs') 54 | add('UUID', "j,u:j,u:,aId'i:", 'jˌuːjˌuːˌaɪdˈiː') 55 | add('UUIDv', "j,u:j,u:,aId'i: v'i:", 'jˌuːjˌuːˌaɪdˈiː vˈiː') 56 | add('src', "s'o@s", 'sˈɔːs') 57 | add('unary', "j'un@ri", "jˈunəɹi") 58 | add('usr', "j,u:,Es'A@", "jˌuːˌɛsˈɑː") 59 | _init_test_overrides() 60 | del _init_test_overrides 61 | 62 | del testcase 63 | 64 | # vim:ts=4 sts=4 sw=4 et 65 | -------------------------------------------------------------------------------- /tests/test_vcmp.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2021 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | from tests.tools import ( 22 | assert_equal, 23 | testcase, 24 | ) 25 | 26 | import lib.espeak as M 27 | 28 | def cmp(x, y): 29 | return (x > y) - (x < y) 30 | 31 | @testcase 32 | def test(): 33 | versions = ''' 34 | 1 35 | 1.04.71 36 | 1.47.08 37 | 1.48.1 38 | 1.48.11 39 | 1.48.15 40 | 3 41 | 3.14 42 | '''.split() 43 | for i1, v1 in enumerate(versions): 44 | for i2, v2 in enumerate(versions): 45 | assert_equal(M.vcmp(v1, v2), cmp(i1, i2)) 46 | 47 | del testcase 48 | 49 | # vim:ts=4 sts=4 sw=4 et 50 | -------------------------------------------------------------------------------- /tests/test_version.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2012-2021 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | import os 22 | 23 | from tests.tools import ( 24 | assert_equal, 25 | testcase, 26 | ) 27 | 28 | from lib.version import __version__ 29 | 30 | here = os.path.dirname(__file__) 31 | docdir = os.path.join(here, os.pardir, 'doc') 32 | 33 | @testcase 34 | def test_changelog(): 35 | path = os.path.join(docdir, 'changelog') 36 | with open(path, 'rt', encoding='UTF-8') as file: 37 | line = file.readline() 38 | changelog_version = line.split()[1].strip('()') 39 | assert_equal(changelog_version, __version__) 40 | 41 | @testcase 42 | def test_manpage(): 43 | path = os.path.join(docdir, 'manpage.rst') 44 | manpage_version = None 45 | with open(path, 'rt', encoding='UTF-8') as file: 46 | for line in file: 47 | if line.startswith(':version:'): 48 | manpage_version = line.split()[-1] 49 | break 50 | assert_equal(manpage_version, __version__) 51 | 52 | del testcase 53 | 54 | # vim:ts=4 sts=4 sw=4 et 55 | -------------------------------------------------------------------------------- /tests/test_warn.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2021 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | import io 22 | import unittest.mock 23 | 24 | from tests.tools import ( 25 | assert_equal, 26 | testcase, 27 | ) 28 | 29 | import lib.misc as M 30 | 31 | @testcase 32 | def test_warn(): 33 | stderr = io.StringIO() 34 | with unittest.mock.patch('sys.stderr', stderr): 35 | with unittest.mock.patch('sys.argv', ['/usr/bin/anorack']): 36 | M.warn('NOBODY expects the Spanish Inquisition!') 37 | assert_equal( 38 | stderr.getvalue(), 39 | 'anorack: warning: NOBODY expects the Spanish Inquisition!\n' 40 | ) 41 | 42 | del testcase 43 | 44 | # vim:ts=4 sts=4 sw=4 et 45 | -------------------------------------------------------------------------------- /tests/tools.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2016-2022 Jakub Wilk 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the “Software”), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | # SOFTWARE. 20 | 21 | import concurrent.futures 22 | import functools 23 | import sys 24 | import unittest 25 | 26 | def isolation(f): 27 | if 'coverage' in sys.modules: 28 | # Process isolation would break coverage measurements. 29 | # Oh well. FIXME. 30 | return f 31 | else: 32 | @functools.wraps(f) 33 | def wrapper(*args, **kwargs): 34 | with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor: 35 | ftr = executor.submit(f, *args, **kwargs) 36 | return ftr.result() 37 | return wrapper 38 | 39 | def testcase(f): 40 | class TestCase(unittest.TestCase): # pylint: disable=redefined-outer-name 41 | @staticmethod 42 | def test(): 43 | return f() 44 | def __str__(self): 45 | return f'{f.__module__}.{f.__name__}' 46 | return TestCase 47 | 48 | tc = unittest.TestCase('__hash__') 49 | 50 | assert_equal = tc.assertEqual 51 | assert_is = tc.assertIs 52 | assert_is_instance = tc.assertIsInstance 53 | assert_not_equal = tc.assertNotEqual 54 | 55 | del tc 56 | 57 | class TestCase(unittest.TestCase): 58 | def __str__(self): 59 | cls = unittest.util.strclass(self.__class__) 60 | name = self._testMethodName 61 | return f'{cls}.{name}' 62 | 63 | __all__ = [ 64 | 'isolation', 65 | 'testcase', 66 | 'TestCase', 67 | # nose-compatible: 68 | 'assert_equal', 69 | 'assert_is', 70 | 'assert_is_instance', 71 | 'assert_not_equal', 72 | ] 73 | 74 | # vim:ts=4 sts=4 sw=4 et 75 | --------------------------------------------------------------------------------