├── .editorconfig ├── abbr ├── exitstatus.py ├── __init__.py ├── clients.py ├── __main__.py ├── scrapers.py └── core.py ├── LICENSE ├── tests ├── test_client.py ├── test_errors.py ├── __init__.py └── test_cli.py ├── .gitignore ├── setup.py └── README.md /.editorconfig: -------------------------------------------------------------------------------- 1 | # https://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = space 6 | indent_size = 4 7 | end_of_line = lf 8 | charset = utf-8 9 | trim_trailing_whitespace = true 10 | insert_final_newline = true 11 | -------------------------------------------------------------------------------- /abbr/exitstatus.py: -------------------------------------------------------------------------------- 1 | from enum import IntEnum, unique 2 | 3 | 4 | @unique 5 | class ExitStatus(IntEnum): 6 | """Program exit status code constants.""" 7 | SUCCESS = 0 8 | ERROR = 1 9 | ERROR_TIMEOUT = 2 10 | ERROR_CTRL_C = 130 11 | -------------------------------------------------------------------------------- /abbr/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # ___. ___. .__ .__ 3 | # _____ \_ |__\_ |_________ ____ | | |__| 4 | # \__ \ | __ \| __ \_ __ \ ______ _/ ___\| | | | 5 | # / __ \| \_\ \ \_\ \ | \/ /_____/ \ \___| |_| | 6 | # (____ /___ /___ /__| \___ >____/__| 7 | # \/ \/ \/ \/ 8 | 9 | 10 | __project_name__ = 'abbr-cli' 11 | __description__ = 'A command-line tool to look up abbreviations for terms (and the reverse).' 12 | __version__ = '1.0.3' 13 | __author__ = 'Mustapha Hadid' 14 | __author_email__ = 'mustapha.hadeed@gmail.com' 15 | __repo_url__ = 'https://github.com/mhadidg/abbr-cli' 16 | __licence__ = 'MIT' 17 | -------------------------------------------------------------------------------- /abbr/clients.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | class BackendClient(object): 5 | 6 | def __init__(self, query: str, reversed_flag: bool = False, order=0): 7 | self.query = query 8 | self.reversed = reversed_flag 9 | self.order = order 10 | 11 | def execute(self) -> tuple[int, str]: 12 | pass 13 | 14 | 15 | class AbbreviationsClient(BackendClient): 16 | base_url = "https://www.abbreviations.com" 17 | 18 | def __init__(self, query: str, reversed_flag: bool = False, order=0): 19 | super().__init__(query, reversed_flag, order) 20 | 21 | def execute(self): 22 | query_type = 1 if self.reversed else 2 # allowed valuae are 1 (abbr->term) or 2 (term->abbr) 23 | order = self.order # allowed values are 0 (popularity), 1 (alphabetically), and 2 (category) 24 | page = 99999 # page number; special number 99999 means retrive all pages 25 | params = {'st': self.query, 'o': order, 'qtype': query_type, 'p': page} 26 | response = requests.get(f"{self.base_url}/serp.php", params=params, allow_redirects=False) 27 | return response.status_code, response.text 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Mustapha Hadid 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/test_client.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from io import StringIO 3 | from unittest.mock import patch 4 | 5 | from tests import abbr_cli 6 | 7 | 8 | class TestBackend(unittest.TestCase): 9 | 10 | def setUp(self): 11 | self.stdout_patch = patch('sys.stdout', new_callable=StringIO) 12 | self.stderr_patch = patch('sys.stderr', new_callable=StringIO) 13 | 14 | self.stdout: StringIO = self.stdout_patch.start() 15 | self.stderr: StringIO = self.stderr_patch.start() 16 | 17 | def test_default(self): 18 | abbr_cli("example --only-words") 19 | self.assertTrue(len(self.stdout.getvalue().split()) > 0) 20 | 21 | def test_reversed(self): 22 | abbr_cli("-r ex --only-words") 23 | self.assertTrue(len(self.stdout.getvalue().split()) > 0) 24 | 25 | def test_wrong_flag(self): 26 | abbr_cli("-r example") 27 | self.assertIn("don't use -r flag", self.stdout.getvalue()) 28 | abbr_cli("ex") 29 | self.assertIn("use with -r flag", self.stdout.getvalue()) 30 | 31 | def tearDown(self): 32 | self.stdout_patch.stop() 33 | self.stderr_patch.stop() 34 | 35 | 36 | if __name__ == '__main__': 37 | unittest.main() 38 | -------------------------------------------------------------------------------- /abbr/__main__.py: -------------------------------------------------------------------------------- 1 | """Look up abbreviations for terms. 2 | 3 | Usage: 4 | abbr [options] 5 | abbr [options] -r 6 | abbr (-h | --help) 7 | abbr --version 8 | 9 | Options: 10 | --version Show version. 11 | -h --help Show this screen. 12 | -r --reverse Reverse the look up. Find terms for an abbreviation. 13 | -n --limit Limit the number of result [default: 25]. 14 | -m --min-stars Include only items with number of stars equal or above . 15 | Allowed values are 0-5 (inclusive) [default: 0]. 16 | -w --only-words List only the words (terms or abbreviations) without category 17 | and rating. Helpful when used in a bash script. 18 | -c --with-category Include categories along with the word. 19 | 20 | """ 21 | 22 | import sys 23 | 24 | from docopt import docopt 25 | 26 | import abbr 27 | from abbr import core 28 | from abbr.exitstatus import ExitStatus 29 | 30 | 31 | def main(): 32 | try: 33 | exit_status = core.main(docopt(__doc__, version=f'abbr {abbr.__version__}')) 34 | except KeyboardInterrupt: 35 | exit_status = ExitStatus.ERROR_CTRL_C 36 | 37 | sys.exit(exit_status.value) 38 | 39 | 40 | if __name__ == '__main__': 41 | main() 42 | -------------------------------------------------------------------------------- /tests/test_errors.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from io import StringIO 3 | from unittest.mock import patch, MagicMock 4 | 5 | import requests 6 | 7 | from tests import abbr_cli 8 | 9 | 10 | class TestErrors(unittest.TestCase): 11 | 12 | def setUp(self): 13 | self.stdout_patch = patch('sys.stdout', new_callable=StringIO) 14 | self.stderr_patch = patch('sys.stderr', new_callable=StringIO) 15 | self.requests_patch = patch('requests.get') 16 | 17 | self.stdout: StringIO = self.stdout_patch.start() 18 | self.stderr: StringIO = self.stderr_patch.start() 19 | self.requests: MagicMock = self.requests_patch.start() 20 | 21 | def test_connection_error(self): 22 | self.requests.side_effect = requests.exceptions.ConnectionError 23 | abbr_cli("example") 24 | self.assertIn("connection failed", self.stderr.getvalue().lower()) 25 | 26 | def test_connection_timeout(self): 27 | self.requests.side_effect = requests.exceptions.ConnectTimeout 28 | abbr_cli("example") 29 | self.assertIn("connection timed out", self.stderr.getvalue().lower()) 30 | 31 | def test_unexpected_error(self): 32 | self.requests.side_effect = RuntimeError 33 | abbr_cli("example") 34 | self.assertIn("unexpected error", self.stderr.getvalue().lower()) 35 | 36 | def tearDown(self) -> None: 37 | self.stdout_patch.stop() 38 | self.stderr_patch.stop() 39 | self.requests_patch.stop() 40 | 41 | 42 | if __name__ == '__main__': 43 | unittest.main() 44 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | from docopt import docopt 2 | 3 | from abbr import __main__ 4 | from abbr.core import main 5 | 6 | _mocked_html = """ 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |

term1

16 | 19 |

20 | category1 21 |

22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 |

TERM2

30 | 33 |

34 | category2 35 |

36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 |

Term3

45 | 48 |

49 | category3 50 |

51 | 52 | 53 |
17 | abbr1 18 |
31 | ABBR2 32 |
46 | Abbr3 47 |
54 | 55 | """ 56 | 57 | 58 | def abbr_cli(args: str): 59 | args = docopt(__main__.__doc__, args.split()) 60 | return main(args) 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | 3 | # Created by https://www.toptal.com/developers/gitignore/api/python 4 | # Edit at https://www.toptal.com/developers/gitignore?templates=python 5 | 6 | ### Python ### 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | cover/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | .pybuilder/ 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | # For a library or package, you might want to ignore these files since the code is 93 | # intended to run in multiple environments; otherwise, check them in: 94 | # .python-version 95 | 96 | # pipenv 97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 100 | # install all needed dependencies. 101 | #Pipfile.lock 102 | 103 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 104 | __pypackages__/ 105 | 106 | # Celery stuff 107 | celerybeat-schedule 108 | celerybeat.pid 109 | 110 | # SageMath parsed files 111 | *.sage.py 112 | 113 | # Environments 114 | .env 115 | .venv 116 | env/ 117 | venv/ 118 | ENV/ 119 | env.bak/ 120 | venv.bak/ 121 | 122 | # Spyder project settings 123 | .spyderproject 124 | .spyproject 125 | 126 | # Rope project settings 127 | .ropeproject 128 | 129 | # mkdocs documentation 130 | /site 131 | 132 | # mypy 133 | .mypy_cache/ 134 | .dmypy.json 135 | dmypy.json 136 | 137 | # Pyre type checker 138 | .pyre/ 139 | 140 | # pytype static type analyzer 141 | .pytype/ 142 | 143 | # Cython debug symbols 144 | cython_debug/ 145 | 146 | # End of https://www.toptal.com/developers/gitignore/api/python 147 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Adpated from httpie project (https://github.com/httpie/httpie) 4 | 5 | import codecs 6 | import os 7 | import sys 8 | from distutils.dir_util import remove_tree 9 | 10 | from setuptools import setup, find_packages 11 | 12 | import abbr 13 | 14 | # 'setup.py publish' shortcut. 15 | if sys.argv[-1] == 'publish': 16 | os.system('./setup.py clean --all') 17 | os.system('./setup.py sdist bdist_wheel') 18 | os.system('twine upload dist/*') 19 | sys.exit() 20 | # 'setup.py clean-all' shortcut. 21 | elif sys.argv[-1] == 'clean-all': 22 | os.system('./setup.py clean --all') 23 | remove_tree('./dist') 24 | remove_tree('./abbr_cli.egg-info') 25 | sys.exit() 26 | 27 | tests_require = [] 28 | 29 | dev_require = [ 30 | *tests_require, 31 | 'twine', 32 | 'wheel', 33 | ] 34 | 35 | install_requires = [ 36 | 'requests>=2.22.0', 37 | 'lxml>=4.5.0', 38 | 'docopt>=0.6.2', 39 | 'colorful>=0.5.4', 40 | 'setuptools', 41 | ] 42 | 43 | install_requires_win_only = [ 44 | 'colorama>=0.2.4', 45 | ] 46 | 47 | # sdist 48 | if 'bdist_wheel' not in sys.argv: 49 | if 'win32' in str(sys.platform).lower(): 50 | # Terminal colors for Windows 51 | install_requires.extend(install_requires_win_only) 52 | 53 | # bdist_wheel 54 | extras_require = { 55 | 'dev': dev_require, 56 | 'test': tests_require, 57 | # https://wheel.readthedocs.io/en/latest/#defining-conditional-dependencies 58 | ':sys_platform == "win32"': install_requires_win_only, 59 | } 60 | 61 | 62 | def long_description(): 63 | with codecs.open('README.md', 'r', encoding='utf8') as file: 64 | return file.read() 65 | 66 | 67 | setup( 68 | name=abbr.__project_name__, 69 | version=abbr.__version__, 70 | description=abbr.__description__, 71 | long_description=long_description(), 72 | long_description_content_type='text/markdown', 73 | author=abbr.__author__, 74 | author_email=abbr.__author_email__, 75 | license=abbr.__licence__, 76 | package_dir={'abbr': 'abbr'}, 77 | packages=find_packages(include=['abbr']), 78 | entry_points={ 79 | 'console_scripts': [ 80 | 'abbr = abbr.__main__:main', 81 | ], 82 | }, 83 | python_requires='>=3.6', 84 | extras_require=extras_require, 85 | install_requires=install_requires, 86 | classifiers=[ 87 | 'Environment :: Console', 88 | 'Development Status :: 5 - Production/Stable', 89 | 'License :: OSI Approved :: Apache Software License', 90 | 'Programming Language :: Python', 91 | 'Programming Language :: Python :: 3 :: Only', 92 | 'Intended Audience :: Education', 93 | 'Intended Audience :: Developers', 94 | 'Topic :: Software Development', 95 | 'Topic :: Terminals', 96 | 'Topic :: Utilities' 97 | ], 98 | project_urls={ 99 | 'GitHub': 'https://github.com/mhadidg/abbr-cli', 100 | }, 101 | ) 102 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from io import StringIO 3 | from unittest.mock import patch 4 | 5 | from abbr import __main__ 6 | from abbr.clients import AbbreviationsClient 7 | from tests import abbr_cli, _mocked_html 8 | 9 | 10 | class TestCommandLine(unittest.TestCase): 11 | 12 | def setUp(self): 13 | self.stdout_patch = patch('sys.stdout', new_callable=StringIO) 14 | self.stderr_patch = patch('sys.stderr', new_callable=StringIO) 15 | self.client_patch = patch.object(AbbreviationsClient, 'execute', return_value=(200, _mocked_html)) 16 | 17 | self.stdout: StringIO = self.stdout_patch.start() 18 | self.stderr: StringIO = self.stderr_patch.start() 19 | self.client_patch.start() 20 | 21 | def test_help(self): 22 | with self.assertRaises(SystemExit): 23 | abbr_cli("--help") 24 | self.assertEqual(self.stdout.getvalue().strip(), __main__.__doc__.strip()) 25 | 26 | def test_term_to_abbr_is_default(self): 27 | abbr_cli("term --only-words") 28 | self.assertCountEqual(self.stdout.getvalue().split(), ['abbr1', 'abbr2', 'abbr3']) 29 | 30 | def test_reversed(self): 31 | abbr_cli("-r abbr --only-words") 32 | self.assertCountEqual(self.stdout.getvalue().split(), ['Term1', 'Term2', 'Term3']) 33 | 34 | def test_limit(self): 35 | abbr_cli("example --only-words --limit 1") 36 | self.assertEqual(len(self.stdout.getvalue().split()), 1) 37 | 38 | def test_zero_limit_means_unlimited(self): 39 | abbr_cli("example --only-words --limit 0") 40 | self.assertGreater(len(self.stdout.getvalue().split()), 0) 41 | 42 | def test_min_stars(self): 43 | abbr_cli("example --only-words --min-stars 4") 44 | self.assertCountEqual(self.stdout.getvalue().split(), ['abbr3']) 45 | 46 | def test_zero_min_stars_means_all(self): 47 | abbr_cli("example --only-words --min-stars 0") 48 | self.assertEqual(len(self.stdout.getvalue().split()), 3) 49 | 50 | def test_result_order_in_html_respected(self): 51 | abbr_cli("example --only-words") 52 | self.assertListEqual(self.stdout.getvalue().split(), ['abbr1', 'abbr2', 'abbr3']) 53 | 54 | def test_fancy_output(self): 55 | abbr_cli("example") 56 | for line in self.stdout.getvalue().strip().split(sep='\n'): 57 | with self.subTest(line=line): 58 | self.assertRegex(line, r'\(\d/\d\)') 59 | self.assertIn("abbr", line) 60 | 61 | def test_fancy_output_with_categories(self): 62 | abbr_cli("example --with-category") 63 | for line in self.stdout.getvalue().strip().split(sep='\n'): 64 | with self.subTest(line=line): 65 | self.assertRegex(line, r'\(\d/\d\)') 66 | self.assertIn("abbr", line) 67 | self.assertIn("category", line) 68 | 69 | def tearDown(self): 70 | self.stdout_patch.stop() 71 | self.stderr_patch.stop() 72 | self.client_patch.stop() 73 | 74 | 75 | if __name__ == '__main__': 76 | unittest.main() 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # abbr-cli 2 | 3 | A command-line tool to look up abbreviations for terms (and the reverse). The abbreviations, or the terms in reverse 4 | lookup, are extracted from [abbreviations.com](https://www.abbreviations.com). 5 | 6 | ``` 7 | $ abbr configuration 8 | (5/5) cfg 9 | (4/5) config 10 | (-/5) conf 11 | (-/5) cnf 12 | 13 | $ abbr --reverse alloc 14 | (3/5) Allocation 15 | (-/5) Allocate 16 | ``` 17 | 18 | ## Table of content 19 | 20 | - [Installation](#installation) 21 | - [Exploring the arguments](#exploring-the-arguments) 22 | - [The documentation](#the-documentation) 23 | - [...In action](#in-action) 24 | - [A little better than abbreviations.com](#a-little-better-than-abbreviationscom) 25 | 26 | ## Installation 27 | 28 | - Python version 3.6 or greater is required. 29 | - Install via `pip` command: 30 | 31 | ``` 32 | $ pip install abbr-cli 33 | ``` 34 | 35 | ## Exploring the arguments 36 | 37 | ### The documentation 38 | 39 | ``` 40 | $ abbr -h 41 | Look up abbreviations for terms. 42 | 43 | Usage: 44 | abbr [options] 45 | abbr [options] -r 46 | abbr (-h | --help) 47 | abbr --version 48 | 49 | Options: 50 | --version Show version. 51 | -h --help Show this screen. 52 | -r --reverse Reverse the look up. Find terms for an abbreviation. 53 | -n --limit Limit the number of result [default: 25]. 54 | -m --min-stars Include only items with number of stars equal or above . 55 | Allowed values are 0-5 (inclusive) [default: 0]. 56 | -w --only-words List only the words (terms or abbreviations) without category 57 | and rating. Helpful when used in a bash script. 58 | -c --with-category Include categories along with the word. 59 | ``` 60 | 61 | ### ...In action 62 | 63 | ``` 64 | $ abbr configuration 65 | (5/5) cfg 66 | (4/5) config 67 | (-/5) conf 68 | (-/5) cnf 69 | 70 | $ abbr configuration --with-category 71 | (5/5) cfg ~ Miscellaneous, Computing 72 | (4/5) config ~ Governmental 73 | (-/5) conf ~ Computing 74 | (-/5) cnf ~ Computing 75 | 76 | $ abbr configuration --only-words 77 | cfg 78 | config 79 | conf 80 | cnf 81 | 82 | $ abbr configuration --min-stars 4 83 | (5/5) cfg 84 | (4/5) config 85 | 86 | $ abbr configuration --limit 1 87 | (5/5) cfg 88 | ``` 89 | 90 | ## A little better than abbreviations.com 91 | 92 | - No duplicates. 93 | - Single-character abbreviations are excluded. 94 | - A single abbreviation (or term in case of `--reverse` flag) with multiple categories (and sometimes subcategories) 95 | are merged in a single line. Subcategories are removed to avoid clutter. The rating will be the average rating. 96 | 97 | ``` 98 | # instead of getting 99 | $ abbr command --with-category 100 | (5/5) cmd ~ Governmental/NASA 101 | (4/5) cmd ~ Governmental/Military 102 | (4/5) cmd ~ Computing/DOSCommands 103 | (5/5) cmd ~ Computing 104 | (-/5) cmd ~ Miscellaneous/Aircraft 105 | ... 106 | 107 | # you will get 108 | $ abbr command --with-category 109 | (4/5) cmd ~ Governmental, Computing, Miscellaneous 110 | ... 111 | ``` 112 | 113 | - Abbreviations are normalized to lowercase, while terms are normalized to title case. 114 | -------------------------------------------------------------------------------- /abbr/scrapers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from lxml import etree 4 | 5 | 6 | class BaseScraper(object): 7 | 8 | def __init__(self, html: str, limit: int, min_stars: int, reversed_flag: bool): 9 | super().__init__() 10 | self.html = html 11 | self.limit = limit if limit > 0 else sys.maxsize 12 | self.min_stars = min_stars 13 | self.reversed = reversed_flag 14 | self._words: list[str] = list() 15 | self._words_stars: dict[int] = dict() 16 | self._words_categories: dict[set] = dict() 17 | self.parse() 18 | 19 | def parse(self): 20 | pass 21 | 22 | # noinspection PyMethodMayBeStatic 23 | def is_valid_word(self, word: str) -> bool: 24 | return len(word) > 1 25 | 26 | def normalize(self, word): 27 | if self.reversed: 28 | normalized = word.title() 29 | else: 30 | normalized = word.lower() 31 | return normalized 32 | 33 | def add_word(self, word: str): 34 | word = self.normalize(word) 35 | if word not in self._words: 36 | self._words.append(word) 37 | 38 | def add_word_stars(self, word: str, star_count: int): 39 | word = self.normalize(word) 40 | current_value = self._words_stars.get(word, 0) 41 | if current_value == 0: 42 | self._words_stars[word] = star_count 43 | elif star_count == 0: 44 | self._words_stars[word] = current_value 45 | else: 46 | self._words_stars[word] = round((current_value + star_count) / 2) 47 | 48 | def add_word_category(self, word: str, category: str): 49 | word = self.normalize(word) 50 | self._words_categories[word] = self._words_categories.get(word, set()) | {category} 51 | 52 | def words(self) -> list[str]: 53 | return self._words 54 | 55 | def words_stars(self) -> dict[int]: 56 | return self._words_stars 57 | 58 | def words_categories(self) -> dict[set]: 59 | return self._words_categories 60 | 61 | 62 | class XPathScraper(BaseScraper): 63 | 64 | def __init__(self, html: str, limit: int = 0, min_stars=0, reversed_flag: bool = False): 65 | super().__init__(html, limit, min_stars, reversed_flag) 66 | 67 | def parse(self): 68 | elements = \ 69 | etree.HTML(self.html).xpath( 70 | "//table[contains(@class,'no-margin')]/tbody/tr") # type: list[etree._Element] 71 | 72 | if not elements: 73 | return 74 | 75 | total = 0 76 | for e in elements: 77 | star_count = int(e.xpath("count(.//span[contains(@class,'sf')])")) 78 | if star_count < self.min_stars: 79 | continue 80 | 81 | if self.reversed: 82 | word = e.xpath(".//p[contains(@class,'desc')]")[0].text 83 | else: 84 | word = e.xpath("./td[1]/a[1]")[0].text 85 | 86 | if not self.is_valid_word(word): 87 | continue 88 | 89 | self.add_word(word) 90 | self.add_word_stars(word, star_count) 91 | 92 | category = e.xpath(".//p[contains(@class,'path')]/a[1]")[0].text 93 | self.add_word_category(word, category) 94 | 95 | total += 1 96 | if total >= self.limit: 97 | break 98 | -------------------------------------------------------------------------------- /abbr/core.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import colorful as cf 4 | from requests.exceptions import ConnectionError, ConnectTimeout 5 | 6 | import abbr 7 | from abbr.clients import AbbreviationsClient 8 | from abbr.exitstatus import ExitStatus 9 | from abbr.scrapers import XPathScraper 10 | 11 | _color_palette = { 12 | 'red': '#EF5350', 13 | 'category': '#757575', 14 | 'categorySeparator': '#757575', 15 | } 16 | 17 | 18 | # noinspection PyBroadException 19 | def main(args) -> ExitStatus: 20 | cf.use_palette(_color_palette) 21 | 22 | term = args[''] 23 | abbrv = args[''] 24 | reversed_flag = args['--reverse'] 25 | min_stars = int(args['--min-stars']) 26 | fancy_output = not args['--only-words'] 27 | include_category = args['--with-category'] 28 | limit = int(args['--limit']) 29 | 30 | query = abbrv if reversed_flag else term 31 | 32 | try: 33 | status_code, html = AbbreviationsClient(query, reversed_flag).execute() 34 | except ConnectTimeout: 35 | eprint(f"Connection timed out!") 36 | return ExitStatus.ERROR 37 | except ConnectionError: 38 | eprint(f"Connection failed! Make sure you're connected to the internet.") 39 | return ExitStatus.ERROR 40 | except Exception: 41 | eprint(f"Unexpected error! Please report the issue on {abbr.__repo_url__}.") 42 | return ExitStatus.ERROR 43 | 44 | if status_code == 302: 45 | if reversed_flag and fancy_output: 46 | print("Zero terms. Is {} a term? Then don't use -r flag.".format(cf.bold_red(abbrv))) 47 | elif fancy_output: 48 | print("Zero abbreviations. Is {} an abbreviation? Then use with -r flag.".format(cf.bold_red(term))) 49 | return ExitStatus.SUCCESS 50 | 51 | try: 52 | scraper = XPathScraper(html, limit, min_stars, reversed_flag) 53 | except Exception: 54 | eprint(f"Unexpected Error! Please report the issue on {abbr.__repo_url__}.") 55 | return ExitStatus.ERROR 56 | 57 | # The 'words' here could be either abbreivations or terms 58 | # depending on whether or not the --reverse flag is present. 59 | words = scraper.words() 60 | 61 | if not words: 62 | if reversed_flag and fancy_output: 63 | print("Zero terms.") 64 | elif fancy_output: 65 | print("Zero abbreviations.") 66 | return ExitStatus.SUCCESS 67 | 68 | if fancy_output: 69 | words_stars = scraper.words_stars() 70 | words_categories = scraper.words_categories() 71 | fancy_print(words, words_categories, words_stars, include_category) 72 | else: 73 | simple_print(words) 74 | 75 | return ExitStatus.SUCCESS 76 | 77 | 78 | def fancy_print(words: list[str], category_dict: dict[set], star_dict: dict[int], include_category: bool): 79 | for abbrv in words: 80 | star_count = star_dict[abbrv] 81 | category = ', '.join(category_dict[abbrv]) 82 | print("({}) ".format('{}/5'.format('-' if star_count == 0 else star_count)) + abbrv, end='') 83 | if include_category: 84 | print(cf.categorySeparator(' ~ ') + cf.category(category)) 85 | else: 86 | print() 87 | 88 | 89 | def simple_print(words: list[str]): 90 | for abbrv in words: 91 | print(abbrv) 92 | 93 | 94 | def eprint(*args, **kwargs): 95 | print(*args, file=sys.stderr, **kwargs) 96 | --------------------------------------------------------------------------------