├── mediawiki
    ├── py.typed
    ├── __init__.py
    ├── utilities.py
    ├── exceptions.py
    ├── configuraton.py
    ├── mediawikipage.py
    └── mediawiki.py
├── docs
    ├── source
    │   ├── _static
    │   │   ├── .gitkeep
    │   │   └── custom.css
    │   ├── index.rst
    │   ├── code.rst
    │   ├── quickstart.rst
    │   └── conf.py
    ├── requirements.txt
    └── Makefile
├── tests
    ├── __init__.py
    └── utilities.py
├── setup.py
├── pyproject.toml
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── publish.yml
    │   └── python-package.yml
├── .readthedocs.yaml
├── codecov.yml
├── LICENSE
├── setup.cfg
├── .gitignore
├── README.rst
├── CONTRIBUTING.md
├── CHANGELOG.md
├── scripts
    └── generate_test_data.py
└── .pylintrc


/mediawiki/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/source/_static/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Testing Module
3 | """
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup  # type: ignore
2 | 
3 | setup()
4 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx>=3.0
2 | sphinx-rtd-theme
3 | requests>=2.0.0,<3.0.0
4 | beautifulsoup4
5 | 


--------------------------------------------------------------------------------
/docs/source/_static/custom.css:
--------------------------------------------------------------------------------
1 | /* Set the properties to be full width */
2 | dl.py.property {
3 |     display: block !important;
4 | }


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. _home:
 2 | .. include:: ../../README.rst
 3 | 
 4 | .. toctree::
 5 | 
 6 |     code
 7 |     quickstart
 8 | 
 9 | 
10 | Read More
11 | ==================
12 | 
13 | * :ref:`api`
14 | * :ref:`quickstart`
15 | * :ref:`genindex`
16 | * :ref:`modindex`
17 | * :ref:`search`
18 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = [
 3 |     "setuptools>=42",
 4 |     "wheel",
 5 |     "setuptools_scm>=6.2",
 6 |     "requests>=2.0.0,<3.0.0",
 7 |     "beautifulsoup4",
 8 | ]
 9 | build-backend = "setuptools.build_meta"
10 | 
11 | [tool.isort]
12 | profile = "black"
13 | 
14 | [tool.black]
15 | line-length = 120
16 | target-version = ['py38']
17 | include = '\.pyi?$'
18 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
 5 | 
 6 | version: 2
 7 | updates:
 8 | 
 9 |   - package-ecosystem: "github-actions"
10 |     directory: "/"
11 |     schedule:
12 |       # Check for updates to GitHub Actions every week
13 |       interval: "weekly"
14 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.11"
13 | 
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 |   configuration: docs/source/conf.py
17 | 
18 | # We recommend specifying your dependencies to enable reproducible builds:
19 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
20 | python:
21 |   install:
22 |   - requirements: docs/requirements.txt


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | codecov:
 2 |   require_ci_to_pass: yes
 3 | 
 4 | coverage:
 5 |   precision: 2
 6 |   round: down
 7 |   range: "70...100"
 8 | 
 9 |   status:
10 |     project:
11 |         default:
12 |             # basic settings
13 |             target: "85%"
14 |             base: auto
15 |             threshold: 15
16 |     patch:
17 |         default:
18 |             target: "50%"
19 |     changes: no
20 | 
21 | parsers:
22 |   gcov:
23 |     branch_detection:
24 |       conditional: yes
25 |       loop: yes
26 |       method: no
27 |       macro: no
28 | 
29 | comment:
30 |   layout: "reach,diff,flags,tree"
31 |   behavior: default
32 |   require_changes: no
33 | 
34 | ignore:
35 |   - "./tests/"
36 |   - "setup.py"
37 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload Python Package
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [created]
 9 | 
10 | jobs:
11 |   deploy:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v6
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v6
19 |       with:
20 |         python-version: '3.x'
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install --upgrade twine build
25 |     - name: Build and publish
26 |       env:
27 |         TWINE_USERNAME: __token__
28 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 |       run: |
30 |         python -m build
31 |         twine check dist/*
32 |         twine upload dist/*
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Tyler Barrus
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/mediawiki/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mediawiki module initialization
 3 | """
 4 | 
 5 | from mediawiki.configuraton import URL, VERSION
 6 | from mediawiki.exceptions import (
 7 |     DisambiguationError,
 8 |     HTTPTimeoutError,
 9 |     MediaWikiAPIURLError,
10 |     MediaWikiCategoryTreeError,
11 |     MediaWikiException,
12 |     MediaWikiForbidden,
13 |     MediaWikiGeoCoordError,
14 |     MediaWikiLoginError,
15 |     PageError,
16 |     RedirectError,
17 | )
18 | from mediawiki.mediawiki import MediaWiki
19 | from mediawiki.mediawikipage import MediaWikiPage
20 | 
21 | __author__ = "Tyler Barrus"
22 | __maintainer__ = "Tyler Barrus"
23 | __email__ = "barrust@gmail.com"
24 | __license__ = "MIT"
25 | __version__ = VERSION
26 | __credits__ = ["Jonathan Goldsmith"]
27 | __url__ = URL
28 | __bugtrack_url__ = f"{__url__}/issues"
29 | __download_url__ = f"{__url__}/tarball/v{__version__}"
30 | 
31 | __all__ = [
32 |     "MediaWiki",
33 |     "MediaWikiPage",
34 |     "PageError",
35 |     "RedirectError",
36 |     "MediaWikiException",
37 |     "DisambiguationError",
38 |     "MediaWikiAPIURLError",
39 |     "HTTPTimeoutError",
40 |     "MediaWikiGeoCoordError",
41 |     "MediaWikiCategoryTreeError",
42 |     "MediaWikiLoginError",
43 |     "MediaWikiForbidden",
44 | ]
45 | 


--------------------------------------------------------------------------------
/tests/utilities.py:
--------------------------------------------------------------------------------
 1 | """ random functions that will be needed for the tests """
 2 | 
 3 | 
 4 | class FunctionUseCounter(object):
 5 |     """decorator to keep a running count of how many
 6 |     times function has been called; stop at 50"""
 7 | 
 8 |     def __init__(self, func):
 9 |         """init decorator"""
10 |         self.func = func
11 |         self.count = 0
12 | 
13 |     def __call__(self, *args, **kwargs):
14 |         """what to do when called"""
15 |         self.count += 1
16 |         if self.count > 50:  # arbitrary large
17 |             return dict()
18 |         return self.func(*args, **kwargs)
19 | 
20 | 
21 | def find_depth(node):
22 |     """find depth of tree"""
23 | 
24 |     def walk(next_node, depth):
25 |         """walk down tree finding depth"""
26 |         if next_node is None:
27 |             return depth
28 |         if "sub-categories" not in next_node:
29 |             return depth
30 |         if next_node["sub-categories"] is None:
31 |             return depth
32 | 
33 |         if len(next_node["sub-categories"].keys()) == 0:
34 |             return next_node["depth"]
35 | 
36 |         for key in next_node["sub-categories"].keys():
37 |             path_depth = walk(next_node["sub-categories"][key], depth)
38 |             if path_depth and path_depth > depth:
39 |                 depth = path_depth
40 |         return depth
41 | 
42 |     return walk(node, 0)
43 | 


--------------------------------------------------------------------------------
/docs/source/code.rst:
--------------------------------------------------------------------------------
 1 | .. _api:
 2 | 
 3 | MediaWiki Documentation
 4 | ***********************
 5 | 
 6 | Here you can find the full developer API for the mediawiki project.
 7 | 
 8 | 
 9 | Functions and Classes
10 | ===============================
11 | 
12 | MediaWiki
13 | +++++++++++++++++++++++++++++++
14 | 
15 | .. autoclass:: mediawiki.MediaWiki
16 |     :members: version, api_version, extensions, rate_limit,
17 |               rate_limit_min_wait, timeout, language, user_agent, api_url,
18 |               memoized, clear_memoized, refresh_interval, set_api_url,
19 |               supported_languages, random, categorytree, page, wiki_request
20 | 
21 |     .. automethod:: mediawiki.MediaWiki.login(username, password)
22 |     .. automethod:: mediawiki.MediaWiki.suggest(query)
23 |     .. automethod:: mediawiki.MediaWiki.search(query, results=10, suggestion=False)
24 |     .. automethod:: mediawiki.MediaWiki.allpages(query='', results=10)
25 |     .. automethod:: mediawiki.MediaWiki.summary(title, sentences=0, chars=0, auto_suggest=True, redirect=True)
26 |     .. automethod:: mediawiki.MediaWiki.geosearch(latitude=None, longitude=None, radius=1000, title=None, auto_suggest=True, results=10)
27 |     .. automethod:: mediawiki.MediaWiki.prefixsearch(prefix, results=10)
28 |     .. automethod:: mediawiki.MediaWiki.opensearch(query, results=10, redirect=True)
29 |     .. automethod:: mediawiki.MediaWiki.categorymembers(category, results=10, subcategories=True)
30 | 
31 | 
32 | MediaWikiPage
33 | +++++++++++++++++++++++++++++++
34 | 
35 | .. autoclass:: mediawiki.MediaWikiPage
36 |     :members:
37 | 
38 | Exceptions
39 | ===============================
40 | 
41 | .. automodule:: mediawiki.exceptions
42 |     :members:
43 | 
44 | Indices and tables
45 | ==================
46 | 
47 | * :ref:`home`
48 | * :ref:`quickstart`
49 | * :ref:`genindex`
50 | * :ref:`modindex`
51 | * :ref:`search`
52 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = pymediawiki
 3 | version = attr: mediawiki.__version__
 4 | author = Tyler Barrus
 5 | author_email = barrust@gmail.com
 6 | url = https://github.com/barrust/mediawiki
 7 | description = Wikipedia and MediaWiki API wrapper for Python
 8 | long_description = file: README.rst
 9 | long_description_content_type = text/x-rst
10 | keywords = python, mediawiki, wikipedia, API, wiki, parser, natural language processing, nlp
11 | license = MIT
12 | license_files = LICENSE
13 | bugtrack_url = https://github.com/barrust/mediawiki/issues
14 | classifiers =
15 |     Development Status :: 5 - Production/Stable
16 |     Intended Audience :: Developers
17 |     Intended Audience :: Information Technology
18 |     Intended Audience :: Science/Research
19 |     Topic :: Software Development :: Libraries
20 |     Topic :: Utilities
21 |     Topic :: Internet
22 |     Topic :: Internet :: WWW/HTTP :: Dynamic Content :: Wiki
23 |     License :: OSI Approved
24 |     License :: OSI Approved :: MIT License
25 |     Operating System :: OS Independent
26 |     Programming Language :: Python
27 |     Programming Language :: Python :: 3
28 |     Programming Language :: Python :: 3.7
29 |     Programming Language :: Python :: 3.8
30 |     Programming Language :: Python :: 3.9
31 |     Programming Language :: Python :: 3.10
32 |     Programming Language :: Python :: 3.11
33 |     Programming Language :: Python :: 3.12
34 | 
35 | [options]
36 | zip_safe = False
37 | include_package_data = True
38 | packages = find:
39 | install_requires =
40 |     beautifulsoup4
41 |     requests>=2.0.0,<3.0.0
42 | python_requires = >=3.6
43 | 
44 | [options.packages.find]
45 | exclude = tests
46 | 
47 | [tool.setuptools_scm]
48 | 
49 | [bdist_wheel]
50 | universal=0
51 | 
52 | [pep8]
53 | max-line-length=120
54 | 
55 | [pycodestyle]
56 | max-line-length = 120
57 | ignore = E203,W503
58 | 
59 | [flake8]
60 | max-line-length = 120
61 | ignore = E203,W503


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python package
 5 | 
 6 | on: [push, pull_request]
 7 | 
 8 | jobs:
 9 |   build:
10 | 
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       matrix:
14 |         python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v6
18 |     - name: Set up Python ${{ matrix.python-version }}
19 |       uses: actions/setup-python@v6
20 |       with:
21 |         python-version: ${{ matrix.python-version }}
22 |     - name: Install dependencies
23 |       run: |
24 |         python -m pip install --upgrade pip
25 |         python -m pip install flake8 pytest pytest-cov
26 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
27 |         python -m pip install -e .
28 |     - name: Lint with flake8
29 |       run: |
30 |         # stop the build if there are Python syntax errors or undefined names
31 |         flake8 mediawiki/ --count --select=E9,F63,F7,F82 --show-source --statistics
32 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
33 |         flake8 mediawiki/ --count --exit-zero --max-complexity=11 --max-line-length=127 --statistics
34 |     - name: Test with pytest
35 |       run: |
36 |         # Run tests while also generating coverage statistics
37 |         pytest --cov . --cov-report xml:/home/runner/coverage.xml
38 |     - name: Upload coverage to Codecov
39 |       uses: codecov/codecov-action@v5
40 |       with:
41 |         files: /home/runner/coverage.xml
42 |         fail_ci_if_error: true
43 |         verbose: true
44 |         token: ${{ secrets.CODECOV_TOKEN }} # required
45 | 
46 |   build-verification:
47 | 
48 |     runs-on: ubuntu-latest
49 |     steps:
50 |     - uses: actions/checkout@v6
51 |     - uses: actions/setup-python@v6
52 |       with:
53 |         python-version: '3.x'
54 |     - name: Build and check twine
55 |       run: |
56 |         python -m pip install --upgrade pip
57 |         python -m pip install build twine
58 |         python -m build
59 |         twine check dist/*
60 | 
61 |   Lint-black:
62 |     runs-on: ubuntu-latest
63 |     steps:
64 |     - uses: actions/checkout@v6
65 |     - uses: psf/black@stable
66 |       with:
67 |         # src: "./mediawiki/*"
68 |         version: "22.8.0"
69 | 


--------------------------------------------------------------------------------
/mediawiki/utilities.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility functions
 3 | """
 4 | import functools
 5 | import inspect
 6 | import sys
 7 | import time
 8 | from typing import Any, Callable, Dict, Optional
 9 | 
10 | 
11 | def parse_all_arguments(func: Callable) -> Dict[str, Any]:
12 |     """determine all positional and named arguments as a dict"""
13 |     args = {}
14 | 
15 |     func_args = inspect.signature(func)
16 |     for itm in list(func_args.parameters)[1:]:
17 |         param = func_args.parameters[itm]
18 |         if param.default is not param.empty:
19 |             args[param.name] = param.default
20 |     return args
21 | 
22 | 
23 | def memoize(func: Callable) -> Callable:
24 |     """quick memoize decorator for class instance methods
25 |     NOTE: this assumes that the class that the functions to be
26 |     memoized already has a memoized and refresh_interval
27 |     property"""
28 | 
29 |     @functools.wraps(func)
30 |     def wrapper(*args, **kwargs):
31 |         """wrap it up and store info in a cache"""
32 |         cache = args[0].memoized
33 |         refresh = args[0]._config.refresh_interval
34 |         use_cache = args[0]._config.use_cache
35 | 
36 |         # short circuit if not using cache
37 |         if use_cache is False:
38 |             return func(*args, **kwargs)
39 | 
40 |         if func.__name__ not in cache:
41 |             cache[func.__name__] = {}
42 |             if "defaults" not in cache:
43 |                 cache["defaults"] = {}
44 |             cache["defaults"][func.__name__] = parse_all_arguments(func)
45 |         # build a key; should also consist of the default values
46 |         defaults = cache["defaults"][func.__name__].copy()
47 |         for key, val in kwargs.items():
48 |             defaults[key] = val
49 |         tmp = []
50 |         tmp.extend(args[1:])
51 |         for k in sorted(defaults.keys()):
52 |             tmp.append(f"({k}: {defaults[k]})")
53 | 
54 |         tmp = [str(x) for x in tmp]
55 |         key = " - ".join(tmp)
56 | 
57 |         # set the value in the cache if missing or needs to be refreshed
58 |         if key not in cache[func.__name__]:
59 |             cache[func.__name__][key] = (time.time(), func(*args, **kwargs))
60 |         else:
61 |             tmp = cache[func.__name__][key]
62 |             # determine if we need to refresh the data...
63 |             if refresh is not None and time.time() - tmp[0] > refresh:
64 |                 cache[func.__name__][key] = (time.time(), func(*args, **kwargs))
65 |         return cache[func.__name__][key][1]
66 | 
67 |     return wrapper
68 | 
69 | 
70 | def str_or_unicode(text: str) -> str:
71 |     """handle python 3 unicode"""
72 |     encoding = sys.stdout.encoding
73 |     return text.encode(encoding).decode(encoding)
74 | 
75 | 
76 | def is_relative_url(url: str) -> Optional[bool]:
77 |     """simple method to determine if a url is relative or absolute"""
78 |     return url.find("://") <= 0 and not url.startswith("//") if not url.startswith("#") else None
79 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ###############################################
  2 | # Project Specific
  3 | ###############################################
  4 | 
  5 | 
  6 | ###############################################
  7 | # Python
  8 | ###############################################
  9 | 
 10 | # Byte-compiled / optimized / DLL files
 11 | __pycache__/
 12 | *.py[cod]
 13 | *$py.class
 14 | 
 15 | # C extensions
 16 | *.so
 17 | 
 18 | # Distribution / packaging
 19 | .Python
 20 | env/
 21 | build/
 22 | develop-eggs/
 23 | dist/
 24 | downloads/
 25 | eggs/
 26 | .eggs/
 27 | lib/
 28 | lib64/
 29 | parts/
 30 | sdist/
 31 | var/
 32 | wheels/
 33 | *.egg-info/
 34 | .installed.cfg
 35 | *.egg
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .coverage
 51 | .coverage.*
 52 | .cache
 53 | nosetests.xml
 54 | coverage.xml
 55 | *.cover
 56 | .hypothesis/
 57 | .pytest_cache/
 58 | 
 59 | # Translations
 60 | *.mo
 61 | *.pot
 62 | 
 63 | # Django stuff:
 64 | *.log
 65 | local_settings.py
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # celery beat schedule file
 87 | celerybeat-schedule
 88 | 
 89 | # SageMath parsed files
 90 | *.sage.py
 91 | 
 92 | # dotenv
 93 | .env
 94 | 
 95 | # virtualenv
 96 | .venv
 97 | venv/
 98 | ENV/
 99 | Pipfile*
100 | 
101 | # Spyder project settings
102 | .spyderproject
103 | .spyproject
104 | 
105 | # Rope project settings
106 | .ropeproject
107 | 
108 | # mkdocs documentation
109 | /site
110 | 
111 | # mypy
112 | .mypy_cache/
113 | 
114 | 
115 | ###############################################
116 | # Operating Systems
117 | ###############################################
118 | # Windows thumbnail cache files
119 | Thumbs.db
120 | Thumbs.db:encryptable
121 | ehthumbs.db
122 | ehthumbs_vista.db
123 | 
124 | # Dump file
125 | *.stackdump
126 | 
127 | # Folder config file
128 | [Dd]esktop.ini
129 | 
130 | # Recycle Bin used on file shares
131 | $RECYCLE.BIN/
132 | 
133 | # Windows Installer files
134 | *.cab
135 | *.msi
136 | *.msix
137 | *.msm
138 | *.msp
139 | 
140 | # Windows shortcuts
141 | *.lnk
142 | 
143 | #
144 | # MacOS
145 | #
146 | 
147 | # General
148 | .DS_Store
149 | .AppleDouble
150 | .LSOverride
151 | 
152 | # Icon must end with two \r
153 | Icon
154 | 
155 | 
156 | # Thumbnails
157 | ._*
158 | 
159 | # Files that might appear in the root of a volume
160 | .DocumentRevisions-V100
161 | .fseventsd
162 | .Spotlight-V100
163 | .TemporaryItems
164 | .Trashes
165 | .VolumeIcon.icns
166 | .com.apple.timemachine.donotpresent
167 | 
168 | # Directories potentially created on remote AFP share
169 | .AppleDB
170 | .AppleDesktop
171 | Network Trash Folder
172 | Temporary Items
173 | .apdisk
174 | 
175 | #
176 | #   Linux
177 | #
178 | *~
179 | 
180 | # temporary files which can be created if a process still has a handle open of a deleted file
181 | .fuse_hidden*
182 | 
183 | # KDE directory preferences
184 | .directory
185 | 
186 | # Linux trash folder which might appear on any partition or disk
187 | .Trash-*
188 | 
189 | # .nfs files are created when an open file is removed but is still being accessed
190 | .nfs*
191 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | MediaWiki
  2 | =========
  3 | 
  4 | .. image:: https://img.shields.io/badge/license-MIT-blue.svg
  5 |     :target: https://opensource.org/licenses/MIT/
  6 |     :alt: License
  7 | .. image:: https://img.shields.io/github/release/barrust/mediawiki.svg
  8 |     :target: https://github.com/barrust/mediawiki/releases
  9 |     :alt: GitHub release
 10 | .. image:: https://github.com/barrust/mediawiki/workflows/Python%20package/badge.svg
 11 |     :target: https://github.com/barrust/mediawiki/actions?query=workflow%3A%22Python+package%22
 12 |     :alt: Build Status
 13 | .. image:: https://codecov.io/gh/barrust/mediawiki/branch/master/graph/badge.svg?token=OdETiNgz9k
 14 |     :target: https://codecov.io/gh/barrust/mediawiki
 15 |     :alt: Test Coverage
 16 | .. image:: https://app.codacy.com/project/badge/Grade/34162198611f4aa0bde630d1dab72ce8
 17 |     :target: https://www.codacy.com/gh/barrust/mediawiki/dashboard?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=barrust/mediawiki&amp;utm_campaign=Badge_Grade
 18 |     :alt: Codacy Review
 19 | .. image:: https://badge.fury.io/py/pymediawiki.svg
 20 |     :target: https://badge.fury.io/py/pymediawiki
 21 |     :alt: PyPi Release
 22 | .. image:: http://pepy.tech/badge/pymediawiki
 23 |     :target: https://pepy.tech/project/pymediawiki
 24 |     :alt: Downloads
 25 | 
 26 | ***mediawiki*** is a python wrapper and parser for the MediaWiki API. The goal
 27 | is to allow users to quickly and efficiently pull data from the MediaWiki site
 28 | of their choice instead of worrying about dealing directly with the API. As
 29 | such, it does not force the use of a particular MediaWiki site. It defaults to
 30 | `Wikipedia <http://www.wikipedia.org>`__ but other MediaWiki sites can
 31 | also be used.
 32 | 
 33 | MediaWiki wraps the `MediaWiki API <https://www.mediawiki.org/wiki/API>`_
 34 | so you can focus on *leveraging* your favorite MediaWiki site's data,
 35 | not getting it. Please check out the code on
 36 | `github <https://www.github.com/barrust/mediawiki>`_!
 37 | 
 38 | **Note:** this library was designed for ease of use and simplicity. If you plan
 39 | on doing serious scraping, automated requests, or editing, please look into
 40 | `Pywikibot <https://www.mediawiki.org/wiki/Manual:Pywikibot>`__
 41 | which has a larger API, advanced rate limiting, and other features so we may
 42 | be considerate of the MediaWiki infrastructure. Pywikibot has also other extra
 43 | features such as support for Wikibase (that runs Wikidata).
 44 | 
 45 | 
 46 | Installation
 47 | ------------------
 48 | 
 49 | Pip Installation:
 50 | 
 51 | ::
 52 | 
 53 |     $ pip install pymediawiki
 54 | 
 55 | To install from source:
 56 | 
 57 | To install ``mediawiki``, simply clone the `repository on GitHub
 58 | <https://github.com/barrust/mediawiki>`__, then run from the folder:
 59 | 
 60 | ::
 61 | 
 62 |     $ python setup.py install
 63 | 
 64 | ``mediawiki`` supports python versions 3.7 - 3.13
 65 | 
 66 | For *python 2.7* support, install `release 0.6.7 <https://github.com/barrust/mediawiki/releases/tag/v0.6.7>`__
 67 | 
 68 | ::
 69 | 
 70 |     $ pip install pymediawiki==0.6.7
 71 | 
 72 | Documentation
 73 | -------------
 74 | 
 75 | Documentation of the latest release is hosted on
 76 | `readthedocs.io <http://pymediawiki.readthedocs.io/en/latest/?>`__
 77 | 
 78 | To build the documentation yourself run:
 79 | 
 80 | ::
 81 | 
 82 |     $ pip install sphinx
 83 |     $ cd docs/
 84 |     $ make html
 85 | 
 86 | Automated Tests
 87 | ------------------
 88 | 
 89 | To run automated tests, one must simply run the following command from the
 90 | downloaded folder:
 91 | 
 92 | ::
 93 | 
 94 |   $ python setup.py test
 95 | 
 96 | 
 97 | Quickstart
 98 | ------------------
 99 | 
100 | Import mediawiki and run a standard search against Wikipedia:
101 | 
102 | .. code:: python
103 | 
104 |     >>> from mediawiki import MediaWiki
105 |     >>> wikipedia = MediaWiki()
106 |     >>> wikipedia.search('washington')
107 | 
108 | Run more advanced searches:
109 | 
110 | .. code:: python
111 | 
112 |     >>> wikipedia.opensearch('washington')
113 |     >>> wikipedia.allpages('a')
114 |     >>> wikipedia.geosearch(title='washington, d.c.')
115 |     >>> wikipedia.geosearch(latitude='0.0', longitude='0.0')
116 |     >>> wikipedia.prefixsearch('arm')
117 |     >>> wikipedia.random(pages=10)
118 | 
119 | Pull a MediaWiki page and some of the page properties:
120 | 
121 | .. code:: python
122 | 
123 |     >>> p = wikipedia.page('Chess')
124 |     >>> p.title
125 |     >>> p.summary
126 |     >>> p.categories
127 |     >>> p.images
128 |     >>> p.links
129 |     >>> p.langlinks
130 | 
131 | See the `documentation for more examples!
132 | <http://pymediawiki.readthedocs.io/en/latest/quickstart.html#quickstart>`_
133 | 
134 | 
135 | 
136 | Changelog
137 | ------------------
138 | 
139 | Please see the `changelog
140 | <https://github.com/barrust/mediawiki/blob/master/CHANGELOG.md>`__ for a list
141 | of all changes.
142 | 
143 | 
144 | License
145 | -------
146 | 
147 | MIT licensed. See the `LICENSE file
148 | <https://github.com/barrust/Wikipedia/blob/master/LICENSE>`__
149 | for full details.
150 | 


--------------------------------------------------------------------------------
/docs/source/quickstart.rst:
--------------------------------------------------------------------------------
  1 | .. _quickstart:
  2 | 
  3 | MediaWiki Quickstart
  4 | ====================
  5 | 
  6 | Quickly get started using the `mediawiki` python library. This page is designed
  7 | to help users understand the basics of using the `mediawiki` library.
  8 | 
  9 | To understand all possible parameters for each function and properties,
 10 | please see :ref:`api`.
 11 | 
 12 | 
 13 | Install
 14 | ^^^^^^^
 15 | 
 16 | Using pip
 17 | """""""""
 18 | 
 19 | ::
 20 | 
 21 |     $ pip install pymediawiki
 22 | 
 23 | From source
 24 | """""""""""
 25 | 
 26 | Begin by installing pymediawiki: simply clone the
 27 | `repository on GitHub <https://github.com/barrust/mediawiki>`__,
 28 | then run the following command from the extracted folder:
 29 | 
 30 | ::
 31 | 
 32 |     $ python setup.py install
 33 | 
 34 | Setup
 35 | ^^^^^
 36 | 
 37 | Setting up the library is as easy as:
 38 | 
 39 | .. code: python
 40 | 
 41 | >>> from mediawiki import MediaWiki
 42 | >>> wikipedia = MediaWiki()
 43 | 
 44 | 
 45 | Change API URL
 46 | ^^^^^^^^^^^^^^
 47 | 
 48 | To change the API URL, one can either set the url parameter
 49 | 
 50 | .. code: python
 51 | 
 52 | >>> from mediawiki import MediaWiki
 53 | >>> asoiaf = MediaWiki(url='http://awoiaf.westeros.org/api.php')
 54 | 
 55 | Or one can update an already setup MediaWiki object:
 56 | 
 57 | .. code: python
 58 | 
 59 | >>> wikipedia.set_api_url('http://awoiaf.westeros.org/api.php')
 60 | 
 61 | Set the User-Agent String
 62 | ^^^^^^^^^^^^^^^^^^^^^^^^^
 63 | Per the `MediaWiki API Etiquette <https://www.mediawiki.org/wiki/API:Etiquette>`__
 64 | it is recommended to not use a library's default user-agent string. Therefore,
 65 | it is easy to change the `user-agent` string either during initialization or by
 66 | setting the user_agent property:
 67 | 
 68 | .. code: python
 69 | 
 70 | >>> from mediawiki import MediaWiki
 71 | >>> wikipedia = MediaWiki(user_agent='pyMediaWiki-User-Agent-String')
 72 | >>>
 73 | >>> # Or reset it!
 74 | >>> wikipedia.user_agent = 'my-new-user-agent-string'
 75 | 
 76 | Searching
 77 | ^^^^^^^^^
 78 | 
 79 | To search the MediaWiki site, it is as easy as calling one of the search
 80 | functions: `random`, `search`, `geosearch`, `opensearch`, or `prefixsearch`
 81 | 
 82 | random
 83 | """"""
 84 | 
 85 | Get a random page:
 86 | 
 87 | .. code: python
 88 | 
 89 | >>> wikipedia.random(pages=3)
 90 | # ['Sutton House, London', 'Iolaus violacea', 'Epigenetics & Chromatin']
 91 | 
 92 | 
 93 | search
 94 | """"""
 95 | 
 96 | Search for the provided title:
 97 | 
 98 | .. code: python
 99 | 
100 | >>> wikipedia.search('washington', results=3)
101 | # ['Washington', 'Washington, D.C.', 'List of Governors of Washington']
102 | 
103 | allpages
104 | """"""""
105 | 
106 | Search for the provided title:
107 | 
108 | .. code: python
109 | 
110 | >>> wikipedia.allpages('a', results=3)
111 | # ['A', 'A!', 'A! (Alexa Feser album)']
112 | 
113 | geosearch
114 | """""""""
115 | 
116 | Search based on geocoords (latitude/longitude):
117 | 
118 | .. code: python
119 | 
120 | >>> wikipedia.geosearch(latitude=0.0, longitude=0.0)
121 | # ['Null Island', 'Mirdif 35']
122 | 
123 | opensearch
124 | """"""""""
125 | 
126 | Search using the OpenSearch specification:
127 | 
128 | .. code: python
129 | 
130 | >>> wikipedia.opensearch('new york', results=1)
131 | # [('New York', 'New York is a state in the Northeastern United States
132 | and is the 27th-most extensive, fourth-most populous, and seventh-most
133 | densely populated U.S.', 'https://en.wikipedia.org/wiki/New_York')]
134 | 
135 | prefixsearch
136 | """"""""""""
137 | 
138 | Search for pages whose title has the defined prefix:
139 | 
140 | .. code: python
141 | 
142 | >>> wikipedia.prefixsearch('ba', results=5)
143 | # ['Ba', 'Barack Obama', 'Baseball', "Bahá'í Faith", 'Basketball']
144 | 
145 | 
146 | Page
147 | ^^^^
148 | 
149 | Load and access information from full MediaWiki pages. Load the page using
150 | a title or page id and then access individual properties:
151 | 
152 | Initialize Page
153 | """""""""""""""
154 | 
155 | Initializing a page is easily accomplished in one line of code
156 | 
157 | .. code: python
158 | 
159 | >>> p = wikipedia.page('grid compass')
160 | 
161 | title
162 | """""""""""
163 | 
164 | The page title
165 | 
166 | .. code: python
167 | 
168 | >>> p.title
169 | # 'Grid Compass'
170 | 
171 | 
172 | pageid
173 | """""""""""
174 | 
175 | The page id of the page
176 | 
177 | .. code: python
178 | 
179 | >>> p.pageid
180 | # 3498511
181 | 
182 | 
183 | revision_id
184 | """""""""""
185 | 
186 | The revision id of the page
187 | 
188 | .. code: python
189 | 
190 | >>> p.revision_id
191 | # 740685101
192 | 
193 | parent_id
194 | """""""""""
195 | 
196 | The parent id  of the page
197 | 
198 | .. code: python
199 | 
200 | >>> p.parent_id
201 | # 740682666
202 | 
203 | links
204 | """""
205 | 
206 | Links to other MediaWiki pages
207 | 
208 | .. code: python
209 | 
210 | >>> p.links
211 | # ['Astronaut', 'Bill Moggridge', 'CP/M', 'Central processing unit',
212 | 'Dynabook', 'Electroluminescent display', 'FTP', 'Flip (form)',
213 | 'GRiD Systems Corporation', 'GRiD-OS', 'Gavilan SC', 'Grid compass',
214 | 'Hard drive', 'IEEE-488', 'Industrial design', 'Intel 8086',
215 | 'John Oliver Creighton', 'Kilobyte', 'Laptop computer',
216 | 'Magnetic bubble memory', 'Modem', 'NASA', 'Operating system',
217 | 'Osborne 1', 'Paratrooper', 'Patent rights', 'Perfect (film)',
218 | 'Portable computer', 'RadioShack', 'Riptide (American TV series)',
219 | 'STS-51-G', 'Sharp PC-5000', 'Space Shuttle Discovery',
220 | 'Tandy Corporation', 'U.S. government', 'United Kingdom',
221 | 'United States Army Special Forces', 'Xerox PARC']
222 | 
223 | Other Properties
224 | """"""""""""""""
225 | 
226 | Other properties for a page include:
227 | 
228 | - `content`
229 | - `html`
230 | - `images`
231 | - `references`
232 | - `categories`
233 | - `coordinates`
234 | - `redirects`
235 | - `backlinks`
236 | - `langlinks`
237 | - `summary`
238 | - `sections`
239 | - `logos`
240 | - `hatnotes`
241 | 
242 | Summarize
243 | """"""""""""""""
244 | 
245 | Summarize a page using additional parameters:
246 | 
247 | .. code: python
248 | 
249 | >>> p.summarize(chars=50)
250 | # The Grid Compass (written GRiD by its manufacturer...
251 | 
252 | 
253 | 
254 | Indices and tables
255 | ==================
256 | 
257 | * :ref:`home`
258 | * :ref:`api`
259 | * :ref:`genindex`
260 | * :ref:`modindex`
261 | * :ref:`search`
262 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Welcome
  3 | 
  4 | Welcome to the pymediawiki: a python MediaWiki API wrapper project. I hope that
  5 | you have found the project to be useful. If you are here, you must want to help
  6 | out in some way! I am very grateful for any help and support.
  7 | 
  8 | ### Table Of Contents
  9 | * [Contributing](#contributing)
 10 | * [Issues and Bug Reports](#issues-and-bug-reports)
 11 | * [Enhancement Requests](#enhancements)
 12 | * [Submitting Pull Requests](#pull-requests)
 13 | * [Testing](#testing)
 14 | * [Coding Style](#coding-style)
 15 | * [Code Contributors](#code-contributors)
 16 | 
 17 | ### Contributing
 18 | 
 19 | Contributing to open-source software comes in many forms: adding additional
 20 | functionality, reporting and/or fixing bugs and defects, and helping maintain
 21 | documentation. Any and all forms are welcome!
 22 | 
 23 | Below you will find ways to help the project along with notes on how to report
 24 | bugs and issues, request enhancements, and issue pull requests.
 25 | 
 26 | #### Issues and Bug Reports
 27 | 
 28 | If you have found an issue with `pymediawiki`, please do not hesitate to let us
 29 | know! Before submitting an issue or bug report, we ask that you complete a few
 30 | cursory items:
 31 | 
 32 | * **Review** current bugs to see if your issue has already been reported. If it
 33 | has been previously reported, please comment on the original report with any
 34 | additional details. This will help the maintainers triage the issue more
 35 | quickly.
 36 | 
 37 | * **Ensure** that the issue is **not** related to the MediaWiki site you are
 38 | trying to which you are trying to connect. There are times where the MediaWiki
 39 | site may refuse connections or throw an error. There are times when trying
 40 | again is all that is needed! If the error is the MediaWiki site, please do not
 41 | report an issue as there is nothing we can do to help. If, however it is
 42 | something within the library, please do not hesitate to report the issue!
 43 | 
 44 | * **Determine** that the issue is reproducible - a code sample of the issue
 45 | will help narrow down the search for the cause of the issue and may lead to a
 46 | quicker fix!
 47 | 
 48 | A **great bug report** will consist of the following:
 49 | 
 50 | * A descriptive title
 51 | 
 52 | * A brief description of the issue
 53 | 
 54 | * Description of the expected results
 55 | 
 56 | * A code example to reproduce the error. Please use
 57 | [Markdown code blocks](https://help.github.com/articles/creating-and-highlighting-code-blocks/)
 58 | with syntax highlighting
 59 | 
 60 | * The link to the API URL if not the default:
 61 | [Wikipedia API](http://en.wikipedia.org/w/api.php)
 62 | 
 63 | * The affected version(s) of `pymediawiki`
 64 | 
 65 | #### Enhancements
 66 | 
 67 | Enhancements are additional functionality not currently supported by the
 68 | `pymediawiki` library. Unfortunately, not all enhancements make sense for the
 69 | goal of the project. If you have a desired feature, there are a few things you
 70 | can do to possibly help get the feature into the `pymediawiki` library:
 71 | 
 72 | * **Review** to see if the feature has been requested in the past.
 73 | 
 74 |     * If it is requested and still open, add your comment as to why you would
 75 |     like it.
 76 | 
 77 |     * If it was previously requested but closed, you may be interested in why
 78 |     it was closed and not implemented. I will try to explain my reasoning for
 79 |     not supporting actions as much as possible.
 80 | 
 81 | * Add an issue to the
 82 | [issue tracker](https://github.com/barrust/mediawiki/issues) and mark it as an
 83 | enhancement. A ***great enhancement*** request will have the following
 84 | information:
 85 | 
 86 |     * A descriptive title
 87 | 
 88 |     * A description of the desired functionality: use cases, added benefit to
 89 |     the library, etc.
 90 | 
 91 |     * A code example, if necessary, to explain how the code would be used
 92 | 
 93 |     * A description of the desired results
 94 | 
 95 | #### Pull Requests
 96 | 
 97 | Pull requests are how you will be able to add new features, fix bugs, or update
 98 | documentation in the pymediawiki library. To create a pull request, you will
 99 | first need to fork the repository, make all necessary changes and then create
100 | a pull request. There are a few guidelines for creating pull requests:
101 | 
102 | * All pull requests must be based off of the latest development branch and not
103 | master (unless there is not a development branch!)
104 | 
105 | * If the PR only changes documentation, please add `[ci skip]` to the commit
106 | message. To learn more, you can [read about skipping integration testing](https://docs.travis-ci.com/user/customizing-the-build#Skipping-a-build)
107 | 
108 | * Reference ***any and all*** [issues](https://github.com/barrust/mediawiki/issues)
109 | related to the pull request
110 | 
111 | #### Testing
112 | 
113 | Each pull request should add or modify the appropriate tests. pymediawiki uses
114 | the unittest module to support tests and most are currently found in the
115 | `./tests/mediawiki_test.py` file.
116 | 
117 | The `./scripts/generate_test_data.py` file is used to help capture request and
118 | response data in different json files for running tests without internet
119 | access.
120 | 
121 | * ###### New Feature:
122 |     * Add tests for each variation of the new feature
123 | 
124 | * ###### Bug Fix
125 |     * Add at least one regression test of an instance that is working to help
126 |     ensure that the bug fix does not cause a new bug
127 | 
128 |     * Add at least one test to show the corrected outcome from the updated code
129 |     to help ensure that the code works as intended
130 | 
131 | #### Coding Style
132 | 
133 | The MediaWiki API wrapper project follows the
134 | [PEP8](https://www.python.org/dev/peps/pep-0008/) coding style for consistency
135 | and readability. Code that does not comply with PEP8 will not be accepted into
136 | the project as-is. All code should adhere to the PEP8 coding style standard
137 | where possible.
138 | 
139 | The MediaWiki API wrapper project also uses [pylint](https://www.pylint.org/)
140 | to help identify potential errors, code duplication, and non-pythonic syntax.
141 | Adhering to pylint's results is not strictly required.
142 | 
143 | To install the [PEP8 compliance checker](https://pypi.org/project/pycodestyle/),
144 | you can simply run the following:
145 | 
146 | ```
147 | pip install pycodestyle
148 | ```
149 | 
150 | To test for PEP8 compliance, run the following from the root directory:
151 | 
152 | ```
153 | pep8 mediawiki
154 | ```
155 | 
156 | ### Code Contributors:
157 | 
158 | A special thanks to all the code contributors to `pymediawiki`!
159 | 
160 | * [@barrust](https://github.com/barrust) (Maintainer)
161 | * [@dan-blanchard](https://github.com/dan-blanchard) - Default URL conforms to passed in language [#26](https://github.com/barrust/mediawiki/pull/26)
162 | * [@nagash91](https://github.com/nagash91) - Pull section titles without additional markup [#42](https://github.com/barrust/mediawiki/issues/42)
163 | * [@flamableconcrete](https://github.com/flamableconcrete) - Added `allpages` functionality [#75](https://github.com/barrust/mediawiki/pull/75)
164 | * [@shnela](https://github.com/shnela) - Added `langlinks` property [#65](https://github.com/barrust/mediawiki/issues/65)
165 | * [@rubabredwan](https://github.com/rubabredwan) - Fix for `suggest` [#85](https://github.com/barrust/mediawiki/pull/85)
166 | * [@ldorigo](https://github.com/ldorigo) - Pulling links for header sections [#90](https://github.com/barrust/mediawiki/pull/90)
167 | * [@tbm](https://github.com/tbm) - `categorymember` improvements [PR #100](https://github.com/barrust/mediawiki/pull/100)
168 | * [@dnanto](https://github.com/dnanto) - Determining `available_languages` [PR #116](https://github.com/barrust/mediawiki/pull/116)
169 | * [gbenson](https://github.com/gbenson) -  HTTPAutenticator [PR #141](https://github.com/barrust/mediawiki/pull/141)
170 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # MediaWiki Changelog
  2 | 
  3 | ## Version 0.7.5
  4 | 
  5 | * Move configuration items to a configuration data class
  6 |   * Will allow for the deprication of some top level properties in lieu of changing against the `Configuration` class
  7 | * Added HTTPAuthenticator for web server authentication; [PR #141](https://github.com/barrust/mediawiki/pull/141) Thanks [gbenson](https://github.com/gbenson)
  8 | 
  9 | ## Version 0.7.4
 10 | 
 11 | * Add typing support
 12 | 
 13 | ## Version 0.7.3
 14 | 
 15 | * Add `unordered_options` to the `DisambiguationError` to attempt to get options in the order presented on the page; [issue #124](https://github.com/barrust/mediawiki/issues/124)
 16 | * Add [verify SSL support](https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification) by passing info directly to the requests library.
 17 | 
 18 | ## Version 0.7.2
 19 | 
 20 | * Add `page_preview` property to simulate the page preview hover [PR #114](https://github.com/barrust/mediawiki/pull/114)
 21 | * Add `available_languages` property [PR #116](https://github.com/barrust/mediawiki/pull/116)
 22 | 
 23 | ## Version 0.7.1
 24 | 
 25 | * Add `page.wikitext` support for pulling the page contents as [wikitext](https://en.wikipedia.org/wiki/Help:Wikitext)
 26 | * Add [proxy support](https://requests.readthedocs.io/en/master/user/advanced/#proxies) by passing info directly to the requests library.
 27 | 
 28 | ## Version 0.7.0
 29 | 
 30 | * Remove support for ***python 2.7***!
 31 | * Add files to `categorymembers()` pull [PR #100](https://github.com/barrust/mediawiki/pull/100) Thanks [tbm](https://github.com/tbm)
 32 | * Better support calls to limit results to the maximum 500 results per query (per API documentation) [PR #99](https://github.com/barrust/mediawiki/pull/99) Thanks [tbm](https://github.com/tbm)
 33 | 
 34 | ## Version 0.6.7
 35 | 
 36 | * ***NOTE:*** Last planned support for **Python 2.7**
 37 | * Cache results of `BeautifulSoup` parsing of `page.html` [PR #90](https://github.com/barrust/mediawiki/pull/90) Thanks [ldorigo](https://github.com/ldorigo)
 38 | * Add ability to pull links from the header section; Thanks to [ldorigo](https://github.com/ldorigo) for example code!
 39 | * Add ability to pull the header section text
 40 | * Move to GitHub Actions and CodeCov for testing
 41 | 
 42 | ## Version 0.6.6
 43 | 
 44 | * Fix a bug using `find_all()` on newer versions of BeautifulSoup4
 45 | 
 46 | ## Version 0.6.5
 47 | 
 48 | * Fix for `suggest` [PR #85](https://github.com/barrust/mediawiki/pull/85) Thanks [rubabredwan](https://github.com/rubabredwan)
 49 | * `__slots__` usage
 50 | 
 51 | ## Version 0.6.4
 52 | 
 53 | * Add ability to login during initialization [issue #79](https://github.com/barrust/mediawiki/issues/79)
 54 | 
 55 | ## Version 0.6.3
 56 | 
 57 | * Capture timeout exception
 58 | * bs4 does not support `hasattr` but uses `*.has_attr()`
 59 | 
 60 | ## Version 0.6.2
 61 | 
 62 | * Add `allpages` functionality [PR #75](https://github.com/barrust/mediawiki/pull/75)
 63 | * Add `langlinks` page property [PR #76](https://github.com/barrust/mediawiki/pull/76)
 64 | 
 65 | ## Version 0.6.1
 66 | 
 67 | * Fix DisambiguationError title property [issue #72](https://github.com/barrust/mediawiki/issues/72)
 68 | * Change to using [black](https://github.com/ambv/black) formatting
 69 | 
 70 | ## Version 0.6.0
 71 | 
 72 | * Fix for the table of contents for all subsections [issue #64](https://github.com/barrust/mediawiki/issues/64)
 73 | * Combined properties into a single set of pulling to reduce the load on the MediaWiki infrastructure [issue #55](https://github.com/barrust/mediawiki/issues/55)
 74 | 
 75 | ## Version 0.5.1
 76 | 
 77 | * Added Table of Contents parsing based on sections: result is an OrderedDict
 78 | * Fix issue where some sections are not pulled correctly
 79 | 
 80 | ## Version 0.5.0
 81 | 
 82 | * Add support for logging into the MediaWiki site [issue #59](https://github.com/barrust/mediawiki/issues/59)
 83 | 
 84 | ## Version 0.4.1
 85 | 
 86 | * Default to `https`
 87 | * Add `category_prefix` property to properly support categories in non-English
 88 | MediaWiki sites [issue #48](https://github.com/barrust/mediawiki/issues/48)
 89 | * Add `user_agent` as an initialization parameter and added information to the
 90 | documentation about why one should set the user-agent string [issue #50](https://github.com/barrust/mediawiki/issues/50)
 91 | 
 92 | ### Version 0.4.0
 93 | 
 94 | * Add fix to use the `query-continue` parameter to continue to pull category
 95 | members [issue #39](https://github.com/barrust/mediawiki/issues/39)
 96 | * Better handle large categorymember selections
 97 | * Add better handling of exception attributes including adding them to the
 98 | documentation
 99 | * Correct the pulling of the section titles without additional markup [issue #42](https://github.com/barrust/mediawiki/issues/42)
100 | * Handle memoization of unicode parameters in python 2.7
101 | * ***Change default timeout*** for HTTP requests to 15 seconds
102 | 
103 | ### Version 0.3.16
104 | 
105 | * Add ability to turn off caching completely
106 | * Fix bug when disambiguation link does not have a title [issue #35](https://github.com/barrust/mediawiki/issues/35)
107 | 
108 | ### Version 0.3.15
109 | 
110 | * Add parse all links within a section [issue #33](https://github.com/barrust/mediawiki/issues/33)
111 | * Add base url property to mediawiki site
112 | 
113 | ### Version 0.3.14
114 | 
115 | * Add refresh interval to cached responses (Defaults to not refresh)
116 | [issue #30](https://github.com/barrust/mediawiki/issues/30)
117 | * Fix minor documentation issues
118 | 
119 | ### Version 0.3.13
120 | 
121 | * Add pulling hatnotes [issue #6](https://github.com/barrust/mediawiki/issues/6)
122 | * Add pulling list of main images or logos [issue #28](https://github.com/barrust/mediawiki/issues/28)
123 | 
124 | ### Version 0.3.12
125 | 
126 | * Default API URL is now language specific: [PR #26](https://github.com/barrust/mediawiki/pull/26)
127 | 
128 | ### Version 0.3.11
129 | 
130 | * Re-factor MediaWikiPage into its own file
131 | * Remove setting properties outside of **init**()
132 | * Better Unicode support
133 | * Add CONTRIBUTING.md file
134 | 
135 | ### Version 0.3.10
136 | 
137 | * Add categorytree support
138 | * Remove adding 'http:' to references if missing
139 | 
140 | ### Version 0.3.9
141 | 
142 | * Fix infinite loop on continued queries: [issue #15](https://github.com/barrust/mediawiki/issues/15)
143 | * Check by looking at the continue variable over time; if it is the same, exit
144 | * Fix image with no url: [issue #14](https://github.com/barrust/mediawiki/issues/14)
145 | 
146 | ### Version 0.3.8
147 | 
148 | * Fix empty disambiguation list items
149 | 
150 | ### Version 0.3.7
151 | 
152 | * Memoize support default parameters
153 | * Add support test for Python 3.6
154 | 
155 | ### Version 0.3.6
156 | 
157 | * Updated Exception documentation
158 | * Fix badges in Readme file
159 | * Additional test coverage
160 | 
161 | ### Version 0.3.5
162 | 
163 | * Add documentation to README
164 |   * Quickstart information
165 |   * pip install instructions [pypi - pymediawiki](https://pypi.python.org/pypi/pymediawiki/)
166 |   * Additional testing
167 | 
168 | ### Version 0.3.4
169 | 
170 | * Update documentation
171 | * Better continuous integration
172 | * Better test data: [issue #4](https://github.com/barrust/mediawiki/issues/4)
173 | * First version on PyPi: [issue #8](https://github.com/barrust/mediawiki/issues/8)
174 | 
175 | ### Version 0.3.3
176 | 
177 | * Improve testing strategy
178 | * Move tests to json from pickle
179 | * Improve parameter checking for geosearch
180 | * Code standardization
181 | * Pep8
182 | * Pylint
183 | * Single quote strings
184 | 
185 | ### Version 0.3.2
186 | 
187 | * OpenSearch functionality
188 | * PrefixSearch functionality
189 | 
190 | ### Version 0.3.1
191 | 
192 | * Page Summary
193 | * Page Sections
194 | * Enforce sorting of page properties
195 | 
196 | ### Pre-Version 0.3.1
197 | 
198 | * Add MediaWiki class
199 | * Add MediaWikiPage class
200 | * Stubbed out functionality
201 | * Add page properties
202 | 


--------------------------------------------------------------------------------
/mediawiki/exceptions.py:
--------------------------------------------------------------------------------
  1 | """
  2 | MediaWiki Exceptions
  3 | """
  4 | 
  5 | from typing import Dict, List, Optional
  6 | 
  7 | from mediawiki.utilities import str_or_unicode
  8 | 
  9 | ODD_ERROR_MESSAGE = (
 10 |     "This should not happen. If the MediaWiki site you are "
 11 |     "querying is available, then please report this issue on "
 12 |     "GitHub: github.com/barrust/mediawiki"
 13 | )
 14 | 
 15 | 
 16 | class MediaWikiBaseException(Exception):
 17 |     """Base MediaWikiException
 18 | 
 19 |     Args:
 20 |         message: The message of the exception"""
 21 | 
 22 |     def __init__(self, message: str):
 23 |         self._message = message
 24 |         super().__init__(self.message)
 25 | 
 26 |     def __unicode__(self):
 27 |         return self.message
 28 | 
 29 |     def __str__(self):
 30 |         return str_or_unicode(self.__unicode__())
 31 | 
 32 |     @property
 33 |     def message(self) -> str:
 34 |         """str: The MediaWiki exception message"""
 35 |         return self._message
 36 | 
 37 | 
 38 | class MediaWikiException(MediaWikiBaseException):
 39 |     """MediaWiki Exception Class
 40 | 
 41 |     Args:
 42 |         error (str): The error message that the MediaWiki site returned"""
 43 | 
 44 |     def __init__(self, error: str):
 45 |         self._error = error
 46 |         msg = f'An unknown error occurred: "{self.error}". Please report it on GitHub!'
 47 |         super().__init__(msg)
 48 | 
 49 |     @property
 50 |     def error(self) -> str:
 51 |         """str: The error message that the MediaWiki site returned"""
 52 |         return self._error
 53 | 
 54 | 
 55 | class PageError(MediaWikiBaseException):
 56 |     """Exception raised when no MediaWiki page matched a query
 57 | 
 58 |     Args:
 59 |         title (str): Title of the page
 60 |         pageid (int): MediaWiki page id of the page"""
 61 | 
 62 |     def __init__(self, title: Optional[str] = None, pageid: Optional[int] = None):
 63 |         if title:
 64 |             self._title = title
 65 |             msg = f'"{self.title}" does not match any pages. Try another query!'
 66 |         elif pageid:
 67 |             self._pageid = pageid
 68 |             msg = f'Page id "{self.pageid}" does not match any pages. Try another id!'
 69 |         else:
 70 |             self._title = ""
 71 |             msg = f'"{self.title}" does not match any pages. Try another query!'
 72 |         super().__init__(msg)
 73 | 
 74 |     @property
 75 |     def title(self) -> str:
 76 |         """str: The title that caused the page error"""
 77 |         return self._title
 78 | 
 79 |     @property
 80 |     def pageid(self) -> int:
 81 |         """int: The page id that caused the page error"""
 82 |         return self._pageid
 83 | 
 84 | 
 85 | class RedirectError(MediaWikiBaseException):
 86 |     """ Exception raised when a page title unexpectedly resolves to
 87 |         a redirect
 88 | 
 89 |         Args:
 90 |             title (str): Title of the page that redirected
 91 |         Note:
 92 |             This should only occur if both auto_suggest and redirect \
 93 |             are set to **False** """
 94 | 
 95 |     def __init__(self, title: str):
 96 |         self._title = title
 97 |         msg = (
 98 |             f'"{self.title}" resulted in a redirect. Set the redirect property to True ' "to allow automatic redirects."
 99 |         )
100 | 
101 |         super().__init__(msg)
102 | 
103 |     @property
104 |     def title(self) -> str:
105 |         """str: The title that was redirected"""
106 |         return self._title
107 | 
108 | 
109 | class DisambiguationError(MediaWikiBaseException):
110 |     """ Exception raised when a page resolves to a Disambiguation page
111 | 
112 |         Args:
113 |             title (str): Title that resulted in a disambiguation page
114 |             may_refer_to (list): List of possible titles
115 |             url (str): Full URL to the disambiguation page
116 |             details (dict): A list of dictionaries with more information of \
117 |                             possible results
118 |         Note:
119 |             `options` only includes titles that link to valid \
120 |             MediaWiki pages """
121 | 
122 |     def __init__(self, title: str, may_refer_to: List[str], url: str, details: Optional[List[Dict]] = None):
123 |         self._title = title
124 |         self._unordered_options = may_refer_to
125 |         self._options = sorted(may_refer_to)
126 |         self._details = details
127 |         self._url = url
128 |         options_str = "\n  ".join(self.options)
129 |         msg = f'\n"{self.title}" may refer to: \n  {options_str}'
130 |         super().__init__(msg)
131 | 
132 |     @property
133 |     def url(self) -> str:
134 |         """str: The url, if possible, of the disambiguation page"""
135 |         return self._url
136 | 
137 |     @property
138 |     def title(self) -> str:
139 |         """str: The title of the page"""
140 |         return self._title
141 | 
142 |     @property
143 |     def options(self) -> List[str]:
144 |         """list: The list of possible page titles"""
145 |         return self._options
146 | 
147 |     @property
148 |     def unordered_options(self) -> List[str]:
149 |         """list: The list of possible page titles, un-sorted in an attempt to get them as they showup on the page"""
150 |         return self._unordered_options
151 | 
152 |     @property
153 |     def details(self) -> Optional[List[Dict]]:
154 |         """list: The details of the proposed non-disambigous pages"""
155 |         return self._details
156 | 
157 | 
158 | class HTTPTimeoutError(MediaWikiBaseException):
159 |     """Exception raised when a request to the Mediawiki site times out.
160 | 
161 |     Args:
162 |         query (str): The query that timed out"""
163 | 
164 |     def __init__(self, query: str):
165 |         self._query = query
166 |         msg = (
167 |             f'Searching for "{self.query}" resulted in a timeout. '
168 |             "Try again in a few seconds, and ensure you have rate limiting "
169 |             "set to True."
170 |         )
171 |         super().__init__(msg)
172 | 
173 |     @property
174 |     def query(self) -> str:
175 |         """str: The query that timed out"""
176 |         return self._query
177 | 
178 | 
179 | class MediaWikiAPIURLError(MediaWikiBaseException):
180 |     """Exception raised when the MediaWiki server does not support the API
181 | 
182 |     Args:
183 |         api_url (str): The API URL that was not recognized"""
184 | 
185 |     def __init__(self, api_url: str):
186 |         self._api_url = api_url
187 |         msg = f"{self.api_url} is not a valid MediaWiki API URL"
188 |         super().__init__(msg)
189 | 
190 |     @property
191 |     def api_url(self) -> str:
192 |         """str: The api url that raised the exception"""
193 |         return self._api_url
194 | 
195 | 
196 | class MediaWikiGeoCoordError(MediaWikiBaseException):
197 |     """ Exceptions to handle GeoData exceptions
198 | 
199 |         Args:
200 |             error (str): Error message from the MediaWiki site related to \
201 |                          GeoCoordinates """
202 | 
203 |     def __init__(self, error: str):
204 |         self._error = error
205 |         msg = (
206 |             f"GeoData search resulted in the following error: {self.error}"
207 |             " - Please use valid coordinates or a proper page title."
208 |         )
209 |         super().__init__(msg)
210 | 
211 |     @property
212 |     def error(self) -> str:
213 |         """str: The error that was thrown when pulling GeoCoordinates"""
214 |         return self._error
215 | 
216 | 
217 | class MediaWikiCategoryTreeError(MediaWikiBaseException):
218 |     """Exception when the category tree is unable to complete for an unknown
219 |     reason
220 | 
221 |     Args:
222 |         category (str): The category that threw an exception"""
223 | 
224 |     def __init__(self, category: str):
225 |         self._category = category
226 |         msg = (
227 |             f"Categorytree threw an exception for trying to get the same category '{self._category}' "
228 |             "too many times. Please try again later and perhaps use the rate limiting option."
229 |         )
230 |         super().__init__(msg)
231 | 
232 |     @property
233 |     def category(self) -> str:
234 |         """ str: The category that threw an exception during category tree \
235 |                  generation """
236 |         return self._category
237 | 
238 | 
239 | class MediaWikiLoginError(MediaWikiBaseException):
240 |     """Exception raised when unable to login to the MediaWiki site
241 | 
242 |     Args:
243 |         error (str): The error message that the MediaWiki site returned"""
244 | 
245 |     def __init__(self, error: str):
246 |         self._error = error
247 |         super().__init__(error)
248 | 
249 |     @property
250 |     def error(self) -> str:
251 |         """str: The error message that the MediaWiki site returned"""
252 |         return self._error
253 | 
254 | 
255 | class MediaWikiForbidden(MediaWikiBaseException):
256 |     """Exception raised when a forbidden status code is returned"""
257 | 
258 |     def __init__(self, error: str):
259 |         self._error = error
260 |         super().__init__(self._error)
261 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | 	$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  epub3      to make an epub3"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 	@echo "  dummy      to check syntax errors of document sources"
 51 | 
 52 | .PHONY: clean
 53 | clean:
 54 | 	rm -rf $(BUILDDIR)/*
 55 | 
 56 | .PHONY: html
 57 | html:
 58 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 61 | 
 62 | .PHONY: dirhtml
 63 | dirhtml:
 64 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 65 | 	@echo
 66 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 67 | 
 68 | .PHONY: singlehtml
 69 | singlehtml:
 70 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 71 | 	@echo
 72 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 73 | 
 74 | .PHONY: pickle
 75 | pickle:
 76 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 77 | 	@echo
 78 | 	@echo "Build finished; now you can process the pickle files."
 79 | 
 80 | .PHONY: json
 81 | json:
 82 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 83 | 	@echo
 84 | 	@echo "Build finished; now you can process the JSON files."
 85 | 
 86 | .PHONY: htmlhelp
 87 | htmlhelp:
 88 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 89 | 	@echo
 90 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 91 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 92 | 
 93 | .PHONY: qthelp
 94 | qthelp:
 95 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 96 | 	@echo
 97 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 98 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 99 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/mediawiki.qhcp"
100 | 	@echo "To view the help file:"
101 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/mediawiki.qhc"
102 | 
103 | .PHONY: applehelp
104 | applehelp:
105 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
106 | 	@echo
107 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
108 | 	@echo "N.B. You won't be able to view it unless you put it in" \
109 | 	      "~/Library/Documentation/Help or install it in your application" \
110 | 	      "bundle."
111 | 
112 | .PHONY: devhelp
113 | devhelp:
114 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
115 | 	@echo
116 | 	@echo "Build finished."
117 | 	@echo "To view the help file:"
118 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/mediawiki"
119 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/mediawiki"
120 | 	@echo "# devhelp"
121 | 
122 | .PHONY: epub
123 | epub:
124 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
125 | 	@echo
126 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
127 | 
128 | .PHONY: epub3
129 | epub3:
130 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
131 | 	@echo
132 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
133 | 
134 | .PHONY: latex
135 | latex:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo
138 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
139 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
140 | 	      "(use \`make latexpdf' here to do that automatically)."
141 | 
142 | .PHONY: latexpdf
143 | latexpdf:
144 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
145 | 	@echo "Running LaTeX files through pdflatex..."
146 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
147 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
148 | 
149 | .PHONY: latexpdfja
150 | latexpdfja:
151 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
152 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
153 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
154 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
155 | 
156 | .PHONY: text
157 | text:
158 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
159 | 	@echo
160 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
161 | 
162 | .PHONY: man
163 | man:
164 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
165 | 	@echo
166 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
167 | 
168 | .PHONY: texinfo
169 | texinfo:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo
172 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
173 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
174 | 	      "(use \`make info' here to do that automatically)."
175 | 
176 | .PHONY: info
177 | info:
178 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
179 | 	@echo "Running Texinfo files through makeinfo..."
180 | 	make -C $(BUILDDIR)/texinfo info
181 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
182 | 
183 | .PHONY: gettext
184 | gettext:
185 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
186 | 	@echo
187 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
188 | 
189 | .PHONY: changes
190 | changes:
191 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
192 | 	@echo
193 | 	@echo "The overview file is in $(BUILDDIR)/changes."
194 | 
195 | .PHONY: linkcheck
196 | linkcheck:
197 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
198 | 	@echo
199 | 	@echo "Link check complete; look for any errors in the above output " \
200 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
201 | 
202 | .PHONY: doctest
203 | doctest:
204 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
205 | 	@echo "Testing of doctests in the sources finished, look at the " \
206 | 	      "results in $(BUILDDIR)/doctest/output.txt."
207 | 
208 | .PHONY: coverage
209 | coverage:
210 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
211 | 	@echo "Testing of coverage in the sources finished, look at the " \
212 | 	      "results in $(BUILDDIR)/coverage/python.txt."
213 | 
214 | .PHONY: xml
215 | xml:
216 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
217 | 	@echo
218 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
219 | 
220 | .PHONY: pseudoxml
221 | pseudoxml:
222 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
223 | 	@echo
224 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
225 | 
226 | .PHONY: dummy
227 | dummy:
228 | 	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
229 | 	@echo
230 | 	@echo "Build finished. Dummy builder generates no files."
231 | 


--------------------------------------------------------------------------------
/mediawiki/configuraton.py:
--------------------------------------------------------------------------------
  1 | """Configuration module"""
  2 | 
  3 | from dataclasses import asdict, dataclass, field
  4 | from datetime import datetime, timedelta
  5 | from typing import Any, Callable, Dict, Optional, Tuple, Union
  6 | 
  7 | URL: str = "https://github.com/barrust/mediawiki"
  8 | VERSION: str = "0.7.5"
  9 | 
 10 | HTTPAuthenticator = Union[Tuple[str, str], Callable[[Any], Any]]
 11 | 
 12 | 
 13 | @dataclass
 14 | class Configuration:
 15 |     """Configuration class"""
 16 | 
 17 |     _lang: str = field(default="en", init=False, repr=False)
 18 |     _api_url: str = field(default="https://en.wikipedia.org/w/api.php", init=False, repr=False)
 19 |     _category_prefix: str = field(default="Category", init=False, repr=False)
 20 |     _timeout: Optional[float] = field(default=15.0, init=False, repr=False)
 21 |     _user_agent: str = field(default=f"python-mediawiki/VERSION-{VERSION}/({URL})/BOT", init=False, repr=False)
 22 |     _proxies: Optional[Dict] = field(default=None, init=False, repr=False)
 23 |     _verify_ssl: Union[bool, str] = field(default=True, init=False, repr=False)
 24 |     _rate_limit: bool = field(default=False, init=False, repr=False)
 25 |     _rate_limit_min_wait: timedelta = field(default=timedelta(milliseconds=50), init=False, repr=False)
 26 |     _username: Optional[str] = field(default=None, init=False, repr=False)
 27 |     _password: Optional[str] = field(default=None, init=False, repr=False)
 28 |     _refresh_interval: Optional[int] = field(default=None, init=False, repr=False)
 29 |     _use_cache: bool = field(default=True, init=False, repr=False)
 30 |     _http_auth: Optional[HTTPAuthenticator] = field(default=None, init=False, repr=False)
 31 | 
 32 |     #  not in repr
 33 |     _reset_session: bool = field(default=True, init=False, repr=False)
 34 |     _clear_memoized: bool = field(default=False, init=False, repr=False)
 35 |     _rate_limit_last_call: Optional[datetime] = field(default=None, init=False, repr=False)
 36 | 
 37 |     def __init__(
 38 |         self,
 39 |         lang: Optional[str] = None,
 40 |         api_url: Optional[str] = None,
 41 |         category_prefix: Optional[str] = None,
 42 |         timeout: Optional[float] = None,
 43 |         user_agent: Optional[str] = None,
 44 |         proxies: Optional[Dict] = None,
 45 |         verify_ssl: Union[bool, str, None] = None,
 46 |         rate_limit: bool = False,
 47 |         rate_limit_wait: Optional[timedelta] = None,
 48 |         username: Optional[str] = None,
 49 |         password: Optional[str] = None,
 50 |         refresh_interval: Optional[int] = None,
 51 |         use_cache: bool = True,
 52 |         http_auth: Optional[HTTPAuthenticator] = None,
 53 |     ):
 54 |         if api_url:
 55 |             self._api_url = api_url
 56 | 
 57 |         if lang:
 58 |             self.lang = lang
 59 | 
 60 |         if category_prefix:
 61 |             self.category_prefix = category_prefix
 62 | 
 63 |         if user_agent:
 64 |             self._user_agent = user_agent
 65 | 
 66 |         if proxies:
 67 |             self.proxies = proxies
 68 | 
 69 |         if verify_ssl:
 70 |             self.verify_ssl = verify_ssl
 71 | 
 72 |         if rate_limit:
 73 |             self.rate_limit = rate_limit
 74 | 
 75 |         if rate_limit_wait:
 76 |             self._rate_limit_min_wait = rate_limit_wait
 77 | 
 78 |         if username:
 79 |             self.username = username
 80 | 
 81 |         if password:
 82 |             self.password = password
 83 | 
 84 |         if refresh_interval:
 85 |             self.refresh_interval = refresh_interval
 86 | 
 87 |         if use_cache:
 88 |             self.use_cache = use_cache
 89 | 
 90 |         if timeout:
 91 |             self.timeout = timeout
 92 | 
 93 |         if http_auth:
 94 |             self.http_auth = http_auth
 95 | 
 96 |     def __repr__(self):
 97 |         """repr"""
 98 |         keys = [
 99 |             x.replace("_", "", 1)
100 |             for x in sorted(asdict(self).keys())
101 |             if x not in ["_rate_limit_last_call", "_clear_memoized", "_reset_session"]
102 |         ]
103 |         full = [f"{x}={self.__getattribute__(x)}" for x in keys]
104 |         return f"Configuration({', '.join(full)})"
105 | 
106 |     @property
107 |     def lang(self) -> str:
108 |         """str: The API URL language, if possible this will update the API URL
109 | 
110 |         Note:
111 |             Use correct language titles with the updated API URL
112 |         Note:
113 |             Some API URLs do not encode language; unable to update if this is the case"""
114 |         return self._lang
115 | 
116 |     @lang.setter
117 |     def lang(self, language: str):
118 |         """Set the language to use; attempts to change the API URL"""
119 |         if self._lang == language.lower():
120 |             return
121 |         url = self._api_url
122 |         tmp = url.replace(f"/{self._lang}.", f"/{language.lower()}.")
123 | 
124 |         self.api_url = tmp
125 |         self._lang = language.lower()
126 |         self._clear_memoized = True
127 | 
128 |     @property
129 |     def api_url(self) -> str:
130 |         """str: API URL of the MediaWiki site
131 | 
132 |         Note:
133 |             Not settable; See :py:func:`mediawiki.MediaWiki.set_api_url`"""
134 |         return self._api_url
135 | 
136 |     @api_url.setter
137 |     def api_url(self, api_url: str):
138 |         self._lang = self.lang.lower()
139 |         self._api_url = api_url.format(lang=self._lang)
140 | 
141 |         # reset session
142 |         self._reset_session = True
143 | 
144 |     @property
145 |     def category_prefix(self) -> str:
146 |         """str: The category prefix to use when using category based functions
147 | 
148 |         Note:
149 |             Use the correct category name for the language selected"""
150 |         return self._category_prefix
151 | 
152 |     @category_prefix.setter
153 |     def category_prefix(self, category_prefix: str):
154 |         """Set the category prefix correctly"""
155 |         self._category_prefix = category_prefix[:-1] if category_prefix[-1:] == ":" else category_prefix
156 | 
157 |     @property
158 |     def user_agent(self) -> str:
159 |         """str: User agent string
160 | 
161 |         Note:
162 |             If using in as part of another project, this should be changed"""
163 |         return self._user_agent
164 | 
165 |     @user_agent.setter
166 |     def user_agent(self, user_agent: str):
167 |         """Set the new user agent string
168 | 
169 |         Note:
170 |             Will need to re-log into the MediaWiki if user agent string is changed"""
171 |         self._user_agent = user_agent
172 | 
173 |     @property
174 |     def proxies(self) -> Optional[Dict]:
175 |         """dict: Turn on, off, or set proxy use with the Requests library"""
176 |         return self._proxies
177 | 
178 |     @proxies.setter
179 |     def proxies(self, proxies: Optional[Dict]):
180 |         """Turn on, off, or set proxy use through the Requests library"""
181 |         self._proxies = proxies if isinstance(proxies, dict) else None
182 | 
183 |         # reset session
184 |         self._reset_session = True
185 | 
186 |     @property
187 |     def verify_ssl(self) -> Union[bool, str]:
188 |         """bool | str: Verify SSL when using requests or path to cert file"""
189 |         return self._verify_ssl
190 | 
191 |     @verify_ssl.setter
192 |     def verify_ssl(self, verify_ssl: Union[bool, str, None]):
193 |         """Set request verify SSL parameter; defaults to True if issue"""
194 |         self._verify_ssl = verify_ssl if isinstance(verify_ssl, (bool, str)) else True
195 | 
196 |         # reset session
197 |         self._reset_session = True
198 | 
199 |     @property
200 |     def rate_limit(self) -> bool:
201 |         """bool: Turn on or off Rate Limiting"""
202 |         return self._rate_limit
203 | 
204 |     @rate_limit.setter
205 |     def rate_limit(self, rate_limit: bool):
206 |         """Turn on or off rate limiting"""
207 |         self._rate_limit = bool(rate_limit)
208 |         self._rate_limit_last_call = None
209 |         self._clear_memoized = True
210 | 
211 |     @property
212 |     def rate_limit_min_wait(self) -> timedelta:
213 |         """timedelta: Time to wait between calls
214 | 
215 |         Note:
216 |             Only used if rate_limit is **True**"""
217 |         return self._rate_limit_min_wait
218 | 
219 |     @rate_limit_min_wait.setter
220 |     def rate_limit_min_wait(self, min_wait: timedelta):
221 |         """Set minimum wait to use for rate limiting"""
222 |         self._rate_limit_min_wait = min_wait
223 |         self._rate_limit_last_call = None
224 | 
225 |     @property
226 |     def username(self) -> Optional[str]:
227 |         """str | None: Username to use to log into the mediawiki site"""
228 |         return self._username
229 | 
230 |     @username.setter
231 |     def username(self, username: Optional[str]):
232 |         """set the username, if needed, to log into the mediawiki site"""
233 |         self._username = username
234 | 
235 |     @property
236 |     def password(self) -> Optional[str]:
237 |         """str | None: Password to use to log into the mediawiki site"""
238 |         return self._password
239 | 
240 |     @password.setter
241 |     def password(self, password: Optional[str]):
242 |         """set the password, if needed, to log into the mediawiki site"""
243 |         self._password = password
244 | 
245 |     @property
246 |     def refresh_interval(self) -> Optional[int]:
247 |         """int | None: The interval at which the memoize cache is to be refresh"""
248 |         return self._refresh_interval
249 | 
250 |     @refresh_interval.setter
251 |     def refresh_interval(self, refresh_interval: Optional[int]):
252 |         "Set the new cache refresh interval" ""
253 |         self._refresh_interval = (
254 |             refresh_interval if isinstance(refresh_interval, int) and refresh_interval > 0 else None
255 |         )
256 | 
257 |     @property
258 |     def use_cache(self) -> bool:
259 |         """bool: Whether caching should be used; on (**True**) or off (**False**)"""
260 |         return self._use_cache
261 | 
262 |     @use_cache.setter
263 |     def use_cache(self, use_cache: bool):
264 |         """toggle using the cache or not"""
265 |         self._use_cache = bool(use_cache)
266 | 
267 |     @property
268 |     def timeout(self) -> Optional[float]:
269 |         """float: Response timeout for API requests
270 | 
271 |         Note:
272 |             Use **None** for no response timeout"""
273 |         return self._timeout
274 | 
275 |     @timeout.setter
276 |     def timeout(self, timeout: Optional[float]):
277 |         """Set request timeout in seconds (or fractions of a second)"""
278 |         self._timeout = None if timeout is None else float(timeout)
279 | 
280 |     @property
281 |     def http_auth(self) -> Optional[HTTPAuthenticator]:
282 |         """tuple|callable: HTTP authenticator to use to access the mediawiki site"""
283 |         return self._http_auth
284 | 
285 |     @http_auth.setter
286 |     def http_auth(self, http_auth: Optional[HTTPAuthenticator]):
287 |         """Set the HTTP authenticator, if needed, to use to access the mediawiki site"""
288 |         self._http_auth = http_auth
289 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # mediawiki documentation build configuration file, created by
  5 | # sphinx-quickstart on Sat Sep 24 19:03:06 2016.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import os
 17 | import sys
 18 | from typing import Dict, List
 19 | 
 20 | # If extensions (or modules to document with autodoc) are in another directory,
 21 | # add these directories to sys.path here. If the directory is relative to the
 22 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 23 | # sys.path.insert(0, os.path.abspath('.'))
 24 | sys.path.insert(0, os.path.abspath("../../"))
 25 | # sys.path.append(os.path.abspath("_themes"))
 26 | import mediawiki
 27 | 
 28 | # -- General configuration ------------------------------------------------
 29 | 
 30 | # If your documentation needs a minimal Sphinx version, state it here.
 31 | # needs_sphinx = '1.0'
 32 | 
 33 | # Add any Sphinx extension module names here, as strings. They can be
 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 35 | # ones.
 36 | extensions = [
 37 |     "sphinx.ext.autodoc",
 38 |     "sphinx.ext.napoleon",
 39 |     "sphinx.ext.doctest",
 40 |     "sphinx.ext.coverage",
 41 |     "sphinx.ext.viewcode",
 42 |     "sphinx.ext.githubpages",
 43 |     "sphinx.ext.todo",
 44 | ]
 45 | 
 46 | napoleon_use_admonition_for_notes = True
 47 | 
 48 | # Add any paths that contain templates here, relative to this directory.
 49 | templates_path = ["_templates"]
 50 | 
 51 | # The suffix(es) of source filenames.
 52 | # You can specify multiple suffix as a list of string:
 53 | # source_suffix = ['.rst', '.md']
 54 | source_suffix = ".rst"
 55 | 
 56 | # The encoding of source files.
 57 | # source_encoding = 'utf-8-sig'
 58 | 
 59 | # The master toctree document.
 60 | master_doc = "index"
 61 | 
 62 | # General information about the project.
 63 | project = "mediawiki"
 64 | copyright = "2016, Tyler Barrus"
 65 | author = mediawiki.__author__
 66 | 
 67 | # The version info for the project you're documenting, acts as replacement for
 68 | # |version| and |release|, also used in various other places throughout the
 69 | # built documents.
 70 | #
 71 | # The short X.Y version.
 72 | version = mediawiki.__version__
 73 | # The full version, including alpha/beta/rc tags.
 74 | release = mediawiki.__version__
 75 | 
 76 | # The language for content autogenerated by Sphinx. Refer to documentation
 77 | # for a list of supported languages.
 78 | #
 79 | # This is also used if you do content translation via gettext catalogs.
 80 | # Usually you set "language" from the command line for these cases.
 81 | language = "en"
 82 | 
 83 | # There are two options for replacing |today|: either, you set today to some
 84 | # non-false value, then it is used:
 85 | # today = ''
 86 | # Else, today_fmt is used as the format for a strftime call.
 87 | # today_fmt = '%B %d, %Y'
 88 | 
 89 | # List of patterns, relative to source directory, that match files and
 90 | # directories to ignore when looking for source files.
 91 | # This patterns also effect to html_static_path and html_extra_path
 92 | exclude_patterns: List[str] = []
 93 | 
 94 | # The reST default role (used for this markup: `text`) to use for all
 95 | # documents.
 96 | # default_role = None
 97 | 
 98 | # If true, '()' will be appended to :func: etc. cross-reference text.
 99 | # add_function_parentheses = True
100 | 
101 | # If true, the current module name will be prepended to all description
102 | # unit titles (such as .. function::).
103 | # add_module_names = True
104 | 
105 | # If true, sectionauthor and moduleauthor directives will be shown in the
106 | # output. They are ignored by default.
107 | # show_authors = False
108 | 
109 | # The name of the Pygments (syntax highlighting) style to use.
110 | pygments_style = "sphinx"
111 | 
112 | # A list of ignored prefixes for module index sorting.
113 | # modindex_common_prefix = []
114 | 
115 | # If true, keep warnings as "system message" paragraphs in the built documents.
116 | # keep_warnings = False
117 | 
118 | # If true, `todo` and `todoList` produce output, else they produce nothing.
119 | todo_include_todos = True
120 | 
121 | 
122 | # -- Options for HTML output ----------------------------------------------
123 | 
124 | # The theme to use for HTML and HTML Help pages.  See the documentation for
125 | # a list of builtin themes.
126 | html_theme = "sphinx_rtd_theme"
127 | # html_theme = 'alabaster'
128 | # html_theme = "custom_theme"
129 | 
130 | 
131 | # Theme options are theme-specific and customize the look and feel of a theme
132 | # further.  For a list of options available for each theme, see the
133 | # documentation.
134 | # html_theme_options = {}
135 | 
136 | # Add any paths that contain custom themes here, relative to this directory.
137 | # html_theme_path = ["_themes"]
138 | 
139 | # The name for this set of Sphinx documents.
140 | # "<project> v<release> documentation" by default.
141 | # html_title = 'mediawiki v0.3.4'
142 | 
143 | # A shorter title for the navigation bar.  Default is the same as html_title.
144 | # html_short_title = None
145 | 
146 | # The name of an image file (relative to this directory) to place at the top
147 | # of the sidebar.
148 | # html_logo = None
149 | 
150 | # The name of an image file (relative to this directory) to use as a favicon of
151 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
152 | # pixels large.
153 | # html_favicon = None
154 | 
155 | # Add any paths that contain custom static files (such as style sheets) here,
156 | # relative to this directory. They are copied after the builtin static files,
157 | # so a file named "default.css" will overwrite the builtin "default.css".
158 | html_static_path = ["_static"]
159 | html_css_files = ["custom.css"]
160 | 
161 | # Add any extra paths that contain custom files (such as robots.txt or
162 | # .htaccess) here, relative to this directory. These files are copied
163 | # directly to the root of the documentation.
164 | # html_extra_path = []
165 | 
166 | # If not None, a 'Last updated on:' timestamp is inserted at every page
167 | # bottom, using the given strftime format.
168 | # The empty string is equivalent to '%b %d, %Y'.
169 | # html_last_updated_fmt = None
170 | 
171 | # If true, SmartyPants will be used to convert quotes and dashes to
172 | # typographically correct entities.
173 | # html_use_smartypants = True
174 | 
175 | # Custom sidebar templates, maps document names to template names.
176 | # html_sidebars = {}
177 | 
178 | # Additional templates that should be rendered to pages, maps page names to
179 | # template names.
180 | # html_additional_pages = {}
181 | 
182 | # If false, no module index is generated.
183 | # html_domain_indices = True
184 | 
185 | # If false, no index is generated.
186 | # html_use_index = True
187 | 
188 | # If true, the index is split into individual pages for each letter.
189 | # html_split_index = False
190 | 
191 | # If true, links to the reST sources are added to the pages.
192 | # html_show_sourcelink = True
193 | 
194 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
195 | # html_show_sphinx = True
196 | 
197 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
198 | # html_show_copyright = True
199 | 
200 | # If true, an OpenSearch description file will be output, and all pages will
201 | # contain a <link> tag referring to it.  The value of this option must be the
202 | # base URL from which the finished HTML is served.
203 | # html_use_opensearch = ''
204 | 
205 | # This is the file name suffix for HTML files (e.g. ".xhtml").
206 | # html_file_suffix = None
207 | 
208 | # Language to be used for generating the HTML full-text search index.
209 | # Sphinx supports the following languages:
210 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
211 | #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
212 | # html_search_language = 'en'
213 | 
214 | # A dictionary with options for the search language support, empty by default.
215 | # 'ja' uses this config value.
216 | # 'zh' user can custom change `jieba` dictionary path.
217 | # html_search_options = {'type': 'default'}
218 | 
219 | # The name of a javascript file (relative to the configuration directory) that
220 | # implements a search results scorer. If empty, the default will be used.
221 | # html_search_scorer = 'scorer.js'
222 | 
223 | # Output file base name for HTML help builder.
224 | htmlhelp_basename = "mediawikidoc"
225 | 
226 | # -- Options for LaTeX output ---------------------------------------------
227 | 
228 | latex_elements: Dict[str, str] = {
229 |     # The paper size ('letterpaper' or 'a4paper').
230 |     #'papersize': 'letterpaper',
231 |     # The font size ('10pt', '11pt' or '12pt').
232 |     #'pointsize': '10pt',
233 |     # Additional stuff for the LaTeX preamble.
234 |     #'preamble': '',
235 |     # Latex figure (float) alignment
236 |     #'figure_align': 'htbp',
237 | }
238 | 
239 | # Grouping the document tree into LaTeX files. List of tuples
240 | # (source start file, target name, title,
241 | #  author, documentclass [howto, manual, or own class]).
242 | latex_documents = [
243 |     (master_doc, "mediawiki.tex", "mediawiki Documentation", "Tyler Barrus", "manual"),
244 | ]
245 | 
246 | # The name of an image file (relative to this directory) to place at the top of
247 | # the title page.
248 | # latex_logo = None
249 | 
250 | # For "manual" documents, if this is true, then toplevel headings are parts,
251 | # not chapters.
252 | # latex_use_parts = False
253 | 
254 | # If true, show page references after internal links.
255 | # latex_show_pagerefs = False
256 | 
257 | # If true, show URL addresses after external links.
258 | # latex_show_urls = False
259 | 
260 | # Documents to append as an appendix to all manuals.
261 | # latex_appendices = []
262 | 
263 | # If false, no module index is generated.
264 | # latex_domain_indices = True
265 | 
266 | 
267 | # -- Options for manual page output ---------------------------------------
268 | 
269 | # One entry per manual page. List of tuples
270 | # (source start file, name, description, authors, manual section).
271 | man_pages = [(master_doc, "mediawiki", "mediawiki Documentation", [author], 1)]
272 | 
273 | # If true, show URL addresses after external links.
274 | # man_show_urls = False
275 | 
276 | 
277 | # -- Options for Texinfo output -------------------------------------------
278 | 
279 | # Grouping the document tree into Texinfo files. List of tuples
280 | # (source start file, target name, title, author,
281 | #  dir menu entry, description, category)
282 | texinfo_documents = [
283 |     (
284 |         master_doc,
285 |         "mediawiki",
286 |         "mediawiki Documentation",
287 |         author,
288 |         "mediawiki",
289 |         "One line description of project.",
290 |         "Miscellaneous",
291 |     ),
292 | ]
293 | 
294 | # Documents to append as an appendix to all manuals.
295 | # texinfo_appendices = []
296 | 
297 | # If false, no module index is generated.
298 | # texinfo_domain_indices = True
299 | 
300 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
301 | # texinfo_show_urls = 'footnote'
302 | 
303 | # If true, do not generate a @detailmenu in the "Top" node's menu.
304 | # texinfo_no_detailmenu = False
305 | 
306 | # Determine which way to group auto documented members
307 | autodoc_member_order = "groupwise"
308 | 


--------------------------------------------------------------------------------
/scripts/generate_test_data.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Generate data for tests
  3 | """
  4 | import json
  5 | import os
  6 | import sys
  7 | from datetime import timedelta
  8 | from decimal import Decimal
  9 | 
 10 | sys.path.insert(0, "../mediawiki")
 11 | from mediawiki import (
 12 |     DisambiguationError,
 13 |     MediaWiki,
 14 |     MediaWikiAPIURLError,
 15 |     MediaWikiGeoCoordError,
 16 |     PageError,
 17 |     RedirectError,
 18 | )
 19 | 
 20 | # set up the json objects
 21 | REQUESTS_FILE = "./tests/mock_requests.json"
 22 | RESPONSES_FILE = "./tests/mock_responses.json"
 23 | CATTREE_FILE = "./tests/mock_categorytree.json"
 24 | 
 25 | 
 26 | def capture_response(func):
 27 |     """capture_response decorator to be used for tests"""
 28 | 
 29 |     def wrapper(*args, **kwargs):
 30 |         """define the actions"""
 31 |         file_path = os.path.abspath(REQUESTS_FILE)
 32 |         if os.path.isfile(file_path):
 33 |             with open(file_path, "r") as mock:
 34 |                 mock_data = json.load(mock)
 35 |         else:
 36 |             mock_data = dict()
 37 | 
 38 |         new_params = json.dumps(tuple(sorted(args[1].items())))
 39 |         # build out parts of the dictionary
 40 |         if args[0].api_url not in mock_data:
 41 |             mock_data[args[0].api_url] = dict()
 42 |         try:
 43 |             res = func(*args, **kwargs)
 44 |         except Exception:
 45 |             res = dict()
 46 |         mock_data[args[0].api_url][new_params] = res
 47 |         with open(file_path, "w") as mock:
 48 |             json.dump(mock_data, mock, ensure_ascii=False, indent=1, sort_keys=True)
 49 |         return res
 50 | 
 51 |     return wrapper
 52 | 
 53 | 
 54 | class MediaWikiOverloaded(MediaWiki):
 55 |     """overloaded mediawiki class"""
 56 | 
 57 |     def __init__(
 58 |         self,
 59 |         url="https://{lang}.wikipedia.org/w/api.php",
 60 |         lang="en",
 61 |         timeout=None,
 62 |         rate_limit=False,
 63 |         rate_limit_wait=timedelta(milliseconds=50),
 64 |     ):
 65 |         """overloaded init"""
 66 |         MediaWiki.__init__(
 67 |             self, url=url, lang=lang, timeout=timeout, rate_limit=rate_limit, rate_limit_wait=rate_limit_wait
 68 |         )
 69 | 
 70 |     @capture_response
 71 |     def _get_response(self, params):
 72 |         """overloaded response"""
 73 |         return MediaWiki._get_response(self, params)
 74 | 
 75 |     @capture_response
 76 |     def _post_response(self, params):
 77 |         """overloaded response"""
 78 |         return MediaWiki._post_response(self, params)
 79 | 
 80 | 
 81 | PULL_ALL = False
 82 | 
 83 | # Parameters to determine which tests to pull
 84 | PULL_SEARCHES = False
 85 | PULL_ALLPAGES = False
 86 | PULL_RANDOM = False
 87 | PULL_SUGGEST = False
 88 | PULL_OPENSEARCH = False
 89 | PULL_PREFIXSEARCH = False
 90 | PULL_GEOSEARCH = False
 91 | PULL_CATEGORYMEMBERS = False
 92 | PULL_CATEGORYTREE = False
 93 | PULL_SUMMARY = False
 94 | PULL_PAGE_ERRORS = False
 95 | PULL_DISAMBIGUATION_ERRORS = False
 96 | PULL_API_URL_ERROR = False
 97 | PULL_REDIRECT_ERROR = False
 98 | PULL_PAGES = False
 99 | PULL_LOGOS = False
100 | PULL_PREVIEWS = True
101 | PULL_HATNOTES = False
102 | PULL_SECTION_LINKS = False
103 | PULL_TABLE_OF_CONTENTS = False
104 | PULL_LOGIN = False
105 | 
106 | # regression tests
107 | PULL_ISSUE_15 = False
108 | PULL_ISSUE_14 = False
109 | PULL_ISSUE_35 = False
110 | PULL_ISSUE_39 = False
111 | 
112 | # make files if they don't exist
113 | if not os.path.isfile(REQUESTS_FILE):
114 |     with open(REQUESTS_FILE, "w") as file_handle:
115 |         json.dump(dict(), file_handle, ensure_ascii=False)
116 | 
117 | if os.path.isfile(RESPONSES_FILE):
118 |     with open(RESPONSES_FILE, "r") as file_handle:
119 |         responses = json.load(file_handle)
120 | else:
121 |     responses = dict()
122 | 
123 | 
124 | # Begin building out new data objects
125 | site = MediaWikiOverloaded()
126 | french_site = MediaWikiOverloaded(url="https://fr.wikipedia.org/w/api.php", lang="fr")
127 | asoiaf = MediaWikiOverloaded(url="https://awoiaf.westeros.org/api.php", lang="fr")
128 | plants = MediaWikiOverloaded(url="https://practicalplants.org/w/api.php")
129 | wikipedia = MediaWikiOverloaded()
130 | 
131 | 
132 | # ensure these pieces of information do not throw errors
133 | if site.api_url not in responses:
134 |     responses[site.api_url] = dict()
135 | if french_site.api_url not in responses:
136 |     responses[french_site.api_url] = dict()
137 | if asoiaf.api_url not in responses:
138 |     responses[asoiaf.api_url] = dict()
139 | 
140 | # pull in standard information for all sites (every time)
141 | if site.api_url not in responses:
142 |     responses[site.api_url] = dict()
143 | responses[site.api_url]["api"] = site.api_url
144 | responses[site.api_url]["lang"] = site.language
145 | responses[site.api_url]["languages"] = site.supported_languages
146 | responses[site.api_url]["api_version"] = site.api_version
147 | responses[site.api_url]["extensions"] = site.extensions
148 | 
149 | if french_site.api_url not in responses:
150 |     responses[french_site.api_url] = dict()
151 | responses[french_site.api_url]["api"] = french_site.api_url
152 | responses[french_site.api_url]["lang"] = french_site.language
153 | responses[french_site.api_url]["languages"] = french_site.supported_languages
154 | responses[french_site.api_url]["api_version"] = french_site.api_version
155 | responses[french_site.api_url]["extensions"] = french_site.extensions
156 | 
157 | if asoiaf.api_url not in responses:
158 |     responses[asoiaf.api_url] = dict()
159 | responses[asoiaf.api_url]["api"] = asoiaf.api_url
160 | responses[asoiaf.api_url]["lang"] = asoiaf.language
161 | responses[asoiaf.api_url]["languages"] = asoiaf.supported_languages
162 | responses[asoiaf.api_url]["api_version"] = asoiaf.api_version
163 | responses[asoiaf.api_url]["extensions"] = asoiaf.extensions
164 | 
165 | # if plants.api_url not in responses:
166 | #     responses[plants.api_url] = dict()
167 | 
168 | print("Completed basic mediawiki information")
169 | 
170 | if PULL_ALL is True or PULL_SEARCHES is True:
171 |     res = site.search("chest set", suggestion=False)
172 |     responses[site.api_url]["search_without_suggestion"] = res
173 |     res = site.search("chest set", suggestion=True)
174 |     responses[site.api_url]["search_with_suggestion_found"] = res
175 |     res = site.search("chess set", suggestion=True)
176 |     responses[site.api_url]["search_with_suggestion_not_found"] = res
177 |     res = site.search("chess set", results=505, suggestion=False)
178 |     responses[site.api_url]["search_with_suggestion_not_found_large"] = res
179 |     res = site.search("chess set", results=3, suggestion=False)
180 |     responses[site.api_url]["search_with_suggestion_not_found_small"] = res
181 | 
182 |     print("Completed pulling searches")
183 | 
184 | if PULL_ALL is True or PULL_ALLPAGES is True:
185 |     res = site.allpages("a")
186 |     responses[site.api_url]["all_pages_query_a"] = res
187 | 
188 |     res = site.allpages("a", results=1)
189 |     responses[site.api_url]["all_pages_query_a_1"] = res
190 | 
191 |     print("Completed pulling allpages")
192 | 
193 | if PULL_ALL is True or PULL_RANDOM is True:
194 |     responses[site.api_url]["random_1"] = site.random(pages=1)
195 |     responses[site.api_url]["random_2"] = site.random(pages=2)
196 |     responses[site.api_url]["random_10"] = site.random(pages=10)
197 |     responses[site.api_url]["random_202"] = site.random(pages=202)
198 | 
199 |     print("Completed pulling random pages")
200 | 
201 | if PULL_ALL is True or PULL_SUGGEST is True:
202 |     responses[site.api_url]["suggest_chest_set"] = site.suggest("chest set")
203 |     responses[site.api_url]["suggest_chess_set"] = site.suggest("chess set")
204 |     responses[site.api_url]["suggest_new_york"] = site.suggest("new york")
205 |     responses[site.api_url]["suggest_yonkers"] = site.suggest("yonkers")
206 |     responses[site.api_url]["suggest_no_results"] = site.suggest("gobbilygook")
207 | 
208 |     print("Completed pulling suggestions")
209 | 
210 | if PULL_ALL is True or PULL_OPENSEARCH is True:
211 |     res = site.opensearch("new york")
212 |     responses[site.api_url]["opensearch_new_york"] = res
213 |     res = site.opensearch("new york", results=5)
214 |     responses[site.api_url]["opensearch_new_york_result"] = res
215 |     res = site.opensearch("new york", redirect=False)
216 |     responses[site.api_url]["opensearch_new_york_redirect"] = res
217 |     res = site.opensearch("new york", results=5, redirect=False)
218 |     responses[site.api_url]["opensearch_new_york_result_redirect"] = res
219 | 
220 |     print("Completed pulling open searches")
221 | 
222 | if PULL_ALL is True or PULL_PREFIXSEARCH is True:
223 |     responses[site.api_url]["prefixsearch_ar"] = site.prefixsearch("ar")
224 |     responses[site.api_url]["prefixsearch_ba"] = site.prefixsearch("ba")
225 |     res = site.prefixsearch("ba", results=5)
226 |     responses[site.api_url]["prefixsearch_ba_5"] = res
227 |     res = site.prefixsearch("ba", results=30)
228 |     responses[site.api_url]["prefixsearch_ba_30"] = res
229 | 
230 |     print("Completed pulling prefix searches")
231 | 
232 | if PULL_ALL is True or PULL_GEOSEARCH is True:
233 |     res = site.geosearch(latitude=Decimal("0.0"), longitude=Decimal("0.0"))
234 |     responses[site.api_url]["geosearch_decimals"] = res
235 |     res = site.geosearch(latitude=Decimal("0.0"), longitude=0.0)
236 |     responses[site.api_url]["geosearch_mix_types"] = res
237 |     res = site.geosearch(
238 |         title="new york city", latitude=Decimal("-9999999999.999"), longitude=Decimal("0.0"), results=22, radius=10000
239 |     )
240 |     responses[site.api_url]["geosearch_page_invalid_lat_long"] = res
241 |     res = site.geosearch(title="new york city", results=22, radius=10000)
242 |     responses[site.api_url]["geosearch_page_radius_results_set"] = res
243 |     res = site.geosearch(title="new york city", radius=10000)
244 |     responses[site.api_url]["geosearch_page_radius_results"] = res
245 |     res = site.geosearch(title="new york city")
246 |     responses[site.api_url]["geosearch_page"] = res
247 |     try:
248 |         site.geosearch(latitude=None, longitude=Decimal("0.0"), results=22, radius=10000)
249 |     except ValueError as ex:
250 |         responses[site.api_url]["invalid_lat_long_value_msg"] = str(ex)
251 |     try:
252 |         site.geosearch(latitude=Decimal("-9999999999.999"), longitude=Decimal("0.0"), results=22, radius=10000)
253 |     except MediaWikiGeoCoordError as ex:
254 |         responses[site.api_url]["invalid_lat_long_geo_msg"] = ex.message
255 | 
256 |     print("Completed pulling geo search")
257 | 
258 | if PULL_ALL is True or PULL_CATEGORYMEMBERS is True:
259 |     res = site.categorymembers("Chess", results=15, subcategories=True)
260 |     responses[site.api_url]["category_members_with_subcategories"] = res
261 |     res = site.categorymembers("Chess", results=15, subcategories=False)
262 |     responses[site.api_url]["category_members_without_subcategories"] = res
263 |     res = site.categorymembers("Chess", results=5, subcategories=False)
264 |     responses[site.api_url]["category_members_without_subcategories_5"] = res
265 |     res = site.categorymembers("Disambiguation categories", results=None)
266 |     responses[site.api_url]["category_members_very_large"] = res
267 | 
268 |     print("Completed pulling category members")
269 | 
270 | if PULL_ALL is True or PULL_CATEGORYTREE is True:
271 |     site.rate_limit = True
272 |     ct = site.categorytree(["Chess", "Ebola"], depth=None)  # type: ignore
273 |     with open(CATTREE_FILE, "w") as fp:
274 |         json.dump(ct, fp, ensure_ascii=False, sort_keys=True)
275 | 
276 |     try:
277 |         site.categorytree("Chess Ebola", depth=None)  # type: ignore
278 |     except Exception as ex:
279 |         responses[site.api_url]["missing_categorytree"] = str(ex)
280 |     site.rate_limit = False
281 | 
282 |     print("Completed pulling category tree")
283 | 
284 | if PULL_ALL is True or PULL_SUMMARY is True:
285 |     res = site.summary("chess", chars=50)
286 |     responses[site.api_url]["summarize_chars_50"] = res
287 |     res = site.summary("chess", sentences=5)
288 |     responses[site.api_url]["summarize_sent_5"] = res
289 |     res = site.summary("chess")
290 |     responses[site.api_url]["summarize_first_paragraph"] = res
291 | 
292 |     print("Completed pulling summaries")
293 | 
294 | if PULL_ALL is True or PULL_PAGE_ERRORS is True:
295 |     try:
296 |         site.page("gobbilygook")
297 |     except PageError as ex:
298 |         responses[site.api_url]["page_error_msg"] = ex.message
299 | 
300 |     try:
301 |         site.page("gobbilygook", auto_suggest=False)
302 |     except PageError as ex:
303 |         responses[site.api_url]["page_error_msg_title"] = ex.message
304 | 
305 |     try:
306 |         site.page(pageid=-1)
307 |     except PageError as ex:
308 |         responses[site.api_url]["page_error_msg_pageid"] = ex.message
309 | 
310 |     print("Completed pulling page errors")
311 | 
312 | if PULL_ALL is True or PULL_DISAMBIGUATION_ERRORS is True:
313 |     try:
314 |         site.page("bush")
315 |     except DisambiguationError as ex:
316 |         responses[site.api_url]["disambiguation_error_msg"] = ex.message
317 | 
318 |     try:
319 |         site.page("Oasis")
320 |     except DisambiguationError as ex:
321 |         msg = ex.message
322 |         responses[site.api_url]["disambiguation_error_msg_with_empty"] = msg
323 | 
324 |     print("Completed pulling disambiguation errors")
325 | 
326 | if PULL_ALL is True or PULL_API_URL_ERROR is True:
327 |     url = "https://french.wikipedia.org/w/api.php"
328 |     try:
329 |         site.set_api_url(api_url=url, lang="fr")
330 |     except MediaWikiAPIURLError as ex:
331 |         responses[site.api_url]["api_url_error_msg"] = ex.message
332 | 
333 |     # this shouldn't be necessary since it should go back to the original
334 |     # values
335 |     site.set_api_url(api_url="https://en.wikipedia.org/w/api.php", lang="en")
336 |     print("Completed pulling api url errors")
337 | 
338 | if PULL_ALL is True or PULL_REDIRECT_ERROR is True:
339 |     # print('Start redirect error')
340 |     try:
341 |         asoiaf.page("arya", auto_suggest=False, redirect=False)
342 |     except RedirectError as ex:
343 |         responses[asoiaf.api_url]["redirect_error_msg"] = ex.message
344 | 
345 |     print("Completed pulling redirect errors")
346 | 
347 | 
348 | if PULL_ALL is True or PULL_PAGES is True:
349 |     # unicode
350 |     site.page("Jacques Léonard Muller")
351 |     # page id and wikitext
352 |     p = site.page(pageid=24337758, auto_suggest=False)
353 |     responses["bpp-complexity_wikitext"] = p.wikitext
354 | 
355 |     # coordinates
356 |     p = site.page("Washington Monument")
357 |     coords = p.coordinates
358 |     responses[site.api_url]["wash_mon"] = [str(coords[0]), str(coords[1])]
359 | 
360 |     # page properties
361 | 
362 |     # arya
363 |     pg = asoiaf.page("arya")
364 |     responses[asoiaf.api_url]["arya"] = dict()
365 |     responses[asoiaf.api_url]["arya"]["title"] = pg.title
366 |     responses[asoiaf.api_url]["arya"]["pageid"] = pg.pageid
367 |     responses[asoiaf.api_url]["arya"]["revision_id"] = pg.revision_id
368 |     responses[asoiaf.api_url]["arya"]["parent_id"] = pg.parent_id
369 |     responses[asoiaf.api_url]["arya"]["content"] = pg.content
370 |     responses[asoiaf.api_url]["arya"]["url"] = pg.url
371 |     # other properties
372 |     responses[asoiaf.api_url]["arya"]["backlinks"] = pg.backlinks
373 |     responses[asoiaf.api_url]["arya"]["images"] = pg.images
374 |     responses[asoiaf.api_url]["arya"]["redirects"] = pg.redirects
375 |     responses[asoiaf.api_url]["arya"]["links"] = pg.links
376 |     responses[asoiaf.api_url]["arya"]["categories"] = pg.categories
377 |     responses[asoiaf.api_url]["arya"]["references"] = pg.references
378 |     responses[asoiaf.api_url]["arya"]["content"] = pg.content
379 |     responses[asoiaf.api_url]["arya"]["parent_id"] = pg.parent_id
380 |     responses[asoiaf.api_url]["arya"]["revision_id"] = pg.revision_id
381 |     responses[asoiaf.api_url]["arya"]["coordinates"] = pg.coordinates
382 |     responses[asoiaf.api_url]["arya"]["summary"] = pg.summary
383 |     responses[asoiaf.api_url]["arya"]["sections"] = pg.sections
384 |     res = pg.section("A Game of Thrones")
385 |     responses[asoiaf.api_url]["arya"]["section_a_game_of_thrones"] = res
386 |     res = pg.section("External links")
387 |     responses[asoiaf.api_url]["arya"]["last_section"] = res
388 |     responses[asoiaf.api_url]["arya"]["html"] = pg.html
389 | 
390 |     # jon snow
391 |     pg = asoiaf.page("jon snow")
392 |     responses[asoiaf.api_url]["jon-snow"] = dict()
393 |     responses[asoiaf.api_url]["jon-snow"]["title"] = pg.title
394 |     responses[asoiaf.api_url]["jon-snow"]["pageid"] = pg.pageid
395 |     responses[asoiaf.api_url]["jon-snow"]["revision_id"] = pg.revision_id
396 |     responses[asoiaf.api_url]["jon-snow"]["parent_id"] = pg.parent_id
397 |     responses[asoiaf.api_url]["jon-snow"]["content"] = pg.content
398 |     responses[asoiaf.api_url]["jon-snow"]["url"] = pg.url
399 | 
400 |     # castos
401 |     pg = asoiaf.page("Castos")
402 |     responses[asoiaf.api_url]["castos"] = dict()
403 |     res = pg.section("References and Notes")
404 |     responses[asoiaf.api_url]["castos"]["section"] = res
405 | 
406 |     # other pages as they will be in the response object
407 |     asoiaf.page("arya", auto_suggest=False)
408 | 
409 |     # lang links property (standard wikipedia)
410 |     pg = site.page("Nobel Prize in Chemistry")
411 |     responses[site.api_url]["nobel_chemistry"] = dict()
412 |     responses[site.api_url]["nobel_chemistry"]["langlinks"] = pg.langlinks
413 | 
414 |     print("Completed pulling pages and properties")
415 | 
416 | 
417 | if PULL_ALL is True or PULL_LOGOS is True:
418 |     # single logo
419 |     res = wikipedia.page("Chess").logos
420 |     responses[wikipedia.api_url]["chess_logos"] = res
421 |     # multiple logos
422 |     res = wikipedia.page("Sony Music").logos
423 |     responses[wikipedia.api_url]["sony_music_logos"] = res
424 |     # no infobox
425 |     res = wikipedia.page("Antivirus Software").logos
426 |     responses[wikipedia.api_url]["antivirus_software_logos"] = res
427 | 
428 |     print("Completed pulling logos")
429 | 
430 | 
431 | if PULL_ALL is True or PULL_PREVIEWS is True:
432 |     res = wikipedia.page("Chess").preview
433 |     responses[wikipedia.api_url]["chess_preview"] = res
434 | 
435 |     print("Completed pulling previews")
436 | 
437 | 
438 | if PULL_ALL is True or PULL_HATNOTES is True:
439 |     # contains hatnotes
440 |     res = wikipedia.page("Chess").hatnotes
441 |     responses[wikipedia.api_url]["chess_hatnotes"] = res
442 |     # no hatnotes
443 |     page_name = "List of Battlestar Galactica (1978 TV series) and " "Galactica 1980 episodes"
444 |     res = wikipedia.page(page_name).hatnotes
445 |     responses[wikipedia.api_url]["page_no_hatnotes"] = res
446 | 
447 |     print("Completed pulling hat notes")
448 | 
449 | if PULL_ALL is True or PULL_SECTION_LINKS is True:
450 |     # contains external links
451 |     pg = wikipedia.page("""McDonald's""")
452 |     res = pg.parse_section_links("EXTERNAL LINKS")
453 |     responses[wikipedia.api_url]["mcy_ds_external_links"] = res
454 | 
455 |     res = pg.parse_section_links(None)
456 |     responses[wikipedia.api_url]["mcy_ds_external_links_none"] = res
457 | 
458 |     # doesn't contain external links
459 |     pg = wikipedia.page("Tropical rainforest conservation")
460 |     res = pg.parse_section_links("EXTERNAL LINKS")
461 |     responses[wikipedia.api_url]["page_no_sec_links"] = res
462 | 
463 |     pg = asoiaf.page("arya")
464 |     for section in pg.sections:
465 |         links = pg.parse_section_links(section)
466 |         responses[asoiaf.api_url]["arya_{}_links".format(section)] = links
467 | 
468 |     print("Completed pulling the section links")
469 | 
470 | if PULL_ALL is True or PULL_TABLE_OF_CONTENTS is True:
471 |     pg = wikipedia.page("New York City")
472 |     res = pg.sections
473 |     responses[wikipedia.api_url]["new_york_city_sections"] = res
474 |     res = pg.table_of_contents
475 |     responses[wikipedia.api_url]["new_york_city_toc"] = res
476 |     responses[wikipedia.api_url]["new_york_city_air_quality"] = pg.section("Air quality")
477 |     responses[wikipedia.api_url]["new_york_city_none"] = pg.section(None)
478 |     responses[wikipedia.api_url]["new_york_city_last_sec"] = pg.section("External links")
479 |     print("Completed pulling Table of Content data")
480 | 
481 | if PULL_ALL is True or PULL_LOGIN is True:
482 |     pg = wikipedia.login(username="badusername", password="fakepassword")
483 |     print("Completed pulling login")
484 | 
485 | 
486 | if PULL_ALL is True or PULL_ISSUE_14 is True:
487 |     res = site.page("One Two Three... Infinity").images
488 |     responses[wikipedia.api_url]["hidden_images"] = res
489 | 
490 |     # missing http got lumped into this issue...
491 |     page = site.page("Minneapolis")
492 |     responses[site.api_url]["references_without_http"] = page.references
493 | 
494 |     print("Completed pulling issue 14")
495 | 
496 | if PULL_ALL is True or PULL_ISSUE_15 is True:
497 |     res = site.page("Rober Eryol").images
498 |     responses[wikipedia.api_url]["infinite_loop_images"] = res
499 |     res = site.page("List of named minor planets (numerical)").links
500 |     responses[wikipedia.api_url]["large_continued_query"] = res
501 |     res = wikipedia.page("B8 polytope").images
502 |     responses[wikipedia.api_url]["large_continued_query_images"] = res
503 | 
504 |     print("Completed pulling issue 15")
505 | 
506 | if PULL_ALL is True or PULL_ISSUE_35 is True:
507 |     try:
508 |         site.page("Leaching")
509 |     except DisambiguationError as ex:
510 |         responses[wikipedia.api_url]["missing_title_disamb_dets"] = ex.details
511 |         responses[wikipedia.api_url]["missing_title_disamb_msg"] = str(ex)
512 | 
513 |     print("Completed pulling issue 35")
514 | 
515 | if PULL_ALL is True or PULL_ISSUE_39 is True:
516 |     res = plants.categorymembers("Plant", results=None, subcategories=False)
517 |     responses[plants.api_url]["query-continue-find"] = res
518 | 
519 |     print("Completed pulling issue 39")
520 | 
521 | # dump data to file
522 | with open(RESPONSES_FILE, "w") as mock:
523 |     json.dump(responses, mock, ensure_ascii=False, indent=1, sort_keys=True)
524 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
  1 | [MAIN]
  2 | 
  3 | # Analyse import fallback blocks. This can be used to support both Python 2 and
  4 | # 3 compatible code, which means that the block might have code that exists
  5 | # only in one or another interpreter, leading to false positives when analysed.
  6 | analyse-fallback-blocks=no
  7 | 
  8 | # Load and enable all available extensions. Use --list-extensions to see a list
  9 | # all available extensions.
 10 | #enable-all-extensions=
 11 | 
 12 | # In error mode, messages with a category besides ERROR or FATAL are
 13 | # suppressed, and no reports are done by default. Error mode is compatible with
 14 | # disabling specific errors.
 15 | #errors-only=
 16 | 
 17 | # Always return a 0 (non-error) status code, even if lint errors are found.
 18 | # This is primarily useful in continuous integration scripts.
 19 | #exit-zero=
 20 | 
 21 | # A comma-separated list of package or module names from where C extensions may
 22 | # be loaded. Extensions are loading into the active Python interpreter and may
 23 | # run arbitrary code.
 24 | extension-pkg-allow-list=
 25 | 
 26 | # A comma-separated list of package or module names from where C extensions may
 27 | # be loaded. Extensions are loading into the active Python interpreter and may
 28 | # run arbitrary code. (This is an alternative name to extension-pkg-allow-list
 29 | # for backward compatibility.)
 30 | extension-pkg-whitelist=
 31 | 
 32 | # Return non-zero exit code if any of these messages/categories are detected,
 33 | # even if score is above --fail-under value. Syntax same as enable. Messages
 34 | # specified are enabled, while categories only check already-enabled messages.
 35 | fail-on=
 36 | 
 37 | # Specify a score threshold to be exceeded before program exits with error.
 38 | fail-under=10
 39 | 
 40 | # Interpret the stdin as a python script, whose filename needs to be passed as
 41 | # the module_or_package argument.
 42 | #from-stdin=
 43 | 
 44 | # Files or directories to be skipped. They should be base names, not paths.
 45 | ignore=CVS
 46 | 
 47 | # Add files or directories matching the regex patterns to the ignore-list. The
 48 | # regex matches against paths and can be in Posix or Windows format.
 49 | ignore-paths=
 50 | 
 51 | # Files or directories matching the regex patterns are skipped. The regex
 52 | # matches against base names, not paths. The default value ignores Emacs file
 53 | # locks
 54 | ignore-patterns=^\.#
 55 | 
 56 | # List of module names for which member attributes should not be checked
 57 | # (useful for modules/projects where namespaces are manipulated during runtime
 58 | # and thus existing member attributes cannot be deduced by static analysis). It
 59 | # supports qualified module names, as well as Unix pattern matching.
 60 | ignored-modules=
 61 | 
 62 | # Python code to execute, usually for sys.path manipulation such as
 63 | # pygtk.require().
 64 | #init-hook=
 65 | 
 66 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
 67 | # number of processors available to use, and will cap the count on Windows to
 68 | # avoid hangs.
 69 | jobs=1
 70 | 
 71 | # Control the amount of potential inferred values when inferring a single
 72 | # object. This can help the performance when dealing with large functions or
 73 | # complex, nested conditions.
 74 | limit-inference-results=100
 75 | 
 76 | # List of plugins (as comma separated values of python module names) to load,
 77 | # usually to register additional checkers.
 78 | load-plugins=
 79 | 
 80 | # Pickle collected data for later comparisons.
 81 | persistent=yes
 82 | 
 83 | # Minimum Python version to use for version dependent checks. Will default to
 84 | # the version used to run pylint.
 85 | py-version=3.10
 86 | 
 87 | # Discover python modules and packages in the file system subtree.
 88 | recursive=no
 89 | 
 90 | # When enabled, pylint would attempt to guess common misconfiguration and emit
 91 | # user-friendly hints instead of false-positive error messages.
 92 | suggestion-mode=yes
 93 | 
 94 | # Allow loading of arbitrary C extensions. Extensions are imported into the
 95 | # active Python interpreter and may run arbitrary code.
 96 | unsafe-load-any-extension=no
 97 | 
 98 | # In verbose mode, extra non-checker-related info will be displayed.
 99 | #verbose=
100 | 
101 | 
102 | [REPORTS]
103 | 
104 | # Python expression which should return a score less than or equal to 10. You
105 | # have access to the variables 'fatal', 'error', 'warning', 'refactor',
106 | # 'convention', and 'info' which contain the number of messages in each
107 | # category, as well as 'statement' which is the total number of statements
108 | # analyzed. This score is used by the global evaluation report (RP0004).
109 | evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
110 | 
111 | # Template used to display messages. This is a python new-style format string
112 | # used to format the message information. See doc for all details.
113 | msg-template=
114 | 
115 | # Set the output format. Available formats are text, parseable, colorized, json
116 | # and msvs (visual studio). You can also give a reporter class, e.g.
117 | # mypackage.mymodule.MyReporterClass.
118 | # output-format=text
119 | 
120 | # Tells whether to display a full report or only the messages.
121 | reports=yes
122 | 
123 | # Activate the evaluation score.
124 | score=yes
125 | 
126 | 
127 | [MESSAGES CONTROL]
128 | 
129 | # Only show warnings with the listed confidence levels. Leave empty to show
130 | # all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
131 | # UNDEFINED.
132 | confidence=HIGH,
133 |            CONTROL_FLOW,
134 |            INFERENCE,
135 |            INFERENCE_FAILURE,
136 |            UNDEFINED
137 | 
138 | # Disable the message, report, category or checker with the given id(s). You
139 | # can either give multiple identifiers separated by comma (,) or put this
140 | # option multiple times (only on the command line, not in the configuration
141 | # file where it should appear only once). You can also use "--disable=all" to
142 | # disable everything first and then re-enable specific checks. For example, if
143 | # you want to run only the similarities checker, you can use "--disable=all
144 | # --enable=similarities". If you want to run only the classes checker, but have
145 | # no Warning level messages displayed, use "--disable=all --enable=classes
146 | # --disable=W".
147 | disable=raw-checker-failed,
148 |         bad-inline-option,
149 |         locally-disabled,
150 |         file-ignored,
151 |         suppressed-message,
152 |         useless-suppression,
153 |         deprecated-pragma,
154 |         use-symbolic-message-instead,
155 |         too-many-arguments,
156 |         protected-access,
157 | 
158 | # Enable the message, report, category or checker with the given id(s). You can
159 | # either give multiple identifier separated by comma (,) or put this option
160 | # multiple time (only on the command line, not in the configuration file where
161 | # it should appear only once). See also the "--disable" option for examples.
162 | enable=c-extension-no-member
163 | 
164 | 
165 | [DESIGN]
166 | 
167 | # List of regular expressions of class ancestor names to ignore when counting
168 | # public methods (see R0903)
169 | exclude-too-few-public-methods=
170 | 
171 | # List of qualified class names to ignore when counting class parents (see
172 | # R0901)
173 | ignored-parents=
174 | 
175 | # Maximum number of arguments for function / method.
176 | # Default = 5
177 | max-args=12
178 | 
179 | # Maximum number of attributes for a class (see R0902).
180 | max-attributes=35
181 | 
182 | # Maximum number of boolean expressions in an if statement (see R0916).
183 | max-bool-expr=5
184 | 
185 | # Maximum number of branch for function / method body (see R0912)
186 | max-branches=15
187 | 
188 | # Maximum number of locals for function / method body.
189 | max-locals=20
190 | 
191 | # Maximum number of parents for a class (see R0901).
192 | max-parents=7
193 | 
194 | # Maximum number of public methods for a class (see R0904).
195 | max-public-methods=40
196 | 
197 | # Maximum number of return / yield for function / method body.
198 | max-returns=6
199 | 
200 | # Maximum number of statements in function / method body.
201 | max-statements=50
202 | 
203 | # Minimum number of public methods for a class (see R0903).
204 | min-public-methods=2
205 | 
206 | 
207 | [MISCELLANEOUS]
208 | 
209 | # List of note tags to take in consideration, separated by a comma.
210 | notes=FIXME,
211 |       XXX,
212 |       TODO
213 | 
214 | # Regular expression of note tags to take in consideration.
215 | notes-rgx=
216 | 
217 | 
218 | [SPELLING]
219 | 
220 | # Limits count of emitted suggestions for spelling mistakes.
221 | max-spelling-suggestions=4
222 | 
223 | # Spelling dictionary name. Available dictionaries: none. To make it work,
224 | # install the 'python-enchant' package.
225 | spelling-dict=
226 | 
227 | # List of comma separated words that should be considered directives if they
228 | # appear at the beginning of a comment and should not be checked.
229 | spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
230 | 
231 | # List of comma separated words that should not be checked.
232 | spelling-ignore-words=
233 | 
234 | # A path to a file that contains the private dictionary; one word per line.
235 | spelling-private-dict-file=
236 | 
237 | # Tells whether to store unknown words to the private dictionary (see the
238 | # --spelling-private-dict-file option) instead of raising a message.
239 | spelling-store-unknown-words=no
240 | 
241 | 
242 | [FORMAT]
243 | 
244 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
245 | expected-line-ending-format=
246 | 
247 | # Regexp for a line that is allowed to be longer than the limit.
248 | ignore-long-lines=^\s*(# )?<?https?://\S+>?$
249 | 
250 | # Number of spaces of indent required inside a hanging or continued line.
251 | indent-after-paren=4
252 | 
253 | # String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
254 | # tab).
255 | indent-string='    '
256 | 
257 | # Maximum number of characters on a single line.
258 | # Default = 100
259 | max-line-length=120
260 | 
261 | # Maximum number of lines in a module.
262 | # Default = 1000
263 | max-module-lines=1250
264 | 
265 | # Allow the body of a class to be on the same line as the declaration if body
266 | # contains single statement.
267 | single-line-class-stmt=no
268 | 
269 | # Allow the body of an if to be on the same line as the test if there is no
270 | # else.
271 | single-line-if-stmt=no
272 | 
273 | 
274 | [REFACTORING]
275 | 
276 | # Maximum number of nested blocks for function / method body
277 | max-nested-blocks=5
278 | 
279 | # Complete name of functions that never returns. When checking for
280 | # inconsistent-return-statements if a never returning function is called then
281 | # it will be considered as an explicit return statement and no message will be
282 | # printed.
283 | never-returning-functions=sys.exit,argparse.parse_error
284 | 
285 | 
286 | [STRING]
287 | 
288 | # This flag controls whether inconsistent-quotes generates a warning when the
289 | # character used as a quote delimiter is used inconsistently within a module.
290 | check-quote-consistency=no
291 | 
292 | # This flag controls whether the implicit-str-concat should generate a warning
293 | # on implicit string concatenation in sequences defined over several lines.
294 | check-str-concat-over-line-jumps=no
295 | 
296 | 
297 | [VARIABLES]
298 | 
299 | # List of additional names supposed to be defined in builtins. Remember that
300 | # you should avoid defining new builtins when possible.
301 | additional-builtins=
302 | 
303 | # Tells whether unused global variables should be treated as a violation.
304 | allow-global-unused-variables=yes
305 | 
306 | # List of names allowed to shadow builtins
307 | allowed-redefined-builtins=
308 | 
309 | # List of strings which can identify a callback function by name. A callback
310 | # name must start or end with one of those strings.
311 | callbacks=cb_,
312 |           _cb
313 | 
314 | # A regular expression matching the name of dummy variables (i.e. expected to
315 | # not be used).
316 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
317 | 
318 | # Argument names that match this expression will be ignored. Default to name
319 | # with leading underscore.
320 | ignored-argument-names=_.*|^ignored_|^unused_
321 | 
322 | # Tells whether we should check for unused import in __init__ files.
323 | init-import=no
324 | 
325 | # List of qualified module names which can have objects that can redefine
326 | # builtins.
327 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
328 | 
329 | 
330 | [TYPECHECK]
331 | 
332 | # List of decorators that produce context managers, such as
333 | # contextlib.contextmanager. Add to this list to register other decorators that
334 | # produce valid context managers.
335 | contextmanager-decorators=contextlib.contextmanager
336 | 
337 | # List of members which are set dynamically and missed by pylint inference
338 | # system, and so shouldn't trigger E1101 when accessed. Python regular
339 | # expressions are accepted.
340 | generated-members=
341 | 
342 | # Tells whether to warn about missing members when the owner of the attribute
343 | # is inferred to be None.
344 | ignore-none=yes
345 | 
346 | # This flag controls whether pylint should warn about no-member and similar
347 | # checks whenever an opaque object is returned when inferring. The inference
348 | # can return multiple potential results while evaluating a Python object, but
349 | # some branches might not be evaluated, which results in partial inference. In
350 | # that case, it might be useful to still emit no-member and other checks for
351 | # the rest of the inferred objects.
352 | ignore-on-opaque-inference=yes
353 | 
354 | # List of symbolic message names to ignore for Mixin members.
355 | ignored-checks-for-mixins=no-member,
356 |                           not-async-context-manager,
357 |                           not-context-manager,
358 |                           attribute-defined-outside-init
359 | 
360 | # List of class names for which member attributes should not be checked (useful
361 | # for classes with dynamically set attributes). This supports the use of
362 | # qualified names.
363 | ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
364 | 
365 | # Show a hint with possible names when a member name was not found. The aspect
366 | # of finding the hint is based on edit distance.
367 | missing-member-hint=yes
368 | 
369 | # The minimum edit distance a name should have in order to be considered a
370 | # similar match for a missing member name.
371 | missing-member-hint-distance=1
372 | 
373 | # The total number of similar names that should be taken in consideration when
374 | # showing a hint for a missing member.
375 | missing-member-max-choices=1
376 | 
377 | # Regex pattern to define which classes are considered mixins.
378 | mixin-class-rgx=.*[Mm]ixin
379 | 
380 | # List of decorators that change the signature of a decorated function.
381 | signature-mutators=
382 | 
383 | 
384 | [CLASSES]
385 | 
386 | # Warn about protected attribute access inside special methods
387 | check-protected-access-in-special-methods=no
388 | 
389 | # List of method names used to declare (i.e. assign) instance attributes.
390 | defining-attr-methods=__init__,
391 |                       __new__,
392 |                       setUp,
393 |                       __post_init__
394 | 
395 | # List of member names, which should be excluded from the protected access
396 | # warning.
397 | exclude-protected=_asdict,
398 |                   _fields,
399 |                   _replace,
400 |                   _source,
401 |                   _make
402 | 
403 | # List of valid names for the first argument in a class method.
404 | valid-classmethod-first-arg=cls
405 | 
406 | # List of valid names for the first argument in a metaclass class method.
407 | valid-metaclass-classmethod-first-arg=cls
408 | 
409 | 
410 | [IMPORTS]
411 | 
412 | # List of modules that can be imported at any level, not just the top level
413 | # one.
414 | allow-any-import-level=
415 | 
416 | # Allow wildcard imports from modules that define __all__.
417 | allow-wildcard-with-all=no
418 | 
419 | # Deprecated modules which should not be used, separated by a comma.
420 | deprecated-modules=
421 | 
422 | # Output a graph (.gv or any supported image format) of external dependencies
423 | # to the given file (report RP0402 must not be disabled).
424 | ext-import-graph=
425 | 
426 | # Output a graph (.gv or any supported image format) of all (i.e. internal and
427 | # external) dependencies to the given file (report RP0402 must not be
428 | # disabled).
429 | import-graph=
430 | 
431 | # Output a graph (.gv or any supported image format) of internal dependencies
432 | # to the given file (report RP0402 must not be disabled).
433 | int-import-graph=
434 | 
435 | # Force import order to recognize a module as part of the standard
436 | # compatibility libraries.
437 | known-standard-library=
438 | 
439 | # Force import order to recognize a module as part of a third party library.
440 | known-third-party=enchant
441 | 
442 | # Couples of modules and preferred modules, separated by a comma.
443 | preferred-modules=
444 | 
445 | 
446 | [SIMILARITIES]
447 | 
448 | # Comments are removed from the similarity computation
449 | ignore-comments=yes
450 | 
451 | # Docstrings are removed from the similarity computation
452 | ignore-docstrings=yes
453 | 
454 | # Imports are removed from the similarity computation
455 | ignore-imports=yes
456 | 
457 | # Signatures are removed from the similarity computation
458 | ignore-signatures=yes
459 | 
460 | # Minimum lines number of a similarity.
461 | min-similarity-lines=4
462 | 
463 | 
464 | [BASIC]
465 | 
466 | # Naming style matching correct argument names.
467 | argument-naming-style=snake_case
468 | 
469 | # Regular expression matching correct argument names. Overrides argument-
470 | # naming-style. If left empty, argument names will be checked with the set
471 | # naming style.
472 | #argument-rgx=
473 | 
474 | # Naming style matching correct attribute names.
475 | attr-naming-style=snake_case
476 | 
477 | # Regular expression matching correct attribute names. Overrides attr-naming-
478 | # style. If left empty, attribute names will be checked with the set naming
479 | # style.
480 | #attr-rgx=
481 | 
482 | # Bad variable names which should always be refused, separated by a comma.
483 | bad-names=foo,
484 |           bar,
485 |           baz,
486 |           toto,
487 |           tutu,
488 |           tata
489 | 
490 | # Bad variable names regexes, separated by a comma. If names match any regex,
491 | # they will always be refused
492 | bad-names-rgxs=
493 | 
494 | # Naming style matching correct class attribute names.
495 | class-attribute-naming-style=any
496 | 
497 | # Regular expression matching correct class attribute names. Overrides class-
498 | # attribute-naming-style. If left empty, class attribute names will be checked
499 | # with the set naming style.
500 | #class-attribute-rgx=
501 | 
502 | # Naming style matching correct class constant names.
503 | class-const-naming-style=UPPER_CASE
504 | 
505 | # Regular expression matching correct class constant names. Overrides class-
506 | # const-naming-style. If left empty, class constant names will be checked with
507 | # the set naming style.
508 | #class-const-rgx=
509 | 
510 | # Naming style matching correct class names.
511 | class-naming-style=PascalCase
512 | 
513 | # Regular expression matching correct class names. Overrides class-naming-
514 | # style. If left empty, class names will be checked with the set naming style.
515 | #class-rgx=
516 | 
517 | # Naming style matching correct constant names.
518 | const-naming-style=UPPER_CASE
519 | 
520 | # Regular expression matching correct constant names. Overrides const-naming-
521 | # style. If left empty, constant names will be checked with the set naming
522 | # style.
523 | #const-rgx=
524 | 
525 | # Minimum line length for functions/classes that require docstrings, shorter
526 | # ones are exempt.
527 | docstring-min-length=-1
528 | 
529 | # Naming style matching correct function names.
530 | function-naming-style=snake_case
531 | 
532 | # Regular expression matching correct function names. Overrides function-
533 | # naming-style. If left empty, function names will be checked with the set
534 | # naming style.
535 | #function-rgx=
536 | 
537 | # Good variable names which should always be accepted, separated by a comma.
538 | good-names=i,
539 |            j,
540 |            k,
541 |            ex,
542 |            Run,
543 |            _
544 | 
545 | # Good variable names regexes, separated by a comma. If names match any regex,
546 | # they will always be accepted
547 | good-names-rgxs=
548 | 
549 | # Include a hint for the correct naming format with invalid-name.
550 | include-naming-hint=no
551 | 
552 | # Naming style matching correct inline iteration names.
553 | inlinevar-naming-style=any
554 | 
555 | # Regular expression matching correct inline iteration names. Overrides
556 | # inlinevar-naming-style. If left empty, inline iteration names will be checked
557 | # with the set naming style.
558 | #inlinevar-rgx=
559 | 
560 | # Naming style matching correct method names.
561 | method-naming-style=snake_case
562 | 
563 | # Regular expression matching correct method names. Overrides method-naming-
564 | # style. If left empty, method names will be checked with the set naming style.
565 | #method-rgx=
566 | 
567 | # Naming style matching correct module names.
568 | module-naming-style=snake_case
569 | 
570 | # Regular expression matching correct module names. Overrides module-naming-
571 | # style. If left empty, module names will be checked with the set naming style.
572 | #module-rgx=
573 | 
574 | # Colon-delimited sets of names that determine each other's naming style when
575 | # the name regexes allow several styles.
576 | name-group=
577 | 
578 | # Regular expression which should only match function or class names that do
579 | # not require a docstring.
580 | no-docstring-rgx=^_
581 | 
582 | # List of decorators that produce properties, such as abc.abstractproperty. Add
583 | # to this list to register other decorators that produce valid properties.
584 | # These decorators are taken in consideration only for invalid-name.
585 | property-classes=abc.abstractproperty
586 | 
587 | # Regular expression matching correct type variable names. If left empty, type
588 | # variable names will be checked with the set naming style.
589 | #typevar-rgx=
590 | 
591 | # Naming style matching correct variable names.
592 | variable-naming-style=snake_case
593 | 
594 | # Regular expression matching correct variable names. Overrides variable-
595 | # naming-style. If left empty, variable names will be checked with the set
596 | # naming style.
597 | #variable-rgx=
598 | 
599 | 
600 | [EXCEPTIONS]
601 | 
602 | # Exceptions that will emit a warning when caught.
603 | overgeneral-exceptions=mediawiki.exceptions.BaseException,builtins.Exception
604 | 
605 | 
606 | [LOGGING]
607 | 
608 | # The type of string formatting that logging methods do. `old` means using %
609 | # formatting, `new` is for `{}` formatting.
610 | logging-format-style=old
611 | 
612 | # Logging modules to check that the string format arguments are in logging
613 | # function parameter format.
614 | logging-modules=logging
615 | 


--------------------------------------------------------------------------------
/mediawiki/mediawikipage.py:
--------------------------------------------------------------------------------
  1 | """
  2 | MediaWikiPage class module
  3 | """
  4 | 
  5 | # MIT License
  6 | # Author: Tyler Barrus (barrust@gmail.com)
  7 | 
  8 | import re
  9 | from collections import OrderedDict
 10 | from decimal import Decimal
 11 | from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
 12 | 
 13 | from bs4 import BeautifulSoup, NavigableString, Tag
 14 | 
 15 | from mediawiki.exceptions import (
 16 |     ODD_ERROR_MESSAGE,
 17 |     DisambiguationError,
 18 |     MediaWikiBaseException,
 19 |     MediaWikiException,
 20 |     PageError,
 21 |     RedirectError,
 22 | )
 23 | from mediawiki.utilities import is_relative_url, str_or_unicode
 24 | 
 25 | 
 26 | class MediaWikiPage:
 27 |     """MediaWiki Page Instance
 28 | 
 29 |     Args:
 30 |         mediawiki (MediaWiki): MediaWiki class object from which to pull
 31 |         title (str): Title of page to retrieve
 32 |         pageid (int): MediaWiki site pageid to retrieve
 33 |         redirect (bool): **True:** Follow redirects
 34 |         preload (bool): **True:** Load most properties after getting page
 35 |         original_title (str): Not to be used from the caller; used to help follow redirects
 36 |     Raises:
 37 |         :py:func:`mediawiki.exceptions.PageError`: if page provided does not exist
 38 |     Raises:
 39 |         :py:func:`mediawiki.exceptions.DisambiguationError`: if page provided is a disambiguation page
 40 |     Raises:
 41 |         :py:func:`mediawiki.exceptions.RedirectError`: if redirect is **False** and the pageid or title \
 42 |             provided redirects to another page
 43 |     Warning:
 44 |         This should never need to be used directly! Please use :func:`mediawiki.MediaWiki.page`"""
 45 | 
 46 |     __slots__ = [
 47 |         "mediawiki",
 48 |         "url",
 49 |         "title",
 50 |         "original_title",
 51 |         "pageid",
 52 |         "_content",
 53 |         "_revision_id",
 54 |         "_parent_id",
 55 |         "_html",
 56 |         "_soup",
 57 |         "_images",
 58 |         "_references",
 59 |         "_categories",
 60 |         "_coordinates",
 61 |         "_links",
 62 |         "_redirects",
 63 |         "_backlinks",
 64 |         "_langlinks",
 65 |         "_summary",
 66 |         "_sections",
 67 |         "_table_of_contents",
 68 |         "_logos",
 69 |         "_hatnotes",
 70 |         "_wikitext",
 71 |         "_preview",
 72 |     ]
 73 | 
 74 |     def __init__(
 75 |         self,
 76 |         mediawiki,
 77 |         title: Optional[str] = None,
 78 |         pageid: Optional[int] = None,
 79 |         redirect: bool = True,
 80 |         preload: bool = False,
 81 |         original_title: str = "",
 82 |     ):
 83 |         self.mediawiki = mediawiki
 84 |         self.url: Optional[str] = None
 85 |         if title is not None:
 86 |             self.title = title
 87 |             self.original_title = original_title or title
 88 |         elif pageid is not None:
 89 |             self.pageid = pageid
 90 |         else:
 91 |             raise ValueError("Either a title or a pageid must be specified")
 92 | 
 93 |         self._content: Optional[str] = None
 94 |         self._revision_id: Optional[int] = None
 95 |         self._parent_id: Optional[int] = None
 96 |         self._html: Union[bool, str] = False  # None signifies nothing returned...
 97 |         self._images: Optional[List[str]] = None
 98 |         self._references: Optional[List[str]] = None
 99 |         self._categories: Optional[List[str]] = None
100 |         self._coordinates: Union[bool, None, Tuple[Decimal, Decimal]] = False  # None signifies nothing returned...
101 |         self._links: Optional[List[str]] = None
102 |         self._redirects: Optional[List[str]] = None
103 |         self._backlinks: Optional[List[str]] = None
104 |         self._langlinks: Optional[Dict[str, str]] = None
105 |         self._summary: Optional[str] = None
106 |         self._sections: Optional[List[str]] = None
107 |         self._table_of_contents: Optional[Dict[str, Any]] = None
108 |         self._logos: Optional[List[str]] = None
109 |         self._hatnotes: Optional[List[str]] = None
110 |         self._soup: Optional[BeautifulSoup] = None
111 |         self._wikitext: Optional[str] = None
112 |         self._preview: Optional[Dict[str, str]] = None
113 | 
114 |         self.__load(redirect=redirect, preload=preload)
115 | 
116 |         preload_props = [
117 |             "content",
118 |             "summary",
119 |             "images",
120 |             "references",
121 |             "links",
122 |             "sections",
123 |             "redirects",
124 |             "coordinates",
125 |             "backlinks",
126 |             "categories",
127 |         ]
128 |         if preload:
129 |             for prop in preload_props:
130 |                 getattr(self, prop)
131 | 
132 |     # end __init__
133 | 
134 |     def __repr__(self):
135 |         """repr"""
136 |         return self.__str__()
137 | 
138 |     def __unicode__(self):
139 |         """python 2.7 unicode"""
140 |         return f"""<MediaWikiPage '{self.title}'>"""
141 | 
142 |     def __str__(self):
143 |         """python > 3 unicode python 2.7 byte str"""
144 |         return str_or_unicode(self.__unicode__())
145 | 
146 |     def __eq__(self, other):
147 |         """base eq function"""
148 |         try:
149 |             return self.pageid == other.pageid and self.title == other.title and self.url == other.url
150 |         except AttributeError:
151 |             return False
152 | 
153 |     # Properties
154 |     def _pull_content_revision_parent(self) -> Tuple[Optional[str], Optional[int], Optional[int]]:
155 |         """combine the pulling of these three properties"""
156 | 
157 |         if self._revision_id is None:
158 |             query_params = {
159 |                 "prop": "extracts|revisions",
160 |                 "explaintext": "",
161 |                 "rvprop": "ids",
162 |             }
163 |             query_params.update(self.__title_query_param())
164 |             request = self.mediawiki.wiki_request(query_params)
165 |             page_info = request["query"]["pages"][self.pageid]
166 |             self._content = page_info.get("extract", None)
167 |             self._revision_id = page_info["revisions"][0]["revid"]
168 |             self._parent_id = page_info["revisions"][0]["parentid"]
169 | 
170 |             if self._content is None and "TextExtracts" not in self.mediawiki.extensions:
171 |                 msg = "Unable to extract page content; the TextExtracts extension must be installed!"
172 |                 raise MediaWikiBaseException(msg)
173 |         return self._content, self._revision_id, self._parent_id
174 | 
175 |     @property
176 |     def content(self) -> str:
177 |         """str: The page content in text format
178 | 
179 |         Note:
180 |             Not settable
181 |         Note:
182 |             Side effect is to also get revision_id and parent_id"""
183 |         if self._content is None:
184 |             self._pull_content_revision_parent()
185 |         return self._content  # type: ignore
186 | 
187 |     @property
188 |     def revision_id(self) -> int:
189 |         """int: The current revision id of the page
190 | 
191 |         Note:
192 |             Not settable
193 |         Note:
194 |             Side effect is to also get content and parent_id"""
195 |         if self._revision_id is None:
196 |             self._pull_content_revision_parent()
197 |         return self._revision_id  # type: ignore
198 | 
199 |     @property
200 |     def parent_id(self) -> int:
201 |         """int: The parent id of the page
202 | 
203 |         Note:
204 |             Not settable
205 |         Note:
206 |             Side effect is to also get content and revision_id"""
207 |         if self._parent_id is None:
208 |             self._pull_content_revision_parent()
209 |         return self._parent_id  # type: ignore
210 | 
211 |     @property
212 |     def html(self) -> str:
213 |         """str: HTML representation of the page
214 | 
215 |         Note:
216 |             Not settable
217 |         Warning:
218 |             This can be slow for very large pages"""
219 |         if self._html is False:
220 |             self._html = ""
221 |             query_params = {
222 |                 "prop": "revisions",
223 |                 "rvprop": "content",
224 |                 "rvlimit": 1,
225 |                 "rvparse": "",
226 |                 "titles": self.title,
227 |             }
228 |             request = self.mediawiki.wiki_request(query_params)
229 |             page = request["query"]["pages"][self.pageid]
230 |             self._html = page["revisions"][0]["*"]
231 |         return self._html  # type: ignore
232 | 
233 |     @property
234 |     def wikitext(self) -> str:
235 |         """str: Wikitext representation of the page
236 | 
237 |         Note:
238 |             Not settable"""
239 |         if self._wikitext is None:
240 |             query_params = {
241 |                 "action": "parse",
242 |                 "pageid": self.pageid,
243 |                 "prop": "wikitext",
244 |                 "formatversion": "latest",
245 |             }
246 |             request = self.mediawiki.wiki_request(query_params)
247 |             self._wikitext = request["parse"]["wikitext"]
248 |         return self._wikitext
249 | 
250 |     @property
251 |     def images(self) -> List[str]:
252 |         """list: Images on the page
253 | 
254 |         Note:
255 |             Not settable"""
256 |         if self._images is None:
257 |             params = {
258 |                 "generator": "images",
259 |                 "gimlimit": "max",
260 |                 "prop": "imageinfo",  # this will be replaced by fileinfo
261 |                 "iiprop": "url",
262 |             }
263 |             self._images = [
264 |                 page["imageinfo"][0]["url"]
265 |                 for page in self._continued_query(params)
266 |                 if "imageinfo" in page and "url" in page["imageinfo"][0]
267 |             ]
268 |             self._images = sorted(self._images)
269 |         return self._images
270 | 
271 |     @property
272 |     def logos(self) -> List[str]:
273 |         """list: Parse images within the infobox signifying either the main image or logo
274 | 
275 |         Note:
276 |             Not settable
277 |         Note:
278 |             Side effect is to also pull the html which can be slow
279 |         Note:
280 |             This is a parsing operation and not part of the standard API"""
281 |         if self._logos is None:
282 |             self._logos = []
283 |             # Cache the results of parsing the html, so that multiple calls happen much faster
284 |             if not self._soup:
285 |                 self._soup = BeautifulSoup(self.html, "html.parser")
286 |             info = self._soup.find("table", {"class": "infobox"})
287 |             if info is not None and isinstance(info, Tag):
288 |                 children = info.find_all("a", class_="image")
289 |                 self._logos.extend("https:" + child.img["src"] for child in children)
290 |         return self._logos
291 | 
292 |     @property
293 |     def hatnotes(self) -> List[str]:
294 |         """list: Parse hatnotes from the HTML
295 | 
296 |         Note:
297 |             Not settable
298 |         Note:
299 |             Side effect is to also pull the html which can be slow
300 |         Note:
301 |             This is a parsing operation and not part of the standard API"""
302 |         if self._hatnotes is None:
303 |             self._hatnotes = []
304 |             # Cache the results of parsing the html, so that multiple calls happen much faster
305 |             if not self._soup:
306 |                 self._soup = BeautifulSoup(self.html, "html.parser")
307 |             notes = self._soup.find_all("div", class_="hatnote")
308 |             if notes is not None:
309 |                 for note in notes:
310 |                     tmp = []
311 |                     for child in note.children:
312 |                         if hasattr(child, "text"):
313 |                             tmp.append(child.text)
314 |                         else:
315 |                             tmp.append(child)
316 |                     self._hatnotes.append("".join(tmp))
317 |         return self._hatnotes
318 | 
319 |     @property
320 |     def references(self) -> List[str]:
321 |         """list: External links, or references, listed anywhere on the MediaWiki page
322 |         Note:
323 |             Not settable
324 |         Note
325 |             May include external links within page that are not technically cited anywhere"""
326 |         if self._references is None:
327 |             self._references = []
328 |             self.__pull_combined_properties()
329 |         return self._references
330 | 
331 |     @property
332 |     def categories(self) -> List[str]:
333 |         """list: Non-hidden categories on the page
334 | 
335 |         Note:
336 |             Not settable"""
337 |         if self._categories is None:
338 |             self._categories = []
339 |             self.__pull_combined_properties()
340 |         return self._categories
341 | 
342 |     @property
343 |     def coordinates(self) -> Optional[Tuple[Decimal, Decimal]]:
344 |         """Tuple: GeoCoordinates of the place referenced; results in lat/long tuple or None if no geocoordinates present
345 | 
346 |         Note:
347 |             Not settable
348 |         Note:
349 |             Requires the GeoData extension to be installed"""
350 |         if self._coordinates is False:
351 |             self._coordinates = None
352 |             self.__pull_combined_properties()
353 |         return self._coordinates  # type: ignore
354 | 
355 |     @property
356 |     def links(self) -> List[str]:
357 |         """list: List of all MediaWiki page links on the page
358 | 
359 |         Note:
360 |             Not settable"""
361 |         if self._links is None:
362 |             self._links = []
363 |             self.__pull_combined_properties()
364 |         return self._links
365 | 
366 |     @property
367 |     def redirects(self) -> List[str]:
368 |         """list: List of all redirects to this page; **i.e.,** the titles listed here will redirect to this page title
369 | 
370 |         Note:
371 |             Not settable"""
372 |         if self._redirects is None:
373 |             self._redirects = []
374 |             self.__pull_combined_properties()
375 |         return self._redirects
376 | 
377 |     @property
378 |     def backlinks(self) -> List[str]:
379 |         """list: Pages that link to this page
380 | 
381 |         Note:
382 |             Not settable"""
383 |         if self._backlinks is None:
384 |             self._backlinks = []
385 |             params = {
386 |                 "action": "query",
387 |                 "list": "backlinks",
388 |                 "bltitle": self.title,
389 |                 "bllimit": "max",
390 |                 "blfilterredir": "nonredirects",
391 |                 "blnamespace": 0,
392 |             }
393 |             tmp = [link["title"] for link in self._continued_query(params, "backlinks")]
394 |             self._backlinks = sorted(tmp)
395 |         return self._backlinks
396 | 
397 |     @property
398 |     def langlinks(self) -> Dict[str, str]:
399 |         """dict: Names of the page in other languages for which page is where the key is the language code
400 |         and the page name is the name of the page in that language.
401 | 
402 |         Note:
403 |             Not settable
404 |         Note:
405 |             list of all language links from the provided pages to other
406 |             languages according to: https://www.mediawiki.org/wiki/API:Langlinks"""
407 | 
408 |         if self._langlinks is None:
409 |             params = {"prop": "langlinks", "cllimit": "max"}
410 |             query_result = self._continued_query(params)
411 | 
412 |             langlinks = {}
413 |             for lang_info in query_result:
414 |                 langlinks[lang_info["lang"]] = lang_info["*"]
415 |             self._langlinks = langlinks
416 |         return self._langlinks
417 | 
418 |     @property
419 |     def preview(self) -> Dict[str, str]:
420 |         """dict: Page preview information that builds the preview hover"""
421 |         if self._preview is None:
422 |             params = {
423 |                 "action": "query",
424 |                 "formatversion": "2",
425 |                 "prop": "info|extracts|pageimages|revisions|pageterms|coordinates|pageviews",
426 |                 "exsentences": "5",
427 |                 "explaintext": "true",
428 |                 "piprop": "thumbnail|original",
429 |                 "pithumbsize": "320",
430 |                 "pilicense": "any",
431 |                 "rvprop": "timestamp|ids",
432 |                 "wbptterms": "description",
433 |                 "titles": self.title,
434 |             }
435 |             raw = self.mediawiki.wiki_request(params)
436 |             self._preview = raw.get("query", {}).get("pages", [])[0]
437 |         return self._preview
438 | 
439 |     @property
440 |     def summary(self) -> Optional[str]:
441 |         """str: Default page summary
442 | 
443 |         Note:
444 |             Not settable"""
445 |         if self._summary is None:
446 |             self.__pull_combined_properties()
447 |             if self._summary is None:
448 |                 self._summary = ""
449 |         return self._summary
450 | 
451 |     def summarize(self, sentences: int = 0, chars: int = 0) -> str:
452 |         """Summarize page either by number of sentences, chars, or first
453 |         section (**default**)
454 | 
455 |         Args:
456 |             sentences (int): Number of sentences to use in summary (first `x` sentences)
457 |             chars (int): Number of characters to use in summary (first `x` characters)
458 |         Returns:
459 |             str: The summary of the MediaWiki page
460 |         Note:
461 |             Precedence for parameters: sentences then chars; if both are 0 then the entire first section is returned"""
462 |         query_params: Dict[str, Any] = {"prop": "extracts", "explaintext": "", "titles": self.title}
463 |         if sentences:
464 |             query_params["exsentences"] = min(sentences, 10)
465 |         elif chars:
466 |             query_params["exchars"] = max(chars, 1)
467 |         else:
468 |             query_params["exintro"] = ""
469 | 
470 |         request = self.mediawiki.wiki_request(query_params)
471 |         return request["query"]["pages"][self.pageid].get("extract")
472 | 
473 |     @property
474 |     def sections(self) -> List[str]:
475 |         """list: Table of contents sections
476 | 
477 |         Note:
478 |             Not settable"""
479 |         # NOTE: Due to MediaWiki sites adding superscripts or italics or bold
480 |         #       information in the sections, moving to regex to get the
481 |         #       `non-decorated` name instead of using the query api!
482 |         if self._sections is None:
483 |             self._parse_sections()
484 |             if self._sections is None:
485 |                 self._sections = []
486 |         return self._sections
487 | 
488 |     @property
489 |     def table_of_contents(self) -> Dict[str, Any]:
490 |         """OrderedDict: Dictionary of sections and sub-sections
491 | 
492 |         Note:
493 |             Leaf nodes are empty OrderedDict objects
494 |         Note:
495 |             Not Settable"""
496 | 
497 |         if self._table_of_contents is None:
498 |             self._parse_sections()
499 |             if self._table_of_contents is None:
500 |                 self._table_of_contents = {}
501 |         return self._table_of_contents
502 | 
503 |     def section(self, section_title: Optional[str]) -> Optional[str]:
504 |         """Plain text section content
505 | 
506 |         Args:
507 |             section_title (str): Name of the section to pull or None for the header section
508 |         Returns:
509 |             str: The content of the section
510 |         Note:
511 |             Use **None** if the header section is desired
512 |         Note:
513 |             Returns **None** if section title is not found; only text between title and next \
514 |                 section or sub-section title is returned
515 |         Note:
516 |             Side effect is to also pull the content which can be slow
517 |         Note:
518 |             This is a parsing operation and not part of the standard API"""
519 |         if not section_title:
520 |             try:
521 |                 content = self.content
522 |                 index = 0
523 |             except ValueError:
524 |                 return None
525 |             except IndexError:
526 |                 pass
527 |         else:
528 |             section = f"== {section_title} =="
529 |             try:
530 |                 # TODO, move index to find to remove exceptions
531 |                 content = self.content
532 |                 index = content.index(section) + len(section)
533 | 
534 |                 # ensure we have the full section header...
535 |                 while True:
536 |                     if content[index + 1] == "=":
537 |                         index += 1
538 |                     else:
539 |                         break
540 |             except ValueError:
541 |                 return None
542 |             except IndexError:
543 |                 pass
544 | 
545 |         try:
546 |             next_index = self.content.index("==", index)
547 |         except ValueError:
548 |             next_index = len(self.content)
549 | 
550 |         val = self.content[index:next_index].lstrip("=").strip()
551 |         if val == "":
552 |             return None
553 |         return val
554 | 
555 |     def parse_section_links(self, section_title: str) -> Optional[List[Tuple[str, str]]]:
556 |         """Parse all links within a section
557 | 
558 |         Args:
559 |             section_title (str): Name of the section to pull or, if  None is provided, \
560 |                 the links between the main heading and the first section
561 |         Returns:
562 |             list: List of (title, url) tuples
563 |         Note:
564 |             Use **None** to pull the links from the header section
565 |         Note:
566 |             Returns **None** if section title is not found
567 |         Note:
568 |             Side effect is to also pull the html which can be slow
569 |         Note:
570 |             This is a parsing operation and not part of the standard API"""
571 |         # Cache the results of parsing the html, so that multiple calls happen much faster
572 |         if not self.html:
573 |             return None
574 |         if not self._soup:
575 |             self._soup = BeautifulSoup(self.html, "html.parser")
576 | 
577 |         if not section_title:
578 |             return self._parse_section_links(None)
579 | 
580 |         headlines = self._soup.find_all("span", class_="mw-headline")
581 |         tmp_soup = BeautifulSoup(section_title, "html.parser")
582 |         tmp_sec_title = tmp_soup.get_text().lower()
583 |         id_tag = None
584 |         for headline in headlines:
585 |             tmp_id = headline.text
586 |             if tmp_id.lower() == tmp_sec_title:
587 |                 id_tag = headline.get("id")
588 |                 break
589 | 
590 |         return self._parse_section_links(id_tag) if id_tag is not None else None
591 | 
592 |     # Protected Methods
593 |     def __load(self, redirect: bool = True, preload: bool = False):
594 |         """load the basic page information"""
595 |         query_params = {
596 |             "prop": "info|pageprops",
597 |             "inprop": "url",
598 |             "ppprop": "disambiguation",
599 |             "redirects": "",
600 |         }
601 |         query_params.update(self.__title_query_param())
602 | 
603 |         request = self.mediawiki.wiki_request(query_params)
604 | 
605 |         query = request["query"]
606 |         pageid = list(query["pages"].keys())[0]
607 |         page = query["pages"][pageid]
608 | 
609 |         # determine result of the request
610 |         # missing is present if the page is missing
611 |         if "missing" in page:
612 |             self._raise_page_error()
613 |         # redirects is present in query if page is a redirect
614 |         elif "redirects" in query:
615 |             self._handle_redirect(redirect, preload, query, page)
616 |         # if pageprops is returned, it must be a disambiguation error
617 |         elif "pageprops" in page:
618 |             self._raise_disambiguation_error(page, pageid)
619 |         else:
620 |             self.pageid = pageid
621 |             self.title = page["title"]
622 |             self.url = page["fullurl"]
623 | 
624 |     def _raise_page_error(self):
625 |         """raise the correct type of page error"""
626 |         if hasattr(self, "title"):
627 |             raise PageError(title=self.title)
628 |         raise PageError(pageid=self.pageid)
629 | 
630 |     def _raise_disambiguation_error(self, page: Dict, pageid: int):
631 |         """parse and throw a disambiguation error"""
632 |         query_params = {
633 |             "prop": "revisions",
634 |             "rvprop": "content",
635 |             "rvparse": "",
636 |             "rvlimit": 1,
637 |         }
638 |         query_params.update(self.__title_query_param())
639 |         request = self.mediawiki.wiki_request(query_params)
640 |         html = request["query"]["pages"][pageid]["revisions"][0]["*"]
641 | 
642 |         lis = BeautifulSoup(html, "html.parser").find_all("li")
643 |         filtered_lis = [li for li in lis if "tocsection" not in "".join(li.get("class", []))]
644 |         may_refer_to = [li.a.get_text() for li in filtered_lis if li.a]
645 | 
646 |         disambiguation = []
647 |         for lis_item in filtered_lis:
648 |             item = lis_item.find_all("a")
649 |             one_disambiguation = {}
650 |             one_disambiguation["description"] = lis_item.text
651 |             if item and item[0].has_attr("title"):
652 |                 one_disambiguation["title"] = item[0]["title"]
653 |             else:
654 |                 # these are non-linked records so double up the text
655 |                 one_disambiguation["title"] = lis_item.text
656 |             disambiguation.append(one_disambiguation)
657 |         raise DisambiguationError(
658 |             getattr(self, "title", page["title"]),
659 |             may_refer_to,
660 |             page["fullurl"],
661 |             disambiguation,
662 |         )
663 | 
664 |     def _handle_redirect(self, redirect: bool, preload: bool, query: Dict, page: Dict[str, Any]):
665 |         """handle redirect"""
666 |         if not redirect:
667 |             raise RedirectError(getattr(self, "title", page["title"]))
668 | 
669 |         redirects = query["redirects"][0]
670 | 
671 |         if "normalized" in query:
672 |             normalized = query["normalized"][0]
673 |             if normalized["from"] != self.title:
674 |                 raise MediaWikiException(ODD_ERROR_MESSAGE)
675 |             from_title = normalized["to"]
676 |         else:
677 |             if not getattr(self, "title", None):
678 |                 self.title = redirects["from"]
679 |                 delattr(self, "pageid")
680 |             from_title = self.title
681 |         if redirects["from"] != from_title:
682 |             raise MediaWikiException(ODD_ERROR_MESSAGE)
683 | 
684 |         # change the title and reload the whole object
685 |         self.__init__(  # type: ignore
686 |             self.mediawiki,
687 |             title=redirects["to"],
688 |             redirect=redirect,
689 |             preload=preload,
690 |         )
691 | 
692 |     def _continued_query(self, query_params: Dict[str, Any], key: str = "pages") -> Iterator[Dict[Any, Any]]:
693 |         """Based on
694 |         https://www.mediawiki.org/wiki/API:Query#Continuing_queries"""
695 |         query_params.update(self.__title_query_param())
696 | 
697 |         last_cont: Dict = {}
698 |         prop = query_params.get("prop")
699 | 
700 |         while True:
701 |             params = query_params.copy()
702 |             params.update(last_cont)
703 | 
704 |             request = self.mediawiki.wiki_request(params)
705 | 
706 |             if "query" not in request:
707 |                 break
708 | 
709 |             pages = request["query"][key]
710 |             if "generator" in query_params:
711 |                 yield from pages.values()
712 |             elif isinstance(pages, list):
713 |                 yield from [v for x, v in enumerate(pages)]
714 |             else:
715 |                 yield from pages[self.pageid].get(prop, [])
716 | 
717 |             if "continue" not in request or request["continue"] == last_cont:
718 |                 break
719 | 
720 |             last_cont = request["continue"]
721 | 
722 |     def _parse_section_links(self, id_tag: Optional[str]) -> List[Tuple[str, str]]:
723 |         """given a section id, parse the links in the unordered list"""
724 |         all_links: List[Tuple[str, str]] = []
725 | 
726 |         if not self._soup:
727 |             self._soup = BeautifulSoup(self.html, "html.parser")
728 | 
729 |         if id_tag is None:
730 |             root = self._soup.find("div", {"class": "mw-parser-output"})
731 |             if root is None or isinstance(root, NavigableString):
732 |                 return all_links
733 |             candidates = root.children
734 |         else:
735 |             root = self._soup.find("span", {"id": id_tag})
736 |             if root is None:
737 |                 return all_links
738 |             candidates = self._soup.find(id=id_tag).parent.next_siblings  # type: ignore
739 | 
740 |         for node in candidates:
741 |             if not isinstance(node, Tag) or node.get("role", "") == "navigation":
742 |                 continue
743 |             classes = node.get("class", [])
744 |             if not isinstance(classes, list):
745 |                 classes = [classes if classes else ""]
746 |             if "infobox" in classes:
747 |                 continue
748 | 
749 |             # If the classname contains "toc", the element is a table of contents.
750 |             # The comprehension is necessary because there are several possible
751 |             # types of tocs: "toclevel", "toc", ...
752 |             toc_classnames = [cname for cname in classes if "toc" in cname]
753 |             if toc_classnames:
754 |                 continue
755 | 
756 |             # this is actually the child node's class...
757 |             is_headline = node.find("span", {"class": "mw-headline"})
758 |             if is_headline is not None:
759 |                 break
760 |             if node.name == "a":
761 |                 all_links.append(self.__parse_link_info(node))
762 |             else:
763 |                 all_links.extend(self.__parse_link_info(link) for link in node.find_all("a"))
764 |         return all_links
765 | 
766 |     def __parse_link_info(self, link: Tag) -> Tuple[str, str]:
767 |         """parse the <a> tag for the link"""
768 |         href = link.get("href", "")
769 |         if isinstance(href, list):
770 |             href = href[0]
771 |         href = "" if href is None else href
772 |         txt = link.string or href
773 |         is_rel = is_relative_url(href)
774 |         if is_rel is True:
775 |             tmp = f"{self.mediawiki.base_url}{href}"
776 |         elif is_rel is None:
777 |             tmp = f"{self.url}{href}"
778 |         else:
779 |             tmp = href
780 |         return txt, tmp
781 | 
782 |     def _parse_sections(self):
783 |         """parse sections and TOC"""
784 | 
785 |         def _list_to_dict(_dict, path, sec):
786 |             tmp = _dict
787 |             for elm in path[:-1]:
788 |                 tmp = tmp[elm]
789 |             tmp[sec] = OrderedDict()
790 | 
791 |         self._sections = []
792 |         section_regexp = r"\n==* .* ==*\n"  # '== {STUFF_NOT_\n} =='
793 |         found_obj = re.findall(section_regexp, self.content)
794 | 
795 |         res = OrderedDict()
796 |         path = []
797 |         last_depth = 0
798 |         for obj in found_obj:
799 |             depth = obj.count("=") / 2  # this gets us to the single side...
800 |             depth -= 2  # now, we can calculate depth
801 | 
802 |             sec = obj.lstrip("\n= ").rstrip(" =\n")
803 |             if depth == 0:
804 |                 last_depth = 0
805 |                 path = [sec]
806 |                 res[sec] = OrderedDict()
807 |             elif depth > last_depth:
808 |                 last_depth = depth
809 |                 path.append(sec)
810 |                 _list_to_dict(res, path, sec)
811 |             elif depth < last_depth:
812 |                 while last_depth > depth:
813 |                     path.pop()
814 |                     last_depth -= 1
815 |                 if path:
816 |                     path.pop()
817 |                 path.append(sec)
818 |                 _list_to_dict(res, path, sec)
819 |                 last_depth = depth
820 |             else:
821 |                 if path:
822 |                     path.pop()
823 |                 path.append(sec)
824 |                 _list_to_dict(res, path, sec)
825 |                 last_depth = depth
826 |             self._sections.append(sec)
827 | 
828 |         self._table_of_contents = res
829 | 
830 |     def __title_query_param(self) -> Dict[str, Any]:
831 |         """util function to determine which parameter method to use"""
832 |         if getattr(self, "title", None) is not None:
833 |             return {"titles": self.title}
834 |         return {"pageids": self.pageid}
835 | 
836 |     def __pull_combined_properties(self):
837 |         """something here..."""
838 | 
839 |         query_params = {
840 |             "titles": self.title,
841 |             "prop": "extracts|redirects|links|coordinates|categories|extlinks",
842 |             "continue": {},
843 |             # summary
844 |             "explaintext": "",
845 |             "exintro": "",  # full first section for the summary!
846 |             # redirects
847 |             "rdprop": "title",
848 |             "rdlimit": "max",
849 |             # links
850 |             "plnamespace": 0,
851 |             "pllimit": "max",
852 |             # coordinates
853 |             "colimit": "max",
854 |             # categories
855 |             "cllimit": "max",
856 |             "clshow": "!hidden",
857 |             # references
858 |             "ellimit": "max",
859 |         }
860 | 
861 |         last_cont = {}
862 |         results = {}
863 |         idx = 0
864 |         while True:
865 |             params = query_params.copy()
866 |             params.update(last_cont)
867 | 
868 |             request = self.mediawiki.wiki_request(params)
869 |             idx += 1
870 | 
871 |             if "query" not in request:
872 |                 break
873 | 
874 |             keys = [
875 |                 "extracts",
876 |                 "redirects",
877 |                 "links",
878 |                 "coordinates",
879 |                 "categories",
880 |                 "extlinks",
881 |             ]
882 |             new_cont = request.get("continue")
883 |             request = request["query"]["pages"][self.pageid]
884 |             if not results:
885 |                 results = request
886 |             else:
887 |                 for key in keys:
888 |                     if key in request and request.get(key) is not None:
889 |                         val = request.get(key)
890 |                         tmp = results.get(key)
891 |                         if isinstance(tmp, (list, tuple)):
892 |                             results[key] = results.get(key, list) + val
893 |             if new_cont is None or new_cont == last_cont:
894 |                 break
895 | 
896 |             last_cont = new_cont
897 | 
898 |         # redirects
899 |         tmp = [link["title"] for link in results.get("redirects", [])]
900 |         self._redirects = sorted(tmp)
901 | 
902 |         # summary
903 |         self._summary = results.get("extract")
904 | 
905 |         # links
906 |         tmp = [link["title"] for link in results.get("links", [])]
907 |         self._links = sorted(tmp)
908 | 
909 |         # categories
910 |         def _get_cat(val):
911 |             """parse the category correctly"""
912 |             tmp = val["title"]
913 |             if tmp.startswith(self.mediawiki.category_prefix):
914 |                 return tmp[len(self.mediawiki.category_prefix) + 1 :]
915 |             return tmp
916 | 
917 |         tmp = [_get_cat(link) for link in results.get("categories", [])]
918 |         self._categories = sorted(tmp)
919 | 
920 |         # coordinates
921 |         if "coordinates" in results:
922 |             self._coordinates = (
923 |                 Decimal(results["coordinates"][0]["lat"]),
924 |                 Decimal(results["coordinates"][0]["lon"]),
925 |             )
926 | 
927 |         # references
928 |         tmp = [link["*"] for link in results.get("extlinks", [])]
929 |         self._references = sorted(tmp)
930 | 


--------------------------------------------------------------------------------
/mediawiki/mediawiki.py:
--------------------------------------------------------------------------------
   1 | """
   2 | MediaWiki class module
   3 | """
   4 | 
   5 | # MIT License
   6 | # Author: Tyler Barrus (barrust@gmail.com)
   7 | 
   8 | import time
   9 | from datetime import datetime, timedelta
  10 | from decimal import Decimal, DecimalException
  11 | from json import JSONDecodeError
  12 | from typing import Any, Dict, List, Optional, Tuple, Union
  13 | 
  14 | import requests
  15 | import requests.exceptions as rex
  16 | 
  17 | from mediawiki.configuraton import VERSION, Configuration, HTTPAuthenticator
  18 | from mediawiki.exceptions import (
  19 |     HTTPTimeoutError,
  20 |     MediaWikiAPIURLError,
  21 |     MediaWikiCategoryTreeError,
  22 |     MediaWikiException,
  23 |     MediaWikiForbidden,
  24 |     MediaWikiGeoCoordError,
  25 |     MediaWikiLoginError,
  26 |     PageError,
  27 | )
  28 | from mediawiki.mediawikipage import MediaWikiPage
  29 | from mediawiki.utilities import memoize
  30 | 
  31 | 
  32 | class MediaWiki:
  33 |     """MediaWiki API Wrapper Instance
  34 | 
  35 |     Args:
  36 |         url (str): API URL of the MediaWiki site; defaults to Wikipedia
  37 |         lang (str): Language of the MediaWiki site; used to help change API URL
  38 |         timeout (float): HTTP timeout setting; None means no timeout
  39 |         rate_limit (bool): Use rate limiting to limit calls to the site
  40 |         rate_limit_wait (timedelta): Amount of time to wait between requests
  41 |         cat_prefix (str): The prefix for categories used by the mediawiki site; defaults to Category (en)
  42 |         user_agent (str): The user agent string to use when making requests; defaults to a library version \
  43 |             but per the MediaWiki API documentation it recommends setting a unique one and not using the library's \
  44 |             default user-agent string
  45 |         username (str): The username to use to log into the MediaWiki
  46 |         password (str): The password to use to log into the MediaWiki
  47 |         proxies (str): A dictionary of specific proxies to use in the Requests libary
  48 |         verify_ssl (bool|str): Verify SSL Certificates to be passed directly into the Requests library
  49 |         http_auth (tuple|callable): HTTP authenticator to be passed directly into the Requests library"""
  50 | 
  51 |     __slots__ = [
  52 |         "_version",
  53 |         "_config",
  54 |         "_session",
  55 |         "_extensions",
  56 |         "_api_version",
  57 |         "_api_version_str",
  58 |         "_base_url",
  59 |         "__supported_languages",
  60 |         "__available_languages",
  61 |         "_is_logged_in",
  62 |         "_cache",
  63 |     ]
  64 | 
  65 |     def __init__(
  66 |         self,
  67 |         url: str = "https://{lang}.wikipedia.org/w/api.php",
  68 |         lang: str = "en",
  69 |         timeout: float = 15.0,
  70 |         rate_limit: bool = False,
  71 |         rate_limit_wait: timedelta = timedelta(milliseconds=50),
  72 |         cat_prefix: str = "Category",
  73 |         user_agent: Optional[str] = None,
  74 |         username: Optional[str] = None,
  75 |         password: Optional[str] = None,
  76 |         proxies: Optional[Dict] = None,
  77 |         verify_ssl: Union[bool, str] = True,
  78 |         http_auth: Optional[HTTPAuthenticator] = None,
  79 |     ):
  80 |         """Init Function"""
  81 |         self._version = VERSION
  82 |         url.format(lang=lang.lower())
  83 |         self._config = Configuration(
  84 |             lang=lang,
  85 |             api_url=url.format(lang=lang.lower()),
  86 |             category_prefix=cat_prefix,
  87 |             timeout=timeout,
  88 |             user_agent=user_agent,
  89 |             proxies=proxies,
  90 |             verify_ssl=verify_ssl,
  91 |             rate_limit=rate_limit,
  92 |             rate_limit_wait=rate_limit_wait,
  93 |             username=username,
  94 |             password=password,
  95 |             refresh_interval=None,
  96 |             use_cache=True,
  97 |             http_auth=http_auth,
  98 |         )
  99 | 
 100 |         # requests library parameters
 101 |         self._session: requests.Session = requests.Session()
 102 | 
 103 |         # reset libary parameters
 104 |         self._extensions = None
 105 |         self._api_version = None
 106 |         self._api_version_str = None
 107 |         self._base_url = None
 108 |         self.__supported_languages: Optional[Dict[str, str]] = None
 109 |         self.__available_languages: Optional[Dict[str, bool]] = None
 110 | 
 111 |         # for memoized results
 112 |         self._cache: Dict = {}
 113 | 
 114 |         self._reset_session()
 115 | 
 116 |         # for login information
 117 |         self._is_logged_in = False
 118 |         if self._config.username is not None and self._config.password is not None:
 119 |             self.login(self._config.username, self._config.password)
 120 | 
 121 |         try:
 122 |             self._get_site_info()
 123 |         except MediaWikiException as exc:
 124 |             raise MediaWikiAPIURLError(self._config.api_url) from exc
 125 | 
 126 |     # non-settable properties
 127 |     @property
 128 |     def version(self) -> str:
 129 |         """str: The version of the pymediawiki library
 130 | 
 131 |         Note:
 132 |             Not settable"""
 133 |         return self._version
 134 | 
 135 |     @property
 136 |     def api_version(self) -> Optional[str]:
 137 |         """str: API Version of the MediaWiki site
 138 | 
 139 |         Note:
 140 |             Not settable"""
 141 |         return self._api_version_str
 142 | 
 143 |     @property
 144 |     def base_url(self) -> str:
 145 |         """str: Base URL for the MediaWiki site
 146 | 
 147 |         Note:
 148 |             Not settable"""
 149 |         return self._base_url if self._base_url else ""
 150 | 
 151 |     @property
 152 |     def extensions(self) -> List[str]:
 153 |         """list: Extensions installed on the MediaWiki site
 154 | 
 155 |         Note:
 156 |             Not settable"""
 157 |         return self._extensions if self._extensions else []
 158 | 
 159 |     # settable properties
 160 |     @property
 161 |     def rate_limit(self) -> bool:
 162 |         """bool: Turn on or off Rate Limiting"""
 163 |         return self._config.rate_limit
 164 | 
 165 |     @rate_limit.setter
 166 |     def rate_limit(self, rate_limit: bool):
 167 |         """Turn on or off rate limiting"""
 168 |         self._config.rate_limit = rate_limit
 169 |         if self._config._clear_memoized:
 170 |             self.clear_memoized()
 171 | 
 172 |     @property
 173 |     def proxies(self) -> Optional[Dict]:
 174 |         """dict: Turn on, off, or set proxy use with the Requests library"""
 175 |         return self._config.proxies
 176 | 
 177 |     @proxies.setter
 178 |     def proxies(self, proxies: Optional[Dict]):
 179 |         """Turn on, off, or set proxy use through the Requests library"""
 180 |         self._config.proxies = proxies
 181 |         if self._config._reset_session:
 182 |             self._reset_session()
 183 | 
 184 |     @property
 185 |     def use_cache(self) -> bool:
 186 |         """bool: Whether caching should be used; on (**True**) or off (**False**)"""
 187 |         return self._config.use_cache
 188 | 
 189 |     @use_cache.setter
 190 |     def use_cache(self, use_cache: bool):
 191 |         """toggle using the cache or not"""
 192 |         self._config.use_cache = use_cache
 193 | 
 194 |     @property
 195 |     def rate_limit_min_wait(self) -> timedelta:
 196 |         """timedelta: Time to wait between calls
 197 | 
 198 |         Note:
 199 |              Only used if rate_limit is **True**"""
 200 |         return self._config.rate_limit_min_wait
 201 | 
 202 |     @rate_limit_min_wait.setter
 203 |     def rate_limit_min_wait(self, min_wait: timedelta):
 204 |         """Set minimum wait to use for rate limiting"""
 205 |         self._config.rate_limit_min_wait = min_wait
 206 | 
 207 |     @property
 208 |     def timeout(self) -> Optional[float]:
 209 |         """float: Response timeout for API requests
 210 | 
 211 |         Note:
 212 |             Use **None** for no response timeout"""
 213 |         return self._config.timeout
 214 | 
 215 |     @timeout.setter
 216 |     def timeout(self, timeout: Optional[float]):
 217 |         """Set request timeout in seconds (or fractions of a second)"""
 218 |         self._config.timeout = timeout
 219 | 
 220 |     @property
 221 |     def verify_ssl(self) -> Union[bool, str]:
 222 |         """bool | str: Verify SSL when using requests or path to cert file"""
 223 |         return self._config.verify_ssl
 224 | 
 225 |     @verify_ssl.setter
 226 |     def verify_ssl(self, verify_ssl: Union[bool, str]):
 227 |         """Set request verify SSL parameter; defaults to True if issue"""
 228 |         self._config.verify_ssl = verify_ssl
 229 |         if self._config._reset_session:
 230 |             self._reset_session()
 231 | 
 232 |     @property
 233 |     def language(self) -> str:
 234 |         """str: The API URL language, if possible this will update the API URL
 235 | 
 236 |         Note:
 237 |             Use correct language titles with the updated API URL
 238 |         Note:
 239 |             Some API URLs do not encode language; unable to update if this is the case"""
 240 |         return self._config.lang
 241 | 
 242 |     @language.setter
 243 |     def language(self, lang: str):
 244 |         """Set the language to use; attempts to change the API URL"""
 245 |         self._config.lang = lang
 246 |         if self._config._clear_memoized:
 247 |             self.clear_memoized()
 248 | 
 249 |     @property
 250 |     def category_prefix(self) -> str:
 251 |         """str: The category prefix to use when using category based functions
 252 | 
 253 |         Note:
 254 |             Use the correct category name for the language selected"""
 255 |         return self._config.category_prefix
 256 | 
 257 |     @category_prefix.setter
 258 |     def category_prefix(self, prefix: str):
 259 |         """Set the category prefix correctly"""
 260 |         self._config.category_prefix = prefix
 261 | 
 262 |     @property
 263 |     def user_agent(self) -> str:
 264 |         """str: User agent string
 265 | 
 266 |         Note: If using in as part of another project, this should be changed"""
 267 |         return self._config.user_agent
 268 | 
 269 |     @user_agent.setter
 270 |     def user_agent(self, user_agent: str):
 271 |         """Set the new user agent string
 272 | 
 273 |         Note: Will need to re-log into the MediaWiki if user agent string is changed"""
 274 |         self._config.user_agent = user_agent
 275 |         if self._config._reset_session:
 276 |             self._reset_session()
 277 | 
 278 |     @property
 279 |     def api_url(self) -> str:
 280 |         """str: API URL of the MediaWiki site
 281 | 
 282 |         Note:
 283 |             Not settable; See :py:func:`mediawiki.MediaWiki.set_api_url`"""
 284 |         return self._config.api_url
 285 | 
 286 |     @property
 287 |     def memoized(self) -> Dict[Any, Any]:
 288 |         """dict: Return the memoize cache
 289 | 
 290 |         Note:
 291 |             Not settable; see
 292 |             :py:func:`mediawiki.MediaWiki.clear_memoized`"""
 293 |         return self._cache
 294 | 
 295 |     @property
 296 |     def refresh_interval(self) -> Optional[int]:
 297 |         """int: The interval at which the memoize cache is to be refresh"""
 298 |         return self._config.refresh_interval
 299 | 
 300 |     @refresh_interval.setter
 301 |     def refresh_interval(self, refresh_interval: int):
 302 |         """Set the new cache refresh interval"""
 303 |         self._config.refresh_interval = refresh_interval
 304 | 
 305 |     @property
 306 |     def http_auth(self) -> Optional[HTTPAuthenticator]:
 307 |         """tuple|callable: HTTP authenticator to use to access the mediawiki site"""
 308 |         return self._config.http_auth
 309 | 
 310 |     @http_auth.setter
 311 |     def http_auth(self, http_auth: Optional[HTTPAuthenticator]):
 312 |         """Set the HTTP authenticator, if needed, to use to access the mediawiki site"""
 313 |         self._config.http_auth = http_auth
 314 |         self._session.auth = http_auth
 315 | 
 316 |     def login(self, username: str, password: str, strict: bool = True) -> bool:
 317 |         """Login as specified user
 318 | 
 319 |         Args:
 320 |             username (str): The username to log in with
 321 |             password (str): The password for the user
 322 |             strict (bool): `True` to throw an error on failure
 323 |         Returns:
 324 |             bool: `True` if successfully logged in; `False` otherwise
 325 |         Raises:
 326 |             :py:func:`mediawiki.exceptions.MediaWikiLoginError`: if unable to login
 327 | 
 328 |         Note:
 329 |             Per the MediaWiki API, one should use the `bot password`; \
 330 |                 see https://www.mediawiki.org/wiki/API:Login for more information
 331 |         """
 332 |         # get login token
 333 |         params = {
 334 |             "action": "query",
 335 |             "meta": "tokens",
 336 |             "type": "login",
 337 |             "format": "json",
 338 |         }
 339 |         token_res = self._get_response(params)
 340 |         if "query" in token_res and "tokens" in token_res["query"]:
 341 |             token = token_res["query"]["tokens"]["logintoken"]
 342 |         else:
 343 |             return False
 344 | 
 345 |         params = {
 346 |             "action": "login",
 347 |             "lgname": username,
 348 |             "lgpassword": password,
 349 |             "lgtoken": token,
 350 |             "format": "json",
 351 |         }
 352 | 
 353 |         res = self._post_response(params)
 354 |         if res["login"]["result"] == "Success":
 355 |             self._is_logged_in = True
 356 |             return True
 357 |         self._is_logged_in = False
 358 |         reason = res["login"]["reason"]
 359 |         if strict:
 360 |             raise MediaWikiLoginError(f"MediaWiki login failure: {reason}")
 361 |         return False
 362 | 
 363 |     # non-properties
 364 |     def set_api_url(
 365 |         self,
 366 |         api_url: str = "https://{lang}.wikipedia.org/w/api.php",
 367 |         lang: str = "en",
 368 |         username: Optional[str] = None,
 369 |         password: Optional[str] = None,
 370 |     ):
 371 |         """Set the API URL and language
 372 | 
 373 |         Args:
 374 |             api_url (str): API URL to use
 375 |             lang (str): Language of the API URL
 376 |             username (str): The username, if needed, to log into the MediaWiki site
 377 |             password (str): The password, if needed, to log into the MediaWiki site
 378 |         Raises:
 379 |             :py:func:`mediawiki.exceptions.MediaWikiAPIURLError`: if the \
 380 |                 url is not a valid MediaWiki site or login fails
 381 |         """
 382 |         old_api_url = self._config.api_url
 383 |         old_lang = self._config.lang
 384 |         self._config.lang = lang.lower()
 385 |         self._config.api_url = api_url.format(lang=self._config.lang)
 386 |         self._config.username = username
 387 |         self._config.password = password
 388 |         self._is_logged_in = False
 389 |         try:
 390 |             if self._config.username is not None and self._config.password is not None:
 391 |                 self.login(self._config.username, self._config.password)
 392 |             self._get_site_info()
 393 |             self.__supported_languages = None  # reset this
 394 |             self.__available_languages = None  # reset this
 395 |         except (rex.ConnectTimeout, MediaWikiException) as exc:
 396 |             # reset api url and lang in the event that the exception was caught
 397 |             self._config.api_url = old_api_url
 398 |             self._config.lang = old_lang
 399 |             raise MediaWikiAPIURLError(api_url) from exc
 400 |         self.clear_memoized()
 401 | 
 402 |     def _reset_session(self):
 403 |         """Set session information"""
 404 |         if self._session:
 405 |             self._session.close()
 406 | 
 407 |         headers = {"User-Agent": self._config.user_agent}
 408 |         self._session = requests.Session()
 409 |         self._session.auth = self._config.http_auth
 410 |         self._session.headers.update(headers)
 411 |         if self._config.proxies is not None:
 412 |             self._session.proxies.update(self._config.proxies)
 413 |         self._session.verify = self._config.verify_ssl
 414 | 
 415 |         self._is_logged_in = False
 416 |         self._config._reset_session = False
 417 | 
 418 |     def clear_memoized(self):
 419 |         """Clear memoized (cached) values"""
 420 |         if hasattr(self, "_cache"):
 421 |             self._cache.clear()
 422 |         self._config._clear_memoized = False
 423 | 
 424 |     # non-setup functions
 425 |     @property
 426 |     def supported_languages(self) -> Dict[str, str]:
 427 |         """dict: All supported language prefixes on the MediaWiki site
 428 | 
 429 |         Note:
 430 |             Not Settable"""
 431 |         if self.__supported_languages is None:
 432 |             res = self.wiki_request({"meta": "siteinfo", "siprop": "languages"})
 433 |             tmp = res["query"]["languages"]
 434 |             supported = {lang["code"]: lang["*"] for lang in tmp}
 435 |             self.__supported_languages = supported
 436 |         return self.__supported_languages
 437 | 
 438 |     @property
 439 |     def available_languages(self) -> Dict[str, bool]:
 440 |         """dict: All available language prefixes on the MediaWiki site
 441 | 
 442 |         Note:
 443 |             Not Settable"""
 444 |         if self.__available_languages is None:
 445 |             available = {}
 446 |             for lang in self.supported_languages:
 447 |                 try:
 448 |                     MediaWiki(lang=lang)
 449 |                     available[lang] = True
 450 |                 except (rex.ConnectionError, rex.ConnectTimeout, MediaWikiException, MediaWikiAPIURLError):
 451 |                     available[lang] = False
 452 |             self.__available_languages = available
 453 |         return self.__available_languages
 454 | 
 455 |     @property
 456 |     def logged_in(self) -> bool:
 457 |         """bool: Returns if logged into the MediaWiki site"""
 458 |         return self._is_logged_in
 459 | 
 460 |     def random(self, pages: int = 1) -> Union[str, List[str]]:
 461 |         """Request a random page title or list of random titles
 462 | 
 463 |         Args:
 464 |             pages (int): Number of random pages to return
 465 |         Returns:
 466 |             list or int: A list of random page titles or a random page title if pages = 1"""
 467 |         if pages is None or pages < 1:
 468 |             raise ValueError("Number of pages must be greater than 0")
 469 | 
 470 |         query_params = {"list": "random", "rnnamespace": 0, "rnlimit": pages}
 471 | 
 472 |         request = self.wiki_request(query_params)
 473 |         titles = [page["title"] for page in request["query"]["random"]]
 474 | 
 475 |         return titles[0] if len(titles) == 1 else titles
 476 | 
 477 |     @memoize
 478 |     def allpages(self, query: str = "", results: int = 10) -> List[str]:
 479 |         """Request all pages from mediawiki instance
 480 | 
 481 |         Args:
 482 |             query (str): Search string to use for pulling pages
 483 |             results (int): The number of pages to return
 484 |         Returns:
 485 |             list: The pages that meet the search query
 486 |         Note:
 487 |             Could add ability to continue past the limit of 500
 488 |         """
 489 |         max_pull = 500
 490 |         limit = min(results, max_pull) if results is not None else max_pull
 491 |         query_params = {"list": "allpages", "aplimit": limit, "apfrom": query}
 492 | 
 493 |         request = self.wiki_request(query_params)
 494 | 
 495 |         self._check_error_response(request, query)
 496 | 
 497 |         return [page["title"] for page in request["query"]["allpages"]]
 498 | 
 499 |     @memoize
 500 |     def search(
 501 |         self, query: str, results: int = 10, suggestion: bool = False
 502 |     ) -> Union[List[str], Tuple[List[str], Optional[str]]]:
 503 |         """Search for similar titles
 504 | 
 505 |         Args:
 506 |             query (str): Page title
 507 |             results (int): Number of pages to return
 508 |             suggestion (bool): Use suggestion
 509 |         Returns:
 510 |             tuple or list: tuple (list results, suggestion) if suggestion is **True**; list of results otherwise
 511 |         Note:
 512 |             Could add ability to continue past the limit of 500
 513 |         """
 514 | 
 515 |         self._check_query(query, "Query must be specified")
 516 | 
 517 |         max_pull = 500
 518 | 
 519 |         search_params = {
 520 |             "list": "search",
 521 |             "srprop": "",
 522 |             "srlimit": min(results, max_pull) if results is not None else max_pull,
 523 |             "srsearch": query,
 524 |             "sroffset": 0,  # this is what will be used to pull more than the max
 525 |         }
 526 |         if suggestion:
 527 |             search_params["srinfo"] = "suggestion"
 528 | 
 529 |         raw_results = self.wiki_request(search_params)
 530 | 
 531 |         self._check_error_response(raw_results, query)
 532 | 
 533 |         search_results = [d["title"] for d in raw_results["query"]["search"]]
 534 | 
 535 |         if suggestion:
 536 |             sug = raw_results["query"]["searchinfo"]["suggestion"] if raw_results["query"].get("searchinfo") else None
 537 |             return search_results, sug
 538 |         return search_results
 539 | 
 540 |     @memoize
 541 |     def suggest(self, query: str) -> Optional[str]:
 542 |         """Gather suggestions based on the provided title or None if no
 543 |         suggestions found
 544 | 
 545 |         Args:
 546 |             query (str): Page title
 547 |         Returns:
 548 |             String or None: Suggested page title or **None** if no suggestion found
 549 |         """
 550 |         res, suggest = self.search(query, results=1, suggestion=True)
 551 |         try:
 552 |             title = res[0] or suggest
 553 |         except IndexError:  # page doesn't exist
 554 |             title = None
 555 |         return title
 556 | 
 557 |     @memoize
 558 |     def geosearch(
 559 |         self,
 560 |         latitude: Union[Decimal, float, None] = None,
 561 |         longitude: Union[Decimal, float, None] = None,
 562 |         radius: int = 1000,
 563 |         title: Optional[str] = None,
 564 |         auto_suggest: bool = True,
 565 |         results: int = 10,
 566 |     ) -> List[str]:
 567 |         """Search for pages that relate to the provided geocoords or near
 568 |         the page
 569 | 
 570 |         Args:
 571 |             latitude (Decimal or None): Latitude geocoord; must be coercible to decimal
 572 |             longitude (Decimal or None): Longitude geocoord; must be coercible to decimal
 573 |             radius (int): Radius around page or geocoords to pull back; in meters
 574 |             title (str): Page title to use as a geocoordinate; this has precedence over lat/long
 575 |             auto_suggest (bool): Auto-suggest the page title
 576 |             results (int): Number of pages within the radius to return
 577 |         Returns:
 578 |             list: A listing of page titles
 579 |         Note:
 580 |             The Geosearch API does not support pulling more than the maximum of 500
 581 |         Note:
 582 |             If the page doesn't match the provided title, try setting auto_suggest to `False`
 583 |         Raises:
 584 |             ValueError: If either the passed latitude or longitude are not coercible to a Decimal
 585 |         """
 586 | 
 587 |         def test_lat_long(val):
 588 |             """handle testing lat and long"""
 589 |             if not isinstance(val, Decimal):
 590 |                 error = (
 591 |                     "Latitude and Longitude must be specified either as "
 592 |                     "a Decimal or in formats that can be coerced into "
 593 |                     "a Decimal."
 594 |                 )
 595 |                 try:
 596 |                     return Decimal(val)
 597 |                 except (DecimalException, TypeError) as exc:
 598 |                     raise ValueError(error) from exc
 599 |             return val
 600 | 
 601 |         # end local function
 602 |         max_pull = 500
 603 | 
 604 |         limit = min(results, max_pull) if results is not None else max_pull
 605 |         params = {"list": "geosearch", "gsradius": radius, "gslimit": limit}
 606 |         if title is not None:
 607 |             if auto_suggest:
 608 |                 title = self.suggest(title)
 609 |             params["gspage"] = title
 610 |         else:
 611 |             lat = test_lat_long(latitude)
 612 |             lon = test_lat_long(longitude)
 613 |             params["gscoord"] = f"{lat}|{lon}"
 614 | 
 615 |         raw_results = self.wiki_request(params)
 616 | 
 617 |         self._check_error_response(raw_results, title if title else "Page Title Not Provided")
 618 | 
 619 |         return [d["title"] for d in raw_results["query"]["geosearch"]]
 620 | 
 621 |     @memoize
 622 |     def opensearch(self, query: str, results: int = 10, redirect: bool = True) -> List[Tuple[str, str, str]]:
 623 |         """Execute a MediaWiki opensearch request, similar to search box
 624 |         suggestions and conforming to the OpenSearch specification
 625 | 
 626 |         Args:
 627 |             query (str): Title to search for
 628 |             results (int): Number of pages within the radius to return
 629 |             redirect (bool): If **False** return the redirect itself, otherwise resolve redirects
 630 |         Returns:
 631 |             List: List of results that are stored in a tuple (Title, Summary, URL)
 632 |         Note:
 633 |             The Opensearch API does not support pulling more than the maximum of 500
 634 |         Raises:
 635 |         """
 636 | 
 637 |         self._check_query(query, "Query must be specified")
 638 |         max_pull = 500
 639 | 
 640 |         query_params = {
 641 |             "action": "opensearch",
 642 |             "search": query,
 643 |             "limit": (min(results, max_pull) if results is not None else max_pull),
 644 |             "redirects": ("resolve" if redirect else "return"),
 645 |             "warningsaserror": True,
 646 |             "namespace": "",
 647 |         }
 648 | 
 649 |         out = self.wiki_request(query_params)
 650 | 
 651 |         self._check_error_response(out, query)
 652 | 
 653 |         return [(item, out[2][i], out[3][i]) for i, item in enumerate(out[1])]
 654 | 
 655 |     @memoize
 656 |     def prefixsearch(self, prefix: str, results: int = 10) -> List[str]:
 657 |         """ Perform a prefix search using the provided prefix string
 658 | 
 659 |             Args:
 660 |                 prefix (str): Prefix string to use for search
 661 |                 results (int): Number of pages with the prefix to return
 662 |             Returns:
 663 |                 list: List of page titles
 664 |             Note:
 665 |                 **Per the documentation:** "The purpose of this module is \
 666 |                 similar to action=opensearch: to take user input and provide \
 667 |                 the best-matching titles. Depending on the search engine \
 668 |                 backend, this might include typo correction, redirect \
 669 |                 avoidance, or other heuristics."
 670 |             Note:
 671 |                 Could add ability to continue past the limit of 500
 672 |         """
 673 | 
 674 |         self._check_query(prefix, "Prefix must be specified")
 675 | 
 676 |         query_params = {
 677 |             "list": "prefixsearch",
 678 |             "pssearch": prefix,
 679 |             "pslimit": ("max" if (results > 500 or results is None) else results),
 680 |             "psnamespace": 0,
 681 |             "psoffset": 0,  # parameterize to skip to later in the list?
 682 |         }
 683 | 
 684 |         raw_results = self.wiki_request(query_params)
 685 | 
 686 |         self._check_error_response(raw_results, prefix)
 687 | 
 688 |         return [rec["title"] for rec in raw_results["query"]["prefixsearch"]]
 689 | 
 690 |     @memoize
 691 |     def summary(self, title: str, sentences: int = 0, chars: int = 0, auto_suggest: bool = True, redirect: bool = True):
 692 |         """ Get the summary for the title in question
 693 | 
 694 |             Args:
 695 |                 title (str): Page title to summarize
 696 |                 sentences (int): Number of sentences to return in summary
 697 |                 chars (int): Number of characters to return in summary
 698 |                 auto_suggest (bool): Run auto-suggest on title before summarizing
 699 |                 redirect (bool): Use page redirect on title before summarizing
 700 |             Returns:
 701 |                 str: The summarized results of the page
 702 |             Note:
 703 |                 Precedence for parameters: sentences then chars; if both are \
 704 |                 0 then the entire first section is returned
 705 |             Note:
 706 |                 If the page doesn't match the provided title, try setting auto_suggest to `False`"""
 707 |         page_info = self.page(title, auto_suggest=auto_suggest, redirect=redirect)
 708 |         return page_info.summarize(sentences, chars)
 709 | 
 710 |     @memoize
 711 |     def categorymembers(
 712 |         self, category: str, results: int = 10, subcategories: bool = True
 713 |     ) -> Union[List[str], Tuple[List[str], List[str]]]:
 714 |         """Get information about a category: pages and subcategories
 715 | 
 716 |         Args:
 717 |             category (str): Category name
 718 |             results (int): Number of result
 719 |             subcategories (bool): Include subcategories (**True**) or not (**False**)
 720 |         Returns:
 721 |             Tuple or List: Either a tuple ([pages], [subcategories]) or just the list of pages
 722 |         Note:
 723 |             Set results to **None** to get all results"""
 724 |         self._check_query(category, "Category must be specified")
 725 | 
 726 |         max_pull = 500
 727 |         search_params = {
 728 |             "list": "categorymembers",
 729 |             "cmprop": "ids|title|type",
 730 |             "cmtype": ("page|subcat|file" if subcategories else "page|file"),
 731 |             "cmlimit": (min(results, max_pull) if results is not None else max_pull),
 732 |             "cmtitle": f"{self.category_prefix}:{category}",
 733 |         }
 734 |         pages = []
 735 |         subcats = []
 736 |         returned_results = 0
 737 |         finished = False
 738 |         last_cont: Dict = {}
 739 |         while not finished:
 740 |             params = search_params.copy()
 741 |             params.update(last_cont)
 742 |             raw_res = self.wiki_request(params)
 743 | 
 744 |             self._check_error_response(raw_res, category)
 745 | 
 746 |             current_pull = len(raw_res["query"]["categorymembers"])
 747 |             for rec in raw_res["query"]["categorymembers"]:
 748 |                 if rec["type"] in ("page", "file"):
 749 |                     pages.append(rec["title"])
 750 |                 elif rec["type"] == "subcat":
 751 |                     tmp = rec["title"]
 752 |                     if tmp.startswith(self.category_prefix):
 753 |                         tmp = tmp[len(self.category_prefix) + 1 :]
 754 |                     subcats.append(tmp)
 755 | 
 756 |             cont = raw_res.get("query-continue", False)
 757 |             if cont and "categorymembers" in cont:
 758 |                 cont = cont["categorymembers"]
 759 |             else:
 760 |                 cont = raw_res.get("continue", False)
 761 | 
 762 |             if cont is False or last_cont == cont:
 763 |                 break
 764 | 
 765 |             returned_results += current_pull
 766 |             if results is None or (results - returned_results > 0):
 767 |                 last_cont = cont
 768 |             else:
 769 |                 finished = True
 770 | 
 771 |             if results is not None and results - returned_results < max_pull:
 772 |                 search_params["cmlimit"] = results - returned_results
 773 |         # end while loop
 774 | 
 775 |         return (pages, subcats) if subcategories else pages
 776 | 
 777 |     def categorytree(self, category: str, depth: int = 5) -> Dict[str, Any]:
 778 |         """Generate the Category Tree for the given categories
 779 | 
 780 |         Args:
 781 |             category(str or list of strings): Category name(s)
 782 |             depth(int): Depth to traverse the tree
 783 |         Returns:
 784 |             dict: Category tree structure
 785 |         Note:
 786 |             Set depth to **None** to get the whole tree
 787 |         Note:
 788 |             Return Data Structure: Subcategory contains the same recursive structure
 789 | 
 790 |             >>> {
 791 |                     'category': {
 792 |                         'depth': Number,
 793 |                         'links': list,
 794 |                         'parent-categories': list,
 795 |                         'sub-categories': dict
 796 |                     }
 797 |                 }
 798 | 
 799 |         .. versionadded:: 0.3.10"""
 800 | 
 801 |         # make it simple to use both a list or a single category term
 802 |         cats = [category] if not isinstance(category, list) else category
 803 | 
 804 |         self.__category_parameter_verification(cats, depth, category)
 805 | 
 806 |         results: Dict = {}
 807 |         categories: Dict = {}
 808 |         links: Dict = {}
 809 | 
 810 |         for cat in [x for x in cats if x]:
 811 |             self.__cat_tree_rec(cat, depth, results, 0, categories, links)
 812 |         return results
 813 | 
 814 |     def page(self, title=None, pageid=None, auto_suggest=True, redirect=True, preload=False):
 815 |         """Get MediaWiki page based on the provided title or pageid
 816 | 
 817 |         Args:
 818 |             title (str): Page title
 819 |             pageid (int): MediaWiki page identifier
 820 |             auto-suggest (bool): **True:** Allow page title auto-suggest
 821 |             redirect (bool): **True:** Follow page redirects
 822 |             preload (bool): **True:** Load most page properties
 823 |         Raises:
 824 |             ValueError: when title is blank or None and no pageid is provided
 825 |         Raises:
 826 |             :py:func:`mediawiki.exceptions.PageError`: if page does not exist
 827 |         Note:
 828 |             Title takes precedence over pageid if both are provided
 829 |         Note:
 830 |             If the page doesn't match the provided title, try setting auto_suggest to `False`"""
 831 |         if (title is None or title.strip() == "") and pageid is None:
 832 |             raise ValueError("Either a title or a pageid must be specified")
 833 |         if title:
 834 |             if auto_suggest:
 835 |                 temp_title = self.suggest(title)
 836 |                 if temp_title is None:  # page doesn't exist
 837 |                     raise PageError(title=title)
 838 |                 title = temp_title
 839 |             return MediaWikiPage(self, title, redirect=redirect, preload=preload)
 840 |         return MediaWikiPage(self, pageid=pageid, preload=preload)
 841 | 
 842 |     def wiki_request(self, params: Dict[str, Any]) -> Dict[Any, Any]:
 843 |         """ Make a request to the MediaWiki API using the given search
 844 |             parameters
 845 | 
 846 |             Args:
 847 |                 params (dict): Request parameters
 848 |             Returns:
 849 |                 A parsed dict of the JSON response
 850 |             Note:
 851 |                 Useful when wanting to query the MediaWiki site for some \
 852 |                 value that is not part of the wrapper API """
 853 | 
 854 |         params["format"] = "json"
 855 |         if "action" not in params:
 856 |             params["action"] = "query"
 857 | 
 858 |         limit = self._config.rate_limit
 859 |         last_call = self._config._rate_limit_last_call
 860 |         if limit and last_call and last_call + self._config.rate_limit_min_wait > datetime.now():
 861 |             # call time to quick for rate limited api requests, wait
 862 |             wait_time = (last_call + self._config.rate_limit_min_wait) - datetime.now()
 863 |             time.sleep(wait_time.total_seconds())
 864 | 
 865 |         req = self._get_response(params)
 866 | 
 867 |         if self._config.rate_limit:
 868 |             self._config._rate_limit_last_call = datetime.now()
 869 | 
 870 |         return req
 871 | 
 872 |     # Protected functions
 873 |     def _get_site_info(self):
 874 |         """Parse out the Wikimedia site information including API Version and Extensions"""
 875 | 
 876 |         response = self.wiki_request({"meta": "siteinfo", "siprop": "extensions|general"})
 877 | 
 878 |         # parse what we need out here!
 879 |         query = response.get("query", None)
 880 |         if query is None or query.get("general", None) is None:
 881 |             raise MediaWikiException("Missing query in response")
 882 | 
 883 |         gen = query.get("general", None)
 884 | 
 885 |         api_version = gen["generator"].split(" ")[1].split("-")[0]
 886 | 
 887 |         major_minor = [int(i) for i in api_version.split(".")]
 888 | 
 889 |         self._api_version = tuple(major_minor)
 890 |         self._api_version_str = ".".join([str(x) for x in self._api_version])
 891 | 
 892 |         # parse the base url out
 893 |         tmp = gen.get("server", "")
 894 |         if tmp == "":
 895 |             raise MediaWikiException("Unable to parse base url")
 896 |         if tmp.startswith("http://") or tmp.startswith("https://"):
 897 |             self._base_url = tmp
 898 |         elif gen["base"].startswith("https:"):
 899 |             self._base_url = f"https:{tmp}"
 900 |         else:
 901 |             self._base_url = f"http:{tmp}"
 902 | 
 903 |         self._extensions = [ext["name"] for ext in query["extensions"]]
 904 |         self._extensions = sorted(list(set(self._extensions)))
 905 | 
 906 |     # end _get_site_info
 907 | 
 908 |     @staticmethod
 909 |     def _check_error_response(response, query: str):
 910 |         """check for default error messages and throw correct exception"""
 911 |         if "error" in response:
 912 |             http_error = ["HTTP request timed out.", "Pool queue is full"]
 913 |             geo_error = [
 914 |                 "Page coordinates unknown.",
 915 |                 "One of the parameters gscoord, gspage, gsbbox is required",
 916 |                 "Invalid coordinate provided",
 917 |             ]
 918 |             err = response["error"]["info"]
 919 |             if err in http_error:
 920 |                 raise HTTPTimeoutError(query)
 921 |             if err in geo_error:
 922 |                 raise MediaWikiGeoCoordError(err)
 923 |             raise MediaWikiException(err)
 924 | 
 925 |     @staticmethod
 926 |     def _check_query(value, message: str):
 927 |         """check if the query is 'valid'"""
 928 |         if value is None or value.strip() == "":
 929 |             raise ValueError(message)
 930 | 
 931 |     @staticmethod
 932 |     def __category_parameter_verification(cats, depth, category):
 933 |         # parameter verification
 934 |         if len(cats) == 1 and (cats[0] is None or cats[0] == ""):
 935 |             msg = (
 936 |                 "CategoryTree: Parameter 'category' must either "
 937 |                 "be a list of one or more categories or a string; "
 938 |                 f"provided: '{category}'"
 939 |             )
 940 |             raise ValueError(msg)
 941 | 
 942 |         if depth is not None and depth < 1:
 943 |             msg = "CategoryTree: Parameter 'depth' must be either None (for the full tree) or be greater than 0"
 944 |             raise ValueError(msg)
 945 | 
 946 |     def __cat_tree_rec(
 947 |         self, cat: str, depth: int, tree: Dict[str, Any], level: int, categories: Dict[str, Any], links: Dict[str, Any]
 948 |     ):
 949 |         """recursive function to build out the tree"""
 950 |         tree[cat] = {}
 951 |         tree[cat]["depth"] = level
 952 |         tree[cat]["sub-categories"] = {}
 953 |         tree[cat]["links"] = []
 954 |         tree[cat]["parent-categories"] = []
 955 |         parent_cats = []
 956 | 
 957 |         if cat not in categories:
 958 |             tries = 0
 959 |             while True:
 960 |                 if tries > 10:
 961 |                     raise MediaWikiCategoryTreeError(cat)
 962 |                 try:
 963 |                     pag = self.page(f"{self.category_prefix}:{cat}")
 964 |                     categories[cat] = pag
 965 |                     parent_cats = categories[cat].categories
 966 |                     links[cat] = self.categorymembers(cat, results=None, subcategories=True)
 967 |                     break
 968 |                 except PageError as exc:
 969 |                     raise PageError(f"{self.category_prefix}:{cat}") from exc
 970 |                 except KeyboardInterrupt as exc:
 971 |                     raise exc
 972 |                 except Exception:
 973 |                     tries = tries + 1
 974 |                     # TODO: Should this really sleep?
 975 |                     time.sleep(1)
 976 |         else:
 977 |             parent_cats = categories[cat].categories
 978 | 
 979 |         tree[cat]["parent-categories"].extend(parent_cats)
 980 |         tree[cat]["links"].extend(links[cat][0])
 981 | 
 982 |         if depth and level >= depth:
 983 |             for ctg in links[cat][1]:
 984 |                 tree[cat]["sub-categories"][ctg] = None
 985 |         else:
 986 |             for ctg in links[cat][1]:
 987 |                 self.__cat_tree_rec(
 988 |                     ctg,
 989 |                     depth,
 990 |                     tree[cat]["sub-categories"],
 991 |                     level + 1,
 992 |                     categories,
 993 |                     links,
 994 |                 )
 995 | 
 996 |     def _get_response(self, params: Dict[str, Any]) -> Dict[str, Any]:
 997 |         """wrap the call to the requests package"""
 998 |         try:
 999 |             r = self._session.get(self._config.api_url, params=params, timeout=self._config.timeout)
1000 |             if r.status_code == 403:
1001 |                 raise MediaWikiForbidden(f"{self.api_url} return a 403 Forbidden; likely need to login!")
1002 |             return r.json()
1003 |         except JSONDecodeError:
1004 |             return {}
1005 | 
1006 |     def _post_response(self, params: Dict[str, Any]) -> Dict[str, Any]:
1007 |         """wrap a post call to the requests package"""
1008 |         try:
1009 |             r = self._session.post(self._config.api_url, data=params, timeout=self._config.timeout)
1010 |             if r.status_code == 403:
1011 |                 raise MediaWikiForbidden(f"{self.api_url} return a 403 Forbidden; likely need to login!")
1012 |             return r.json()
1013 |         except JSONDecodeError:
1014 |             return {}
1015 | 
1016 | 
1017 | # end MediaWiki class
1018 | 


--------------------------------------------------------------------------------