├── .codeclimate.yml ├── .deepsource.toml ├── .flake8 ├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ ├── code-quality.yml │ ├── dependabot-approve-updates.yml │ ├── deploy.yml │ ├── release.yml │ ├── scorecard.yml │ ├── stale.yml │ └── tox.yml ├── .gitignore ├── .isort.cfg ├── .mypy.ini ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── .travis.yml ├── API.rst ├── CHANGES.rst ├── CODE_OF_CONDUCT.md ├── DEVELOPMENT.rst ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── SECURITY.md ├── SECURITY.rst ├── conf.py ├── example.py ├── index.rst ├── requirements-build.txt ├── requirements-dev.txt ├── requirements-doc.txt ├── requirements.txt ├── setup.py ├── tests ├── __init__.py ├── backlinks_test.py ├── categories_test.py ├── categorymembers_test.py ├── extract_errors_test.py ├── extract_html_format_test.py ├── extract_wiki_format_test.py ├── langlinks_test.py ├── links_test.py ├── mock_data.py ├── wikipedia_page_test.py └── wikipedia_test.py ├── tox.ini └── wikipediaapi ├── __init__.py └── api.rst /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | # https://docs.codeclimate.com/docs/getting-started-configuration 2 | engines: 3 | duplication: 4 | enabled: true 5 | config: 6 | languages: 7 | python: 8 | python_version: 3 9 | fixme: 10 | enabled: true 11 | radon: 12 | enabled: true 13 | config: 14 | python_version: 3 15 | pep8: 16 | enabled: true 17 | 18 | ratings: 19 | paths: 20 | - "wikipediaapi/**.py" 21 | - "tests/**.py" 22 | -------------------------------------------------------------------------------- /.deepsource.toml: -------------------------------------------------------------------------------- 1 | # https://docs.deepsource.com/docs/analyzers-python 2 | 3 | version = 1 4 | 5 | test_patterns = [ 6 | 'tests/*' 7 | ] 8 | 9 | exclude_patterns = [ 10 | 11 | ] 12 | 13 | [[analyzers]] 14 | name = 'python' 15 | enabled = true 16 | dependency_file_paths = ["requirements.txt"] 17 | 18 | [analyzers.meta] 19 | runtime_version = "3.x.x" 20 | type_checker = "mypy" 21 | max_line_length = 100 22 | skip_doc_coverage = ["module", "magic", "init"] 23 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 100 3 | extend-ignore = E203, W503 4 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [martin-majlis] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | # patreon: # Replace with a single Patreon username 5 | # open_collective: # Replace with a single Open Collective username 6 | # ko_fi: # Replace with a single Ko-fi username 7 | tidelift: pypi/Wikipedia-API # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | # community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | # liberapay: # Replace with a single Liberapay username 10 | # issuehunt: # Replace with a single IssueHunt username 11 | # lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | # polar: # Replace with a single Polar username 13 | buy_me_a_coffee: martin.majlis # Replace with a single Buy Me a Coffee username 14 | thanks_dev: gh/martin-majlis # Replace with a single thanks.dev username 15 | # custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 16 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | - package-ecosystem: "github-actions" # See documentation for possible values 13 | directory: "/" # Location of package manifests 14 | schedule: 15 | interval: "weekly" 16 | -------------------------------------------------------------------------------- /.github/workflows/code-quality.yml: -------------------------------------------------------------------------------- 1 | name: Check code quality 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | code-quality: 13 | name: Code quality 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Set up Python 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: 3.11 21 | cache: "pip" 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install -U pip 25 | make requirements-dev 26 | make requirements 27 | - name: Install pre-commit hooks 28 | run: | 29 | pre-commit install 30 | - name: Run pre-commit 31 | run: | 32 | pre-commit run -a 33 | 34 | run-tests: 35 | name: Tests on ${{ matrix.os }} and ${{ matrix.python }} 36 | needs: [code-quality] 37 | runs-on: ${{ matrix.os }} 38 | strategy: 39 | matrix: 40 | os: [ubuntu-latest, windows-latest, macos-latest] 41 | python: ["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.9", "pypy3.10"] 42 | steps: 43 | - uses: actions/checkout@v4 44 | - uses: actions/setup-python@v5 45 | name: Install Python 46 | with: 47 | python-version: ${{ matrix.python }} 48 | cache: "pip" 49 | - name: Install dependencies 50 | run: | 51 | python -m pip install -U pip 52 | make requirements-dev 53 | make requirements 54 | - name: Code coverage 55 | run: | 56 | make run-tests 57 | 58 | code-coverage: 59 | name: Code coverage 60 | needs: [run-tests, code-quality] 61 | runs-on: ubuntu-latest 62 | steps: 63 | - uses: actions/checkout@v4 64 | - name: Set up Python 65 | uses: actions/setup-python@v5 66 | with: 67 | python-version: 3.11 68 | cache: "pip" 69 | - name: Install dependencies 70 | run: | 71 | python -m pip install -U pip 72 | make requirements-dev 73 | make requirements 74 | - name: Code coverage 75 | run: | 76 | make run-coverage 77 | - name: Upload coverage reports to Codecov 78 | uses: codecov/codecov-action@v5.4.3 79 | with: 80 | token: ${{ secrets.CODECOV_TOKEN }} 81 | slug: martin-majlis/Wikipedia-API 82 | -------------------------------------------------------------------------------- /.github/workflows/dependabot-approve-updates.yml: -------------------------------------------------------------------------------- 1 | name: Dependabot Pull Request 2 | on: pull_request 3 | 4 | permissions: 5 | contents: write 6 | pull-requests: write 7 | 8 | jobs: 9 | dependabot: 10 | runs-on: ubuntu-latest 11 | if: github.event.pull_request.user.login == 'dependabot[bot]' && github.repository == 'martin-majlis/Wikipedia-API' 12 | steps: 13 | - name: Fetch Dependabot metadata 14 | id: dependabot-metadata 15 | uses: dependabot/fetch-metadata@v2 16 | with: 17 | github-token: ${{ secrets.GITHUB_TOKEN }} 18 | - name: Approve patch and minor updates 19 | if: ${{steps.dependabot-metadata.outputs.update-type == 'version-update:semver-patch' || steps.dependabot-metadata.outputs.update-type == 'version-update:semver-minor'}} 20 | run: | 21 | gh pr review $PR_URL --approve -b "I'm **approving** this pull request because **it includes a patch or minor update**" 22 | gh pr merge --auto --squash "$PR_URL" 23 | env: 24 | PR_URL: ${{github.event.pull_request.html_url}} 25 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 26 | - name: Approve major updates of development dependencies 27 | if: ${{steps.dependabot-metadata.outputs.update-type == 'version-update:semver-major' && steps.dependabot-metadata.outputs.dependency-type == 'direct:development'}} 28 | run: | 29 | gh pr review $PR_URL --approve -b "I'm **approving** this pull request because **it includes a major update of a dependency used only in development**" 30 | gh pr merge --auto --squash "$PR_URL" 31 | env: 32 | PR_URL: ${{github.event.pull_request.html_url}} 33 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 34 | - name: Comment on major updates of non-development dependencies 35 | if: ${{steps.dependabot-metadata.outputs.update-type == 'version-update:semver-major' && steps.dependabot-metadata.outputs.dependency-type == 'direct:production'}} 36 | run: | 37 | gh pr comment $PR_URL --body "I'm **not approving** this PR because **it includes a major update of a dependency used in production**" 38 | gh pr edit $PR_URL --add-label "requires-manual-qa" 39 | env: 40 | PR_URL: ${{github.event.pull_request.html_url}} 41 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 42 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Build and upload to PyPI 2 | 3 | # Build on every branch push, tag push, and pull request change: 4 | # on: [push, pull_request] 5 | on: [push] 6 | # Alternatively, to publish when a (published) GitHub Release is created, use the following: 7 | # on: 8 | # push: 9 | # pull_request: 10 | # release: 11 | # types: 12 | # - published 13 | 14 | jobs: 15 | build_wheels: 16 | name: Build wheels on ${{ matrix.os }} and ${{ matrix.python }} 17 | runs-on: ${{ matrix.os }} 18 | strategy: 19 | matrix: 20 | os: [ubuntu-latest, windows-latest, macos-latest] 21 | python: ["3.9", "3.10", "3.11", "3.12", "pypy3.9", "pypy3.10"] 22 | 23 | steps: 24 | - uses: actions/checkout@v4 25 | 26 | - uses: actions/setup-python@v5 27 | name: Install Python 28 | with: 29 | python-version: ${{ matrix.python }} 30 | cache: "pip" 31 | 32 | - name: Install Python 3 33 | if: runner.os == 'Windows' 34 | run: | 35 | choco install python3 -f -y 36 | - name: Install Visual Studio 2019 37 | if: runner.os == 'Windows' 38 | run: | 39 | choco install visualstudio2019buildtools 40 | - name: Install dependencies 41 | run: | 42 | python -m pip install --upgrade pip 43 | make requirements 44 | make requirements-build 45 | pip install --upgrade setuptools 46 | - name: Build wheels 47 | run: | 48 | pip wheel -w wheelhouse . 49 | env: 50 | CIBW_SKIP: cp27-win* 51 | - uses: actions/upload-artifact@v4 52 | with: 53 | path: wheelhouse/Wikipedia*.whl 54 | name: Wikipedia-api-${{ matrix.os }}-${{ matrix.python }}.whl 55 | 56 | build_sdist: 57 | name: Build source dist on ${{ matrix.os }} and ${{ matrix.python }} 58 | runs-on: ${{ matrix.os }} 59 | strategy: 60 | matrix: 61 | os: [ubuntu-latest, windows-latest, macos-latest] 62 | python: ["3.9", "3.10", "3.11", "3.12", "pypy3.9", "pypy3.10"] 63 | steps: 64 | - uses: actions/checkout@v4 65 | 66 | - uses: actions/setup-python@v5 67 | name: Install Python 68 | with: 69 | python-version: ${{ matrix.python }} 70 | cache: "pip" 71 | - name: Install dependencies 72 | run: | 73 | python -m pip install --upgrade pip 74 | make requirements 75 | make requirements-build 76 | pip install --upgrade setuptools 77 | - name: Build sdist 78 | run: python setup.py sdist 79 | 80 | - uses: actions/upload-artifact@v4 81 | with: 82 | path: dist/Wikipedia*.tar.gz 83 | name: Wikipedia-api-${{ matrix.os }}-${{ matrix.python }}.tar.gz 84 | 85 | upload_pypi: 86 | needs: [build_wheels, build_sdist] 87 | runs-on: ubuntu-latest 88 | # upload to PyPI on every tag starting with 'v' 89 | if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') 90 | # alternatively, to publish when a GitHub Release is created, use the following rule: 91 | # if: github.event_name == 'release' && github.event.action == 'published' 92 | environment: 93 | name: pypi 94 | url: https://pypi.org/p/Wikipedia-API 95 | permissions: 96 | id-token: write 97 | steps: 98 | - name: Download Artifacts 99 | uses: actions/download-artifact@v4 100 | with: 101 | pattern: Wikipedia-api-* 102 | path: downloaded 103 | - name: Display structure of downloaded files 104 | run: ls -lR downloaded 105 | - name: Prepare distribution structure 106 | run: | 107 | mkdir -p dist 108 | for f in $( find downloaded -type f ); do \ 109 | echo ${f}; \ 110 | d=$( dirname ${f} | rev | cut -d'/' -f 1 | rev ); \ 111 | echo ${d}; \ 112 | cp -v ${f} dist/${d}; \ 113 | done 114 | - name: Display structure of dist files 115 | run: ls -lR dist 116 | - uses: pypa/gh-action-pypi-publish@v1.12.4 117 | with: 118 | verbose: true 119 | print-hash: true 120 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | # GitHub recommends pinning actions to a commit SHA. 7 | # To get a newer version, you will need to update the SHA. 8 | # You can also reference a tag or branch, but the action may change without warning. 9 | 10 | name: Upload Python Package 11 | 12 | on: 13 | release: 14 | types: 15 | - published 16 | 17 | permissions: 18 | contents: read 19 | 20 | jobs: 21 | code-quality: 22 | name: Code quality 23 | runs-on: ubuntu-latest 24 | steps: 25 | - uses: actions/checkout@v4 26 | - name: Set up Python 27 | uses: actions/setup-python@v5 28 | with: 29 | python-version: 3.11 30 | cache: "pip" 31 | - name: Install dependencies 32 | run: | 33 | python -m pip install -U pip 34 | make requirements-dev 35 | make requirements 36 | - name: Install pre-commit hooks 37 | run: | 38 | pre-commit install 39 | - name: Run pre-commit 40 | run: | 41 | pre-commit run -a 42 | 43 | run-tests: 44 | name: Tests on ${{ matrix.os }} and ${{ matrix.python }} 45 | needs: [code-quality] 46 | runs-on: ${{ matrix.os }} 47 | strategy: 48 | matrix: 49 | os: [ubuntu-latest, windows-latest, macos-latest] 50 | python: ["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.9", "pypy3.10"] 51 | steps: 52 | - uses: actions/checkout@v4 53 | - uses: actions/setup-python@v5 54 | name: Install Python 55 | with: 56 | python-version: ${{ matrix.python }} 57 | cache: "pip" 58 | - name: Install dependencies 59 | run: | 60 | python -m pip install -U pip 61 | make requirements-dev 62 | make requirements 63 | - name: Run tests 64 | run: | 65 | make run-tests 66 | 67 | release-build: 68 | runs-on: ubuntu-latest 69 | needs: 70 | - run-tests 71 | 72 | steps: 73 | - uses: actions/checkout@v4 74 | 75 | - uses: actions/setup-python@v5 76 | with: 77 | python-version: "3.11" 78 | 79 | - name: Install dependencies 80 | run: | 81 | python -m pip install --upgrade pip 82 | make requirements 83 | make requirements-build 84 | 85 | - name: Build release distributions 86 | run: | 87 | make build-package 88 | 89 | - name: Upload distributions 90 | uses: actions/upload-artifact@v4 91 | with: 92 | name: release-dists 93 | path: dist/ 94 | 95 | pypi-publish: 96 | runs-on: ubuntu-latest 97 | 98 | needs: 99 | - release-build 100 | 101 | permissions: 102 | # IMPORTANT: this permission is mandatory for trusted publishing 103 | id-token: write 104 | 105 | # Dedicated environments with protections for publishing are strongly recommended. 106 | environment: 107 | name: release 108 | # OPTIONAL: uncomment and update to include your PyPI project URL in the deployment status: 109 | # url: https://pypi.org/p/YOURPROJECT 110 | 111 | steps: 112 | - name: Retrieve release distributions 113 | uses: actions/download-artifact@v4 114 | with: 115 | name: release-dists 116 | path: dist/ 117 | 118 | - name: Publish release distributions to PyPI 119 | uses: pypa/gh-action-pypi-publish@release/v1 120 | -------------------------------------------------------------------------------- /.github/workflows/scorecard.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. They are provided 2 | # by a third-party and are governed by separate terms of service, privacy 3 | # policy, and support documentation. 4 | 5 | name: Scorecard supply-chain security 6 | on: 7 | # For Branch-Protection check. Only the default branch is supported. See 8 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection 9 | branch_protection_rule: 10 | # To guarantee Maintained check is occasionally updated. See 11 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained 12 | schedule: 13 | - cron: "33 18 * * 0" 14 | push: 15 | branches: ["master"] 16 | 17 | # Declare default permissions as read only. 18 | permissions: read-all 19 | 20 | jobs: 21 | analysis: 22 | name: Scorecard analysis 23 | runs-on: ubuntu-latest 24 | permissions: 25 | # Needed to upload the results to code-scanning dashboard. 26 | security-events: write 27 | # Needed to publish results and get a badge (see publish_results below). 28 | id-token: write 29 | # Uncomment the permissions below if installing in a private repository. 30 | # contents: read 31 | # actions: read 32 | 33 | steps: 34 | - name: "Checkout code" 35 | uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 36 | with: 37 | persist-credentials: false 38 | 39 | - name: "Run analysis" 40 | uses: ossf/scorecard-action@05b42c624433fc40578a4040d5cf5e36ddca8cde # v2.4.2 41 | with: 42 | results_file: results.sarif 43 | results_format: sarif 44 | # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: 45 | # - you want to enable the Branch-Protection check on a *public* repository, or 46 | # - you are installing Scorecard on a *private* repository 47 | # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action?tab=readme-ov-file#authentication-with-fine-grained-pat-optional. 48 | # repo_token: ${{ secrets.SCORECARD_TOKEN }} 49 | 50 | # Public repositories: 51 | # - Publish results to OpenSSF REST API for easy access by consumers 52 | # - Allows the repository to include the Scorecard badge. 53 | # - See https://github.com/ossf/scorecard-action#publishing-results. 54 | # For private repositories: 55 | # - `publish_results` will always be set to `false`, regardless 56 | # of the value entered here. 57 | publish_results: true 58 | 59 | # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF 60 | # format to the repository Actions tab. 61 | - name: "Upload artifact" 62 | uses: actions/upload-artifact@97a0fba1372883ab732affbe8f94b823f91727db # v3.pre.node20 63 | with: 64 | name: SARIF file 65 | path: results.sarif 66 | retention-days: 5 67 | 68 | # Upload the results to GitHub's code scanning dashboard (optional). 69 | # Commenting out will disable upload of results to your repo's Code Scanning dashboard 70 | - name: "Upload to code-scanning" 71 | uses: github/codeql-action/upload-sarif@v3 72 | with: 73 | sarif_file: results.sarif 74 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: "Close stale issues and PRs" 2 | on: 3 | schedule: 4 | - cron: "30 1 * * *" 5 | 6 | permissions: 7 | contents: write # only for delete-branch option 8 | issues: write 9 | pull-requests: write 10 | 11 | jobs: 12 | stale: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/stale@v9 16 | with: 17 | stale-issue-message: "This issue is stale because it has been open 45 days with no activity. Remove stale label or comment or this will be closed in 5 days." 18 | stale-pr-message: "This PR is stale because it has been open 45 days with no activity. Remove stale label or comment or this will be closed in 10 days." 19 | close-issue-message: "This issue was closed because it has been stalled for 10 days with no activity." 20 | close-pr-message: "This PR was closed because it has been stalled for 10 days with no activity." 21 | days-before-issue-stale: 45 22 | days-before-pr-stale: 45 23 | days-before-issue-close: 10 24 | days-before-pr-close: 10 25 | delete-branch: true 26 | ascending: true 27 | operations-per-run: 300 28 | -------------------------------------------------------------------------------- /.github/workflows/tox.yml: -------------------------------------------------------------------------------- 1 | name: Tests - TOX 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python: ["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.9", "pypy3.10"] 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Setup Python 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: ${{ matrix.python }} 18 | cache: "pip" 19 | - name: Install Tox and any other packages 20 | run: make requirements-dev 21 | - name: Run Tox 22 | # Run tox using the version of Python in `PATH` 23 | run: tox --version; tox -e py 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | coverage.json 47 | *.cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | # Eclipse 105 | .project 106 | .pydevproject 107 | .settings/ 108 | 109 | # Documentation 110 | _build/ 111 | pypi-doc.html 112 | 113 | 114 | # Created by https://www.gitignore.io/api/pycharm+all,jetbrains+all 115 | 116 | ### JetBrains+all ### 117 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 118 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 119 | 120 | # User-specific stuff: 121 | .idea/**/workspace.xml 122 | .idea/**/tasks.xml 123 | .idea/dictionaries 124 | 125 | # Sensitive or high-churn files: 126 | .idea/**/dataSources/ 127 | .idea/**/dataSources.ids 128 | .idea/**/dataSources.xml 129 | .idea/**/dataSources.local.xml 130 | .idea/**/sqlDataSources.xml 131 | .idea/**/dynamic.xml 132 | .idea/**/uiDesigner.xml 133 | 134 | # Gradle: 135 | .idea/**/gradle.xml 136 | .idea/**/libraries 137 | 138 | # CMake 139 | cmake-build-debug/ 140 | 141 | # Mongo Explorer plugin: 142 | .idea/**/mongoSettings.xml 143 | 144 | ## File-based project format: 145 | *.iws 146 | 147 | ## Plugin-specific files: 148 | 149 | # IntelliJ 150 | /out/ 151 | 152 | # mpeltonen/sbt-idea plugin 153 | .idea_modules/ 154 | 155 | # JIRA plugin 156 | atlassian-ide-plugin.xml 157 | 158 | # Cursive Clojure plugin 159 | .idea/replstate.xml 160 | 161 | # Ruby plugin and RubyMine 162 | /.rakeTasks 163 | 164 | # Crashlytics plugin (for Android Studio and IntelliJ) 165 | com_crashlytics_export_strings.xml 166 | crashlytics.properties 167 | crashlytics-build.properties 168 | fabric.properties 169 | 170 | ### JetBrains+all Patch ### 171 | # Ignores the whole .idea folder and all .iml files 172 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 173 | 174 | .idea/ 175 | 176 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 177 | 178 | *.iml 179 | modules.xml 180 | .idea/misc.xml 181 | *.ipr 182 | 183 | ### PyCharm+all ### 184 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 185 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 186 | 187 | # User-specific stuff: 188 | 189 | # Sensitive or high-churn files: 190 | 191 | # Gradle: 192 | 193 | # CMake 194 | 195 | # Mongo Explorer plugin: 196 | 197 | ## File-based project format: 198 | 199 | ## Plugin-specific files: 200 | 201 | # IntelliJ 202 | 203 | # mpeltonen/sbt-idea plugin 204 | 205 | # JIRA plugin 206 | 207 | # Cursive Clojure plugin 208 | 209 | # Ruby plugin and RubyMine 210 | 211 | # Crashlytics plugin (for Android Studio and IntelliJ) 212 | 213 | ### PyCharm+all Patch ### 214 | # Ignores the whole .idea folder and all .iml files 215 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 216 | 217 | 218 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 219 | 220 | 221 | 222 | # End of https://www.gitignore.io/api/pycharm+all,jetbrains+all 223 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | profile = "google" 3 | multi_line_output = 3 4 | include_trailing_comma = True 5 | -------------------------------------------------------------------------------- /.mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | python_version = 3.10 3 | warn_return_any = True 4 | warn_unused_configs = True 5 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | 4 | exclude: original 5 | 6 | repos: 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v4.4.0 9 | hooks: 10 | - id: check-case-conflict 11 | - id: check-merge-conflict 12 | - id: check-docstring-first 13 | - id: check-executables-have-shebangs 14 | - id: check-yaml 15 | - id: trailing-whitespace 16 | - id: end-of-file-fixer 17 | # - id: double-quote-string-fixer 18 | - id: check-yaml 19 | # - id: check-added-large-files 20 | - id: requirements-txt-fixer 21 | # - id: name-tests-test 22 | # args: ["--django"] 23 | 24 | - repo: https://github.com/pre-commit/mirrors-prettier 25 | rev: "v2.7.1" 26 | hooks: 27 | - id: prettier 28 | 29 | - repo: local 30 | hooks: 31 | - id: isort 32 | name: Run isort 33 | entry: isort 34 | language: python 35 | args: ["--profile", "google", "--filter-files"] 36 | types: [file, python] 37 | - id: black 38 | name: Run black 39 | entry: black 40 | language: python 41 | types: [file, python] 42 | - id: flake8 43 | name: Run flake8 44 | entry: flake8 45 | language: python 46 | additional_dependencies: [flake8-bugbear] 47 | types: [file, python] 48 | - id: mypy 49 | name: Run mypy 50 | entry: mypy 51 | language: python 52 | types: [file, python] 53 | - id: pyupgrade 54 | name: Run pyupgrade 55 | entry: pyupgrade 56 | language: python 57 | args: ["--py39-plus"] 58 | types: [file, python] 59 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: "ubuntu-lts-latest" 5 | tools: 6 | python: "latest" 7 | 8 | sphinx: 9 | configuration: conf.py 10 | 11 | # Optional but recommended, declare the Python requirements required 12 | # to build your documentation 13 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 14 | python: 15 | install: 16 | - requirements: requirements-doc.txt 17 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # https://docs.codeclimate.com/v1.0/docs/travis-ci-test-coverage 2 | env: 3 | global: 4 | - CC_TEST_REPORTER_ID=d62b9e265b625e28f256147e9636d8fc1abe9f540d4fc82f9f8976171676cec4 5 | 6 | language: python 7 | python: 8 | - "3.9" 9 | - "3.10" 10 | - "3.11" 11 | - "3.12" 12 | install: 13 | - pip install -r requirements.txt 14 | - pip install coverage 15 | - pip install coveralls 16 | before_script: 17 | - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter 18 | - chmod +x ./cc-test-reporter 19 | - ./cc-test-reporter before-build 20 | script: 21 | - make run-coverage 22 | after_script: 23 | - ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT 24 | - coveralls 25 | deploy: 26 | provider: pypi 27 | skip_existing: true 28 | user: martin.majlis 29 | password: 30 | secure: GwW600dya+08B+vTpvMMzAannK880HSV/lSqedn3DjxOUSJTRAK0GzhG9IOYDfY4mR/1UYmklfYocjvOH/SBKEo3kfV5k6mhNYp0g2/Jem2ZOq5XyNKNCH3xuz4c7UlSA6CgRu0nzaZvACdsXA3jLJPFXBJkAJvZ4hQL5mwxurc+OeyPNTMRfgT6qB0YrZFtCW8E+rcM2Yn4HwWImDw7OaHm324wEXKTJGmeEn/x4/ZJropm8eTvNsupigp78bJGw3DdhlC2T+JM8ZtPpNOOfoP9K5OA0j5ckSWDHdQ1DVCh7kySPetrvGuWvwKQt4peAU4/03COcQzQJLf0v1I6kflTGSzRYGT9GskD/srt89znUj1MeYmR4W+nM+MqK+KUXvd2HgO8YozpngiEjfgox8VkOh7lJ/zuZNf0uBPxBWiaoYQkxN1gZ2PbqySYebpIDiVoiSiZtyxkEREEXyEoEnCcoZ9CKh90CiOXEdCCqPVFMq+Xky/AofdqCgYgJUXASB01aqUxfM6tPslBNVHn4KJSmKLrYjX9OcZhCcRKDyC+hVFnLFqsBA7ng8gjUA7itQZDmchBXqCc6PzsD3KFKGjqHFoafRGjX3pNSlTDvQojwZk6WPiJrq0llFAGs84sqJOlA7j5ofeF1XZwGLSl2SMznqFFDm57Wx+sg5p9iXs= 31 | on: 32 | tags: true 33 | after_sucess: 34 | - CODECLIMATE_REPO_TOKEN=d62b9e265b625e28f256147e9636d8fc1abe9f540d4fc82f9f8976171676cec4 codeclimate-test-reporter 35 | -------------------------------------------------------------------------------- /API.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | Wikipedia 5 | --------- 6 | * ``__init__(user_agent: str, language='en', variant=Optional[str] = None, extract_format=ExtractFormat.WIKI, headers: Optional[Dict[str, Any]] = None, extra_api_params: Optional[dict[str, Any]] = None, **request_kwargs)`` 7 | * ``page(title)`` 8 | 9 | WikipediaPage 10 | ------------- 11 | * ``exists()`` 12 | * ``pageid`` 13 | * ``title`` - title 14 | * ``summary`` - summary of the page 15 | * ``text`` - returns text of the page 16 | * ``sections`` - list of all sections (list of ``WikipediaPageSection``) 17 | * ``langlinks`` - language links to other languages ({lang: ``WikipediaLangLink``}) 18 | * ``section_by_title(name)`` - finds last section by title (``WikipediaPageSection``) 19 | * ``sections_by_title(name)`` - finds all section by title (``WikipediaPageSection``) 20 | * ``links`` - links to other pages ({title: ``WikipediaPage``}) 21 | * ``categories`` - all categories ({title: ``WikipediaPage``}) 22 | * ``displaytitle`` 23 | * ``varianttitles`` 24 | * ``canonicalurl`` 25 | * ``ns`` 26 | * ``contentmodel`` 27 | * ``pagelanguage`` 28 | * ``pagelanguagehtmlcode`` 29 | * ``pagelanguagedir`` 30 | * ``touched`` 31 | * ``lastrevid`` 32 | * ``length`` 33 | * ``protection`` 34 | * ``restrictiontypes`` 35 | * ``watchers`` 36 | * ``notificationtimestamp`` 37 | * ``talkid`` 38 | * ``fullurl`` 39 | * ``editurl`` 40 | * ``readable`` 41 | * ``preload`` 42 | 43 | 44 | WikipediaPageSection 45 | -------------------- 46 | * ``title`` 47 | * ``level`` 48 | * ``text`` 49 | * ``sections`` 50 | * ``section_by_title(title)`` 51 | 52 | ExtractFormat 53 | ------------- 54 | * ``WIKI`` 55 | * ``HTML`` 56 | -------------------------------------------------------------------------------- /CHANGES.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | 0.8.1 5 | ----- 6 | 7 | * More user friendly error messages to make ChatGPT users happy 8 | 9 | 0.8.0 10 | ----- 11 | 12 | * Allow specifying language variant - `Issue 209`_ 13 | * Allow passing additional parameters for API calls. 14 | * This breaks the API since `variant` is now the third parameter. 15 | 16 | .. _Issue 209: https://github.com/martin-majlis/Wikipedia-API/issues/209 17 | 18 | 0.7.3 19 | ----- 20 | 21 | * Regenerate stable documentation 22 | 23 | 0.7.2 24 | ----- 25 | 26 | * Add support for Python 3.13 27 | * Regenerate stable documentation 28 | 29 | 0.7.1 30 | ----- 31 | 32 | * Drop support for Python 3.8 33 | 34 | 0.6.9 35 | ----- 36 | 37 | * Update dependencies 38 | * Add tests for more platforms 39 | 40 | 0.6.0 41 | ----- 42 | 43 | * Make user agent mandatory - `Issue 63`_ 44 | * This breaks the API since `user_agent` is now the first parameter. 45 | 46 | .. _Issue 63: https://github.com/martin-majlis/Wikipedia-API/issues/63 47 | 48 | 49 | 0.5.8 50 | ----- 51 | 52 | * Adds support for retrieving all sections with given name - `Issue 39`_ 53 | 54 | .. _Issue 39: https://github.com/martin-majlis/Wikipedia-API/issues/39 55 | 56 | 0.5.4 57 | ----- 58 | 59 | * Namespace could be arbitrary integer - `Issue 29`_ 60 | 61 | .. _Issue 29: https://github.com/martin-majlis/Wikipedia-API/issues/29 62 | 63 | 64 | 0.5.3 65 | ----- 66 | 67 | * Adds persistent HTTP connection - `Issue 26`_ 68 | * Downloading 50 pages reduced from 13s to 8s => 40% speed up 69 | 70 | .. _Issue 26: https://github.com/martin-majlis/Wikipedia-API/issues/26 71 | 72 | 73 | 0.5.2 74 | ----- 75 | 76 | * Adds namespaces 102 - 105 - `Issue 24`_ 77 | 78 | .. _Issue 24: https://github.com/martin-majlis/Wikipedia-API/issues/24 79 | 80 | 0.5.1 81 | ----- 82 | 83 | * Adds tox for testing different Python versions 84 | 85 | 0.5.0 86 | ----- 87 | 88 | * Allows modifying API call parameters 89 | * Fixes `Issue 16`_ - hidden categories 90 | * Fixes `Issue 21`_ - summary extraction 91 | 92 | .. _Issue 16: https://github.com/martin-majlis/Wikipedia-API/issues/16 93 | .. _Issue 21: https://github.com/martin-majlis/Wikipedia-API/issues/21 94 | 95 | 96 | 0.4.5 97 | ----- 98 | 99 | * Handles missing sections correctly 100 | * Fixes `Issue 20`_ 101 | 102 | .. _Issue 20: https://github.com/martin-majlis/Wikipedia-API/issues/20 103 | 104 | 105 | 0.4.4 106 | ----- 107 | * Uses HTTPS directly instead of HTTP to avoid redirect 108 | 109 | 0.4.3 110 | ----- 111 | * Correctly extracts text from pages without sections 112 | * Adds support for quoted page titles 113 | 114 | .. code:: python 115 | 116 | api = wikipediaapi.Wikipedia( 117 | language='hi', 118 | ) 119 | python = api.article( 120 | title='%E0%A4%AA%E0%A4%BE%E0%A4%87%E0%A4%A5%E0%A4%A8', 121 | unquote=True, 122 | ) 123 | print(python.summary) 124 | 125 | 0.4.2 126 | ----- 127 | * Adds support for Python 3.4 by not using f-strings 128 | 129 | 0.4.1 130 | ----- 131 | * Uses code style enforced by flake8 132 | * Increased code coverage 133 | 134 | 0.4.0 135 | ----- 136 | * Uses type annotations => minimal requirement is now Python 3.5 137 | * Adds possibility to use more parameters for `request`_. For example: 138 | 139 | .. code:: python 140 | 141 | api = wikipediaapi.Wikipedia( 142 | language='en', 143 | proxies={'http': 'http://localhost:1234'} 144 | ) 145 | 146 | * Extends documentation 147 | 148 | .. _request: http://docs.python-requests.org/en/master/api/#requests.request 149 | 150 | 0.3.4 151 | ----- 152 | * Adds support for `property Categorymembers`_ 153 | * Adds property ``text`` for retrieving complete text of the page 154 | 155 | .. _property Categorymembers: https://www.mediawiki.org/wiki/API:Categorymembers 156 | 157 | 0.3.3 158 | ----- 159 | * Added support for `request timeout`_ 160 | * Add header: Accept-Encoding: gzip 161 | 162 | .. _request timeout: https://github.com/martin-majlis/Wikipedia-API/issues/1 163 | 164 | 0.3.2 165 | ----- 166 | * Added support for `property Categories`_ 167 | 168 | .. _property Categories: https://www.mediawiki.org/wiki/API:Categories 169 | 170 | 0.3.1 171 | ----- 172 | * Removing ``WikipediaLangLink`` 173 | * Page keeps track of its own language, so it's easier to jump between different translations of the same page 174 | 175 | 0.3.0 176 | ----- 177 | * Rename directory from ``wikipedia`` to ``wikipediaapi`` to avoid collisions 178 | 179 | 0.2.4 180 | ----- 181 | * Handle redirects properly 182 | 183 | 0.2.3 184 | ----- 185 | * Usage method ``page`` instead of ``article`` in ``Wikipedia`` 186 | 187 | 0.2.2 188 | ----- 189 | * Added support for `property Links`_ 190 | 191 | .. _property Links: https://www.mediawiki.org/wiki/API:Links 192 | 193 | 0.2.1 194 | ----- 195 | * Added support for `property Langlinks`_ 196 | 197 | .. _property Langlinks: https://www.mediawiki.org/wiki/API:Langlinks 198 | 199 | 0.2.0 200 | ----- 201 | * Use properties instead of functions 202 | * Added support for `property Info`_ 203 | 204 | .. _property Info: https://www.mediawiki.org/wiki/API:Info 205 | 206 | 0.1.6 207 | ----- 208 | * Support for extracting texts with HTML markdown 209 | * Added initial version of unit tests 210 | 211 | 0.1.4 212 | ----- 213 | * It's possible to extract summary and sections of the page 214 | * Added support for `property Extracts`_ 215 | 216 | .. _property Extracts: https://www.mediawiki.org/wiki/Extension:TextExtracts#API 217 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | - Using welcoming and inclusive language 12 | - Being respectful of differing viewpoints and experiences 13 | - Gracefully accepting constructive criticism 14 | - Focusing on what is best for the community 15 | - Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | - The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | - Trolling, insulting/derogatory comments, and personal or political attacks 21 | - Public or private harassment 22 | - Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | - Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at martin@majlis.cz. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /DEVELOPMENT.rst: -------------------------------------------------------------------------------- 1 | Development 2 | =========== 3 | 4 | Makefile targets 5 | ---------------- 6 | * ``make release`` - based on version specified in ``wikipedia/__init__.py`` creates new release as well as git tag 7 | * ``make run-tests`` - run unit tests 8 | * ``make run-coverage`` - run code coverage 9 | * ``make pypi-html`` - generates single HTML documentation into ``pypi-doc.html`` 10 | * ``make html`` - generates HTML documentation similar to RTFD into folder ``_build/html/`` 11 | * ``make requirements`` - install requirements 12 | * ``make requirements-dev`` - install development requirements 13 | 14 | Usage Statistics 15 | ---------------- 16 | 17 | * `PIP Downloads`_ 18 | 19 | .. _PIP Downloads: https://pypistats.org/packages/wikipedia-api 20 | 21 | 22 | Underlying API 23 | -------------- 24 | 25 | * `API - HP`_ 26 | * `Module - Parse`_ 27 | * `Module - Query`_ 28 | 29 | .. _API - HP: https://www.mediawiki.org/wiki/API:Main_page 30 | .. _Module - Parse: https://en.wikipedia.org/w/api.php?action=help&modules=parse 31 | .. _Module - Query: https://en.wikipedia.org/w/api.php?action=help&modules=query 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Martin Majlis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include CHANGES.rst 3 | include README.rst 4 | include DEVELOPMENT.rst 5 | include API.rst 6 | recursive-include wikipedia *.py 7 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python3 -msphinx 7 | SPHINXPROJ = Wikipedia-API 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | pypi-html: 18 | python3 setup.py --long-description | rst2html.py > pypi-doc.html 19 | echo file://$$( pwd )/pypi-doc.html 20 | 21 | run-pre-commit: 22 | pre-commit run -a 23 | 24 | run-tests: 25 | python3 -m unittest discover tests/ '*test.py' 26 | 27 | run-type-check: 28 | mypy ./wikipediaapi 29 | 30 | run-flake8: 31 | flake8 --max-line-length=100 wikipediaapi tests 32 | 33 | run-tox: 34 | tox 35 | 36 | run-coverage: 37 | coverage run --source=wikipediaapi -m unittest discover tests/ '*test.py' 38 | coverage report -m 39 | coverage xml 40 | 41 | run-example: 42 | ./example.py 43 | 44 | requirements-all: requirements requirements-dev requirements-doc 45 | 46 | requirements: 47 | pip install -r requirements.txt 48 | 49 | requirements-dev: 50 | pip install -r requirements-dev.txt 51 | 52 | requirements-doc: 53 | pip install -r requirements-doc.txt 54 | 55 | requirements-build: 56 | pip install -r requirements-build.txt 57 | 58 | pre-release-check: run-pre-commit run-type-check run-flake8 run-coverage pypi-html run-tox run-example 59 | echo "Pre-release check was successful" 60 | 61 | release: pre-release-check 62 | if [ "x$(MSG)" = "x" -o "x$(VERSION)" = "x" ]; then \ 63 | echo "Use make release MSG='some msg' VERSION='1.2.3'"; \ 64 | exit 1; \ 65 | fi; \ 66 | version=`grep __version__ wikipediaapi/__init__.py | sed -r 's/.*= \( *(.*), *(.*), *(.*)\)/\1.\2.\3/'`; \ 67 | if [ "x$$version" = "x" ]; then \ 68 | echo "Unable to extract version"; \ 69 | exit 1; \ 70 | fi; \ 71 | echo "Current version: $$version"; \ 72 | as_number() { \ 73 | total=0; \ 74 | for p in `echo $$1 | tr "." "\n"`; do \ 75 | total=$$(( $$total * 1000 + $$p )); \ 76 | done; \ 77 | echo $$total; \ 78 | }; \ 79 | number_dots=`echo -n $(VERSION) | sed -r 's/[^.]//g' | wc -c`; \ 80 | if [ ! "$${number_dots}" = "2" ]; then \ 81 | echo "Version has to have format X.Y.Z"; \ 82 | echo "Specified version is $(VERSION)"; \ 83 | exit 2; \ 84 | fi; \ 85 | number_version=`as_number $$version`; \ 86 | number_VERSION=`as_number $(VERSION);`; \ 87 | if [ $$number_version -ge $$number_VERSION ]; then \ 88 | echo -n "Specified version $(VERSION) ($$number_VERSION) is lower than"; \ 89 | echo "current version $$version ($$number_version)"; \ 90 | echo "New version has to be greater"; \ 91 | exit 2; \ 92 | fi; \ 93 | has_documentation=`grep -c "^$(VERSION)\\$$" CHANGES.rst`; \ 94 | if [ $$has_documentation -eq 0 ]; then \ 95 | echo "There is no information about $(VERSION) in CHANGES.rst"; \ 96 | exit 3; \ 97 | fi; \ 98 | short_VERSION=`echo $(VERSION) | cut -f1-2 -d.`; \ 99 | commas_VERSION=`echo $(VERSION) | sed -r 's/\./, /g'`; \ 100 | echo "Short version: $$short_VERSION"; \ 101 | sed -ri 's/version=.*/version="'$(VERSION)'",/' setup.py; \ 102 | sed -ri 's/^release = .*/release = "'$(VERSION)'"/' conf.py; \ 103 | sed -ri 's/^version = .*/version = "'$$short_VERSION'"/' conf.py; \ 104 | sed -ri 's/^__version__ = .*/__version__ = ('"$$commas_VERSION"')/' wikipediaapi/__init__.py; \ 105 | git commit .github CHANGES.rst setup.py conf.py wikipediaapi/__init__.py -m "Update version to $(VERSION) for new release." && \ 106 | git push && \ 107 | git tag v$(VERSION) -m "$(MSG)" && \ 108 | git push --tags origin master 109 | 110 | 111 | build-package: 112 | python setup.py sdist 113 | 114 | # Catch-all target: route all unknown targets to Sphinx using the new 115 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 116 | %: Makefile 117 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 118 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Wikipedia API 2 | ============= 3 | 4 | ``Wikipedia-API`` is easy to use Python wrapper for `Wikipedias'`_ API. It supports extracting texts, sections, links, categories, translations, etc from Wikipedia. Documentation provides code snippets for the most common use cases. 5 | 6 | .. _Wikipedias': https://www.mediawiki.org/wiki/API:Main_page 7 | 8 | |github-stars-flat| |cc-coverage| |docs| |version| |pyversions| 9 | 10 | Installation 11 | ------------ 12 | 13 | This package requires at least Python 3.9 to install because it's using IntEnum. 14 | 15 | .. code-block:: python 16 | 17 | pip3 install wikipedia-api 18 | 19 | 20 | Usage 21 | ----- 22 | 23 | Goal of ``Wikipedia-API`` is to provide simple and easy to use API for retrieving informations from Wikipedia. Bellow are examples of common use cases. 24 | 25 | Importing 26 | ~~~~~~~~~ 27 | 28 | .. code-block:: python 29 | 30 | import wikipediaapi 31 | 32 | How To Get Single Page 33 | ~~~~~~~~~~~~~~~~~~~~~~ 34 | 35 | Getting single page is straightforward. You have to initialize ``Wikipedia`` object and ask for page by its name. 36 | To initialize it, you have to provide: 37 | 38 | * `user_agent` to identify your project. Please follow the recommended `format`_. 39 | * `language` to specify language mutation. It has to be one of `supported languages`_. 40 | 41 | .. _format: https://meta.wikimedia.org/wiki/User-Agent_policy 42 | .. _supported languages: http://meta.wikimedia.org/wiki/List_of_Wikipedias 43 | 44 | .. code-block:: python 45 | 46 | import wikipediaapi 47 | wiki_wiki = wikipediaapi.Wikipedia(user_agent='MyProjectName (merlin@example.com)', language='en') 48 | 49 | page_py = wiki_wiki.page('Python_(programming_language)') 50 | 51 | 52 | How To Check If Wiki Page Exists 53 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 54 | 55 | For checking, whether page exists, you can use function ``exists``. 56 | 57 | .. code-block:: python 58 | 59 | page_py = wiki_wiki.page('Python_(programming_language)') 60 | print("Page - Exists: %s" % page_py.exists()) 61 | # Page - Exists: True 62 | 63 | page_missing = wiki_wiki.page('NonExistingPageWithStrangeName') 64 | print("Page - Exists: %s" % page_missing.exists()) 65 | # Page - Exists: False 66 | 67 | How To Get Page Summary 68 | ~~~~~~~~~~~~~~~~~~~~~~~ 69 | 70 | Class ``WikipediaPage`` has property ``summary``, which returns description of Wiki page. 71 | 72 | .. code-block:: python 73 | 74 | 75 | import wikipediaapi 76 | wiki_wiki = wikipediaapi.Wikipedia('MyProjectName (merlin@example.com)', 'en') 77 | 78 | print("Page - Title: %s" % page_py.title) 79 | # Page - Title: Python (programming language) 80 | 81 | print("Page - Summary: %s" % page_py.summary[0:60]) 82 | # Page - Summary: Python is a widely used high-level programming language for 83 | 84 | 85 | How To Get Page URL 86 | ~~~~~~~~~~~~~~~~~~~ 87 | 88 | ``WikipediaPage`` has two properties with URL of the page. It is ``fullurl`` and ``canonicalurl``. 89 | 90 | .. code-block:: python 91 | 92 | print(page_py.fullurl) 93 | # https://en.wikipedia.org/wiki/Python_(programming_language) 94 | 95 | print(page_py.canonicalurl) 96 | # https://en.wikipedia.org/wiki/Python_(programming_language) 97 | 98 | How To Get Full Text 99 | ~~~~~~~~~~~~~~~~~~~~ 100 | 101 | To get full text of Wikipedia page you should use property ``text`` which constructs text of the page 102 | as concatanation of summary and sections with their titles and texts. 103 | 104 | .. code-block:: python 105 | 106 | wiki_wiki = wikipediaapi.Wikipedia( 107 | user_agent='MyProjectName (merlin@example.com)', 108 | language='en', 109 | extract_format=wikipediaapi.ExtractFormat.WIKI 110 | ) 111 | 112 | p_wiki = wiki_wiki.page("Test 1") 113 | print(p_wiki.text) 114 | # Summary 115 | # Section 1 116 | # Text of section 1 117 | # Section 1.1 118 | # Text of section 1.1 119 | # ... 120 | 121 | 122 | wiki_html = wikipediaapi.Wikipedia( 123 | user_agent='MyProjectName (merlin@example.com)', 124 | language='en', 125 | extract_format=wikipediaapi.ExtractFormat.HTML 126 | ) 127 | p_html = wiki_html.page("Test 1") 128 | print(p_html.text) 129 | #
Summary
130 | #Text of section 1
132 | #Text of section 1.1
134 | # ... 135 | 136 | How To Get Page Sections 137 | ~~~~~~~~~~~~~~~~~~~~~~~~ 138 | 139 | To get all top level sections of page, you have to use property ``sections``. It returns list of 140 | ``WikipediaPageSection``, so you have to use recursion to get all subsections. 141 | 142 | .. code-block:: python 143 | 144 | def print_sections(sections, level=0): 145 | for s in sections: 146 | print("%s: %s - %s" % ("*" * (level + 1), s.title, s.text[0:40])) 147 | print_sections(s.sections, level + 1) 148 | 149 | 150 | print_sections(page_py.sections) 151 | # *: History - Python was conceived in the late 1980s, 152 | # *: Features and philosophy - Python is a multi-paradigm programming l 153 | # *: Syntax and semantics - Python is meant to be an easily readable 154 | # **: Indentation - Python uses whitespace indentation, rath 155 | # **: Statements and control flow - Python's statements include (among other 156 | # **: Expressions - Some Python expressions are similar to l 157 | 158 | How To Get Page Section By Title 159 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 160 | 161 | To get last section of page with given title, you have to use function ``section_by_title``. 162 | It returns the last ``WikipediaPageSection`` with this title. 163 | 164 | .. code-block:: python 165 | 166 | section_history = page_py.section_by_title('History') 167 | print("%s - %s" % (section_history.title, section_history.text[0:40])) 168 | 169 | # History - Python was conceived in the late 1980s b 170 | 171 | How To Get All Page Sections By Title 172 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 173 | 174 | To get all sections of page with given title, you have to use function ``sections_by_title``. 175 | It returns the all ``WikipediaPageSection`` with this title. 176 | 177 | .. code-block:: python 178 | 179 | page_1920 = wiki_wiki.page('1920') 180 | sections_january = page_1920.sections_by_title('January') 181 | for s in sections_january: 182 | print("* %s - %s" % (s.title, s.text[0:40])) 183 | 184 | # * January - January 1 185 | # Polish–Soviet War in 1920: The 186 | # * January - January 2 187 | # Isaac Asimov, American author 188 | # * January - January 1 – Zygmunt Gorazdowski, Polish 189 | 190 | How To Get Page In Other Languages 191 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 192 | 193 | If you want to get other translations of given page, you should use property ``langlinks``. It is map, 194 | where key is language code and value is ``WikipediaPage``. 195 | 196 | .. code-block:: python 197 | 198 | def print_langlinks(page): 199 | langlinks = page.langlinks 200 | for k in sorted(langlinks.keys()): 201 | v = langlinks[k] 202 | print("%s: %s - %s: %s" % (k, v.language, v.title, v.fullurl)) 203 | 204 | print_langlinks(page_py) 205 | # af: af - Python (programmeertaal): https://af.wikipedia.org/wiki/Python_(programmeertaal) 206 | # als: als - Python (Programmiersprache): https://als.wikipedia.org/wiki/Python_(Programmiersprache) 207 | # an: an - Python: https://an.wikipedia.org/wiki/Python 208 | # ar: ar - بايثون: https://ar.wikipedia.org/wiki/%D8%A8%D8%A7%D9%8A%D8%AB%D9%88%D9%86 209 | # as: as - পাইথন: https://as.wikipedia.org/wiki/%E0%A6%AA%E0%A6%BE%E0%A6%87%E0%A6%A5%E0%A6%A8 210 | 211 | page_py_cs = page_py.langlinks['cs'] 212 | print("Page - Summary: %s" % page_py_cs.summary[0:60]) 213 | # Page - Summary: Python (anglická výslovnost [ˈpaiθtən]) je vysokoúrovňový sk 214 | 215 | How To Get Links To Other Pages 216 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 217 | 218 | If you want to get all links to other wiki pages from given page, you need to use property ``links``. 219 | It's map, where key is page title and value is ``WikipediaPage``. 220 | 221 | .. code-block:: python 222 | 223 | def print_links(page): 224 | links = page.links 225 | for title in sorted(links.keys()): 226 | print("%s: %s" % (title, links[title])) 227 | 228 | print_links(page_py) 229 | # 3ds Max: 3ds Max (id: ??, ns: 0) 230 | # ?:: ?: (id: ??, ns: 0) 231 | # ABC (programming language): ABC (programming language) (id: ??, ns: 0) 232 | # ALGOL 68: ALGOL 68 (id: ??, ns: 0) 233 | # Abaqus: Abaqus (id: ??, ns: 0) 234 | # ... 235 | 236 | How To Get Page Categories 237 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 238 | 239 | If you want to get all categories under which page belongs, you should use property ``categories``. 240 | It's map, where key is category title and value is ``WikipediaPage``. 241 | 242 | .. code-block:: python 243 | 244 | def print_categories(page): 245 | categories = page.categories 246 | for title in sorted(categories.keys()): 247 | print("%s: %s" % (title, categories[title])) 248 | 249 | 250 | print("Categories") 251 | print_categories(page_py) 252 | # Category:All articles containing potentially dated statements: ... 253 | # Category:All articles with unsourced statements: ... 254 | # Category:Articles containing potentially dated statements from August 2016: ... 255 | # Category:Articles containing potentially dated statements from March 2017: ... 256 | # Category:Articles containing potentially dated statements from September 2017: ... 257 | 258 | How To Get All Pages From Category 259 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 260 | 261 | To get all pages from given category, you should use property ``categorymembers``. It returns all members of given category. 262 | You have to implement recursion and deduplication by yourself. 263 | 264 | .. code-block:: python 265 | 266 | def print_categorymembers(categorymembers, level=0, max_level=1): 267 | for c in categorymembers.values(): 268 | print("%s: %s (ns: %d)" % ("*" * (level + 1), c.title, c.ns)) 269 | if c.ns == wikipediaapi.Namespace.CATEGORY and level < max_level: 270 | print_categorymembers(c.categorymembers, level=level + 1, max_level=max_level) 271 | 272 | 273 | cat = wiki_wiki.page("Category:Physics") 274 | print("Category members: Category:Physics") 275 | print_categorymembers(cat.categorymembers) 276 | 277 | # Category members: Category:Physics 278 | # * Statistical mechanics (ns: 0) 279 | # * Category:Physical quantities (ns: 14) 280 | # ** Refractive index (ns: 0) 281 | # ** Vapor quality (ns: 0) 282 | # ** Electric susceptibility (ns: 0) 283 | # ** Specific weight (ns: 0) 284 | # ** Category:Viscosity (ns: 14) 285 | # *** Brookfield Engineering (ns: 0) 286 | 287 | Use Extra API Parameters 288 | ~~~~~~~~~~~~~~~~~~~~~~~~ 289 | 290 | Official API supports many different parameters. You can see them in the `sandbox`_. Not all these 291 | parameters are supported directly as parameters of the functions. If you want to specify them, 292 | you can pass them as additional parameters in the constructor. For the `info API call`_ you can 293 | specify parameter `converttitles`. If you want to specify it, you can use: 294 | 295 | .. code-block:: python 296 | 297 | import sys 298 | 299 | import wikipediaapi 300 | wiki_wiki = wikipediaapi.Wikipedia('MyProjectName (merlin@example.com)', 'zh', 'zh-tw', extra_api_params={'converttitles': 1}) 301 | page = wiki_wiki.page("孟卯") 302 | print(repr(page.varianttitles)) 303 | 304 | 305 | .. _sandbox: https://en.wikipedia.org/wiki/Special:ApiSandbox 306 | .. _info API call: https://zh.wikipedia.org/wiki/Special:API%E6%B2%99%E7%9B%92#action=query&format=json&variant=zh-tw&prop=info&titles=%E5%AD%9F%E5%8D%AF&converttitles=1&formatversion=2&inprop=varianttitles%7Cdisplaytitle 307 | 308 | How To See Underlying API Call 309 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 310 | 311 | If you have problems with retrieving data you can get URL of undrerlying API call. 312 | This will help you determine if the problem is in the library or somewhere else. 313 | 314 | .. code-block:: python 315 | 316 | import sys 317 | 318 | import wikipediaapi 319 | wikipediaapi.log.setLevel(level=wikipediaapi.logging.DEBUG) 320 | 321 | # Set handler if you use Python in interactive mode 322 | out_hdlr = wikipediaapi.logging.StreamHandler(sys.stderr) 323 | out_hdlr.setFormatter(wikipediaapi.logging.Formatter('%(asctime)s %(message)s')) 324 | out_hdlr.setLevel(wikipediaapi.logging.DEBUG) 325 | wikipediaapi.log.addHandler(out_hdlr) 326 | 327 | wiki = wikipediaapi.Wikipedia(user_agent='MyProjectName (merlin@example.com)', language='en') 328 | 329 | page_ostrava = wiki.page('Ostrava') 330 | print(page_ostrava.summary) 331 | # logger prints out: Request URL: http://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=Ostrava&explaintext=1&exsectionformat=wiki 332 | 333 | External Links 334 | -------------- 335 | 336 | * `GitHub`_ 337 | * `PyPi`_ 338 | * `ReadTheDocs`_ 339 | 340 | .. _GitHub: https://github.com/martin-majlis/Wikipedia-API/ 341 | .. _PyPi: https://pypi.python.org/pypi/Wikipedia-API/ 342 | .. _ReadTheDocs: http://wikipedia-api.readthedocs.io/ 343 | 344 | Other Badges 345 | ------------ 346 | 347 | |cc-badge| |cc-issues| |coveralls| |version| |pyversions| |implementations| 348 | |github-downloads| |github-tag| |github-release| 349 | |github-commits-since-latest| |github-forks| |github-stars| |github-watches| 350 | |github-commit-activity| |github-last-commit| |github-code-size| |github-repo-size| 351 | |pypi-license| |pypi-wheel| |pypi-format| |pypi-pyversions| |pypi-implementations| 352 | |pypi-status| |pypi-downloads-dd| |pypi-downloads-dw| |pypi-downloads-dm| 353 | |libraries-io-sourcerank| |libraries-io-dependent-repos| 354 | 355 | 356 | Other Pages 357 | ----------- 358 | 359 | .. PYPI-BEGIN 360 | .. toctree:: 361 | :maxdepth: 2 362 | 363 | API 364 | CHANGES 365 | DEVELOPMENT 366 | wikipediaapi/api 367 | 368 | .. PYPI-END 369 | 370 | 371 | .. |docs| image:: https://readthedocs.org/projects/wikipedia-api/badge/?version=latest 372 | :target: http://wikipedia-api.readthedocs.io/en/latest/?badge=latest 373 | :alt: Documentation Status 374 | 375 | .. |cc-badge| image:: https://codeclimate.com/github/martin-majlis/Wikipedia-API/badges/gpa.svg 376 | :target: https://codeclimate.com/github/martin-majlis/Wikipedia-API 377 | :alt: Code Climate 378 | 379 | .. |cc-issues| image:: https://codeclimate.com/github/martin-majlis/Wikipedia-API/badges/issue_count.svg 380 | :target: https://codeclimate.com/github/martin-majlis/Wikipedia-API 381 | :alt: Issue Count 382 | 383 | .. |cc-coverage| image:: https://api.codeclimate.com/v1/badges/6e2c24d72438b39e5c26/test_coverage 384 | :target: https://codeclimate.com/github/martin-majlis/Wikipedia-API 385 | :alt: Test Coverage 386 | 387 | .. |coveralls| image:: https://coveralls.io/repos/github/martin-majlis/Wikipedia-API/badge.svg?branch=master 388 | :target: https://coveralls.io/github/martin-majlis/Wikipedia-API?branch=master 389 | :alt: Coveralls 390 | 391 | .. |version| image:: https://img.shields.io/pypi/v/wikipedia-api.svg?style=flat 392 | :target: https://pypi.python.org/pypi/Wikipedia-API 393 | :alt: Version 394 | 395 | .. |pyversions| image:: https://img.shields.io/pypi/pyversions/wikipedia-api.svg?style=flat 396 | :target: https://pypi.python.org/pypi/Wikipedia-API 397 | :alt: Py Versions 398 | 399 | .. |implementations| image:: https://img.shields.io/pypi/implementation/wikipedia-api.svg?style=flat 400 | :target: https://pypi.python.org/pypi/Wikipedia-API 401 | :alt: Implementations 402 | 403 | .. |github-downloads| image:: https://img.shields.io/github/downloads/martin-majlis/Wikipedia-API/total.svg 404 | :target: https://github.com/martin-majlis/Wikipedia-API/releases 405 | :alt: Downloads 406 | 407 | .. |github-tag| image:: https://img.shields.io/github/tag/martin-majlis/Wikipedia-API.svg 408 | :target: https://github.com/martin-majlis/Wikipedia-API/tags 409 | :alt: Tags 410 | 411 | .. |github-release| image:: https://img.shields.io/github/release/martin-majlis/Wikipedia-API.svg 412 | :target: https://github.com/martin-majlis/Wikipedia-API/ 413 | 414 | .. |github-commits-since-latest| image:: https://img.shields.io/github/commits-since/martin-majlis/Wikipedia-API/latest.svg 415 | :target: https://github.com/martin-majlis/Wikipedia-API/ 416 | :alt: Github commits (since latest release) 417 | 418 | .. |github-forks| image:: https://img.shields.io/github/forks/martin-majlis/Wikipedia-API.svg?style=social&label=Fork 419 | :target: https://github.com/martin-majlis/Wikipedia-API/ 420 | :alt: GitHub forks 421 | 422 | .. |github-stars| image:: https://img.shields.io/github/stars/martin-majlis/Wikipedia-API.svg?style=social&label=Stars 423 | :target: https://github.com/martin-majlis/Wikipedia-API/ 424 | :alt: GitHub stars 425 | 426 | .. |github-stars-flat| image:: https://img.shields.io/github/stars/martin-majlis/Wikipedia-API.svg?style=flat&label=Stars 427 | :target: https://github.com/martin-majlis/Wikipedia-API/ 428 | :alt: GitHub stars 429 | 430 | .. |github-watches| image:: https://img.shields.io/github/watchers/martin-majlis/Wikipedia-API.svg?style=social&label=Watch 431 | :target: https://github.com/martin-majlis/Wikipedia-API/ 432 | :alt: GitHub watchers 433 | 434 | .. |github-commit-activity| image:: https://img.shields.io/github/commit-activity/y/martin-majlis/Wikipedia-API.svg 435 | :target: https://github.com/martin-majlis/Wikipedia-API/commits/master 436 | :alt: GitHub commit activity the past week, 4 weeks, year 437 | 438 | .. |github-last-commit| image:: https://img.shields.io/github/commits/martin-majlis/Wikipedia-API/last.svg 439 | :target: https://github.com/martin-majlis/Wikipedia-API/ 440 | :alt: Last commit 441 | 442 | .. |github-code-size| image:: https://img.shields.io/github/languages/code-size/martin-majlis/Wikipedia-API.svg 443 | :target: https://github.com/martin-majlis/Wikipedia-API/ 444 | :alt: GitHub code size in bytes 445 | 446 | .. |github-repo-size| image:: https://img.shields.io/github/repo-size/martin-majlis/Wikipedia-API.svg 447 | :target: https://github.com/martin-majlis/Wikipedia-API/ 448 | :alt: GitHub repo size in bytes 449 | 450 | .. |pypi-license| image:: https://img.shields.io/pypi/l/Wikipedia-API.svg 451 | :target: https://pypi.python.org/pypi/Wikipedia-API/ 452 | :alt: PyPi License 453 | 454 | .. |pypi-wheel| image:: https://img.shields.io/pypi/wheel/Wikipedia-API.svg 455 | :target: https://pypi.python.org/pypi/Wikipedia-API/ 456 | :alt: PyPi Wheel 457 | 458 | .. |pypi-format| image:: https://img.shields.io/pypi/format/Wikipedia-API.svg 459 | :target: https://pypi.python.org/pypi/Wikipedia-API/ 460 | :alt: PyPi Format 461 | 462 | .. |pypi-pyversions| image:: https://img.shields.io/pypi/pyversions/Wikipedia-API.svg 463 | :target: https://pypi.python.org/pypi/Wikipedia-API/ 464 | :alt: PyPi PyVersions 465 | 466 | .. |pypi-implementations| image:: https://img.shields.io/pypi/implementation/Wikipedia-API.svg 467 | :target: https://pypi.python.org/pypi/Wikipedia-API/ 468 | :alt: PyPi Implementations 469 | 470 | .. |pypi-status| image:: https://img.shields.io/pypi/status/Wikipedia-API.svg 471 | :target: https://pypi.python.org/pypi/Wikipedia-API/ 472 | :alt: PyPi Status 473 | 474 | .. |pypi-downloads-dd| image:: https://img.shields.io/pypi/dd/Wikipedia-API.svg 475 | :target: https://pypi.python.org/pypi/Wikipedia-API/ 476 | :alt: PyPi Downloads - Day 477 | 478 | .. |pypi-downloads-dw| image:: https://img.shields.io/pypi/dw/Wikipedia-API.svg 479 | :target: https://pypi.python.org/pypi/Wikipedia-API/ 480 | :alt: PyPi Downloads - Week 481 | 482 | .. |pypi-downloads-dm| image:: https://img.shields.io/pypi/dm/Wikipedia-API.svg 483 | :target: https://pypi.python.org/pypi/Wikipedia-API/ 484 | :alt: PyPi Downloads - Month 485 | 486 | .. |libraries-io-sourcerank| image:: https://img.shields.io/librariesio/sourcerank/pypi/Wikipedia-API.svg 487 | :target: https://libraries.io/pypi/Wikipedia-API 488 | :alt: Libraries.io - SourceRank 489 | 490 | .. |libraries-io-dependent-repos| image:: https://img.shields.io/librariesio/dependent-repos/pypi/Wikipedia-API.svg 491 | :target: https://libraries.io/pypi/Wikipedia-API 492 | :alt: Libraries.io - Dependent Repos 493 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | ## Security contact information 2 | 3 | To report a security vulnerability, please use the 4 | [Tidelift security contact](https://tidelift.com/security). 5 | Tidelift will coordinate the fix and disclosure. 6 | -------------------------------------------------------------------------------- /SECURITY.rst: -------------------------------------------------------------------------------- 1 | Security contact information 2 | ============================ 3 | 4 | To report a security vulnerability, please use the 5 | `Tidelift security contact`_. 6 | Tidelift will coordinate the fix and disclosure. 7 | 8 | .. _Tidelift security contact: https://tidelift.com/security 9 | -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Wikipedia-API documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Jul 3 09:08:55 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | import os 20 | import sys 21 | 22 | sys.path.append(os.path.abspath("wikipediaapi")) 23 | 24 | 25 | # -- General configuration ------------------------------------------------ 26 | 27 | # If your documentation needs a minimal Sphinx version, state it here. 28 | # 29 | # needs_sphinx = '1.0' 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = ["sphinx.ext.autodoc"] 35 | 36 | # Autodoc settings 37 | autodoc_default_flags = ["members", "special-members"] 38 | 39 | # Add any paths that contain templates here, relative to this directory. 40 | templates_path = ["_templates"] 41 | 42 | # The suffix(es) of source filenames. 43 | # You can specify multiple suffix as a list of string: 44 | # 45 | # source_suffix = ['.rst', '.md'] 46 | source_suffix = ".rst" 47 | 48 | # The master toctree document. 49 | master_doc = "README" 50 | 51 | # General information about the project. 52 | project = "Wikipedia Python API" 53 | copyright = '2017-2025, Martin Majlis' 54 | author = "Martin Majlis" 55 | 56 | # The version info for the project you're documenting, acts as replacement for 57 | # |version| and |release|, also used in various other places throughout the 58 | # built documents. 59 | # 60 | # The short X.Y version. 61 | version = "0.8" 62 | # The full version, including alpha/beta/rc tags. 63 | release = "0.8.1" 64 | 65 | # The language for content autogenerated by Sphinx. Refer to documentation 66 | # for a list of supported languages. 67 | # 68 | # This is also used if you do content translation via gettext catalogs. 69 | # Usually you set "language" from the command line for these cases. 70 | language = "en" 71 | 72 | # List of patterns, relative to source directory, that match files and 73 | # directories to ignore when looking for source files. 74 | # This patterns also effect to html_static_path and html_extra_path 75 | exclude_patterns = [ 76 | "_build", 77 | "Thumbs.db", 78 | ".DS_Store", 79 | "venv", 80 | ".tox", 81 | ".mypy_cache", 82 | ] 83 | 84 | # The name of the Pygments (syntax highlighting) style to use. 85 | pygments_style = "sphinx" 86 | 87 | # If true, `todo` and `todoList` produce output, else they produce nothing. 88 | todo_include_todos = False 89 | 90 | 91 | # -- Options for HTML output ---------------------------------------------- 92 | 93 | # The theme to use for HTML and HTML Help pages. See the documentation for 94 | # a list of builtin themes. 95 | # 96 | html_theme = "alabaster" 97 | 98 | # Theme options are theme-specific and customize the look and feel of a theme 99 | # further. For a list of options available for each theme, see the 100 | # documentation. 101 | # 102 | # html_theme_options = {} 103 | 104 | # Add any paths that contain custom static files (such as style sheets) here, 105 | # relative to this directory. They are copied after the builtin static files, 106 | # so a file named "default.css" will overwrite the builtin "default.css". 107 | html_static_path = [] # type: list[str] 108 | 109 | # Custom sidebar templates, must be a dictionary that maps document names 110 | # to template names. 111 | # 112 | # This is required for the alabaster theme 113 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars 114 | html_sidebars = { 115 | "**": [ 116 | "about.html", 117 | "navigation.html", 118 | "relations.html", # needs 'show_related': True theme option to display 119 | "searchbox.html", 120 | "donate.html", 121 | ] 122 | } 123 | 124 | 125 | # -- Options for HTMLHelp output ------------------------------------------ 126 | 127 | # Output file base name for HTML help builder. 128 | htmlhelp_basename = "Wikipedia-API" 129 | 130 | 131 | # -- Options for LaTeX output --------------------------------------------- 132 | 133 | latex_elements = { 134 | # The paper size ('letterpaper' or 'a4paper'). 135 | # 136 | "papersize": "letterpaper", 137 | # The font size ('10pt', '11pt' or '12pt'). 138 | # 139 | # 'pointsize': '10pt', 140 | # Additional stuff for the LaTeX preamble. 141 | # 142 | # 'preamble': '', 143 | # Latex figure (float) alignment 144 | # 145 | # 'figure_align': 'htbp', 146 | } 147 | 148 | # Grouping the document tree into LaTeX files. List of tuples 149 | # (source start file, target name, title, 150 | # author, documentclass [howto, manual, or own class]). 151 | latex_documents = [ 152 | ( 153 | master_doc, 154 | "Wikipedia-API.tex", 155 | "Wikipedia-API Documentation", 156 | "Martin Majlis", 157 | "manual", 158 | ), 159 | ] 160 | 161 | 162 | # -- Options for manual page output --------------------------------------- 163 | 164 | # One entry per manual page. List of tuples 165 | # (source start file, name, description, authors, manual section). 166 | man_pages = [(master_doc, "wikipedia-api", "Wikipedia-API Documentation", [author], 1)] 167 | 168 | 169 | # -- Options for Texinfo output ------------------------------------------- 170 | 171 | # Grouping the document tree into Texinfo files. List of tuples 172 | # (source start file, target name, title, author, 173 | # dir menu entry, description, category) 174 | texinfo_documents = [ 175 | ( 176 | master_doc, 177 | "Wikipedia-API", 178 | "Wikipedia-API Documentation", 179 | author, 180 | "Wikipedia-API", 181 | "Python wrapper around Wikipedia API.", 182 | "Miscellaneous", 183 | ), 184 | ] 185 | 186 | 187 | # -- Options for Epub output ---------------------------------------------- 188 | 189 | # Bibliographic Dublin Core info. 190 | epub_title = project 191 | epub_author = author 192 | epub_publisher = author 193 | epub_copyright = copyright 194 | 195 | # The unique identifier of the text. This can be a ISBN number 196 | # or the project homepage. 197 | # 198 | # epub_identifier = '' 199 | 200 | # A unique identification for the text. 201 | # 202 | # epub_uid = '' 203 | 204 | # A list of files that should not be packed into the epub file. 205 | epub_exclude_files = ["search.html"] 206 | 207 | html_context = { 208 | "display_github": True, # Integrate GitHub 209 | "github_user": "martin-majlis", # Username 210 | "github_repo": "Wikipedia-API", # Repo name 211 | "github_version": "master", # Version 212 | "conf_py_path": "/_build/html/", # Path in the checkout to the docs root, 213 | "travis_button": True, 214 | "codecov_button": True, 215 | } 216 | -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | 4 | import wikipediaapi 5 | 6 | logging.basicConfig(level=logging.INFO) 7 | 8 | user_agent = "Wikipedia-API Example (merlin@example.com)" 9 | 10 | wiki_wiki = wikipediaapi.Wikipedia(user_agent=user_agent, language="en") 11 | 12 | page_py = wiki_wiki.page("Python_(programming_language)") 13 | 14 | print("Page - Exists: %s" % page_py.exists()) 15 | print("Page - Id: %s" % page_py.pageid) 16 | print("Page - Title: %s" % page_py.title) 17 | print("Page - Summary: %s" % page_py.summary[0:60]) 18 | 19 | 20 | def print_sections(sections, level=0): 21 | for s in sections: 22 | print("{}: {} - {}".format("*" * (level + 1), s.title, s.text[0:40])) 23 | print_sections(s.sections, level + 1) 24 | 25 | 26 | print("Sections:") 27 | print_sections(page_py.sections) 28 | 29 | 30 | def print_langlinks(page): 31 | langlinks = page.langlinks 32 | for k in sorted(langlinks.keys()): 33 | v = langlinks[k] 34 | print(f"{k}: {v.language} - {v.title}: {v.fullurl}") 35 | 36 | 37 | print("Lang links:") 38 | print_langlinks(page_py) 39 | 40 | 41 | def print_links(page): 42 | links = page.links 43 | for title in sorted(links.keys()): 44 | print(f"{title}: {links[title]}") 45 | 46 | 47 | print("Links:") 48 | print_links(page_py) 49 | 50 | 51 | def print_categories(page): 52 | categories = page.categories 53 | for title in sorted(categories.keys()): 54 | print(f"{title}: {categories[title]}") 55 | 56 | 57 | print("Categories") 58 | print_categories(page_py) 59 | 60 | section_py = page_py.section_by_title("Features and philosophy") 61 | if section_py is not None: 62 | print("Section - Title: %s" % section_py.title) 63 | print("Section - Text: %s" % section_py.text[0:60]) 64 | else: 65 | print("Section does not exist.") 66 | 67 | wiki_html = wikipediaapi.Wikipedia( 68 | user_agent=user_agent, language="cs", extract_format=wikipediaapi.ExtractFormat.HTML 69 | ) 70 | 71 | page_ostrava = wiki_html.page("Ostrava") 72 | print("Page - Exists: %s" % page_ostrava.exists()) 73 | print("Page - Id: %s" % page_ostrava.pageid) 74 | print("Page - Title: %s" % page_ostrava.title) 75 | print("Page - Summary: %s" % page_ostrava.summary[0:60]) 76 | print_sections(page_ostrava.sections) 77 | 78 | section_ostrava = page_ostrava.section_by_title("Heraldický znak") 79 | if section_ostrava is not None: 80 | print("Section - Title: %s" % section_ostrava.title) 81 | print("Section - Text: %s" % section_ostrava.text[0:60]) 82 | else: 83 | print("Section does not exists") 84 | 85 | page_nonexisting = wiki_wiki.page("Wikipedia-API-FooBar") 86 | print("Page - Exists: %s" % page_nonexisting.exists()) 87 | print("Page - Id: %s" % page_nonexisting.pageid) 88 | print("Page - Title: %s" % page_nonexisting.title) 89 | print("Page - Summary: %s" % page_nonexisting.summary[0:60]) 90 | 91 | 92 | wiki_de = wikipediaapi.Wikipedia(user_agent=user_agent, language="de") 93 | de_page = wiki_de.page("Deutsche Sprache") 94 | print(de_page.title + ": " + de_page.fullurl) 95 | print(de_page.summary[0:60]) 96 | 97 | en_page = de_page.langlinks["en"] 98 | print(en_page.title + ": " + en_page.fullurl) 99 | print(en_page.summary[0:60]) 100 | 101 | 102 | def print_categorymembers(categorymembers, level=0, max_level=2): 103 | for c in categorymembers.values(): 104 | print("%s %s (ns: %d)" % ("*" * (level + 1), c.title, c.ns)) 105 | if c.ns == wikipediaapi.Namespace.CATEGORY and level < max_level: 106 | print_categorymembers(c.categorymembers, level + 1, max_level=max_level) 107 | 108 | 109 | cat = wiki_wiki.page("Category:Physics") 110 | print("Category members: Category:Physics") 111 | print_categorymembers(cat.categorymembers, max_level=1) 112 | 113 | wiki_hi = wikipediaapi.Wikipedia(user_agent=user_agent, language="hi") 114 | # fetch page about Python in Hindu 115 | # https://hi.wikipedia.org/wiki/%E0%A4%AA%E0%A4%BE%E0%A4%87%E0%A4%A5%E0%A4%A8 116 | 117 | p_hi_python_quoted = wiki_hi.article( 118 | title="%E0%A4%AA%E0%A4%BE%E0%A4%87%E0%A4%A5%E0%A4%A8", 119 | unquote=True, 120 | ) 121 | print(p_hi_python_quoted.title) 122 | print(p_hi_python_quoted.summary[0:60]) 123 | 124 | # Fetch page about Python in Chinese 125 | wiki_zh = wikipediaapi.Wikipedia(user_agent=user_agent, language="zh") 126 | zh_page = wiki_zh.page("Python") 127 | print(zh_page.title + ": " + zh_page.fullurl) 128 | print(zh_page.summary[0:60]) 129 | print(repr(zh_page.varianttitles)) 130 | 131 | # https://zh.wikipedia.org/zh-cn/Python 132 | wiki_zh_cn = wikipediaapi.Wikipedia( 133 | user_agent=user_agent, language="zh", variant="zh-cn" 134 | ) 135 | zh_page_cn = wiki_zh_cn.page("Python") 136 | print(zh_page_cn.title + ": " + zh_page_cn.fullurl) 137 | print(zh_page_cn.summary[0:60]) 138 | print(repr(zh_page_cn.varianttitles)) 139 | 140 | # https://zh.wikipedia.org/zh-tw/Python 141 | wiki_zh_tw = wikipediaapi.Wikipedia( 142 | user_agent=user_agent, language="zh", variant="zh-tw" 143 | ) 144 | zh_page_tw = wiki_zh_tw.page("Python") 145 | print(zh_page_tw.title + ": " + zh_page_tw.fullurl) 146 | print(zh_page_tw.summary[0:60]) 147 | print(repr(zh_page_tw.varianttitles)) 148 | 149 | # https://zh.wikipedia.org/zh-sg/Python 150 | wiki_zh_sg = wikipediaapi.Wikipedia( 151 | user_agent=user_agent, language="zh", variant="zh-sg" 152 | ) 153 | zh_page_sg = wiki_zh_sg.page("Python") 154 | print(zh_page_sg.title + ": " + zh_page_sg.fullurl) 155 | print(zh_page_sg.summary[0:60]) 156 | print(repr(zh_page_sg.varianttitles)) 157 | -------------------------------------------------------------------------------- /index.rst: -------------------------------------------------------------------------------- 1 | README.rst -------------------------------------------------------------------------------- /requirements-build.txt: -------------------------------------------------------------------------------- 1 | setuptools==80.7.1 2 | wheel==0.45.1 3 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | black==25.1.0 2 | coverage==7.8.0 3 | flake8==7.2.0 4 | isort==6.0.1 5 | mypy==1.15.0 6 | pre-commit==4.2.0 7 | pygments==2.19.1 8 | pyupgrade==3.19.1 9 | tox==4.26.0 10 | types-requests==2.32.0.20250515 11 | types-setuptools==80.7.0.20250516 12 | -------------------------------------------------------------------------------- /requirements-doc.txt: -------------------------------------------------------------------------------- 1 | sphinx==8.2.3 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.32.3 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import List # noqa 3 | 4 | from setuptools import setup 5 | 6 | 7 | def fix_doc(txt): 8 | """ 9 | Fixes documentation so that it's readable in pypi website. 10 | """ 11 | return re.sub( 12 | r"\.\. PYPI-BEGIN([\r\n]|[^\r\n])*?PYPI-END", "", txt, flags=re.DOTALL 13 | ) 14 | 15 | 16 | with open("README.rst", encoding="utf8") as fileR: 17 | README = fix_doc(fileR.read()) 18 | 19 | with open("CHANGES.rst", encoding="utf8") as fileC: 20 | CHANGES = fix_doc(fileC.read()) 21 | 22 | requires = [ 23 | "requests", 24 | ] 25 | 26 | tests_require = [] # type: List[str] 27 | 28 | setup( 29 | name="Wikipedia-API", 30 | version="0.8.1", 31 | description="Python Wrapper for Wikipedia", 32 | long_description=README + "\n\n" + CHANGES, 33 | classifiers=[ 34 | "Development Status :: 5 - Production/Stable", 35 | "Environment :: Web Environment", 36 | "Intended Audience :: Developers", 37 | "Programming Language :: Python", 38 | "Programming Language :: Python :: 3", 39 | "Programming Language :: Python :: 3.9", 40 | "Programming Language :: Python :: 3.10", 41 | "Programming Language :: Python :: 3.11", 42 | "Programming Language :: Python :: 3.12", 43 | "Programming Language :: Python :: 3.13", 44 | "Programming Language :: Python :: Implementation :: CPython", 45 | "Programming Language :: Python :: Implementation :: PyPy", 46 | "License :: OSI Approved :: MIT License", 47 | "Operating System :: OS Independent", 48 | "Topic :: Communications :: Email", 49 | "Topic :: Software Development :: Libraries :: Python Modules", 50 | ], 51 | author="Martin Majlis", 52 | author_email="martin@majlis.cz", 53 | license="MIT", 54 | url="https://github.com/martin-majlis/Wikipedia-API", 55 | download_url="https://github.com/martin-majlis/Wikipedia-API/archive/master.tar.gz", 56 | keywords="Wikipedia API wrapper", 57 | packages=["wikipediaapi"], 58 | include_package_data=True, 59 | zip_safe=False, 60 | extras_require={ 61 | "testing": tests_require, 62 | }, 63 | install_requires=requires, 64 | platforms="any", 65 | ) 66 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martin-majlis/Wikipedia-API/41a8c5cd34a58d44d8a4631d0ef987704e2f9c8b/tests/__init__.py -------------------------------------------------------------------------------- /tests/backlinks_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from tests.mock_data import user_agent 4 | from tests.mock_data import wikipedia_api_request 5 | import wikipediaapi 6 | 7 | 8 | class TestBackLinks(unittest.TestCase): 9 | def setUp(self): 10 | self.wiki = wikipediaapi.Wikipedia(user_agent, "en") 11 | self.wiki._query = wikipedia_api_request(self.wiki) 12 | 13 | def test_backlinks_nonexistent_count(self): 14 | page = self.wiki.page("Non_Existent") 15 | self.assertEqual(len(page.backlinks), 0) 16 | 17 | def test_backlinks_single_page_count(self): 18 | page = self.wiki.page("Test_1") 19 | self.assertEqual(len(page.backlinks), 3) 20 | 21 | def test_backlinks_single_page_titles(self): 22 | page = self.wiki.page("Test_1") 23 | self.assertEqual( 24 | list(sorted(map(lambda s: s.title, page.backlinks.values()))), 25 | ["Title - " + str(i + 1) for i in range(3)], 26 | ) 27 | 28 | def test_backlinks_multi_page_count(self): 29 | page = self.wiki.page("Test_2") 30 | self.assertEqual(len(page.backlinks), 5) 31 | 32 | def test_backlinks_multi_page_titles(self): 33 | page = self.wiki.page("Test_2") 34 | self.assertEqual( 35 | list(sorted(map(lambda s: s.title, page.backlinks.values()))), 36 | ["Title - " + str(i + 1) for i in range(5)], 37 | ) 38 | -------------------------------------------------------------------------------- /tests/categories_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from tests.mock_data import user_agent 4 | from tests.mock_data import wikipedia_api_request 5 | import wikipediaapi 6 | 7 | 8 | class TestCategories(unittest.TestCase): 9 | def setUp(self): 10 | self.wiki = wikipediaapi.Wikipedia(user_agent, "en") 11 | self.wiki._query = wikipedia_api_request(self.wiki) 12 | 13 | def test_categories_count(self): 14 | page = self.wiki.page("Test_1") 15 | self.assertEqual(len(page.categories), 3) 16 | 17 | def test_categories_titles(self): 18 | page = self.wiki.page("Test_1") 19 | self.assertEqual( 20 | list(sorted(map(lambda s: s.title, page.categories.values()))), 21 | ["Category:C" + str(i + 1) for i in range(3)], 22 | ) 23 | 24 | def test_categories_nss(self): 25 | page = self.wiki.page("Test_1") 26 | self.assertEqual( 27 | list(sorted(map(lambda s: s.ns, page.categories.values()))), [14] * 3 28 | ) 29 | 30 | def test_no_categories_count(self): 31 | page = self.wiki.page("No_Categories") 32 | self.assertEqual(len(page.categories), 0) 33 | -------------------------------------------------------------------------------- /tests/categorymembers_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from tests.mock_data import user_agent 4 | from tests.mock_data import wikipedia_api_request 5 | import wikipediaapi 6 | 7 | 8 | class TestCategoryMembers(unittest.TestCase): 9 | def setUp(self): 10 | self.wiki = wikipediaapi.Wikipedia(user_agent, "en") 11 | self.wiki._query = wikipedia_api_request(self.wiki) 12 | 13 | def test_links_single_page_count(self): 14 | page = self.wiki.page("Category:C1") 15 | self.assertEqual(len(page.categorymembers), 3) 16 | 17 | def test_links_single_page_titles(self): 18 | page = self.wiki.page("Category:C1") 19 | self.assertEqual( 20 | list(sorted(map(lambda s: s.title, page.categorymembers.values()))), 21 | ["Title - " + str(i + 1) for i in range(3)], 22 | ) 23 | 24 | def test_links_multi_page_count(self): 25 | page = self.wiki.page("Category:C2") 26 | self.assertEqual(len(page.categorymembers), 5) 27 | 28 | def test_links_multi_page_titles(self): 29 | page = self.wiki.page("Category:C2") 30 | self.assertEqual( 31 | list(sorted(map(lambda s: s.title, page.categorymembers.values()))), 32 | ["Title - " + str(i + 1) for i in range(5)], 33 | ) 34 | -------------------------------------------------------------------------------- /tests/extract_errors_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from tests.mock_data import user_agent 4 | from tests.mock_data import wikipedia_api_request 5 | import wikipediaapi 6 | 7 | 8 | class TestErrorsExtracts(unittest.TestCase): 9 | def setUp(self): 10 | self.wiki = wikipediaapi.Wikipedia(user_agent, "en") 11 | self.wiki._query = wikipedia_api_request(self.wiki) 12 | 13 | def test_title_before_fetching(self): 14 | page = self.wiki.page("NonExisting") 15 | self.assertEqual(page.title, "NonExisting") 16 | 17 | def test_pageid(self): 18 | page = self.wiki.page("NonExisting") 19 | self.assertEqual(page.pageid, -1) 20 | -------------------------------------------------------------------------------- /tests/extract_html_format_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from tests.mock_data import user_agent 4 | from tests.mock_data import wikipedia_api_request 5 | import wikipediaapi 6 | 7 | 8 | class TestHtmlFormatExtracts(unittest.TestCase): 9 | def setUp(self): 10 | self.wiki = wikipediaapi.Wikipedia( 11 | user_agent, "en", extract_format=wikipediaapi.ExtractFormat.HTML 12 | ) 13 | self.wiki._query = wikipedia_api_request(self.wiki) 14 | 15 | def test_title_before_fetching(self): 16 | page = self.wiki.page("Test_1") 17 | self.assertEqual(page.title, "Test_1") 18 | 19 | def test_pageid(self): 20 | page = self.wiki.page("Test_1") 21 | self.assertEqual(page.pageid, 4) 22 | 23 | def test_title_after_fetching(self): 24 | page = self.wiki.page("Test_1") 25 | page._fetch("extracts") 26 | self.assertEqual(page.title, "Test 1") 27 | 28 | def test_summary(self): 29 | page = self.wiki.page("Test_1") 30 | self.assertEqual(page.summary, "Summary text\n\n
") 31 | 32 | def test_section_count(self): 33 | page = self.wiki.page("Test_1") 34 | self.assertEqual(len(page.sections), 5) 35 | 36 | def test_top_level_section_titles(self): 37 | page = self.wiki.page("Test_1") 38 | self.assertEqual( 39 | list(map(lambda s: s.title, page.sections)), 40 | ["Section " + str(i + 1) for i in range(5)], 41 | ) 42 | 43 | def test_subsection_by_title(self): 44 | page = self.wiki.page("Test_1") 45 | section = page.section_by_title("Section 4") 46 | self.assertEqual(section.title, "Section 4") 47 | self.assertEqual(section.level, 1) 48 | 49 | def test_subsection_by_title_with_multiple_spans(self): 50 | page = self.wiki.page("Test_1") 51 | section = page.section_by_title("Section 5") 52 | self.assertEqual(section.title, "Section 5") 53 | 54 | def test_subsection(self): 55 | page = self.wiki.page("Test_1") 56 | section = page.section_by_title("Section 4") 57 | self.assertEqual(section.title, "Section 4") 58 | self.assertEqual(section.text, "") 59 | self.assertEqual(len(section.sections), 2) 60 | 61 | def test_subsubsection(self): 62 | page = self.wiki.page("Test_1") 63 | section = page.section_by_title("Section 4.2.2") 64 | self.assertEqual(section.title, "Section 4.2.2") 65 | self.assertEqual(section.text, "Text for section 4.2.2\n\n\n
") 66 | self.assertEqual( 67 | repr(section), 68 | "Section: Section 4.2.2 (3):\n" 69 | + "Text for section 4.2.2\n\n\n
\n" 70 | + "Subsections (0):\n", 71 | ) 72 | self.assertEqual(len(section.sections), 0) 73 | 74 | def test_subsection_by_title_return_last(self): 75 | page = self.wiki.page("Test_Nested") 76 | section = page.section_by_title("Subsection B") 77 | self.assertEqual(section.title, "Subsection B") 78 | self.assertEqual(section.text, "Text for section 3.B\n\n\n
") 79 | self.assertEqual(len(section.sections), 0) 80 | 81 | def test_subsections_by_title(self): 82 | page = self.wiki.page("Test_Nested") 83 | sections = page.sections_by_title("Subsection B") 84 | self.assertEqual(len(sections), 3) 85 | self.assertEqual( 86 | [s.text for s in sections], 87 | [ 88 | "Text for section 1.B\n\n\n
", 89 | "Text for section 2.B\n\n\n
", 90 | "Text for section 3.B\n\n\n
", 91 | ], 92 | ) 93 | 94 | def test_text(self): 95 | page = self.wiki.page("Test_1") 96 | self.maxDiff = None 97 | self.assertEqual( 98 | page.text, 99 | ( 100 | "Summary text\n\n
\n\n" 101 | + "Text for section 1
\n\n" 103 | + "Text for section 1.1\n\n\n
\n\n" 105 | + "Text for section 1.2\n\n\n
\n\n" 107 | + "Text for section 2\n\n\n
\n\n" 109 | + "Text for section 3\n\n\n
\n\n" 111 | + "Text for section 4.1\n\n\n
\n\n" 114 | + "Text for section 4.2\n\n\n
\n\n" 116 | + "Text for section 4.2.1\n\n\n
\n\n" 118 | + "Text for section 4.2.2\n\n\n
\n\n" 120 | + "Text for section 5\n\n\n
\n\n" 122 | + "Text for section 5.1\n\n\n
" 124 | ), 125 | ) 126 | 127 | def test_with_erroneous_edit(self): 128 | page = self.wiki.page("Test_Edit") 129 | self.maxDiff = None 130 | section = page.section_by_title("Section with Edit") 131 | self.assertEqual(section.title, "Section with Edit") 132 | self.assertEqual( 133 | page.text, 134 | ( 135 | "Summary text\n\n
\n\n" 136 | + "Text for section 1
\n\n" 138 | "Text for section with edit\n\n\n
" 140 | ), 141 | ) 142 | -------------------------------------------------------------------------------- /tests/extract_wiki_format_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from tests.mock_data import user_agent 4 | from tests.mock_data import wikipedia_api_request 5 | import wikipediaapi 6 | 7 | 8 | class TestWikiFormatExtracts(unittest.TestCase): 9 | def setUp(self): 10 | self.wiki = wikipediaapi.Wikipedia(user_agent, "en") 11 | self.wiki._query = wikipedia_api_request(self.wiki) 12 | 13 | def test_title_before_fetching(self): 14 | page = self.wiki.page("Test_1") 15 | self.assertEqual(page.title, "Test_1") 16 | 17 | def test_pageid(self): 18 | page = self.wiki.page("Test_1") 19 | self.assertEqual(page.pageid, 4) 20 | 21 | def test_title_after_fetching(self): 22 | page = self.wiki.page("Test_1") 23 | page._fetch("extracts") 24 | self.assertEqual(page.title, "Test 1") 25 | 26 | def test_summary(self): 27 | page = self.wiki.page("Test_1") 28 | self.assertEqual(page.summary, "Summary text") 29 | 30 | def test_section_count(self): 31 | page = self.wiki.page("Test_1") 32 | self.assertEqual(len(page.sections), 5) 33 | 34 | def test_top_level_section_titles(self): 35 | page = self.wiki.page("Test_1") 36 | self.assertEqual( 37 | list(map(lambda s: s.title, page.sections)), 38 | ["Section " + str(i + 1) for i in range(5)], 39 | ) 40 | 41 | def test_subsection_by_title(self): 42 | page = self.wiki.page("Test_1") 43 | section = page.section_by_title("Section 4") 44 | self.assertEqual(section.title, "Section 4") 45 | self.assertEqual(section.level, 1) 46 | 47 | def test_subsection(self): 48 | page = self.wiki.page("Test_1") 49 | section = page.section_by_title("Section 4") 50 | self.assertEqual(section.title, "Section 4") 51 | self.assertEqual(section.text, "") 52 | self.assertEqual(len(section.sections), 2) 53 | 54 | def test_subsubsection(self): 55 | page = self.wiki.page("Test_1") 56 | section = page.section_by_title("Section 4.2.2") 57 | self.assertEqual(section.title, "Section 4.2.2") 58 | self.assertEqual(section.text, "Text for section 4.2.2") 59 | self.assertEqual( 60 | repr(section), 61 | "Section: Section 4.2.2 (3):\n" 62 | + "Text for section 4.2.2\n" 63 | + "Subsections (0):\n", 64 | ) 65 | self.assertEqual(len(section.sections), 0) 66 | 67 | def test_text(self): 68 | page = self.wiki.page("Test_1") 69 | self.maxDiff = None 70 | self.assertEqual( 71 | page.text, 72 | ( 73 | "Summary text\n\n" 74 | + "Section 1\n" 75 | + "Text for section 1\n\n" 76 | + "Section 1.1\n" 77 | + "Text for section 1.1\n\n" 78 | + "Section 1.2\n" 79 | + "Text for section 1.2\n\n" 80 | + "Section 2\n" 81 | + "Text for section 2\n\n" 82 | + "Section 3\n" 83 | + "Text for section 3\n\n" 84 | + "Section 4\n" 85 | + "Section 4.1\n" 86 | + "Text for section 4.1\n\n" 87 | + "Section 4.2\n" 88 | + "Text for section 4.2\n\n" 89 | + "Section 4.2.1\n" 90 | + "Text for section 4.2.1\n\n" 91 | + "Section 4.2.2\n" 92 | + "Text for section 4.2.2\n\n" 93 | + "Section 5\n" 94 | + "Text for section 5\n\n" 95 | + "Section 5.1\n" 96 | + "Text for section 5.1" 97 | ), 98 | ) 99 | 100 | def test_text_and_summary_without_sections(self): 101 | page = self.wiki.page("No_Sections") 102 | self.maxDiff = None 103 | self.assertEqual(page.text, ("Summary text")) 104 | self.assertEqual(page.summary, ("Summary text")) 105 | -------------------------------------------------------------------------------- /tests/langlinks_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from tests.mock_data import user_agent 4 | from tests.mock_data import wikipedia_api_request 5 | import wikipediaapi 6 | 7 | 8 | class TestLangLinks(unittest.TestCase): 9 | def setUp(self): 10 | self.wiki = wikipediaapi.Wikipedia(user_agent, "en") 11 | self.wiki._query = wikipedia_api_request(self.wiki) 12 | 13 | def test_langlinks_count(self): 14 | page = self.wiki.page("Test_1") 15 | self.assertEqual(len(page.langlinks), 3) 16 | 17 | def test_langlinks_titles(self): 18 | page = self.wiki.page("Test_1") 19 | self.assertEqual( 20 | list(sorted(map(lambda s: s.title, page.langlinks.values()))), 21 | ["Test 1 - " + str(i + 1) for i in range(3)], 22 | ) 23 | 24 | def test_langlinks_lang_values(self): 25 | page = self.wiki.page("Test_1") 26 | self.assertEqual( 27 | list(sorted(map(lambda s: s.language, page.langlinks.values()))), 28 | ["l" + str(i + 1) for i in range(3)], 29 | ) 30 | 31 | def test_langlinks_lang_keys(self): 32 | page = self.wiki.page("Test_1") 33 | self.assertEqual( 34 | list(sorted(page.langlinks.keys())), ["l" + str(i + 1) for i in range(3)] 35 | ) 36 | 37 | def test_langlinks_urls(self): 38 | page = self.wiki.page("Test_1") 39 | self.assertEqual( 40 | list(sorted(map(lambda s: s.fullurl, page.langlinks.values()))), 41 | [ 42 | ( 43 | "https://l" 44 | + str(i + 1) 45 | + ".wikipedia.org/wiki/Test_1_-_" 46 | + str(i + 1) 47 | ) 48 | for i in range(3) 49 | ], 50 | ) 51 | 52 | def test_jump_between_languages(self): 53 | page = self.wiki.page("Test_1") 54 | langlinks = page.langlinks 55 | p1 = langlinks["l1"] 56 | self.assertEqual(p1.language, "l1") 57 | self.assertEqual(p1.pageid, 10) 58 | 59 | def test_langlinks_no_langlink_count(self): 60 | page = self.wiki.page("No_LangLinks") 61 | self.assertEqual(len(page.langlinks), 0) 62 | -------------------------------------------------------------------------------- /tests/links_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from tests.mock_data import user_agent 4 | from tests.mock_data import wikipedia_api_request 5 | import wikipediaapi 6 | 7 | 8 | class TestLinks(unittest.TestCase): 9 | def setUp(self): 10 | self.wiki = wikipediaapi.Wikipedia(user_agent, "en") 11 | self.wiki._query = wikipedia_api_request(self.wiki) 12 | 13 | def test_links_single_page_count(self): 14 | page = self.wiki.page("Test_1") 15 | self.assertEqual(len(page.links), 3) 16 | 17 | def test_links_single_page_titles(self): 18 | page = self.wiki.page("Test_1") 19 | self.assertEqual( 20 | list(sorted(map(lambda s: s.title, page.links.values()))), 21 | ["Title - " + str(i + 1) for i in range(3)], 22 | ) 23 | 24 | def test_links_multi_page_count(self): 25 | page = self.wiki.page("Test_2") 26 | self.assertEqual(len(page.links), 5) 27 | 28 | def test_links_multi_page_titles(self): 29 | page = self.wiki.page("Test_2") 30 | self.assertEqual( 31 | list(sorted(map(lambda s: s.title, page.links.values()))), 32 | ["Title - " + str(i + 1) for i in range(5)], 33 | ) 34 | 35 | def test_links_no_links_count(self): 36 | page = self.wiki.page("No_Links") 37 | self.assertEqual(len(page.links), 0) 38 | 39 | def test_links_from_variant(self): 40 | wiki = wikipediaapi.Wikipedia(user_agent, "zh", "zh-tw") 41 | wiki._query = wikipedia_api_request(wiki) 42 | page = wiki.page("Test_Zh-Tw") 43 | self.assertEqual( 44 | list(sorted(map(lambda s: (s.title, s.variant), page.links.values()))), 45 | [("Title - Zh-Tw - " + str(i + 1), "zh-tw") for i in range(3)], 46 | ) 47 | -------------------------------------------------------------------------------- /tests/mock_data.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | 3 | user_agent = "UnitTests (bot@example.com)" 4 | 5 | 6 | def wikipedia_api_request(wiki): 7 | def api_request(page, params): 8 | used_params = wiki._construct_params(page, params) 9 | query = "" 10 | for k in sorted(used_params.keys()): 11 | query += k + "=" + str(used_params[k]) + "&" 12 | 13 | return _MOCK_DATA[page.language + ":" + query] 14 | 15 | return api_request 16 | 17 | 18 | _MOCK_DATA = { 19 | "en:action=query&explaintext=1&exsectionformat=wiki&format=json&prop=extracts&redirects=1&titles=Test_1&": { 20 | "batchcomplete": "", 21 | "warnings": { 22 | "extracts": { 23 | "*": '"exlimit" was too large for a whole article extracts request, lowered to 1.' 24 | } 25 | }, 26 | "query": { 27 | "normalized": [{"from": "Test_1", "to": "Test 1"}], 28 | "pages": { 29 | "4": { 30 | "pageid": 4, 31 | "ns": 0, 32 | "title": "Test 1", 33 | "extract": ( 34 | "Summary text\n\n\n" 35 | + "== Section 1 ==\n" 36 | + "Text for section 1\n\n\n" 37 | + "=== Section 1.1 ===\n" 38 | + "Text for section 1.1\n\n\n" 39 | + "=== Section 1.2 ===\n" 40 | + "Text for section 1.2\n\n\n" 41 | + "== Section 2 ==\n" 42 | + "Text for section 2\n\n\n" 43 | + "== Section 3 ==\n" 44 | + "Text for section 3\n\n\n" 45 | + "== Section 4 ==\n\n\n" 46 | + "=== Section 4.1 ===\n" 47 | + "Text for section 4.1\n\n\n" 48 | + "=== Section 4.2 ===\n" 49 | + "Text for section 4.2\n\n\n" 50 | + "==== Section 4.2.1 ====\n" 51 | + "Text for section 4.2.1\n\n\n" 52 | + "==== Section 4.2.2 ====\n" 53 | + "Text for section 4.2.2\n\n\n" 54 | + "== Section 5 ==\n" 55 | + "Text for section 5\n\n\n" 56 | + "=== Section 5.1 ===\n" 57 | + "Text for section 5.1\n" 58 | ), 59 | } 60 | }, 61 | }, 62 | }, 63 | "en:action=query&explaintext=1&exsectionformat=wiki&format=json&prop=extracts&redirects=1&titles=No_Sections&": { 64 | "batchcomplete": "", 65 | "warnings": { 66 | "extracts": { 67 | "*": '"exlimit" was too large for a whole article extracts request, lowered to 1.' 68 | } 69 | }, 70 | "query": { 71 | "normalized": [{"from": "No_Sections", "to": "No Sections"}], 72 | "pages": { 73 | "4": { 74 | "pageid": 5, 75 | "ns": 0, 76 | "title": "No Sections", 77 | "extract": ("Summary text\n\n\n"), 78 | } 79 | }, 80 | }, 81 | }, 82 | "en:action=query&format=json&prop=extracts&redirects=1&titles=Test_1&": { 83 | "batchcomplete": "", 84 | "warnings": { 85 | "extracts": { 86 | "*": '"exlimit" was too large for a whole article extracts request, lowered to 1.' 87 | } 88 | }, 89 | "query": { 90 | "normalized": [{"from": "Test_1", "to": "Test 1"}], 91 | "pages": { 92 | "4": { 93 | "pageid": 4, 94 | "ns": 0, 95 | "title": "Test 1", 96 | "extract": ( 97 | "Summary text\n\n
\n" 98 | + "Text for section 1
\n\n\n" 100 | + 'Text for section 1.1\n\n\n
" 102 | + "Text for section 1.2\n\n\n
" 104 | + 'Text for section 2\n\n\n
" 106 | + "Text for section 3\n\n\n
" 108 | + 'Text for section 4.1\n\n\n
" 111 | + 'Text for section 4.2\n\n\n
" 113 | + 'Text for section 4.2.1\n\n\n
" 115 | + 'Text for section 4.2.2\n\n\n
" 117 | + 'Text for section 5\n\n\n
" 119 | + 'Text for section 5.1\n\n\n
" 121 | ), 122 | } 123 | }, 124 | }, 125 | }, 126 | "en:action=query&format=json&prop=extracts&redirects=1&titles=Test_Nested&": { 127 | "batchcomplete": "", 128 | "warnings": { 129 | "extracts": { 130 | "*": '"exlimit" was too large for a whole article extracts request, lowered to 1.' 131 | } 132 | }, 133 | "query": { 134 | "normalized": [{"from": "Test_Nested", "to": "Test Nested"}], 135 | "pages": { 136 | "4": { 137 | "pageid": 14, 138 | "ns": 0, 139 | "title": "Test Nested", 140 | "extract": ( 141 | "Summary text\n\n
\n" 142 | + "Text for section 1
\n\n\n" 144 | + 'Text for section 1.A\n\n\n
" 146 | + "Text for section 1.B\n\n\n
" 148 | + 'Text for section 2\n\n\n
" 150 | + 'Text for section 2.A\n\n\n
" 152 | + "Text for section 2.B\n\n\n
" 154 | + "Text for section 3\n\n\n
" 156 | + 'Text for section 3.A\n\n\n
" 158 | + "Text for section 3.B\n\n\n
" 160 | ), 161 | } 162 | }, 163 | }, 164 | }, 165 | "en:action=query&format=json&prop=extracts&redirects=1&titles=Test_Edit&": { 166 | "batchcomplete": "", 167 | "warnings": { 168 | "extracts": { 169 | "*": '"exlimit" was too large for a whole article extracts request, lowered to 1.' 170 | } 171 | }, 172 | "query": { 173 | "normalized": [{"from": "Test_Edit", "to": "Test Edit"}], 174 | "pages": { 175 | "4": { 176 | "pageid": 4, 177 | "ns": 0, 178 | "title": "Test Edit", 179 | "extract": ( 180 | "Summary text\n\n
\n" 181 | + "Text for section 1
\n\n\n" 183 | + 'Text for section with edit\n\n\n
" 185 | ), 186 | } 187 | }, 188 | }, 189 | }, 190 | "en:action=query&format=json&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle|varianttitles&prop=info&redirects=1&titles=Test_1&": { 191 | "batchcomplete": "", 192 | "query": { 193 | "normalized": [{"from": "Test_1", "to": "Test 1"}], 194 | "pages": { 195 | "4": { 196 | "pageid": 4, 197 | "ns": 0, 198 | "title": "Test 1", 199 | "missing": "", 200 | "contentmodel": "wikitext", 201 | "pagelanguage": "en", 202 | "pagelanguagehtmlcode": "en", 203 | "pagelanguagedir": "ltr", 204 | "protection": [ 205 | {"type": "create", "level": "sysop", "expiry": "infinity"} 206 | ], 207 | "restrictiontypes": ["create"], 208 | "notificationtimestamp": "", 209 | "fullurl": "https://en.wikipedia.org/wiki/Test_1", 210 | "editurl": "https://en.wikipedia.org/w/index.php?title=Test_1&action=edit", 211 | "canonicalurl": "https://en.wikipedia.org/wiki/Test_1", 212 | "readable": "", 213 | "preload": None, 214 | "displaytitle": "Test 1", 215 | } 216 | }, 217 | }, 218 | }, 219 | "l1:action=query&format=json&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle|varianttitles&prop=info&redirects=1&titles=Test 1 - 1&": { 220 | "batchcomplete": "", 221 | "query": { 222 | "pages": { 223 | "10": { 224 | "pageid": 10, 225 | "ns": 0, 226 | "title": "Test 1 - 1", 227 | "missing": "", 228 | "contentmodel": "wikitext", 229 | "pagelanguage": "l1", 230 | "pagelanguagehtmlcode": "l1", 231 | "pagelanguagedir": "ltr", 232 | "protection": [ 233 | {"type": "create", "level": "sysop", "expiry": "infinity"} 234 | ], 235 | "restrictiontypes": ["create"], 236 | "notificationtimestamp": "", 237 | "fullurl": "https://l1.wikipedia.org/wiki/Test 1 - 1", 238 | "editurl": "https://l1.wikipedia.org/w/index.php?title=Test 1 - 1&action=edit", 239 | "canonicalurl": "https://l1.wikipedia.org/wiki/Test 1 - 1", 240 | "readable": "", 241 | "preload": None, 242 | "displaytitle": "Test 1 - 1", 243 | } 244 | } 245 | }, 246 | }, 247 | "en:action=query&format=json&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle|varianttitles&prop=info&redirects=1&titles=NonExisting&": { 248 | "batchcomplete": "", 249 | "query": { 250 | "pages": { 251 | "-1": { 252 | "ns": 0, 253 | "title": "NonExisting", 254 | "missing": "", 255 | "contentmodel": "wikitext", 256 | "pagelanguage": "en", 257 | "pagelanguagehtmlcode": "en", 258 | "pagelanguagedir": "ltr", 259 | "protection": [], 260 | "restrictiontypes": ["create"], 261 | "notificationtimestamp": "", 262 | "fullurl": "https://en.wikipedia.org/wiki/NonExisting", 263 | "editurl": "https://en.wikipedia.org/w/index.php?title=NonExisting&action=edit", 264 | "canonicalurl": "https://en.wikipedia.org/wiki/NonExisting", 265 | "readable": "", 266 | "preload": None, 267 | "displaytitle": "NonExisting", 268 | } 269 | } 270 | }, 271 | }, 272 | "en:action=query&format=json&lllimit=500&llprop=url&prop=langlinks&redirects=1&titles=Test_1&": { 273 | "batchcomplete": "", 274 | "query": { 275 | "pages": { 276 | "4": { 277 | "pageid": 4, 278 | "ns": 0, 279 | "title": "Test 1", 280 | "langlinks": [ 281 | { 282 | "lang": "l1", 283 | "url": "https://l1.wikipedia.org/wiki/Test_1_-_1", 284 | "*": "Test 1 - 1", 285 | }, 286 | { 287 | "lang": "l2", 288 | "url": "https://l2.wikipedia.org/wiki/Test_1_-_2", 289 | "*": "Test 1 - 2", 290 | }, 291 | { 292 | "lang": "l3", 293 | "url": "https://l3.wikipedia.org/wiki/Test_1_-_3", 294 | "*": "Test 1 - 3", 295 | }, 296 | ], 297 | } 298 | } 299 | }, 300 | }, 301 | "en:action=query&format=json&lllimit=500&llprop=url&prop=langlinks&redirects=1&titles=No_LangLinks&": { 302 | "batchcomplete": "", 303 | "query": { 304 | "pages": { 305 | "10": { 306 | "pageid": 10, 307 | "ns": 0, 308 | "title": "No LangLinks", 309 | } 310 | } 311 | }, 312 | }, 313 | "en:action=query&format=json&pllimit=500&prop=links&redirects=1&titles=Test_1&": { 314 | "query": { 315 | "pages": { 316 | "4": { 317 | "pageid": 4, 318 | "ns": 0, 319 | "title": "Test 1", 320 | "links": [ 321 | {"ns": 0, "title": "Title - 1"}, 322 | {"ns": 0, "title": "Title - 2"}, 323 | {"ns": 0, "title": "Title - 3"}, 324 | ], 325 | } 326 | } 327 | } 328 | }, 329 | "en:action=query&format=json&pllimit=500&prop=links&redirects=1&titles=Test_2&": { 330 | "continue": {"plcontinue": "5|0|Title_-_4", "continue": "||"}, 331 | "query": { 332 | "pages": { 333 | "4": { 334 | "pageid": 5, 335 | "ns": 0, 336 | "title": "Test 2", 337 | "links": [ 338 | {"ns": 0, "title": "Title - 1"}, 339 | {"ns": 0, "title": "Title - 2"}, 340 | {"ns": 0, "title": "Title - 3"}, 341 | ], 342 | } 343 | } 344 | }, 345 | }, 346 | "en:action=query&format=json&plcontinue=5|0|Title_-_4&pllimit=500&prop=links&redirects=1&titles=Test_2&": { 347 | "query": { 348 | "pages": { 349 | "4": { 350 | "pageid": 5, 351 | "ns": 0, 352 | "title": "Test 2", 353 | "links": [ 354 | {"ns": 0, "title": "Title - 4"}, 355 | {"ns": 0, "title": "Title - 5"}, 356 | ], 357 | } 358 | } 359 | } 360 | }, 361 | "en:action=query&format=json&pllimit=500&prop=links&redirects=1&titles=No_Links&": { 362 | "query": { 363 | "pages": { 364 | "4": { 365 | "pageid": 11, 366 | "ns": 0, 367 | "title": "No_Links", 368 | } 369 | } 370 | } 371 | }, 372 | "en:action=query&cllimit=500&format=json&prop=categories&redirects=1&titles=Test_1&": { 373 | "batchcomplete": "", 374 | "query": { 375 | "pages": { 376 | "4": { 377 | "pageid": 4, 378 | "ns": 0, 379 | "title": "Test 1", 380 | "categories": [ 381 | {"ns": 14, "title": "Category:C1"}, 382 | {"ns": 14, "title": "Category:C2"}, 383 | {"ns": 14, "title": "Category:C3"}, 384 | ], 385 | } 386 | } 387 | }, 388 | }, 389 | "en:action=query&cmlimit=500&cmtitle=Category:C1&format=json&list=categorymembers&redirects=1&": { 390 | "query": { 391 | "categorymembers": [ 392 | {"ns": 0, "pageid": 4, "title": "Title - 1"}, 393 | {"ns": 0, "pageid": 5, "title": "Title - 2"}, 394 | {"ns": 0, "pageid": 6, "title": "Title - 3"}, 395 | ] 396 | } 397 | }, 398 | "en:action=query&cmlimit=500&cmtitle=Category:C2&format=json&list=categorymembers&redirects=1&": { 399 | "continue": {"cmcontinue": "5|0|Title_-_4", "continue": "-||"}, 400 | "query": { 401 | "categorymembers": [ 402 | {"ns": 0, "pageid": 4, "title": "Title - 1"}, 403 | {"ns": 0, "pageid": 5, "title": "Title - 2"}, 404 | {"ns": 0, "pageid": 6, "title": "Title - 3"}, 405 | ] 406 | }, 407 | }, 408 | "en:action=query&cmcontinue=5|0|Title_-_4&cmlimit=500&cmtitle=Category:C2&format=json&list=categorymembers&redirects=1&": { 409 | "query": { 410 | "categorymembers": [ 411 | {"ns": 0, "pageid": 7, "title": "Title - 4"}, 412 | {"ns": 0, "pageid": 8, "title": "Title - 5"}, 413 | ] 414 | } 415 | }, 416 | "en:action=query&cllimit=500&format=json&prop=categories&redirects=1&titles=No_Categories&": { 417 | "batchcomplete": "", 418 | "query": { 419 | "pages": { 420 | "4": { 421 | "pageid": 4, 422 | "ns": 0, 423 | "title": "Test 1", 424 | } 425 | } 426 | }, 427 | }, 428 | "en:action=query&bllimit=500&bltitle=Non_Existent&format=json&list=backlinks&redirects=1&": { 429 | "query": {"backlinks": []} 430 | }, 431 | "en:action=query&bllimit=500&bltitle=Test_1&format=json&list=backlinks&redirects=1&": { 432 | "query": { 433 | "backlinks": [ 434 | {"ns": 0, "title": "Title - 1"}, 435 | {"ns": 0, "title": "Title - 2"}, 436 | {"ns": 0, "title": "Title - 3"}, 437 | ] 438 | } 439 | }, 440 | "en:action=query&bllimit=500&bltitle=Test_2&format=json&list=backlinks&redirects=1&": { 441 | "continue": {"blcontinue": "5|0|Title_-_4", "continue": "||"}, 442 | "query": { 443 | "backlinks": [ 444 | {"ns": 0, "title": "Title - 1"}, 445 | {"ns": 0, "title": "Title - 2"}, 446 | {"ns": 0, "title": "Title - 3"}, 447 | ] 448 | }, 449 | }, 450 | "en:action=query&blcontinue=5|0|Title_-_4&bllimit=500&bltitle=Test_2&format=json&list=backlinks&redirects=1&": { 451 | "query": { 452 | "backlinks": [ 453 | {"ns": 0, "title": "Title - 4"}, 454 | {"ns": 0, "title": "Title - 5"}, 455 | ] 456 | } 457 | }, 458 | "hi:action=query&format=json&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle|varianttitles&prop=info&redirects=1&titles=पाइथन&": { 459 | "batchcomplete": "", 460 | "query": { 461 | "pages": { 462 | "10": { 463 | "pageid": 10, 464 | "ns": 0, 465 | "title": "पाइथन", 466 | "missing": "", 467 | "contentmodel": "wikitext", 468 | "pagelanguage": "hi", 469 | "pagelanguagehtmlcode": "hi", 470 | "pagelanguagedir": "ltr", 471 | "protection": [ 472 | {"type": "create", "level": "sysop", "expiry": "infinity"} 473 | ], 474 | "restrictiontypes": ["create"], 475 | "notificationtimestamp": "", 476 | "fullurl": "https://l1.wikipedia.org/wiki/Test 1 - 1", 477 | "editurl": "https://l1.wikipedia.org/w/index.php?title=Test 1 - 1&action=edit", 478 | "canonicalurl": "https://l1.wikipedia.org/wiki/Test 1 - 1", 479 | "readable": "", 480 | "preload": None, 481 | "displaytitle": "पाइथन", 482 | } 483 | } 484 | }, 485 | }, 486 | "zh:action=query&explaintext=1&exsectionformat=wiki&format=json&prop=extracts&redirects=1&titles=Test_Zh-Tw&variant=zh-tw&": { 487 | "batchcomplete": "", 488 | "warnings": { 489 | "extracts": { 490 | "*": '"exlimit" was too large for a whole article extracts request, lowered to 1.' 491 | } 492 | }, 493 | "query": { 494 | "normalized": [{"from": "Test_Zh-Tw", "to": "Test Zh-Tw"}], 495 | "pages": { 496 | "4": { 497 | "pageid": 44, 498 | "ns": 0, 499 | "title": "Test Zh-Tw", 500 | "extract": ("ZH-TW\n\n\n"), 501 | } 502 | }, 503 | }, 504 | }, 505 | "zh:action=query&format=json&pllimit=500&prop=links&redirects=1&titles=Test_Zh-Tw&variant=zh-tw&": { 506 | "query": { 507 | "pages": { 508 | "44": { 509 | "pageid": 44, 510 | "ns": 0, 511 | "title": "Test Zh-Tw", 512 | "links": [ 513 | {"ns": 0, "title": "Title - Zh-Tw - 1"}, 514 | {"ns": 0, "title": "Title - Zh-Tw - 2"}, 515 | {"ns": 0, "title": "Title - Zh-Tw - 3"}, 516 | ], 517 | } 518 | } 519 | } 520 | }, 521 | "zh:action=query&format=json&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle|varianttitles&prop=info&redirects=1&titles=Test_Zh-Tw&variant=zh-tw&": { 522 | "batchcomplete": "", 523 | "query": { 524 | "pages": { 525 | "44": { 526 | "pageid": 44, 527 | "ns": 0, 528 | "title": "Test Zh-Tw", 529 | "missing": "", 530 | "contentmodel": "wikitext", 531 | "pagelanguage": "zh", 532 | "pagelanguagehtmlcode": "zh", 533 | "pagelanguagedir": "ltr", 534 | "protection": [ 535 | {"type": "create", "level": "sysop", "expiry": "infinity"} 536 | ], 537 | "restrictiontypes": ["create"], 538 | "notificationtimestamp": "", 539 | "fullurl": "https://zh.wikipedia.org/wiki/Test Zh-Tw", 540 | "editurl": "https://zh.wikipedia.org/w/index.php?title=Test Zh-Tw&action=edit", 541 | "canonicalurl": "https://zh.wikipedia.org/wiki/Test Zh-Tw", 542 | "readable": "", 543 | "preload": None, 544 | "displaytitle": "Test Zh-Tw", 545 | "varianttitles": { 546 | "zh": "Test Zh", 547 | "zh-hans": "Test Zh-Hans", 548 | "zh-tw": "Test Zh-Tw", 549 | }, 550 | } 551 | } 552 | }, 553 | }, 554 | "en:action=query&foo=bar&format=json&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle|varianttitles&prop=info&redirects=1&titles=Extra_API_Params&": { 555 | "batchcomplete": "", 556 | "query": { 557 | "normalized": [{"from": "Extra_API_Params", "to": "Extra API Params"}], 558 | "pages": { 559 | "4": { 560 | "pageid": 9, 561 | "ns": 0, 562 | "title": "Extra API Params", 563 | "missing": "", 564 | "contentmodel": "wikitext", 565 | "pagelanguage": "en", 566 | "pagelanguagehtmlcode": "en", 567 | "pagelanguagedir": "ltr", 568 | "protection": [ 569 | {"type": "create", "level": "sysop", "expiry": "infinity"} 570 | ], 571 | "restrictiontypes": ["create"], 572 | "notificationtimestamp": "", 573 | "fullurl": "https://en.wikipedia.org/wiki/Extra_API_Params", 574 | "editurl": "https://en.wikipedia.org/w/index.php?title=Extra_API_Params&action=edit", 575 | "canonicalurl": "https://en.wikipedia.org/wiki/Extra_API_Params", 576 | "readable": "", 577 | "preload": None, 578 | "displaytitle": "Extra API Params", 579 | } 580 | }, 581 | }, 582 | }, 583 | } 584 | -------------------------------------------------------------------------------- /tests/wikipedia_page_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from tests.mock_data import user_agent 4 | from tests.mock_data import wikipedia_api_request 5 | import wikipediaapi 6 | 7 | 8 | class TestWikipediaPage(unittest.TestCase): 9 | def setUp(self): 10 | self.wiki = wikipediaapi.Wikipedia(user_agent, "en") 11 | self.wiki._query = wikipedia_api_request(self.wiki) 12 | 13 | def test_repr_before_fetching(self): 14 | page = self.wiki.page("Test_1") 15 | self.assertEqual(repr(page), "Test_1 (lang: en, variant: None, id: ??, ns: 0)") 16 | 17 | def test_repr_after_fetching(self): 18 | page = self.wiki.page("Test_1") 19 | self.assertEqual(repr(page), "Test_1 (lang: en, variant: None, id: ??, ns: 0)") 20 | self.assertEqual(page.pageid, 4) 21 | self.assertEqual(repr(page), "Test 1 (lang: en, variant: None, id: 4, ns: 0)") 22 | 23 | def test_extract(self): 24 | page = self.wiki.page("Test_1") 25 | self.assertEqual(page.pageid, 4) 26 | self.assertEqual(page.title, "Test 1") 27 | self.assertEqual(page.ns, 0) 28 | self.assertEqual(page.contentmodel, "wikitext") 29 | self.assertEqual(page.pagelanguage, "en") 30 | self.assertEqual(page.pagelanguagedir, "ltr") 31 | self.assertEqual(page.fullurl, "https://en.wikipedia.org/wiki/Test_1") 32 | self.assertEqual( 33 | page.editurl, 34 | "https://en.wikipedia.org/w/index.php?title=Test_1&action=edit", 35 | ) 36 | self.assertEqual(page.canonicalurl, "https://en.wikipedia.org/wiki/Test_1") 37 | self.assertEqual(page.displaytitle, "Test 1") 38 | self.assertEqual(page.variant, None) 39 | 40 | def test_unknown_property(self): 41 | page = self.wiki.page("Test_1") 42 | with self.assertRaises(AttributeError): 43 | page.unknown_property 44 | 45 | def test_nonexisting(self): 46 | page = self.wiki.page("NonExisting") 47 | self.assertFalse(page.exists()) 48 | 49 | def test_existing(self): 50 | page = self.wiki.page("Test_1") 51 | self.assertTrue(page.exists()) 52 | 53 | def test_article_method(self): 54 | p = self.wiki.page("Test_1") 55 | a = self.wiki.article("Test_1") 56 | self.assertEqual(p.pageid, a.pageid) 57 | 58 | def test_article_title_unquote(self): 59 | # https://github.com/goldsmith/Wikipedia/issues/190 60 | w = wikipediaapi.Wikipedia(user_agent, "hi") 61 | w._query = wikipedia_api_request(w) 62 | p_encoded = w.article( 63 | "%E0%A4%AA%E0%A4%BE%E0%A4%87%E0%A4%A5%E0%A4%A8", 64 | unquote=True, 65 | ) 66 | p_decoded = w.article("पाइथन") 67 | self.assertEqual(p_encoded.pageid, p_decoded.pageid) 68 | 69 | def test_page_title_unquote(self): 70 | # https://github.com/goldsmith/Wikipedia/issues/190 71 | w = wikipediaapi.Wikipedia(user_agent, "hi") 72 | w._query = wikipedia_api_request(w) 73 | p_encoded = w.page( 74 | "%E0%A4%AA%E0%A4%BE%E0%A4%87%E0%A4%A5%E0%A4%A8", 75 | unquote=True, 76 | ) 77 | p_decoded = w.page("पाइथन") 78 | self.assertEqual(p_encoded.pageid, p_decoded.pageid) 79 | 80 | def test_page_with_int_namespace(self): 81 | page = self.wiki.page("NonExisting", ns=110) 82 | self.assertFalse(page.exists()) 83 | self.assertEqual(110, page.namespace) 84 | 85 | def test_page_with_variant(self): 86 | wiki = wikipediaapi.Wikipedia(user_agent, "zh", "zh-tw") 87 | wiki._query = wikipedia_api_request(wiki) 88 | page = wiki.page("Test_Zh-Tw") 89 | self.assertTrue(page.exists()) 90 | self.assertEqual(page.pageid, 44) 91 | self.assertEqual(page.title, "Test Zh-Tw") 92 | self.assertEqual(page.variant, "zh-tw") 93 | self.assertEqual( 94 | page.varianttitles, 95 | {"zh": "Test Zh", "zh-hans": "Test Zh-Hans", "zh-tw": "Test Zh-Tw"}, 96 | ) 97 | 98 | def test_page_with_extra_parameters(self): 99 | wiki = wikipediaapi.Wikipedia(user_agent, "en", extra_api_params={"foo": "bar"}) 100 | wiki._query = wikipedia_api_request(wiki) 101 | page = wiki.page("Extra_API_Params") 102 | self.assertTrue(page.exists()) 103 | -------------------------------------------------------------------------------- /tests/wikipedia_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import wikipediaapi 4 | 5 | 6 | class TestWikipedia(unittest.TestCase): 7 | def test_missing_user_agent_should_fail(self): 8 | with self.assertRaises(AssertionError) as e: 9 | wikipediaapi.Wikipedia("en") 10 | self.assertEqual( 11 | str(e.exception), 12 | str( 13 | AssertionError( 14 | "Please, be nice to Wikipedia and specify user agent - " 15 | + "https://meta.wikimedia.org/wiki/User-Agent_policy. " 16 | + "Current user_agent: 'en' is not sufficient. " 17 | + "Use Wikipedia(user_agent='your-user-agent', language='en')" 18 | ) 19 | ), 20 | ) 21 | 22 | def test_swapped_parameters_in_constructor(self): 23 | with self.assertRaises(AssertionError) as e: 24 | wikipediaapi.Wikipedia("en", "my-user-agent") 25 | self.assertEqual( 26 | str(e.exception), 27 | str( 28 | AssertionError( 29 | "Please, be nice to Wikipedia and specify user agent - " 30 | + "https://meta.wikimedia.org/wiki/User-Agent_policy. " 31 | + "Current user_agent: 'en' is not sufficient. " 32 | + "Use Wikipedia(user_agent='your-user-agent', language='en')" 33 | ) 34 | ), 35 | ) 36 | 37 | def test_empty_parameters_in_constructor(self): 38 | with self.assertRaises(AssertionError) as e: 39 | wikipediaapi.Wikipedia("", "") 40 | self.assertEqual( 41 | str(e.exception), 42 | str( 43 | AssertionError( 44 | "Please, be nice to Wikipedia and specify user agent - " 45 | + "https://meta.wikimedia.org/wiki/User-Agent_policy. " 46 | + "Current user_agent: '' is not sufficient. " 47 | + "Use Wikipedia(user_agent='your-user-agent', language='your-language')" 48 | ) 49 | ), 50 | ) 51 | 52 | def test_empty_language_in_constructor(self): 53 | with self.assertRaises(AssertionError) as e: 54 | wikipediaapi.Wikipedia("test-user-agent", "") 55 | self.assertEqual( 56 | str(e.exception), 57 | str( 58 | AssertionError( 59 | "Specify language. Current language: '' is not sufficient. " 60 | + "Use Wikipedia(user_agent='test-user-agent', language='your-language')" 61 | ) 62 | ), 63 | ) 64 | 65 | def test_long_language_and_user_agent(self): 66 | wiki = wikipediaapi.Wikipedia( 67 | user_agent="param-user-agent", language="very-long-language" 68 | ) 69 | self.assertIsNotNone(wiki) 70 | self.assertEqual(wiki.language, "very-long-language") 71 | self.assertIsNone(wiki.variant) 72 | 73 | def test_user_agent_is_used(self): 74 | wiki = wikipediaapi.Wikipedia( 75 | user_agent="param-user-agent", 76 | ) 77 | self.assertIsNotNone(wiki) 78 | user_agent = wiki._session.headers.get("User-Agent") 79 | self.assertEqual( 80 | user_agent, 81 | "param-user-agent (" + wikipediaapi.USER_AGENT + ")", 82 | ) 83 | self.assertEqual(wiki.language, "en") 84 | 85 | def test_user_agent_in_headers_is_fine(self): 86 | wiki = wikipediaapi.Wikipedia( 87 | "en", 88 | headers={"User-Agent": "header-user-agent"}, 89 | ) 90 | self.assertIsNotNone(wiki) 91 | user_agent = wiki._session.headers.get("User-Agent") 92 | self.assertEqual( 93 | user_agent, 94 | "header-user-agent (" + wikipediaapi.USER_AGENT + ")", 95 | ) 96 | 97 | def test_user_agent_in_headers_win(self): 98 | wiki = wikipediaapi.Wikipedia( 99 | user_agent="param-user-agent", 100 | headers={"User-Agent": "header-user-agent"}, 101 | ) 102 | self.assertIsNotNone(wiki) 103 | user_agent = wiki._session.headers.get("User-Agent") 104 | self.assertEqual( 105 | user_agent, 106 | "header-user-agent (" + wikipediaapi.USER_AGENT + ")", 107 | ) 108 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Install old pythons 2 | # https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa 3 | [tox] 4 | envlist = py39, py310, py311, py312, py313 5 | 6 | [testenv] 7 | deps = 8 | typing 9 | 10 | commands = 11 | python3 -m unittest discover tests/ '*test.py' 12 | -------------------------------------------------------------------------------- /wikipediaapi/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wikipedia-API is easy to use wrapper for extracting information from Wikipedia. 3 | 4 | It supports extracting texts, sections, links, categories, translations, etc. 5 | from Wikipedia. Documentation provides code snippets for the most common use 6 | cases. 7 | """ 8 | 9 | __version__ = (0, 8, 1) 10 | 11 | from collections import defaultdict 12 | from enum import IntEnum 13 | import logging 14 | import re 15 | from typing import Any, Optional, Union 16 | from urllib import parse 17 | 18 | import requests 19 | 20 | USER_AGENT = ( 21 | "Wikipedia-API/" 22 | + ".".join(str(s) for s in __version__) 23 | + "; https://github.com/martin-majlis/Wikipedia-API/" 24 | ) 25 | 26 | MIN_USER_AGENT_LEN = 5 27 | MAX_LANG_LEN = 5 28 | 29 | log = logging.getLogger(__name__) 30 | 31 | 32 | # https://www.mediawiki.org/wiki/API:Main_page 33 | PagesDict = dict[str, "WikipediaPage"] 34 | 35 | 36 | class ExtractFormat(IntEnum): 37 | """Represents extraction format.""" 38 | 39 | WIKI = 1 40 | """ 41 | Allows recognizing subsections 42 | 43 | Example: https://goo.gl/PScNVV 44 | """ 45 | 46 | HTML = 2 47 | """ 48 | Alows retrieval of HTML tags 49 | 50 | Example: https://goo.gl/1Jwwpr 51 | """ 52 | 53 | # Plain: https://goo.gl/MAv2qz 54 | # Doesn't allow to recognize subsections 55 | # PLAIN = 3 56 | 57 | 58 | class Namespace(IntEnum): 59 | """ 60 | Represents namespace in Wikipedia 61 | 62 | You can gen list of possible namespaces here: 63 | 64 | * https://en.wikipedia.org/wiki/Wikipedia:Namespace 65 | * https://en.wikipedia.org/wiki/Wikipedia:Namespace#Programming 66 | 67 | Currently following namespaces are supported: 68 | """ 69 | 70 | MAIN = 0 71 | TALK = 1 72 | USER = 2 73 | USER_TALK = 3 74 | WIKIPEDIA = 4 75 | WIKIPEDIA_TALK = 5 76 | FILE = 6 77 | FILE_TALK = 7 78 | MEDIAWIKI = 8 79 | MEDIAWIKI_TALK = 9 80 | TEMPLATE = 10 81 | TEMPLATE_TALK = 11 82 | HELP = 12 83 | HELP_TALK = 13 84 | CATEGORY = 14 85 | CATEGORY_TALK = 15 86 | PORTAL = 100 87 | PORTAL_TALK = 101 88 | PROJECT = 102 89 | PROJECT_TALK = 103 90 | REFERENCE = 104 91 | REFERENCE_TALK = 105 92 | BOOK = 108 93 | BOOK_TALK = 109 94 | DRAFT = 118 95 | DRAFT_TALK = 119 96 | EDUCATION_PROGRAM = 446 97 | EDUCATION_PROGRAM_TALK = 447 98 | TIMED_TEXT = 710 99 | TIMED_TEXT_TALK = 711 100 | MODULE = 828 101 | MODULE_TALK = 829 102 | GADGET = 2300 103 | GADGET_TALK = 2301 104 | GADGET_DEFINITION = 2302 105 | GADGET_DEFINITION_TALK = 2303 106 | 107 | 108 | WikiNamespace = Union[Namespace, int] 109 | 110 | 111 | def namespace2int(namespace: WikiNamespace) -> int: 112 | """Converts namespace into integer""" 113 | if isinstance(namespace, Namespace): 114 | return namespace.value 115 | 116 | return namespace 117 | 118 | 119 | RE_SECTION = { 120 | ExtractFormat.WIKI: re.compile(r"\n\n *(==+) (.*?) (==+) *\n"), 121 | ExtractFormat.HTML: re.compile( 122 | r"\n? *