├── .github └── workflows │ ├── codeql-analysis.yml │ └── publish.yml ├── .gitignore ├── .idea ├── .gitignore ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── nonebot-plugin-mediawiki.iml └── vcs.xml ├── LICENSE ├── README.md ├── nonebot_plugin_mediawiki ├── __init__.py ├── config.py ├── config_manager.py ├── constants.py ├── exception.py ├── fakemwapi.py ├── mediawiki │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── constants.py │ ├── exceptions.py │ ├── mediawiki.py │ ├── mediawikipage.py │ └── utilities.py ├── utilities.py └── worker.py └── pyproject.toml /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ main ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ main ] 20 | schedule: 21 | - cron: '42 17 * * 2' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'python' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Learn more about CodeQL language support at https://git.io/codeql-language-support 38 | 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v2 42 | 43 | # Initializes the CodeQL tools for scanning. 44 | - name: Initialize CodeQL 45 | uses: github/codeql-action/init@v1 46 | with: 47 | languages: ${{ matrix.language }} 48 | # If you wish to specify custom queries, you can do so here or in a config file. 49 | # By default, queries listed here will override any specified in a config file. 50 | # Prefix the list here with "+" to use these queries and those in the config file. 51 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 52 | 53 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 54 | # If this step fails, then you should remove it and run the build manually (see below) 55 | - name: Autobuild 56 | uses: github/codeql-action/autobuild@v1 57 | 58 | # ℹ️ Command-line programs to run using the OS shell. 59 | # 📚 https://git.io/JvXDl 60 | 61 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 62 | # and modify them (or add more) to build your code if your project 63 | # uses a compiled language 64 | 65 | #- run: | 66 | # make bootstrap 67 | # make release 68 | 69 | - name: Perform CodeQL Analysis 70 | uses: github/codeql-action/analyze@v1 71 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package to PyPI when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Build and Upload Python Package 10 | 11 | on: 12 | push: 13 | branches: 14 | - main 15 | paths-ignore: 16 | - '**/*.md' 17 | tags-ignore: 18 | - '**' 19 | workflow_dispatch: 20 | inputs: 21 | release: 22 | description: Publish a Release 23 | required: false 24 | default: false 25 | type: boolean 26 | 27 | permissions: 28 | contents: read 29 | 30 | jobs: 31 | release-build: 32 | runs-on: ubuntu-latest 33 | 34 | steps: 35 | - uses: actions/checkout@v4 36 | 37 | - uses: actions/setup-python@v5 38 | with: 39 | python-version: "3.x" 40 | 41 | - name: Build release distributions 42 | run: | 43 | # NOTE: put your own distribution build steps here. 44 | pipx install poetry 45 | poetry install 46 | poetry build 47 | 48 | - name: Publish to PyPI 49 | if: ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.release == 'true') || contains(github.event.head_commit.message, '[release]') }} 50 | env: 51 | POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_TOKEN }} 52 | run: | 53 | poetry publish 54 | 55 | - name: Upload distributions 56 | uses: actions/upload-artifact@v4 57 | with: 58 | name: release-dists 59 | path: dist/ 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # poetry 88 | poetry.lock 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/nonebot-plugin-mediawiki.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | 633 | Copyright (C) 634 | 635 | This program is free software: you can redistribute it and/or modify 636 | it under the terms of the GNU Affero General Public License as published 637 | by the Free Software Foundation, either version 3 of the License, or 638 | (at your option) any later version. 639 | 640 | This program is distributed in the hope that it will be useful, 641 | but WITHOUT ANY WARRANTY; without even the implied warranty of 642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 643 | GNU Affero General Public License for more details. 644 | 645 | You should have received a copy of the GNU Affero General Public License 646 | along with this program. If not, see . 647 | 648 | Also add information on how to contact you by electronic and paper mail. 649 | 650 | If your software can interact with users remotely through a computer 651 | network, you should also make sure that it provides a way for users to 652 | get its source. For example, if your program is a web application, its 653 | interface could display a "Source" link that leads users to an archive 654 | of the code. There are many ways you could offer source, and different 655 | solutions will be better for different programs; see section 13 for the 656 | specific requirements. 657 | 658 | You should also get your employer (if you work as a programmer) or school, 659 | if any, to sign a "copyright disclaimer" for the program, if necessary. 660 | For more information on this, and how to apply and follow the GNU AGPL, see 661 | . 662 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | nonebot 3 |

4 | 5 |
6 | 7 | # nonebot-plugin-mediawiki 8 | 9 | _适用于 [NoneBot2](https://v2.nonebot.dev) 的 MediaWiki 查询插件_ 10 | 11 |
12 | 13 | ------ 14 | 15 |
16 | 17 | ![Python](https://img.shields.io/badge/python-3.8%2B-lightgrey) 18 | ![nonebot2](https://img.shields.io/badge/nonebot2-2.0.0b2-yellowgreen) 19 | [![GitHub license](https://img.shields.io/github/license/KoishiMoe/nonebot-plugin-mediawiki)](https://github.com/KoishiMoe/nonebot-plugin-mediawiki/blob/main/LICENSE) 20 | [![pypi](https://img.shields.io/pypi/v/nonebot-plugin-mediawiki?color=blue)](https://pypi.org/project/nonebot-plugin-mediawiki/) 21 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/nonebot-plugin-mediawiki) 22 | 23 | [![GitHub issues](https://img.shields.io/github/issues/KoishiMoe/nonebot-plugin-mediawiki)](https://github.com/KoishiMoe/nonebot-plugin-mediawiki/issues) 24 | [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/KoishiMoe/nonebot-plugin-mediawiki?include_prereleases)](https://github.com/KoishiMoe/nonebot-plugin-mediawiki/releases) 25 | ![GitHub contributors](https://img.shields.io/github/contributors/KoishiMoe/nonebot-plugin-mediawiki) 26 | ![GitHub Repo stars](https://img.shields.io/github/stars/KoishiMoe/nonebot-plugin-mediawiki?style=social) 27 | 28 |
29 | 30 | ------ 31 | 32 | 本项目是 [Flandre](https://github.com/KoishiMoe/Flandre) 的 33 | [wiki](https://github.com/KoishiMoe/Flandre/tree/main/src/plugins/wiki) 组件,经简单修改成为独立插件发布 34 | 35 | ## 旧版本用户请注意 36 | 本插件在1.0版本进行了一次重构,同时更改了设置命令的语法(查询不受影响),请阅读文档的相应部分 37 | 38 | ## 关于更新 39 | 40 | 这个插件是我很久之前写的,由于个人精力有限,目前**并未**积极跟进上游更新,也**没有**进行兼容性测试。如果你在最新版的nonebot2上使用它时出现了问题,请在issue区指出。 41 | 42 | 功能更新会在后续有时间的时候在进行。当前计划中的有:跟进最新版nonebot,解除onebot适配器依赖,添加条目跟踪等功能,以及简化命令、优化管理相关操作,并对整个项目进行重构以优化性能、提升可维护性。 43 | 44 | ETA:无 45 | 46 | 47 | 48 | ## FAQ 49 | 50 |
51 | 这个插件是干什么的? 52 | 53 | * 在**群聊**中查wiki用的。此处的wiki特指基于[mediawiki](https://mediawiki.org)开设的wiki网站 54 | * 对大多数群可能都有用,但是使用频率大概会很低。从功能上说,我是为了一些具有特定的专业性话题的讨论群设计的。 55 | 例如Linux群,成员如果问了一个Wiki上已经写的很详细的问题,其他成员就可以直接利用bot来指引提问者去查看对应页面,而非自己去wiki上找, 56 | 然后再复制链接发出来。另外,对于一些wiki项目的编辑交流群来说,这种插件也可能有助于提高编辑间的交流效率 57 | > 简单举例: 58 | > 59 | > A: dalao们,我这钓鱼怎么钓不出来附魔书啊 60 | > 61 | > B: 参考[[钓鱼#垃圾与宝藏]] 62 | > 63 | > Bot: https://zh.minecraft.wiki/w/%E9%92%93%E9%B1%BC#%E5%9E%83%E5%9C%BE%E4%B8%8E%E5%AE%9D%E8%97%8F 64 |
65 | 66 |
67 | 这个插件和其他wiki插件有什么不同吗? 68 | 69 | 在我发布这个插件时,nonebot还没有这类插件,而我在的群用得上这个,因此就顺便搓了一个发布了。现在nb市场也有些其他的wiki插件(或者包含wiki功能的bot), 70 | 其中的一些是适配特定wiki的,不具有通用性(但它们可能适配了本插件无法适配的wiki,例如scpwiki——它使用的系统是wikidot,且不开放api); 71 | 另一些(目前插件市场里只有一个,叫nonebot-plugin-wiki)同样针对mediawiki设计,意味着它们和本插件的功能在相当程度上可能是重叠的, 72 | 此时你可以比较差异功能、更新频率、兼容性、稳定性等选择适合自己的(例如前面提到的插件,接入了bwiki的api,可以获取bwiki上条目的简介之类) 73 |
74 | 75 |
76 | MediaWiki是啥?API是啥?条目路径是啥? 77 | 78 | * MediaWiki是维基媒体基金会开发的一套wiki引擎,大名鼎鼎的维基百科就是由它驱动的。 79 | 目前世界上有很多wiki网站均使用不同版本的该引擎驱动,尤其是很多游戏、动漫等的wiki 80 | * 本插件提到的API都指MediaWiki的API。利用API,bot可以与wiki站点通信,来快速从wiki站点获取想要的信息。在本插件中,bot就利用api搜索指定标题是否存在、 81 | 获取页面的链接、查询消歧义义项等。 82 | * 部分wiki出于各种原因可能不开放API,或者有非常严格的使用限制,这使得利用API获取链接的方法无法使用。所幸MediaWiki的链接格式非常稳定, 83 | 都是 `一个固定链接/条目名`,基于此,在已知这个固定链接的前提下,我们可以直接将其和条目名拼接生成链接。当然,这样bot就无法检查条目是否存在, 84 | 损失了一部分功能。在MediaWiki中,这个固定链接一般被叫做`条目路径` 85 |
86 | 87 |
88 | 我怎么知道我要查的wiki用的是不是MediaWiki? 89 | 90 | * 直接在目标wiki搜索框搜`Special:Version`就行,一般会跳到版本页面;没有搜索功能的wiki也可以直接把浏览器地址栏末尾的条目名改成前面的那一串然后回车。 91 | 如果提示没有权限访问、不存在的特殊页面之类,应该也是MediaWiki 92 | * 其实一般的MediaWiki站点不会刻意隐藏自己使用了MediaWiki,没什么意义,所以一般在网站的关于、版权信息之类的地方也能找到相关说明 93 |
94 | 95 |
96 | 查询命令怎么这么奇怪 97 | 98 | * 因为MediaWiki中,同wiki内部互相引用条目用的就是双方括号,引用模板则是双花括号,这样设计是为了和wiki保持一致。 99 | * 至于圆括号,MediaWiki中确实没有,不过上面都用了其他两种括号了,下面用圆括号也比较自然(确信) 100 | * 为了方便手机查询 (issue#1) ,本插件也有简化的条目查询命令,即 `wiki xxx` 101 |
102 | 103 |
104 | 这插件怎么用 105 | 106 | * 如果你是一般路过群友,只需要知道 `wiki 前缀:条目名` 这种查询语法一般就可以了,前缀的定义在下面有写。如果你的群只绑定了一个wiki,前缀是可以省略的 107 | * 如果你是群管理员……看下面文档的说明吧~ 108 |
109 | 110 | ## 使用说明 111 | 112 | ### TL;DR 113 | 114 | 查询条目: `[[条目名]]` `[[prefix:条目名]]` 115 | 116 | 查询条目(方法2): `wiki 条目名` `wiki prefix:条目名` 117 | 118 | 查询模板: `{{模板名}}` `{{prefix:模板名}}` 119 | 120 | 绕过api查询条目: `((条目名))` `((prefix:条目名))` 121 | 122 | 页面截图: `wiki.shot prefix:条目名` 123 | 124 | 添加:wiki.add <前缀> <通用url地址> < -g(添加该参数表示操作全局wiki)> 125 | 126 | 删除:wiki.delete <前缀> < -g > 127 | 128 | 列表:wiki.list < -g > 129 | 130 | 设置默认:wiki.default <前缀> < -g > 131 | 132 | **其中所有非全局指令均需要在目标群中进行,所有全局指令(除查询)均只有Bot管理员能执行** 133 | 134 | ### 查询功能 135 | 136 | 查询功能的语法和标准的mediawiki内链格式基本一致: 137 | 138 | 使用半角中括号包裹要查询的条目名,如 `[[帮助]]` 139 | 140 | 使用半角大括号包裹要查询的模板名,如 `{{测试模板}}` 141 | 142 | (PS:直接使用`[[Template:模板名]]`也是可行的) 143 | 144 | 此外,方便起见,也可以用`wiki 条目名` `wiki prefix:条目名`的方法查询 145 | 146 | 使用`wiki.shot prefix:条目名`可以获取对应页面的截图 **(测试版功能,使用时请注意安全风险,如调取敏感条目,泄漏服务器ip,或者使用浏览器漏洞对服务器进行攻击。如不需要本功能,请先使用pypi上的正式版)** 147 | 148 | Bot会尝试去调取目标wiki的api,并获取对应标题的页面信息(默认允许重定向、跨wiki、简繁转换)。如果未找到对应条目,或者对应页面是消歧义页面, 149 | 则会提供数字来选择。如果调用api失败或者未配置api,会回落到字符串拼接的方式生成链接。 150 | 151 | > Tip:如果api返回的结果不是你想要的,可以使用半角小括号包裹条目名以绕过api,如 ((帮助)) 152 | 153 | 当绑定了多个wiki时,需要指定前缀以查询默认wiki之外的wiki,例如,假如将某个wiki的前缀设置为flan,且不是默认wiki,则查询命令对应为[[flan:帮助]] 154 | 155 | ### 管理功能 156 | 157 | * wiki列表 158 | * 权限:所有人可用 159 | * 语法:`wiki.list` 160 | * 返回:当前群绑定的wiki列表,以及全局wiki列表 161 | 162 | 163 | * 添加wiki 164 | * 语法 `wiki.add` 165 | * 参数: 166 | * 前缀:用于区分wiki的前缀,仅支持字母、数字和下划线,不能和本群已有的重复,但可以和全局已有的重复,此时本地设置优先。另外,为了防止和mediawiki的名字空间冲突,bot默认屏蔽了部分名字空间名作为前缀的能力,也请在绑定前先了解目标wiki的名字空间情况。 167 | * api地址(可选):目标wiki的mediawiki api的地址。某些wiki可能限制api调用,此时可以不设置api。该地址通常可以在目标wiki的`Special:版本#接入点URL`页面中找到。或者也可以尝试这些一般的格式: 168 | 169 | > https://www.example.org/api.php (如萌娘百科) 170 | > 171 | > https://www.example.org/w/api.php (如维基百科) 172 | 173 | * 通用url:目标wiki的条目路径。通常来讲,在该url后加上正确的条目名即可访问目标条目。可以在目标wiki的`Special:版本#接入点URL`中找到(“条目路径”中的$1即条目名) 174 | 175 | > 例如,对维基百科:https://www.example.org/wiki 176 | > 177 | > 对萌百等:https://www.example.org/ 178 | 179 | 180 | 181 | * 删除wiki 182 | * 语法 `wiki.delete` 183 | * 参数: 184 | * 前缀:要删除的wiki的前缀 185 | 186 | 187 | * 设置默认wiki 188 | * 语法 `wiki.default` 189 | * 参数: 190 | * 前缀:要设置默认的wiki的前缀 191 | 192 | > Tip:本群/全局绑定的的一个wiki将被自动设置为本地/全局的默认wiki,当本地/全局绑定的默认wiki被删除时会自动清除对应的默认wiki设置,无需手动操作。 193 | 194 | 195 | ### 附加说明 196 | #### 本地和全局 197 | 198 | bot管理员可以设置全局的wiki,全局wiki的设计意图在于回落,换句话说,本地设置无条件优先于全局设置。当且仅当在以下情况下,全局设置会被应用: 199 | 200 | 1. 本地没有绑定任何wiki 201 | 2. 本地没有设置默认前缀,而查询请求中又不包含前缀 202 | 203 | > 注意:如果本地有和全局默认前缀相同的wiki时,本地的wiki仍将被优先调用 204 | 205 | 3. 本地设置了默认前缀,但是本地不存在该wiki 206 | 207 | > 注意:当前缀在全局中也不存在时,前缀将被视为名字空间,直接和条目名一并传入api进行查询 208 | 209 | 4. 查询请求中包含的前缀在本地不存在 210 | 211 | #### API调用 212 | 213 | 为了提供更准确的结果,默认情况下bot会调用mediawiki api查询条目。当api无法正常调用时,会使用通用url和条目名拼接作为回落。 214 | 如果返回了错误的结果,可以使用小括号查询来绕过api。 215 | 216 | #### 使用代理 217 | 218 | 如果你需要使用代理来优化某些wiki的访问速度,可以在`.env`文件中设置`WIKI_PROXY`环境变量,该变量的值为代理地址,格式为`scheme://(user:password@)host:port`,例如: 219 | 220 | ```dotenv 221 | WIKI_PROXY=http://127.0.0.1:1080 222 | ``` 223 | 224 | ```dotenv 225 | WIKI_PROXY=socks5://user:mysecret@example.org:11451 226 | ``` 227 | 228 | 该变量会被传递给aiohttp和playwright,因此可以用于本插件中的所有网络请求。 229 | 230 | 如果`.env`文件中的配置无法正常生效,你也可以: 231 | 232 | a) 在系统环境变量中添加代理设置 233 | ```shell 234 | # windows cmd 235 | set WIKI_PROXY 'http://127.0.0.1:1080' 236 | # windows powershell 237 | $Env:WIKI_PROXY='http://127.0.0.1:1080' 238 | # linux/macOS 239 | export WIKI_PROXY='http://127.0.0.1:1080' 240 | ``` 241 | 242 | b) 在bot的入口文件(通常是`bot.py`)中对`config.wiki_proxy`直接赋值: 243 | ```python 244 | import nonebot 245 | 246 | # 初始化时 247 | nonebot.init(wiki_proxy="http://127.0.0.1:1080") 248 | 249 | # 或者在初始化后 250 | config = nonebot.get_driver().config 251 | config.wiki_proxy = "socks5://user:pass@proxy.example.org" 252 | ``` 253 | 254 | 请参考[nonebot文档](https://nonebot.dev/docs/appendices/config#%E9%85%8D%E7%BD%AE%E9%A1%B9%E7%9A%84%E5%8A%A0%E8%BD%BD)获取更多信息 255 | 256 | > 注意:该代理设置不支持按wiki分流,即所有wiki的请求都会使用同一个代理。如果你有此类需求,建议使用代理客户端内置的分流功能,它们通常会提供更灵活的配置选项。 257 | > 258 | > 由于众所周知的原因,传播某些需要代理才能访问的wiki内的内容可能影响帐号安全,请谨慎添加此类wiki,或者使用保证内容合法的境内镜像站(如有) 259 | 260 | #### 截图功能 261 | 262 | 本插件支持截图功能,但是需要额外安装依赖。如果你不需要截图功能,可以跳过这一节。 263 | 264 | * 如果你还没安装该插件 265 | ```shell 266 | pip install nonebot-plugin-mediawiki[shot] 267 | ``` 268 | 这样,pip会在安装该插件时自动安装截图功能所需的依赖(目前仅有playwright) 269 | 270 | * 如果你已经安装了该插件,则需要在安装了该插件的虚拟环境中安装playwright 271 | ```shell 272 | # 激活虚拟环境 273 | # linux, venv 274 | source venv/bin/activate 275 | # windows, venv 276 | venv\Scripts\activate.bat 277 | # 安装playwright 278 | pip install playwright 279 | ``` 280 | 281 | * 然后再安装chromium 282 | ```shell 283 | # 安装chromium 284 | playwright install chromium 285 | # 对于无头linux服务器,建议在系统中安装完整的chromium以补全缺失的依赖 286 | # Debian / Ubuntu 287 | sudo apt install chromium-browser 288 | # CentOS 289 | sudo yum install chromium 290 | # Arch 291 | sudo pacman -S chromium 292 | ``` 293 | 294 | * 页面加载设置 295 | 296 | 如果需要调整页面加载时间限制(默认30秒),可以在`.env`文件中设置`WIKI_SHOT_TIMEOUT`环境变量,单位为秒,例如: 297 | 298 | ```dotenv 299 | WIKI_SHOT_TIMEOUT=60 300 | ``` 301 | 302 | 无头浏览器操作的时间内,bot不会向用户发送进度消息,因此过长的超时时间可能被认为是无响应,调整时间限制时请注意这一点。 303 | 304 | 如果需要调整何时进行截图,可以在`.env`文件中设置`WIKI_SHOT_WAIT_UNTIL`环境变量,有效值有`load`、`domcontentloaded`、`networkidle`、`commit`,例如: 305 | 306 | ```dotenv 307 | WIKI_SHOT_WAIT_UNTIL=networkidle 308 | ``` 309 | 310 | 请参考[playwright文档](https://playwright.dev/python/docs/api/class-page#page-goto)获取这些值的具体含义 311 | 312 | 需要注意的是,当前版本的插件硬编码使用chromium。如果您确实需要使用其他浏览器,可以自行修改`worker.py`中的相关语句。 313 | 314 | 当前该功能**仍处于测试阶段**,不建议在生产环境中使用。以下是一些您可能需要注意的问题: 315 | * chromium会占用大量服务器资源,如果您的服务器配置较低,建议不要使用截图功能。 316 | * 本插件对输出的内容没有过滤,您可能需要考虑安全性问题(例如,如果您的bot在公开群中使用,可能会被恶意利用来发送一些不适合在某些地区显示的内容)。 317 | * 某些wiki有奇怪的弹窗、广告等,也有些wiki的防火墙比较严格,或者有人机验证等,可能会导致无头浏览器无法正常获取页面。 318 | * 恶意的群成员可能会利用一些wiki的特殊页面来获取bot的服务器ip等敏感信息 319 | * 攻击者可能会利用浏览器漏洞来入侵服务器,建议定期更新playwright和chromium 320 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/__init__.py: -------------------------------------------------------------------------------- 1 | # from dotenv import load_dotenv 2 | 3 | from nonebot import logger, get_driver 4 | 5 | from . import config_manager 6 | from . import worker 7 | 8 | # 接入帮助系统 9 | __usage__ = '使用:\n' \ 10 | '快速使用:wiki 前缀:条目名\n' \ 11 | '查询条目/模板:[[前缀:条目名]] {{前缀:模板名}} ((前缀:条目名))\n' \ 12 | '截图:wiki.shot 前缀:条目名\n' \ 13 | '其中中括号、大括号匹配后会调用api搜索条目/模板名,如果有误,可以使用小括号方式绕过api直接生成链接\n' \ 14 | '前缀由群管和bot超管配置,没有指定前缀或前缀无效时,会回落到默认前缀\n' \ 15 | '配置:\n' \ 16 | '添加:wiki.add <前缀> <通用url地址> < -g(若添加该参数,则操作全局wiki,需要超管权限,下同)>\n' \ 17 | '删除:wiki.delete <前缀> < -g >\n' \ 18 | '列表:wiki.list < -g >\n' \ 19 | '设置默认:wiki.default <前缀> < -g >\n' \ 20 | '按提示提供相应参数即可\n' \ 21 | '注意:私聊状态下该插件仅会响应超管的命令,且仅能管理全局wiki\n' \ 22 | '完整文档请前往 https://github.com/KoishiMoe/nonebot-plugin-mediawiki 查看' 23 | 24 | __help_version__ = '1.2.2' 25 | 26 | __help_plugin_name__ = 'Wiki推送' 27 | 28 | # load_dotenv() # Dont do this, nonebot has its own logic. 29 | 30 | # if os.getenv("WIKI_PROXY"): 31 | # logger.info(f"Wiki: using proxy {os.getenv('WIKI_PROXY')}") 32 | 33 | try: 34 | logger.info(f"Wiki: using proxy {get_driver().config.wiki_proxy}") 35 | except AttributeError: 36 | get_driver().config.wiki_proxy = None 37 | 38 | 39 | try: 40 | from nonebot.plugin import PluginMetadata 41 | 42 | __plugin_meta__ = PluginMetadata( 43 | name=__help_plugin_name__, 44 | description="适用于 nonebot2 的 MediaWiki 查询插件", 45 | usage=__usage__, 46 | homepage="https://github.com/KoishiMoe/nonebot-plugin-mediawiki", 47 | type="application", 48 | extra={}, 49 | supported_adapters={"~onebot.v11"}, 50 | ) 51 | except ImportError: 52 | pass 53 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from pathlib import Path 4 | 5 | from .exception import NoSuchPrefixException, NoDefaultPrefixException 6 | 7 | WIKI_DIR = Path("") / "data" / "database" / "wiki" 8 | 9 | 10 | class Config: 11 | def __init__(self, group_id: int): 12 | self.__gid = group_id 13 | self.__default: str = "" 14 | self.__default_global: str = "" 15 | self.__wikis: dict = {} 16 | self.__wikis_global: dict = {} 17 | 18 | self.__parse_data(self.__get_config()) 19 | self.__parse_global_data(self.__get_global_config()) 20 | 21 | def add_wiki(self, prefix: str, api_url: str, url: str) -> bool: 22 | self.__wikis[prefix] = [api_url, url] 23 | if self.__default == "": 24 | self.__default = prefix 25 | 26 | return self.save_data() 27 | 28 | def add_wiki_global(self, prefix: str, api_url: str, url: str) -> bool: 29 | self.__wikis_global[prefix] = [api_url, url] 30 | if self.__default_global == "": 31 | self.__default_global = prefix 32 | 33 | return self.save_global_data() 34 | 35 | def del_wiki(self, prefix: str) -> bool: 36 | if prefix == self.__default: 37 | self.__default = "" 38 | return self.__wikis.pop(prefix, "") != "" and self.save_data() 39 | 40 | def del_wiki_global(self, prefix: str) -> bool: 41 | if prefix == self.__default_global: 42 | self.__default_global = "" 43 | return self.__wikis_global.pop(prefix, "") != "" and self.save_global_data() 44 | 45 | def __get_config(self) -> dict: 46 | file_name = f'{self.__gid}.json' 47 | return self.__get_config_parse(file_name) 48 | 49 | def __get_global_config(self) -> dict: 50 | file_name = 'global.json' 51 | return self.__get_config_parse(file_name) 52 | 53 | @staticmethod 54 | def __get_config_parse(file_name: str) -> dict: 55 | path = WIKI_DIR / file_name 56 | if not WIKI_DIR.is_dir(): 57 | os.makedirs(WIKI_DIR) 58 | if not path.is_file(): 59 | with open(path, "w", encoding="utf-8") as w: 60 | w.write(json.dumps({})) 61 | data = json.loads(path.read_bytes()) 62 | return data 63 | 64 | def __parse_data(self, data: dict): 65 | self.__default = data.get("default", "") 66 | self.__wikis = data.get("wikis", {}) 67 | 68 | def __parse_global_data(self, data: dict): 69 | self.__default_global = data.get("default", "") 70 | self.__wikis_global = data.get("wikis", {}) 71 | 72 | def get_from_prefix(self, prefix: str) -> list: 73 | if prefix == "": # 没有匹配到前缀,尝试使用默认前缀 74 | if self.__default == "" and self.__default_global == "": # 没有配置默认前缀 75 | raise NoDefaultPrefixException 76 | if self.__default != "": # 本群设置了默认前缀 77 | temp_data: list = self.__wikis.get(self.__default, None) 78 | if not temp_data: # 没有从本群的列表中找到对应wiki,回落到全局 79 | temp_global_data = self.__wikis_global.get(self.__default, None) 80 | if not temp_global_data: 81 | raise NoSuchPrefixException 82 | return temp_global_data 83 | return temp_data 84 | # 有全局默认前缀(此时强制使用全局数据库) 85 | temp_global_data: list = self.__wikis_global.get(self.__default_global, None) 86 | if not temp_global_data: 87 | raise NoSuchPrefixException 88 | return temp_global_data 89 | 90 | temp_data: list = self.__wikis.get(prefix, None) 91 | if not temp_data: 92 | temp_global_data = self.__wikis_global.get(prefix, None) 93 | if not temp_global_data: 94 | raise NoSuchPrefixException 95 | return temp_global_data 96 | return temp_data 97 | 98 | def save_data(self) -> bool: 99 | file_name = f"{self.__gid}.json" 100 | data: dict = {"default": self.__default, "wikis": self.__wikis} 101 | return self.__save_data_parse(file_name, data) 102 | 103 | def save_global_data(self) -> bool: 104 | file_name = "global.json" 105 | data: dict = {"default": self.__default_global, "wikis": self.__wikis_global} 106 | return self.__save_data_parse(file_name, data) 107 | 108 | @staticmethod 109 | def __save_data_parse(file_name: str, data: dict) -> bool: 110 | path = WIKI_DIR / file_name 111 | if not path.is_file(): 112 | with open(path, "w", encoding="utf-8") as w: 113 | w.write(json.dumps({})) 114 | with open(path, "w", encoding="utf-8") as w: 115 | w.write(json.dumps(data, indent=4)) 116 | return True 117 | 118 | def set_default(self, default: str) -> bool: 119 | if default in self.__wikis: 120 | self.__default = default 121 | self.save_data() 122 | return True 123 | return False 124 | 125 | def set_default_global(self, default: str) -> bool: 126 | if default in self.__wikis_global: 127 | self.__default_global = default 128 | self.save_global_data() 129 | return True 130 | return False 131 | 132 | @property 133 | def list_data(self) -> tuple: 134 | count: int = 0 135 | temp_list: str = "" 136 | temp_list += f"本群默认:{self.__default}\n" 137 | temp_list += "本群所有wiki:\n" 138 | for prefix in self.__wikis: 139 | count += 1 140 | temp_str: str = f"{count}.前缀:{prefix}\n" + \ 141 | f"API地址:{self.__wikis.get(prefix)[0]}\n" + \ 142 | f"通用链接:{self.__wikis.get(prefix)[1]}\n" 143 | temp_list += temp_str 144 | 145 | count = 0 146 | temp_list_global: str = "" 147 | temp_list_global += f"全局默认:{self.__default_global}\n" 148 | temp_list_global += "所有全局wiki:\n" 149 | for prefix in self.__wikis_global: 150 | count += 1 151 | temp_str: str = f"{count}.前缀:{prefix}\n" + \ 152 | f"API地址:{self.__wikis_global.get(prefix)[0]}\n" + \ 153 | f"通用链接:{self.__wikis_global.get(prefix)[1]}\n" 154 | temp_list_global += temp_str 155 | 156 | return temp_list, temp_list_global 157 | 158 | @property 159 | def prefixes(self) -> set: 160 | prefixes = set(self.__wikis.keys()).union(set(self.__wikis_global.keys())) 161 | return prefixes 162 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/config_manager.py: -------------------------------------------------------------------------------- 1 | import re 2 | from asyncio import TimeoutError 3 | 4 | from nonebot import on_command, get_driver 5 | from nonebot.adapters.onebot.v11 import GroupMessageEvent, Bot, MessageEvent 6 | from nonebot.adapters.onebot.v11.permission import GROUP_OWNER, GROUP_ADMIN 7 | from nonebot.params import RawCommand 8 | from nonebot.permission import SUPERUSER 9 | from nonebot.typing import T_State 10 | from nonebot.log import logger 11 | 12 | from .config import Config 13 | from .mediawiki import MediaWiki, MediaWikiAPIURLError 14 | from .utilities import process_command 15 | 16 | QUIT_LIST = ["取消", "quit", "退出", "exit", "算了"] 17 | BotConfig = get_driver().config 18 | reserved = ["talk", "user", "user talk", "project", "project talk", "file", "file talk", "mediawiki", 19 | "mediawiki talk", "template", "template talk", "help", "help talk", "category", "category talk", 20 | "special", "media", "t", "u"] 21 | 22 | add_wiki = on_command("wiki.add", permission=SUPERUSER | GROUP_ADMIN | GROUP_OWNER) 23 | 24 | 25 | @add_wiki.handle() 26 | async def _add_wiki(bot: Bot, event: MessageEvent, raw_command: str = RawCommand()): 27 | msg = str(event.message).strip() 28 | param_list, param_dict = process_command(raw_command, msg) 29 | 30 | # check if is global 31 | is_global = False 32 | if param_dict.get("g"): 33 | if await SUPERUSER(bot, event): 34 | is_global = True 35 | else: 36 | await add_wiki.finish("您没有权限使用此命令!") 37 | 38 | # parse params 39 | if len(param_list) <= 1: 40 | await add_wiki.finish("请输入正确的参数!格式为: wiki.add <前缀> <条目路径> < -g (添加该参数修改全局)>") 41 | return # 糊弄下IDE 42 | elif len(param_list) == 2: 43 | prefix = param_list[0].strip().lower() 44 | api = '' 45 | url = param_list[1].strip().rstrip('/') # 防止之后拼接的时候多出来斜杠 46 | else: 47 | prefix = param_list[0].strip().lower() 48 | api = param_list[1].strip().rstrip('/') 49 | url = param_list[2].strip().rstrip('/') 50 | 51 | # check params 52 | if url.endswith('api.php'): 53 | await add_wiki.finish("参数错误!如果您只提供了一个地址,则其必须是条目路径而非api地址") 54 | if api and not re.match(r'^https?:/{2}\w.+$', api): 55 | await add_wiki.finish("非法的api地址,请重新输入!") 56 | if not re.match(r'^https?:/{2}\w.+$', url): 57 | await add_wiki.finish("非法的条目路径,请重新输入!") 58 | if prefix in reserved or ":" in prefix or ":" in prefix: 59 | await add_wiki.finish("该前缀为保留前缀或含有非法字符,请重新输入!") 60 | 61 | if api: 62 | success = False 63 | for i in range(3): 64 | try: 65 | if get_driver().config.wiki_proxy: 66 | await MediaWiki.create(url=api, timeout=10, proxies=get_driver().config.wiki_proxy) 67 | else: 68 | await MediaWiki.create(url=api, timeout=10) 69 | success = True 70 | break 71 | except (MediaWikiAPIURLError, TimeoutError): 72 | continue 73 | except Exception as e: 74 | logger.error(f"添加wiki时发生错误:{e}") 75 | await add_wiki.finish("因未知错误无法连接到api,请bot管理员检查日志") 76 | if not success: 77 | await add_wiki.finish("无法连接到wiki,请检查api地址是否正确!如果确认无误,可能是网络故障或者防火墙拦截," 78 | "您可以不提供api地址,直接提供条目路径即可") 79 | 80 | # 进行插入操作 81 | group_id = event.group_id if isinstance(event, GroupMessageEvent) else 0 82 | config = Config(group_id=group_id) 83 | if (is_global and config.add_wiki_global(prefix, api, url)) \ 84 | or (not is_global and config.add_wiki(prefix, api, url)): 85 | await add_wiki.finish(f"添加/编辑Wiki:{prefix}成功!") 86 | else: 87 | await add_wiki.finish("呜……出错了……请联系bot管理员进行处理……") 88 | 89 | 90 | list_wiki = on_command("wiki.list") 91 | 92 | 93 | @list_wiki.handle() 94 | async def _list_wiki(bot: Bot, event: MessageEvent, raw_command: str = RawCommand()): 95 | msg = str(event.message).strip() 96 | param_list, param_dict = process_command(raw_command, msg) 97 | 98 | # check if is global 99 | is_global = bool(param_dict.get("g")) 100 | 101 | if is_global: 102 | config = Config(group_id=0) 103 | await list_wiki.finish(config.list_data[1]) 104 | elif isinstance(event, GroupMessageEvent): 105 | config = Config(group_id=event.group_id) 106 | await list_wiki.finish(config.list_data[0]) 107 | 108 | 109 | del_wiki = on_command("wiki.delete", permission=SUPERUSER | GROUP_ADMIN | GROUP_OWNER) 110 | 111 | 112 | @del_wiki.handle() 113 | async def _del_wiki(bot: Bot, event: MessageEvent, raw_command: str = RawCommand()): 114 | msg = str(event.message).strip() 115 | param_list, param_dict = process_command(raw_command, msg) 116 | 117 | # check if is global 118 | is_global = False 119 | if param_dict.get("g"): 120 | if await SUPERUSER(bot, event): 121 | is_global = True 122 | else: 123 | await del_wiki.finish("您没有权限使用此命令!") 124 | 125 | if not param_list: 126 | await del_wiki.finish("你似乎没有提供要删除的前缀的说……") 127 | prefix = param_list[0] 128 | group_id = event.group_id if isinstance(event, GroupMessageEvent) else 0 129 | config = Config(group_id=group_id) 130 | 131 | if (is_global and config.del_wiki_global(prefix)) or (not is_global and config.del_wiki(prefix)): 132 | await del_wiki.finish("删除成功") 133 | else: 134 | await del_wiki.finish("呜……删除失败了……请检查前缀是否有误") 135 | 136 | 137 | set_default = on_command("wiki.default", permission=SUPERUSER | GROUP_ADMIN | GROUP_OWNER) 138 | 139 | 140 | @set_default.handle() 141 | async def _set_default(bot: Bot, event: MessageEvent, state: T_State, raw_command: str = RawCommand()): 142 | msg = str(event.message).strip() 143 | param_list, param_dict = process_command(raw_command, msg) 144 | 145 | # check if is global 146 | is_global = False 147 | if param_dict.get("g"): 148 | if await SUPERUSER(bot, event): 149 | is_global = True 150 | else: 151 | await set_default.finish("您没有权限使用此命令!") 152 | 153 | if not param_list: 154 | await set_default.finish("你似乎没有提供要设置的前缀的说……") 155 | prefix = param_list[0] 156 | group_id = event.group_id if isinstance(event, GroupMessageEvent) else 0 157 | config = Config(group_id=group_id) 158 | 159 | if (is_global and config.set_default_global(prefix)) or (not is_global and config.set_default(prefix)): 160 | await set_default.finish("设置成功") 161 | else: 162 | await set_default.finish("呜……设置失败了……请检查前缀是否有误") 163 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/constants.py: -------------------------------------------------------------------------------- 1 | # 用于正则匹配的模板字符串 2 | ARTICLE_RAW = r"[[(.*?)]]" # adapter出于安全原因会把中括号转义,此处用于让事件响应器能正确响应事件 3 | ARTICLE = r"\[\[(.*?)\]\]" 4 | TEMPLATE = r"\{\{(.*?)\}\}" 5 | RAW = r"\(\((.*?)\)\)" 6 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/exception.py: -------------------------------------------------------------------------------- 1 | from .mediawiki.exceptions import MediaWikiBaseException 2 | 3 | 4 | class NoDefaultPrefixException(MediaWikiBaseException): 5 | def __init__(self, group: int = None): 6 | self._group = group 7 | msg = f"群{group}没有配置默认的Wiki前缀" 8 | super(NoDefaultPrefixException, self).__init__(msg) 9 | 10 | @property 11 | def group(self): 12 | return self._group 13 | 14 | 15 | class NoSuchPrefixException(MediaWikiBaseException): 16 | def __init__(self, group: int = None, prefix: str = None): 17 | self._group = group 18 | self._prefix = prefix 19 | msg = f"群{group}的wiki列表以及全局列表中均不存在前缀{prefix}" 20 | super(NoSuchPrefixException, self).__init__(msg) 21 | 22 | @property 23 | def group(self): 24 | return self._group 25 | 26 | @property 27 | def prefix(self): 28 | return self._prefix 29 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/fakemwapi.py: -------------------------------------------------------------------------------- 1 | from urllib import parse 2 | 3 | 4 | class DummyPage: 5 | def __init__(self, url: str, title: str): 6 | self._title = title 7 | self._url = f"{url}/{parse.quote(title)}" 8 | 9 | @property 10 | def title(self): 11 | return self._title 12 | 13 | @property 14 | def url(self): 15 | return self._url 16 | 17 | 18 | class DummyMediaWiki: 19 | def __init__(self, url: str): 20 | self._url = url 21 | 22 | async def page(self, title: str, *args, **kwargs) -> DummyPage: 23 | return DummyPage(self.url, title) 24 | 25 | @property 26 | def url(self): 27 | return self._url 28 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/mediawiki/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Tyler Barrus 4 | Copyright (c) 2022 KoishiMoe 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/mediawiki/README.md: -------------------------------------------------------------------------------- 1 | 此处使用的是 [pymediawiki](https://github.com/barrust/mediawiki) 库的一个异步fork。目前还处于测试阶段,我不保证它在所有情况下都能正常工作。因此我还没有将其单独发布到pypi,而是暂时将其与本插件捆绑分发。 2 | 3 | 该fork本身依然以MIT许可证授权,仓库地址为:https://github.com/KoishiMoe/pymediawiki-async 。本目录中包含LICENSE文件是为了符合MIT许可证的要求,而本项目(nonebot-plugin-mediawiki)的许可不受其影响。 4 | 5 | ~~在测试完善之后,我将会将其单独发布到pypi,并不再与本插件捆绑发布。~~ 6 | 7 | Edit:如果需要异步支持的话建议用原生支持异步的那些库吧,这个是之前不会写协程的时候乱写的…… 8 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/mediawiki/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | mediawiki module initialization 3 | """ 4 | import asyncio 5 | import platform 6 | 7 | from .constants import URL, VERSION 8 | from .exceptions import ( 9 | MediaWikiException, 10 | PageError, 11 | MediaWikiGeoCoordError, 12 | RedirectError, 13 | DisambiguationError, 14 | MediaWikiAPIURLError, 15 | HTTPTimeoutError, 16 | MediaWikiCategoryTreeError, 17 | MediaWikiLoginError, 18 | ) 19 | from .mediawiki import MediaWiki 20 | from .mediawikipage import MediaWikiPage 21 | 22 | __author__ = "KoishiMoe" 23 | __maintainer__ = "KoishiMoe" 24 | __license__ = "MIT" 25 | __version__ = VERSION 26 | __credits__ = ["Tyler Barrus", "Jonathan Goldsmith"] 27 | __url__ = URL 28 | __bugtrack_url__ = "{0}/issues".format(__url__) 29 | __download_url__ = "{0}/tarball/v{1}".format(__url__, __version__) 30 | 31 | __all__ = [ 32 | "MediaWiki", 33 | "MediaWikiPage", 34 | "PageError", 35 | "RedirectError", 36 | "MediaWikiException", 37 | "DisambiguationError", 38 | "MediaWikiAPIURLError", 39 | "HTTPTimeoutError", 40 | "MediaWikiGeoCoordError", 41 | "MediaWikiCategoryTreeError", 42 | "MediaWikiLoginError", 43 | ] 44 | 45 | # fix some proxy issues on Windows 46 | if "windows" in platform.system().lower(): 47 | asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) 48 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/mediawiki/constants.py: -------------------------------------------------------------------------------- 1 | URL = "https://github.com/KoishiMoe/pymediawiki-async" 2 | VERSION = "0.1.0" 3 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/mediawiki/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | MediaWiki Exceptions 3 | """ 4 | from .constants import URL 5 | from .utilities import str_or_unicode 6 | 7 | ODD_ERROR_MESSAGE = ( 8 | "This should not happen. If the MediaWiki site you are " 9 | "querying is available, then please report this issue on " 10 | "GitHub: {URL}/issues".format(URL=URL) 11 | ) 12 | 13 | 14 | class MediaWikiBaseException(Exception): 15 | """ Base MediaWikiException 16 | 17 | Args: 18 | message: The message of the exception """ 19 | 20 | def __init__(self, message): 21 | self._message = message 22 | super(MediaWikiBaseException, self).__init__(self.message) 23 | 24 | def __unicode__(self): 25 | return self.message 26 | 27 | def __str__(self): 28 | return str_or_unicode(self.__unicode__()) 29 | 30 | @property 31 | def message(self): 32 | """ str: The MediaWiki exception message """ 33 | return self._message 34 | 35 | 36 | class MediaWikiException(MediaWikiBaseException): 37 | """ MediaWiki Exception Class 38 | 39 | Args: 40 | error (str): The error message that the MediaWiki site returned """ 41 | 42 | def __init__(self, error): 43 | self._error = error 44 | msg = 'An unknown error occurred: "{0}". Please report it on GitHub!'.format( 45 | self.error 46 | ) 47 | super(MediaWikiException, self).__init__(msg) 48 | 49 | @property 50 | def error(self): 51 | """ str: The error message that the MediaWiki site returned """ 52 | return self._error 53 | 54 | 55 | class PageError(MediaWikiBaseException): 56 | """ Exception raised when no MediaWiki page matched a query 57 | 58 | Args: 59 | title (str): Title of the page 60 | pageid (int): MediaWiki page id of the page""" 61 | 62 | def __init__(self, title=None, pageid=None): 63 | if title: 64 | self._title = title 65 | msg = '"{0}" does not match any pages. Try another query!'.format( 66 | self.title 67 | ) 68 | elif pageid: 69 | self._pageid = pageid 70 | msg = 'Page id "{0}" does not match any pages. Try another id!'.format( 71 | self.pageid 72 | ) 73 | else: 74 | self._title = "" 75 | msg = '"{0}" does not match any pages. Try another query!'.format( 76 | self.title 77 | ) 78 | super(PageError, self).__init__(msg) 79 | 80 | @property 81 | def title(self): 82 | """ str: The title that caused the page error """ 83 | return self._title 84 | 85 | @property 86 | def pageid(self): 87 | """ int: The page id that caused the page error """ 88 | return self._pageid 89 | 90 | 91 | class RedirectError(MediaWikiBaseException): 92 | """ Exception raised when a page title unexpectedly resolves to 93 | a redirect 94 | 95 | Args: 96 | title (str): Title of the page that redirected 97 | Note: 98 | This should only occur if both auto_suggest and redirect \ 99 | are set to **False** """ 100 | 101 | def __init__(self, title): 102 | self._title = title 103 | msg = ( 104 | '"{0}" resulted in a redirect. Set the redirect property to True ' 105 | "to allow automatic redirects." 106 | ).format(self.title) 107 | 108 | super(RedirectError, self).__init__(msg) 109 | 110 | @property 111 | def title(self): 112 | """ str: The title that was redirected """ 113 | return self._title 114 | 115 | 116 | class DisambiguationError(MediaWikiBaseException): 117 | """ Exception raised when a page resolves to a Disambiguation page 118 | 119 | Args: 120 | title (str): Title that resulted in a disambiguation page 121 | may_refer_to (list): List of possible titles 122 | url (str): Full URL to the disambiguation page 123 | details (list[dict]): A list of dictionaries with more information of \ 124 | possible results 125 | Note: 126 | `options` only includes titles that link to valid \ 127 | MediaWiki pages """ 128 | 129 | def __init__(self, title, may_refer_to, url, details=None): 130 | self._title = title 131 | self._options = sorted(may_refer_to) 132 | self._details = details 133 | self._url = url 134 | msg = '\n"{0}" may refer to: \n ' "{1}".format( 135 | self.title, "\n ".join(self.options) 136 | ) 137 | super(DisambiguationError, self).__init__(msg) 138 | 139 | @property 140 | def url(self): 141 | """ str: The url, if possible, of the disambiguation page """ 142 | return self._url 143 | 144 | @property 145 | def title(self): 146 | """ str: The title of the page """ 147 | return self._title 148 | 149 | @property 150 | def options(self): 151 | """ list: The list of possible page titles """ 152 | return self._options 153 | 154 | @property 155 | def details(self): 156 | """ list: The details of the proposed non-disambigous pages """ 157 | return self._details 158 | 159 | 160 | class HTTPTimeoutError(MediaWikiBaseException): 161 | """ Exception raised when a request to the Mediawiki site times out. 162 | 163 | Args: 164 | query (str): The query that timed out""" 165 | 166 | def __init__(self, query): 167 | self._query = query 168 | msg = ( 169 | 'Searching for "{0}" resulted in a timeout. ' 170 | "Try again in a few seconds, and ensure you have rate limiting " 171 | "set to True." 172 | ).format(self.query) 173 | super(HTTPTimeoutError, self).__init__(msg) 174 | 175 | @property 176 | def query(self): 177 | """ str: The query that timed out """ 178 | return self._query 179 | 180 | 181 | class MediaWikiAPIURLError(MediaWikiBaseException): 182 | """ Exception raised when the MediaWiki server does not support the API 183 | 184 | Args: 185 | api_url (str): The API URL that was not recognized """ 186 | 187 | def __init__(self, api_url): 188 | self._api_url = api_url 189 | msg = "{0} is not a valid MediaWiki API URL".format(self.api_url) 190 | super(MediaWikiAPIURLError, self).__init__(msg) 191 | 192 | @property 193 | def api_url(self): 194 | """ str: The api url that raised the exception """ 195 | return self._api_url 196 | 197 | 198 | class MediaWikiGeoCoordError(MediaWikiBaseException): 199 | """ Exceptions to handle GeoData exceptions 200 | 201 | Args: 202 | error (str): Error message from the MediaWiki site related to \ 203 | GeoCoordinates """ 204 | 205 | def __init__(self, error): 206 | self._error = error 207 | msg = ( 208 | "GeoData search resulted in the following error: {0}" 209 | " - Please use valid coordinates or a proper page title." 210 | ).format(self.error) 211 | super(MediaWikiGeoCoordError, self).__init__(msg) 212 | 213 | @property 214 | def error(self): 215 | """ str: The error that was thrown when pulling GeoCoordinates """ 216 | return self._error 217 | 218 | 219 | class MediaWikiCategoryTreeError(MediaWikiBaseException): 220 | """ Exception when the category tree is unable to complete for an unknown 221 | reason 222 | 223 | Args: 224 | category (str): The category that threw an exception """ 225 | 226 | def __init__(self, category): 227 | self._category = category 228 | msg = ( 229 | "Categorytree threw an exception for trying to get the " 230 | "same category '{}' too many times. Please try again later " 231 | "and perhaps use the rate limiting " 232 | "option." 233 | ).format(self._category) 234 | super(MediaWikiCategoryTreeError, self).__init__(msg) 235 | 236 | @property 237 | def category(self): 238 | """ str: The category that threw an exception during category tree \ 239 | generation """ 240 | return self._category 241 | 242 | 243 | class MediaWikiLoginError(MediaWikiBaseException): 244 | """ Exception raised when unable to login to the MediaWiki site 245 | 246 | Args: 247 | error (str): The error message that the MediaWiki site returned """ 248 | 249 | def __init__(self, error): 250 | self._error = error 251 | super(MediaWikiLoginError, self).__init__(error) 252 | 253 | @property 254 | def error(self): 255 | """ str: The error message that the MediaWiki site returned """ 256 | return self._error 257 | 258 | 259 | # Exception add by KoishiMoe 260 | class InterWikiError(MediaWikiBaseException): 261 | """ Exception raised when a page resolves to be an interwiki link 262 | 263 | Args: 264 | title (str): Title that results in a interwiki link 265 | url (str): Full URL to the link 266 | """ 267 | 268 | def __init__(self, title, url): 269 | self._title = title 270 | self._url = url 271 | msg = "{0} is an interwiki link to {1}".format(self._title, self._url) 272 | super(InterWikiError, self).__init__(msg) 273 | 274 | @property 275 | def title(self): 276 | return self._title 277 | 278 | @property 279 | def url(self): 280 | return self._url 281 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/mediawiki/mediawiki.py: -------------------------------------------------------------------------------- 1 | """ 2 | MediaWiki class module 3 | """ 4 | # MIT License 5 | # Author: Tyler Barrus (barrust@gmail.com); KoishiMoe 6 | 7 | import asyncio 8 | from datetime import datetime, timedelta 9 | from decimal import Decimal, DecimalException 10 | from json import JSONDecodeError 11 | 12 | import aiohttp 13 | 14 | from .constants import VERSION, URL 15 | from .exceptions import ( 16 | HTTPTimeoutError, 17 | MediaWikiAPIURLError, 18 | MediaWikiCategoryTreeError, 19 | MediaWikiException, 20 | MediaWikiGeoCoordError, 21 | MediaWikiLoginError, 22 | PageError, 23 | ) 24 | from .mediawikipage import MediaWikiPage 25 | from .utilities import memoize 26 | 27 | 28 | class MediaWiki(object): 29 | """ MediaWiki API Wrapper Instance 30 | 31 | Warning: 32 | This should never need to be used directly! Please use \ 33 | :func:`MediaWiki.create` instead. 34 | """ 35 | 36 | __slots__ = [ 37 | "_version", 38 | "_lang", 39 | "_api_url", 40 | "_cat_prefix", 41 | "_timeout", 42 | "_user_agent", 43 | "_session", 44 | "_rate_limit", 45 | "_rate_limit_last_call", 46 | "_min_wait", 47 | "_extensions", 48 | "_api_version", 49 | "_api_version_str", 50 | "_base_url", 51 | "__supported_languages", 52 | "__available_languages", 53 | "_cache", 54 | "_refresh_interval", 55 | "_use_cache", 56 | "_is_logged_in", 57 | "_proxies", 58 | ] 59 | 60 | def __init__(self, 61 | url="https://{lang}.wikipedia.org/w/api.php", 62 | lang="en", 63 | timeout=15.0, 64 | rate_limit=False, 65 | rate_limit_wait=timedelta(milliseconds=50), 66 | cat_prefix="Category", 67 | ): 68 | """ DO NOT USE ME, USE MediaWiki.create() INSTEAD !!! """ 69 | self._version = VERSION 70 | self._lang = lang.lower() 71 | self._api_url = url.format(lang=self._lang) 72 | self._cat_prefix = None 73 | self.category_prefix = cat_prefix 74 | self._timeout = None 75 | self.timeout = timeout 76 | # requests library parameters 77 | self._session = None 78 | self._user_agent = "python-mediawiki/VERSION-{0}" "/({1})/BOT".format(VERSION, URL) 79 | self._proxies = None 80 | 81 | self._rate_limit = None 82 | self.rate_limit = bool(rate_limit) 83 | self._rate_limit_last_call = None 84 | self._min_wait = rate_limit_wait 85 | self._extensions = None 86 | self._api_version = None 87 | self._api_version_str = None 88 | self._base_url = None 89 | self.__supported_languages = None 90 | self.__available_languages = None 91 | 92 | # for memoized results 93 | self._cache = dict() 94 | self._refresh_interval = None 95 | self._use_cache = True 96 | 97 | # for login information 98 | self._is_logged_in = False 99 | 100 | @classmethod 101 | async def create( 102 | cls, 103 | url="https://{lang}.wikipedia.org/w/api.php", 104 | lang="en", 105 | timeout=15.0, 106 | rate_limit=False, 107 | rate_limit_wait=timedelta(milliseconds=50), 108 | cat_prefix="Category", 109 | user_agent=None, 110 | username=None, 111 | password=None, 112 | proxies: str = None, 113 | ): 114 | 115 | """ MediaWiki API Wrapper Instance 116 | 117 | Args: 118 | url (str): API URL of the MediaWiki site; defaults to Wikipedia 119 | lang (str): Language of the MediaWiki site; used to help change API URL 120 | timeout (float): HTTP timeout setting; None means no timeout 121 | rate_limit (bool): Use rate limiting to limit calls to the site 122 | rate_limit_wait (timedelta): Amount of time to wait between requests 123 | cat_prefix (str): The prefix for categories used by the mediawiki site; defaults to Category (en) 124 | user_agent (str): The user agent string to use when making requests; defaults to a library version but \ 125 | per the MediaWiki API documentation it recommends setting a unique one and not using the \ 126 | library's default user-agent string 127 | username (str): The username to use to log into the MediaWiki 128 | password (str): The password to use to log into the MediaWiki 129 | proxies (str): Proxy **URL** for aiohttp library to use. 130 | It looks like 'http://your_proxy_url:your_proxy_port' or 131 | 'http://your_user:your_password@your_proxy_url:your_proxy_port' (If your proxy requires authentication) 132 | """ 133 | 134 | self = MediaWiki( 135 | url=url, 136 | lang=lang, 137 | timeout=timeout, 138 | rate_limit=rate_limit, 139 | rate_limit_wait=rate_limit_wait, 140 | cat_prefix=cat_prefix, 141 | ) 142 | 143 | # set library parameters 144 | if user_agent is not None: 145 | await self.set_user_agent(user_agent) 146 | await self.set_proxies(proxies) # this will call self._reset_session() 147 | 148 | if password is not None and username is not None: 149 | await self.login(username, password) 150 | 151 | try: 152 | await self._get_site_info() 153 | except MediaWikiException: 154 | raise MediaWikiAPIURLError(url) 155 | 156 | return self 157 | 158 | def __del__(self): 159 | if self._session: 160 | loop = asyncio.get_event_loop() 161 | loop.create_task(self._session.close()) 162 | 163 | # non-settable properties 164 | @property 165 | def version(self): 166 | """ str: The version of the pymediawiki library 167 | 168 | Note: 169 | Not settable """ 170 | return self._version 171 | 172 | @property 173 | def api_version(self): 174 | """ str: API Version of the MediaWiki site 175 | 176 | Note: 177 | Not settable """ 178 | return self._api_version_str 179 | 180 | @property 181 | def base_url(self): 182 | """ str: Base URL for the MediaWiki site 183 | 184 | Note: 185 | Not settable """ 186 | return self._base_url 187 | 188 | @property 189 | def extensions(self): 190 | """ list: Extensions installed on the MediaWiki site 191 | 192 | Note: 193 | Not settable """ 194 | return self._extensions 195 | 196 | # settable properties 197 | @property 198 | def rate_limit(self): 199 | """ bool: Turn on or off Rate Limiting """ 200 | return self._rate_limit 201 | 202 | @rate_limit.setter 203 | def rate_limit(self, rate_limit): 204 | """ Turn on or off rate limiting """ 205 | self._rate_limit = bool(rate_limit) 206 | self._rate_limit_last_call = None 207 | self.clear_memoized() 208 | 209 | @property 210 | def proxies(self): 211 | return self._proxies 212 | 213 | async def set_proxies(self, proxies): 214 | """ Turn on, off, or set proxy use through the aiohttp library """ 215 | if proxies and isinstance(proxies, str): 216 | self._proxies = proxies 217 | else: 218 | self._proxies = None 219 | await self._reset_session() 220 | 221 | @property 222 | def use_cache(self): 223 | """ bool: Whether caching should be used; on (**True**) or off \ 224 | (**False**) """ 225 | return self._use_cache 226 | 227 | @use_cache.setter 228 | def use_cache(self, use_cache): 229 | """ toggle using the cache or not """ 230 | self._use_cache = bool(use_cache) 231 | 232 | @property 233 | def rate_limit_min_wait(self): 234 | """ timedelta: Time to wait between calls 235 | 236 | Note: 237 | Only used if rate_limit is **True** """ 238 | return self._min_wait 239 | 240 | @rate_limit_min_wait.setter 241 | def rate_limit_min_wait(self, min_wait): 242 | """ Set minimum wait to use for rate limiting """ 243 | self._min_wait = min_wait 244 | self._rate_limit_last_call = None 245 | 246 | @property 247 | def timeout(self): 248 | """ float: Response timeout for API requests 249 | 250 | Note: 251 | Use **None** for no response timeout """ 252 | return self._timeout 253 | 254 | @timeout.setter 255 | def timeout(self, timeout): 256 | """ Set request timeout in seconds (or fractions of a second) """ 257 | 258 | if timeout is None: 259 | self._timeout = None # no timeout 260 | return 261 | self._timeout = float(timeout) # allow the exception to be raised 262 | 263 | @property 264 | def language(self): 265 | """ str: The API URL language, if possible this will update the API \ 266 | URL 267 | 268 | Note: 269 | Use correct language titles with the updated API URL 270 | Note: 271 | Some API URLs do not encode language; unable to update if \ 272 | this is the case """ 273 | return self._lang 274 | 275 | @language.setter 276 | def language(self, lang): 277 | """ Set the language to use; attempts to change the API URL """ 278 | lang = lang.lower() 279 | if self._lang == lang: 280 | return 281 | 282 | url = self._api_url 283 | tmp = url.replace("/{0}.".format(self._lang), "/{0}.".format(lang)) 284 | 285 | self._api_url = tmp 286 | self._lang = lang 287 | self.clear_memoized() 288 | 289 | @property 290 | def category_prefix(self): 291 | """ str: The category prefix to use when using category based functions 292 | 293 | Note: 294 | Use the correct category name for the language selected """ 295 | return self._cat_prefix 296 | 297 | @category_prefix.setter 298 | def category_prefix(self, prefix): 299 | """ Set the category prefix correctly """ 300 | if prefix[-1:] == ":": 301 | prefix = prefix[:-1] 302 | self._cat_prefix = prefix 303 | 304 | @property 305 | def user_agent(self): 306 | """ str: User agent string 307 | 308 | Note: If using in as part of another project, this should be \ 309 | changed """ 310 | return self._user_agent 311 | 312 | async def set_user_agent(self, user_agent): 313 | """ Set the new user agent string 314 | 315 | Note: Will need to re-log into the MediaWiki if user agent string \ 316 | is changed """ 317 | self._user_agent = user_agent 318 | await self._reset_session() 319 | 320 | @property 321 | def api_url(self): 322 | """ str: API URL of the MediaWiki site 323 | 324 | Note: 325 | Not settable; See :py:func:`mediawiki.MediaWiki.set_api_url`""" 326 | return self._api_url 327 | 328 | @property 329 | def memoized(self): 330 | """ dict: Return the memoize cache 331 | 332 | Note: 333 | Not settable; see 334 | :py:func:`mediawiki.MediaWiki.clear_memoized` """ 335 | return self._cache 336 | 337 | @property 338 | def refresh_interval(self): 339 | """ int: The interval at which the memoize cache is to be refreshed """ 340 | return self._refresh_interval 341 | 342 | @refresh_interval.setter 343 | def refresh_interval(self, refresh_interval): 344 | """ Set the new cache refresh interval """ 345 | if isinstance(refresh_interval, int) and refresh_interval > 0: 346 | self._refresh_interval = refresh_interval 347 | else: 348 | self._refresh_interval = None 349 | 350 | async def login(self, username, password, strict=True): 351 | """ Login as specified user 352 | 353 | Args: 354 | username (str): The username to log in with 355 | password (str): The password for the user 356 | strict (bool): `True` to throw an error on failure 357 | Returns: 358 | bool: `True` if successfully logged in; `False` otherwise 359 | Raises: 360 | :py:func:`mediawiki.exceptions.MediaWikiLoginError`: if unable to login 361 | 362 | Note: 363 | Per the MediaWiki API, one should use the `bot password`; \ 364 | see https://www.mediawiki.org/wiki/API:Login for more information """ 365 | # get login token 366 | params = { 367 | "action": "query", 368 | "meta": "tokens", 369 | "type": "login", 370 | "format": "json", 371 | } 372 | token_res = await self._get_response(params) 373 | if "query" in token_res and "tokens" in token_res["query"]: 374 | token = token_res["query"]["tokens"]["logintoken"] 375 | 376 | params = { 377 | "action": "login", 378 | "lgname": username, 379 | "lgpassword": password, 380 | "lgtoken": token, 381 | "format": "json", 382 | } 383 | 384 | res = await self._post_response(params) 385 | if res["login"]["result"] == "Success": 386 | self._is_logged_in = True 387 | return True 388 | self._is_logged_in = False 389 | reason = res["login"]["reason"] 390 | if strict: 391 | msg = "MediaWiki login failure: {}".format(reason) 392 | raise MediaWikiLoginError(msg) 393 | return False 394 | 395 | # non-properties 396 | async def set_api_url( 397 | self, api_url="https://{lang}.wikipedia.org/w/api.php", lang="en", username=None, password=None, 398 | ): 399 | """ Set the API URL and language 400 | 401 | Args: 402 | api_url (str): API URL to use 403 | lang (str): Language of the API URL 404 | username (str): The username, if needed, to log into the MediaWiki site 405 | password (str): The password, if needed, to log into the MediaWiki site 406 | Raises: 407 | :py:func:`mediawiki.exceptions.MediaWikiAPIURLError`: if the \ 408 | url is not a valid MediaWiki site or login fails """ 409 | old_api_url = self._api_url 410 | old_lang = self._lang 411 | self._lang = lang.lower() 412 | self._api_url = api_url.format(lang=self._lang) 413 | 414 | self._is_logged_in = False 415 | try: 416 | if username is not None and password is not None: 417 | await self.login(username, password) 418 | await self._get_site_info() 419 | self.__supported_languages = None # reset this 420 | self.__available_languages = None # reset this 421 | except (asyncio.TimeoutError, MediaWikiException): 422 | # reset api url and lang in the event that the exception was caught 423 | self._api_url = old_api_url 424 | self._lang = old_lang 425 | raise MediaWikiAPIURLError(api_url) 426 | self.clear_memoized() 427 | 428 | async def _reset_session(self): 429 | """ Set session information """ 430 | if self._session: 431 | await self._session.close() 432 | 433 | headers = {"User-Agent": self._user_agent} 434 | self._session = aiohttp.ClientSession() 435 | self._session.headers.update(headers) 436 | self._is_logged_in = False 437 | 438 | def clear_memoized(self): 439 | """ Clear memoized (cached) values """ 440 | if hasattr(self, "_cache"): 441 | self._cache.clear() 442 | 443 | # non-setup functions 444 | async def supported_languages(self): 445 | """ dict: All supported language prefixes on the MediaWiki site 446 | 447 | Note: 448 | Not Settable """ 449 | if self.__supported_languages is None: 450 | res = await self.wiki_request({"meta": "siteinfo", "siprop": "languages"}) 451 | tmp = res["query"]["languages"] 452 | supported = {lang["code"]: lang["*"] for lang in tmp} 453 | self.__supported_languages = supported 454 | return self.__supported_languages 455 | 456 | async def available_languages(self): 457 | """ dict: All available language prefixes on the MediaWiki site 458 | 459 | Note: 460 | Not Settable """ 461 | if self.__available_languages is None: 462 | available = {} 463 | supported_languages = await self.supported_languages() 464 | for lang in supported_languages: 465 | try: 466 | MediaWiki(lang=lang) 467 | available[lang] = True 468 | except (aiohttp.ClientConnectionError, asyncio.TimeoutError, MediaWikiException, 469 | MediaWikiAPIURLError): 470 | available[lang] = False 471 | self.__available_languages = available 472 | return self.__available_languages 473 | 474 | @property 475 | def logged_in(self): 476 | """ bool: Returns if logged into the MediaWiki site """ 477 | return self._is_logged_in 478 | 479 | async def random(self, pages=1): 480 | """ Request a random page title or list of random titles 481 | 482 | Args: 483 | pages (int): Number of random pages to return 484 | Returns: 485 | list or int: A list of random page titles or a random page title if pages = 1 """ 486 | if pages is None or pages < 1: 487 | raise ValueError("Number of pages must be greater than 0") 488 | 489 | query_params = {"list": "random", "rnnamespace": 0, "rnlimit": pages} 490 | 491 | request = await self.wiki_request(query_params) 492 | titles = [page["title"] for page in request["query"]["random"]] 493 | 494 | if len(titles) == 1: 495 | return titles[0] 496 | return titles 497 | 498 | @memoize 499 | async def allpages(self, query="", results=10): 500 | """ Request all pages from mediawiki instance 501 | 502 | Args: 503 | query (str): Search string to use for pulling pages 504 | results (int): The number of pages to return 505 | Returns: 506 | list: The pages that meet the search query 507 | Note: 508 | Could add ability to continue past the limit of 500 509 | """ 510 | max_pull = 500 511 | limit = min(results, max_pull) if results is not None else max_pull 512 | query_params = {"list": "allpages", "aplimit": limit, "apfrom": query} 513 | 514 | request = await self.wiki_request(query_params) 515 | 516 | self._check_error_response(request, query) 517 | 518 | titles = [page["title"] for page in request["query"]["allpages"]] 519 | return titles 520 | 521 | @memoize 522 | async def search(self, query, results=10, suggestion=False): 523 | """ Search for similar titles 524 | 525 | Args: 526 | query (str): Page title 527 | results (int): Number of pages to return 528 | suggestion (bool): Use suggestion 529 | Returns: 530 | tuple or list: tuple (list results, suggestion) if suggestion is **True**; list of results otherwise 531 | Note: 532 | Could add ability to continue past the limit of 500 533 | """ 534 | 535 | self._check_query(query, "Query must be specified") 536 | 537 | max_pull = 500 538 | 539 | search_params = { 540 | "list": "search", 541 | "srprop": "", 542 | "srlimit": min(results, max_pull) if results is not None else max_pull, 543 | "srsearch": query, 544 | "sroffset": 0, # this is what will be used to pull more than the max 545 | } 546 | if suggestion: 547 | search_params["srinfo"] = "suggestion" 548 | 549 | raw_results = await self.wiki_request(search_params) 550 | 551 | self._check_error_response(raw_results, query) 552 | 553 | search_results = [d["title"] for d in raw_results["query"]["search"]] 554 | 555 | if suggestion: 556 | sug = None 557 | if raw_results["query"].get("searchinfo"): 558 | sug = raw_results["query"]["searchinfo"]["suggestion"] 559 | return search_results, sug 560 | return search_results 561 | 562 | @memoize 563 | async def suggest(self, query): 564 | """ Gather suggestions based on the provided title or None if no 565 | suggestions found 566 | 567 | Args: 568 | query (str): Page title 569 | Returns: 570 | String or None: Suggested page title or **None** if no suggestion found 571 | """ 572 | res, suggest = await self.search(query, results=1, suggestion=True) 573 | try: 574 | title = res[0] or suggest 575 | except IndexError: # page doesn't exist 576 | title = None 577 | return title 578 | 579 | @memoize 580 | async def geosearch( 581 | self, latitude=None, longitude=None, radius=1000, title=None, auto_suggest=True, results=10, 582 | ): 583 | """ Search for pages that relate to the provided geocoords or near 584 | the page 585 | 586 | Args: 587 | latitude (Decimal or None): Latitude geocoord; must be coercible to decimal 588 | longitude (Decimal or None): Longitude geocoord; must be coercible to decimal 589 | radius (int): Radius around page or geocoords to pull back; in meters 590 | title (str): Page title to use as a geocoordinate; this has precedence over lat/long 591 | auto_suggest (bool): Auto-suggest the page title 592 | results (int): Number of pages within the radius to return 593 | Returns: 594 | list: A listing of page titles 595 | Note: 596 | The Geosearch API does not support pulling more than the maximum of 500 597 | Raises: 598 | ValueError: If either the passed latitude or longitude are not coercible to a Decimal 599 | """ 600 | 601 | def test_lat_long(val): 602 | """ handle testing lat and long """ 603 | if not isinstance(val, Decimal): 604 | error = ( 605 | "Latitude and Longitude must be specified either as " 606 | "a Decimal or in formats that can be coerced into " 607 | "a Decimal." 608 | ) 609 | try: 610 | return Decimal(val) 611 | except (DecimalException, TypeError): 612 | raise ValueError(error) 613 | return val 614 | 615 | # end local function 616 | max_pull = 500 617 | 618 | limit = min(results, max_pull) if results is not None else max_pull 619 | params = {"list": "geosearch", "gsradius": radius, "gslimit": limit} 620 | if title is not None: 621 | if auto_suggest: 622 | title = await self.suggest(title) 623 | params["gspage"] = title 624 | else: 625 | lat = test_lat_long(latitude) 626 | lon = test_lat_long(longitude) 627 | params["gscoord"] = "{0}|{1}".format(lat, lon) 628 | 629 | raw_results = await self.wiki_request(params) 630 | 631 | self._check_error_response(raw_results, title) 632 | 633 | return [d["title"] for d in raw_results["query"]["geosearch"]] 634 | 635 | @memoize 636 | async def opensearch(self, query, results=10, redirect=True): 637 | """ Execute a MediaWiki opensearch request, similar to search box 638 | suggestions and conforming to the OpenSearch specification 639 | 640 | Args: 641 | query (str): Title to search for 642 | results (int): Number of pages within the radius to return 643 | redirect (bool): If **False** return the redirect itself, otherwise resolve redirects 644 | Returns: 645 | List: List of results that are stored in a tuple (Title, Summary, URL) 646 | Note: 647 | The Opensearch API does not support pulling more than the maximum of 500 648 | Raises: 649 | """ 650 | 651 | self._check_query(query, "Query must be specified") 652 | max_pull = 500 653 | 654 | query_params = { 655 | "action": "opensearch", 656 | "search": query, 657 | "limit": (min(results, max_pull) if results is not None else max_pull), 658 | "redirects": ("resolve" if redirect else "return"), 659 | "warningsaserror": True, 660 | "namespace": "", 661 | } 662 | 663 | results = await self.wiki_request(query_params) 664 | 665 | self._check_error_response(results, query) 666 | 667 | res = list() 668 | for i, item in enumerate(results[1]): 669 | res.append((item, results[2][i], results[3][i])) 670 | return res 671 | 672 | @memoize 673 | async def prefixsearch(self, prefix, results=10): 674 | """ Perform a prefix search using the provided prefix string 675 | 676 | Args: 677 | prefix (str): Prefix string to use for search 678 | results (int): Number of pages with the prefix to return 679 | Returns: 680 | list: List of page titles 681 | Note: 682 | **Per the documentation:** "The purpose of this module is \ 683 | similar to action=opensearch: to take user input and provide \ 684 | the best-matching titles. Depending on the search engine \ 685 | backend, this might include typo correction, redirect \ 686 | avoidance, or other heuristics." 687 | Note: 688 | Could add ability to continue past the limit of 500 689 | """ 690 | 691 | self._check_query(prefix, "Prefix must be specified") 692 | 693 | query_params = { 694 | "list": "prefixsearch", 695 | "pssearch": prefix, 696 | "pslimit": ("max" if (results > 500 or results is None) else results), 697 | "psnamespace": 0, 698 | "psoffset": 0, # parameterize to skip to later in the list? 699 | } 700 | 701 | raw_results = await self.wiki_request(query_params) 702 | 703 | self._check_error_response(raw_results, prefix) 704 | 705 | return [rec["title"] for rec in raw_results["query"]["prefixsearch"]] 706 | 707 | @memoize 708 | async def summary(self, title, sentences=0, chars=0, auto_suggest=True, redirect=True): 709 | """ Get the summary for the title in question 710 | 711 | Args: 712 | title (str): Page title to summarize 713 | sentences (int): Number of sentences to return in summary 714 | chars (int): Number of characters to return in summary 715 | auto_suggest (bool): Run auto-suggest on title before summarizing 716 | redirect (bool): Use page redirect on title before summarizing 717 | Returns: 718 | str: The summarized results of the page 719 | Note: 720 | Precedence for parameters: sentences then chars; if both are \ 721 | 0 then the entire first section is returned """ 722 | page_info = await self.page(title, auto_suggest=auto_suggest, redirect=redirect) 723 | return await page_info.summarize(sentences, chars) 724 | 725 | @memoize 726 | async def categorymembers(self, category, results=10, subcategories=True): 727 | """ Get information about a category: pages and subcategories 728 | 729 | Args: 730 | category (str): Category name 731 | results (int): Number of result 732 | subcategories (bool): Include subcategories (**True**) or not (**False**) 733 | Returns: 734 | Tuple or List: Either a tuple ([pages], [subcategories]) or just the list of pages 735 | Note: 736 | Set results to **None** to get all results """ 737 | self._check_query(category, "Category must be specified") 738 | 739 | max_pull = 500 740 | search_params = { 741 | "list": "categorymembers", 742 | "cmprop": "ids|title|type", 743 | "cmtype": ("page|subcat|file" if subcategories else "page|file"), 744 | "cmlimit": (min(results, max_pull) if results is not None else max_pull), 745 | "cmtitle": "{0}:{1}".format(self.category_prefix, category), 746 | } 747 | pages = list() 748 | subcats = list() 749 | returned_results = 0 750 | finished = False 751 | last_cont = dict() 752 | while not finished: 753 | params = search_params.copy() 754 | params.update(last_cont) 755 | raw_res = await self.wiki_request(params) 756 | 757 | self._check_error_response(raw_res, category) 758 | 759 | current_pull = len(raw_res["query"]["categorymembers"]) 760 | for rec in raw_res["query"]["categorymembers"]: 761 | if rec["type"] in ("page", "file"): 762 | pages.append(rec["title"]) 763 | elif rec["type"] == "subcat": 764 | tmp = rec["title"] 765 | if tmp.startswith(self.category_prefix): 766 | tmp = tmp[len(self.category_prefix) + 1:] 767 | subcats.append(tmp) 768 | 769 | cont = raw_res.get("query-continue", False) 770 | if cont and "categorymembers" in cont: 771 | cont = cont["categorymembers"] 772 | else: 773 | cont = raw_res.get("continue", False) 774 | 775 | if cont is False or last_cont == cont: 776 | break 777 | 778 | returned_results += current_pull 779 | if results is None or (results - returned_results > 0): 780 | last_cont = cont 781 | else: 782 | finished = True 783 | 784 | if results is not None and results - returned_results < max_pull: 785 | search_params["cmlimit"] = results - returned_results 786 | # end while loop 787 | 788 | if subcategories: 789 | return pages, subcats 790 | return pages 791 | 792 | async def categorytree(self, category, depth=5): 793 | """ Generate the Category Tree for the given categories 794 | 795 | Args: 796 | category(str or list of strings): Category name(s) 797 | depth(int): Depth to traverse the tree 798 | Returns: 799 | dict: Category tree structure 800 | Note: 801 | Set depth to **None** to get the whole tree 802 | Note: 803 | Return Data Structure: Subcategory contains the same recursive structure 804 | 805 | >>> { 806 | 'category': { 807 | 'depth': Number, 808 | 'links': list, 809 | 'parent-categories': list, 810 | 'sub-categories': dict 811 | } 812 | } 813 | 814 | .. versionadded:: 0.3.10 """ 815 | 816 | # make it simple to use both a list or a single category term 817 | cats = [category] if not isinstance(category, list) else category 818 | 819 | self.__category_parameter_verification(cats, depth, category) 820 | 821 | results = dict() 822 | categories = dict() 823 | links = dict() 824 | 825 | for cat in [x for x in cats if x]: 826 | await self.__cat_tree_rec(cat, depth, results, 0, categories, links) 827 | return results 828 | 829 | async def page(self, title=None, pageid=None, auto_suggest=True, redirect=True, preload=False, 830 | convert_titles=False, iwurl=True): 831 | """ Get MediaWiki page based on the provided title or pageid 832 | 833 | Args: 834 | title (str): Page title 835 | pageid (int): MediaWiki page identifier 836 | auto_suggest (bool): **True:** Allow page title auto-suggest 837 | redirect (bool): **True:** Follow page redirects 838 | preload (bool): **True:** Load most page properties 839 | convert_titles (bool): **False:** Convert titles to other variants if necessary. \ 840 | Only works if the wiki's content language supports variant conversion. 841 | iwurl (bool): **False:** Whether to get the full URL if the title is an interwiki link. 842 | Raises: 843 | ValueError: when title is blank or None and no pageid is provided 844 | Raises: 845 | :py:func:`mediawiki.exceptions.PageError`: if page does not exist 846 | Note: 847 | Title takes precedence over pageid if both are provided """ 848 | if (title is None or title.strip() == "") and pageid is None: 849 | raise ValueError("Either a title or a pageid must be specified") 850 | if title: 851 | if auto_suggest: 852 | temp_title = await self.suggest(title) 853 | if temp_title is None: # page doesn't exist 854 | raise PageError(title=title) 855 | title = temp_title 856 | return await MediaWikiPage.create(self, title, redirect=redirect, preload=preload, 857 | convert_titles=convert_titles, iwurl=iwurl) 858 | return await MediaWikiPage.create(self, pageid=pageid, preload=preload, 859 | convert_titles=convert_titles, iwurl=iwurl) 860 | 861 | async def wiki_request(self, params): 862 | """ Make a request to the MediaWiki API using the given search 863 | parameters 864 | 865 | Args: 866 | params (dict): Request parameters 867 | Returns: 868 | A parsed dict of the JSON response 869 | Note: 870 | Useful when wanting to query the MediaWiki site for some \ 871 | value that is not part of the wrapper API """ 872 | 873 | params["format"] = "json" 874 | if "action" not in params: 875 | params["action"] = "query" 876 | 877 | limit = self._rate_limit 878 | last_call = self._rate_limit_last_call 879 | if limit and last_call and last_call + self._min_wait > datetime.now(): 880 | # call time to quick for rate limited api requests, wait 881 | wait_time = (last_call + self._min_wait) - datetime.now() 882 | await asyncio.sleep(wait_time.total_seconds()) 883 | 884 | req = await self._get_response(params) 885 | 886 | if self._rate_limit: 887 | self._rate_limit_last_call = datetime.now() 888 | 889 | return req 890 | 891 | # Protected functions 892 | async def _get_site_info(self): 893 | """ Parse out the Wikimedia site information including API Version and Extensions """ 894 | response = await self.wiki_request({"meta": "siteinfo", "siprop": "extensions|general"}) 895 | 896 | # parse what we need out here! 897 | query = response.get("query", None) 898 | if query is None or query.get("general", None) is None: 899 | raise MediaWikiException("Missing query in response") 900 | 901 | gen = query.get("general", None) 902 | 903 | api_version = gen["generator"].split(" ")[1].split("-")[0] 904 | 905 | major_minor = api_version.split(".") 906 | for i, item in enumerate(major_minor): 907 | major_minor[i] = int(item) 908 | self._api_version = tuple(major_minor) 909 | self._api_version_str = ".".join([str(x) for x in self._api_version]) 910 | 911 | # parse the base url out 912 | tmp = gen.get("server", "") 913 | if tmp == "": 914 | raise MediaWikiException("Unable to parse base url") 915 | if tmp.startswith("http://") or tmp.startswith("https://"): 916 | self._base_url = tmp 917 | elif gen["base"].startswith("https:"): 918 | self._base_url = "https:{}".format(tmp) 919 | else: 920 | self._base_url = "http:{}".format(tmp) 921 | 922 | self._extensions = [ext["name"] for ext in query["extensions"]] 923 | self._extensions = sorted(list(set(self._extensions))) 924 | 925 | # end _get_site_info 926 | 927 | @staticmethod 928 | def _check_error_response(response, query): 929 | """ check for default error messages and throw correct exception """ 930 | if "error" in response: 931 | http_error = ["HTTP request timed out.", "Pool queue is full"] 932 | geo_error = [ 933 | "Page coordinates unknown.", 934 | "One of the parameters gscoord, gspage, gsbbox is required", 935 | "Invalid coordinate provided", 936 | ] 937 | err = response["error"]["info"] 938 | if err in http_error: 939 | raise HTTPTimeoutError(query) 940 | if err in geo_error: 941 | raise MediaWikiGeoCoordError(err) 942 | raise MediaWikiException(err) 943 | 944 | @staticmethod 945 | def _check_query(value, message): 946 | """ check if the query is 'valid' """ 947 | if value is None or value.strip() == "": 948 | raise ValueError(message) 949 | 950 | @staticmethod 951 | def __category_parameter_verification(cats, depth, category): 952 | # parameter verification 953 | if len(cats) == 1 and (cats[0] is None or cats[0] == ""): 954 | msg = ( 955 | "CategoryTree: Parameter 'category' must either " 956 | "be a list of one or more categories or a string; " 957 | "provided: '{}'".format(category) 958 | ) 959 | raise ValueError(msg) 960 | 961 | if depth is not None and depth < 1: 962 | msg = "CategoryTree: Parameter 'depth' must be either None " "(for the full tree) or be greater than 0" 963 | raise ValueError(msg) 964 | 965 | async def __cat_tree_rec(self, cat, depth, tree, level, categories, links): 966 | """ recursive function to build out the tree """ 967 | tree[cat] = dict() 968 | tree[cat]["depth"] = level 969 | tree[cat]["sub-categories"] = dict() 970 | tree[cat]["links"] = list() 971 | tree[cat]["parent-categories"] = list() 972 | parent_cats = list() 973 | 974 | if cat not in categories: 975 | tries = 0 976 | while True: 977 | if tries > 10: 978 | raise MediaWikiCategoryTreeError(cat) 979 | try: 980 | pag = await self.page("{0}:{1}".format(self.category_prefix, cat)) 981 | categories[cat] = pag 982 | parent_cats = await categories[cat].categories() 983 | links[cat] = await self.categorymembers(cat, results=None, subcategories=True) 984 | break 985 | except PageError: 986 | raise PageError("{0}:{1}".format(self.category_prefix, cat)) 987 | except KeyboardInterrupt: 988 | raise 989 | except Exception: 990 | tries = tries + 1 991 | await asyncio.sleep(1) 992 | else: 993 | parent_cats = await categories[cat].categories() 994 | 995 | tree[cat]["parent-categories"].extend(parent_cats) 996 | tree[cat]["links"].extend(links[cat][0]) 997 | 998 | if depth and level >= depth: 999 | for ctg in links[cat][1]: 1000 | tree[cat]["sub-categories"][ctg] = None 1001 | else: 1002 | for ctg in links[cat][1]: 1003 | await self.__cat_tree_rec( 1004 | ctg, depth, tree[cat]["sub-categories"], level + 1, categories, links, 1005 | ) 1006 | 1007 | async def _get_response(self, params): 1008 | """ wrap the call to the requests package """ 1009 | try: 1010 | resp = await self._session.get(self._api_url, params=params, timeout=self._timeout, proxy=self._proxies) 1011 | return await resp.json() 1012 | except (JSONDecodeError, aiohttp.ContentTypeError): 1013 | return {} 1014 | 1015 | async def _post_response(self, params): 1016 | """ wrap a post call to the requests package """ 1017 | try: 1018 | resp = await self._session.post(self._api_url, data=params, timeout=self._timeout, proxy=self._proxies) 1019 | return await resp.json() 1020 | except JSONDecodeError: 1021 | return {} 1022 | 1023 | # end MediaWiki class 1024 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/mediawiki/mediawikipage.py: -------------------------------------------------------------------------------- 1 | """ 2 | MediaWikiPage class module 3 | """ 4 | # MIT License 5 | # Author: Tyler Barrus (barrust@gmail.com); KoishiMoe 6 | 7 | import re 8 | from collections import OrderedDict 9 | from decimal import Decimal 10 | 11 | from bs4 import BeautifulSoup, Tag 12 | 13 | from .exceptions import ( 14 | MediaWikiBaseException, 15 | MediaWikiException, 16 | PageError, 17 | RedirectError, 18 | DisambiguationError, 19 | ODD_ERROR_MESSAGE, 20 | InterWikiError, 21 | ) 22 | from .utilities import str_or_unicode, is_relative_url 23 | 24 | 25 | class MediaWikiPage(object): 26 | """ MediaWiki Page Instance 27 | 28 | Warning: 29 | This should never need to be used directly! Please use \ 30 | :func:`MediaWikiPage.create` instead. 31 | """ 32 | __slots__ = [ 33 | "mediawiki", 34 | "url", 35 | "title", 36 | "original_title", 37 | "pageid", 38 | "_content", 39 | "_revision_id", 40 | "_parent_id", 41 | "_html", 42 | "_soup", 43 | "_images", 44 | "_references", 45 | "_categories", 46 | "_coordinates", 47 | "_links", 48 | "_redirects", 49 | "_backlinks", 50 | "_langlinks", 51 | "_summary", 52 | "_sections", 53 | "_table_of_contents", 54 | "_logos", 55 | "_hatnotes", 56 | "_wikitext", 57 | "_preview", 58 | "_converttitles", 59 | "_iwurl", 60 | ] 61 | 62 | def __init__( 63 | self, 64 | mediawiki, 65 | title=None, 66 | pageid=None, 67 | preload=False, 68 | original_title="", 69 | convert_titles=False, 70 | iwurl=True, 71 | ): 72 | 73 | self.mediawiki = mediawiki 74 | self.url = None 75 | if title is not None: 76 | self.title = title 77 | self.original_title = original_title or title 78 | elif pageid is not None: 79 | self.pageid = pageid 80 | else: 81 | raise ValueError("Either a title or a pageid must be specified") 82 | 83 | self._content = None 84 | self._revision_id = None 85 | self._parent_id = None 86 | self._html = False # None signifies nothing returned... 87 | self._images = None 88 | self._references = None 89 | self._categories = None 90 | self._coordinates = False # None signifies nothing returned... 91 | self._links = None 92 | self._redirects = None 93 | self._backlinks = None 94 | self._langlinks = None 95 | self._summary = None 96 | self._sections = None 97 | self._table_of_contents = None 98 | self._logos = None 99 | self._hatnotes = None 100 | self._soup = None 101 | self._wikitext = None 102 | self._preview = None 103 | self._converttitles = convert_titles 104 | self._iwurl = iwurl 105 | 106 | preload_props = [ 107 | "content", 108 | "summary", 109 | "images", 110 | "references", 111 | "links", 112 | "sections", 113 | "redirects", 114 | "coordinates", 115 | "backlinks", 116 | "categories", 117 | ] 118 | if preload: 119 | for prop in preload_props: 120 | getattr(self, prop) 121 | 122 | # end __init__ 123 | 124 | @classmethod 125 | async def create( 126 | cls, 127 | mediawiki, 128 | title=None, 129 | pageid=None, 130 | redirect=True, 131 | preload=False, 132 | original_title="", 133 | convert_titles=False, 134 | iwurl=True, 135 | ): 136 | """ create a MediaWikiPage instance 137 | 138 | Args: 139 | mediawiki (MediaWiki): MediaWiki class object from which to pull 140 | title (str): Title of page to retrieve 141 | pageid (int): MediaWiki site pageid to retrieve 142 | redirect (bool): **True:** Follow redirects 143 | preload (bool): **True:** Load most properties after getting page 144 | original_title (str): Not to be used from the caller; used to \ 145 | help follow redirects 146 | convert_titles (bool): Convert titles to other variants if necessary. \ 147 | Only works if the wiki's content language supports variant conversion. 148 | iwurl (bool): Whether to get the full URL if the title is an interwiki link. 149 | Raises: 150 | :py:func:`mediawiki.exceptions.PageError`: if page provided does \ 151 | not exist 152 | Raises: 153 | :py:func:`mediawiki.exceptions.DisambiguationError`: if page \ 154 | provided is a disambiguation page 155 | Raises: 156 | :py:func:`mediawiki.exceptions.RedirectError`: if redirect is \ 157 | **False** and the pageid or title provided redirects to another \ 158 | page 159 | Warning: 160 | This should never need to be used directly! Please use \ 161 | :func:`mediawiki.MediaWiki.page` instead.""" 162 | self = MediaWikiPage(mediawiki, title, pageid, preload, original_title, convert_titles, iwurl) 163 | await self.__load(redirect=redirect, preload=preload) 164 | return self 165 | 166 | def __repr__(self): 167 | """ repr """ 168 | return self.__str__() 169 | 170 | def __unicode__(self): 171 | """ python 2.7 unicode """ 172 | return """""".format(self.title) 173 | 174 | def __str__(self): 175 | """ python > 3 unicode python 2.7 byte str """ 176 | return str_or_unicode(self.__unicode__()) 177 | 178 | def __eq__(self, other): 179 | """ base eq function """ 180 | try: 181 | return ( 182 | self.pageid == other.pageid 183 | and self.title == other.title 184 | and self.url == other.url 185 | ) 186 | except AttributeError: 187 | return False 188 | 189 | # Properties 190 | async def _pull_content_revision_parent(self): 191 | """ combine the pulling of these three properties """ 192 | 193 | if self._revision_id is None: 194 | query_params = { 195 | "prop": "extracts|revisions", 196 | "explaintext": "", 197 | "rvprop": "ids", 198 | } 199 | query_params.update(self.__title_query_param()) 200 | request = await self.mediawiki.wiki_request(query_params) 201 | page_info = request["query"]["pages"][self.pageid] 202 | self._content = page_info.get("extract", None) 203 | self._revision_id = page_info["revisions"][0]["revid"] 204 | self._parent_id = page_info["revisions"][0]["parentid"] 205 | 206 | if self._content is None and 'TextExtracts' not in self.mediawiki.extensions: 207 | msg = "Unable to extract page content; the TextExtracts extension must be installed!" 208 | raise MediaWikiBaseException(msg) 209 | return self._content, self._revision_id, self._parent_id 210 | 211 | async def content(self): 212 | """ str: The page content in text format 213 | 214 | Note: 215 | Not settable 216 | Note: 217 | Side effect is to also get revision_id and parent_id """ 218 | if self._content is None: 219 | await self._pull_content_revision_parent() 220 | return self._content 221 | 222 | async def revision_id(self): 223 | """ int: The current revision id of the page 224 | 225 | Note: 226 | Not settable 227 | Note: 228 | Side effect is to also get content and parent_id """ 229 | if self._revision_id is None: 230 | await self._pull_content_revision_parent() 231 | return self._revision_id 232 | 233 | async def parent_id(self): 234 | """ int: The parent id of the page 235 | 236 | Note: 237 | Not settable 238 | Note: 239 | Side effect is to also get content and revision_id """ 240 | if self._parent_id is None: 241 | await self._pull_content_revision_parent() 242 | return self._parent_id 243 | 244 | async def html(self): 245 | """ str: HTML representation of the page 246 | 247 | Note: 248 | Not settable 249 | Warning: 250 | This can be slow for very large pages """ 251 | if self._html is False: 252 | self._html = None 253 | query_params = { 254 | "prop": "revisions", 255 | "rvprop": "content", 256 | "rvlimit": 1, 257 | "rvparse": "", 258 | "titles": self.title, 259 | } 260 | request = await self.mediawiki.wiki_request(query_params) 261 | page = request["query"]["pages"][self.pageid] 262 | self._html = page["revisions"][0]["*"] 263 | return self._html 264 | 265 | async def wikitext(self): 266 | """ str: Wikitext representation of the page 267 | 268 | Note: 269 | Not settable """ 270 | if self._wikitext is None: 271 | query_params = { 272 | "action": "parse", 273 | "pageid": self.pageid, 274 | "prop": "wikitext", 275 | "formatversion": "latest", 276 | } 277 | request = await self.mediawiki.wiki_request(query_params) 278 | self._wikitext = request["parse"]["wikitext"] 279 | return self._wikitext 280 | 281 | async def images(self): 282 | """ list: Images on the page 283 | 284 | Note: 285 | Not settable """ 286 | if self._images is None: 287 | self._images = list() 288 | params = { 289 | "generator": "images", 290 | "gimlimit": "max", 291 | "prop": "imageinfo", # this will be replaced by fileinfo 292 | "iiprop": "url", 293 | } 294 | async for page in self._continued_query(params): 295 | if "imageinfo" in page and "url" in page["imageinfo"][0]: 296 | self._images.append(page["imageinfo"][0]["url"]) 297 | self._images = sorted(self._images) 298 | return self._images 299 | 300 | async def logos(self): 301 | """ list: Parse images within the infobox signifying either the main \ 302 | image or logo 303 | 304 | Note: 305 | Not settable 306 | Note: 307 | Side effect is to also pull the html which can be slow 308 | Note: 309 | This is a parsing operation and not part of the standard API""" 310 | if self._logos is None: 311 | self._logos = list() 312 | # Cache the results of parsing the html, so that multiple calls happen much faster 313 | if not self._soup: 314 | self._soup = BeautifulSoup(await self.html(), "html.parser") 315 | info = self._soup.find("table", {"class": "infobox"}) 316 | if info is not None: 317 | children = info.find_all("a", class_="image") 318 | for child in children: 319 | self._logos.append("https:" + child.img["src"]) 320 | return self._logos 321 | 322 | async def hatnotes(self): 323 | """ list: Parse hatnotes from the HTML 324 | 325 | Note: 326 | Not settable 327 | Note: 328 | Side effect is to also pull the html which can be slow 329 | Note: 330 | This is a parsing operation and not part of the standard API""" 331 | if self._hatnotes is None: 332 | self._hatnotes = list() 333 | # Cache the results of parsing the html, so that multiple calls happen much faster 334 | if not self._soup: 335 | self._soup = BeautifulSoup(await self.html(), "html.parser") 336 | notes = self._soup.find_all("div", class_="hatnote") 337 | if notes is not None: 338 | for note in notes: 339 | tmp = list() 340 | for child in note.children: 341 | if hasattr(child, "text"): 342 | tmp.append(child.text) 343 | else: 344 | tmp.append(child) 345 | self._hatnotes.append("".join(tmp)) 346 | return self._hatnotes 347 | 348 | async def references(self): 349 | """ list: External links, or references, listed anywhere on the \ 350 | MediaWiki page 351 | Note: 352 | Not settable 353 | Note 354 | May include external links within page that are not \ 355 | technically cited anywhere """ 356 | if self._references is None: 357 | self._references = list() 358 | await self.__pull_combined_properties() 359 | return self._references 360 | 361 | async def categories(self): 362 | """ list: Non-hidden categories on the page 363 | 364 | Note: 365 | Not settable """ 366 | if self._categories is None: 367 | self._categories = list() 368 | await self.__pull_combined_properties() 369 | return self._categories 370 | 371 | async def coordinates(self): 372 | """ Tuple: GeoCoordinates of the place referenced; results in \ 373 | lat/long tuple or None if no geocoordinates present 374 | 375 | Note: 376 | Not settable 377 | Note: 378 | Requires the GeoData extension to be installed """ 379 | if self._coordinates is False: 380 | self._coordinates = None 381 | await self.__pull_combined_properties() 382 | return self._coordinates 383 | 384 | async def links(self): 385 | """ list: List of all MediaWiki page links on the page 386 | 387 | Note: 388 | Not settable """ 389 | if self._links is None: 390 | self._links = list() 391 | await self.__pull_combined_properties() 392 | return self._links 393 | 394 | async def redirects(self): 395 | """ list: List of all redirects to this page; **i.e.,** the titles \ 396 | listed here will redirect to this page title 397 | 398 | Note: 399 | Not settable """ 400 | if self._redirects is None: 401 | self._redirects = list() 402 | await self.__pull_combined_properties() 403 | return self._redirects 404 | 405 | async def backlinks(self): 406 | """ list: Pages that link to this page 407 | 408 | Note: 409 | Not settable """ 410 | if self._backlinks is None: 411 | self._backlinks = list() 412 | params = { 413 | "action": "query", 414 | "list": "backlinks", 415 | "bltitle": self.title, 416 | "bllimit": "max", 417 | "blfilterredir": "nonredirects", 418 | "blnamespace": 0, 419 | } 420 | tmp = [link["title"] async for link in self._continued_query(params, "backlinks")] 421 | self._backlinks = sorted(tmp) 422 | return self._backlinks 423 | 424 | async def langlinks(self): 425 | """ dict: Names of the page in other languages for which page is \ 426 | where the key is the language code and the page name is the name \ 427 | of the page in that language. 428 | 429 | Note: 430 | Not settable 431 | Note: 432 | list of all language links from the provided pages to other \ 433 | languages according to: \ 434 | https://www.mediawiki.org/wiki/API:Langlinks """ 435 | 436 | if self._langlinks is None: 437 | params = {"prop": "langlinks", "cllimit": "max"} 438 | query_result = self._continued_query(params) 439 | 440 | langlinks = dict() 441 | async for lang_info in query_result: 442 | langlinks[lang_info["lang"]] = lang_info["*"] 443 | self._langlinks = langlinks 444 | return self._langlinks 445 | 446 | async def preview(self): 447 | """ dict: Page preview information that builds the preview hover """ 448 | if self._preview is None: 449 | params = { 450 | "action": "query", 451 | "formatversion": "2", 452 | "prop": "info|extracts|pageimages|revisions|pageterms|coordinates", 453 | "exsentences": "5", 454 | "explaintext": "true", 455 | "piprop": "thumbnail|original", 456 | "pithumbsize": "320", 457 | "pilicense": "any", 458 | "rvprop": "timestamp|ids", 459 | "wbptterms": "description", 460 | "titles": self.title, 461 | } 462 | raw = await self.mediawiki.wiki_request(params) 463 | self._preview = raw.get("query", dict()).get("pages", list())[0] 464 | return self._preview 465 | 466 | async def summary(self): 467 | """ str: Default page summary 468 | 469 | Note: 470 | Not settable """ 471 | if self._summary is None: 472 | await self.__pull_combined_properties() 473 | return self._summary 474 | 475 | async def summarize(self, sentences=0, chars=0): 476 | """ Summarize page either by number of sentences, chars, or first 477 | section (**default**) 478 | 479 | Args: 480 | sentences (int): Number of sentences to use in summary \ 481 | (first `x` sentences) 482 | chars (int): Number of characters to use in summary \ 483 | (first `x` characters) 484 | Returns: 485 | str: The summary of the MediaWiki page 486 | Note: 487 | Precedence for parameters: sentences then chars; if both are \ 488 | 0 then the entire first section is returned """ 489 | query_params = {"prop": "extracts", "explaintext": "", "titles": self.title} 490 | if sentences: 491 | query_params["exsentences"] = 10 if sentences > 10 else sentences 492 | elif chars: 493 | query_params["exchars"] = 1 if chars < 1 else chars 494 | else: 495 | query_params["exintro"] = "" 496 | 497 | request = await self.mediawiki.wiki_request(query_params) 498 | summary = request["query"]["pages"][self.pageid].get("extract") 499 | return summary 500 | 501 | async def sections(self): 502 | """ list: Table of contents sections 503 | 504 | Note: 505 | Not settable """ 506 | # NOTE: Due to MediaWiki sites adding superscripts or italics or bold 507 | # information in the sections, moving to regex to get the 508 | # `non-decorated` name instead of using the query api! 509 | if self._sections is None: 510 | await self._parse_sections() 511 | return self._sections 512 | 513 | async def table_of_contents(self): 514 | """ OrderedDict: Dictionary of sections and subsections 515 | 516 | Note: 517 | Leaf nodes are empty OrderedDict objects 518 | Note: 519 | Not Settable""" 520 | 521 | if self._table_of_contents is None: 522 | await self._parse_sections() 523 | return self._table_of_contents 524 | 525 | async def section(self, section_title): 526 | """ Plain text section content 527 | 528 | Args: 529 | section_title (str): Name of the section to pull or None \ 530 | for the header section 531 | Returns: 532 | str: The content of the section 533 | Note: 534 | Use **None** if the header section is desired 535 | Note: 536 | Returns **None** if section title is not found; only text \ 537 | between title and next section or subsection title is returned 538 | Note: 539 | Side effect is to also pull the content which can be slow 540 | Note: 541 | This is a parsing operation and not part of the standard API""" 542 | if not section_title: 543 | try: 544 | content = await self.content() 545 | index = 0 546 | except ValueError: 547 | return None 548 | except IndexError: 549 | pass 550 | else: 551 | section = "== {0} ==".format(section_title) 552 | try: 553 | content = await self.content() 554 | index = content.index(section) + len(section) 555 | 556 | # ensure we have the full section header... 557 | while True: 558 | if content[index + 1] == "=": 559 | index += 1 560 | else: 561 | break 562 | except ValueError: 563 | return None 564 | except IndexError: 565 | pass 566 | 567 | content = await self.content() 568 | try: 569 | next_index = content.index("==", index) 570 | except ValueError: 571 | next_index = len(await self.content()) 572 | 573 | return content[index:next_index].lstrip("=").strip() 574 | 575 | async def parse_section_links(self, section_title): 576 | """ Parse all links within a section 577 | 578 | Args: 579 | section_title (str): Name of the section to pull or, if \ 580 | None is provided, the links between the main heading and \ 581 | the first section 582 | Returns: 583 | list: List of (title, url) tuples 584 | Note: 585 | Use **None** to pull the links from the header section 586 | Note: 587 | Returns **None** if section title is not found 588 | Note: 589 | Side effect is to also pull the html which can be slow 590 | Note: 591 | This is a parsing operation and not part of the standard API""" 592 | # Cache the results of parsing the html, so that multiple calls happen much faster 593 | if not self._soup: 594 | self._soup = BeautifulSoup(await self.html(), "html.parser") 595 | 596 | if not section_title: 597 | return self._parse_section_links(None) 598 | 599 | headlines = self._soup.find_all("span", class_="mw-headline") 600 | tmp_soup = BeautifulSoup(section_title, "html.parser") 601 | tmp_sec_title = tmp_soup.get_text().lower() 602 | id_tag = None 603 | for headline in headlines: 604 | tmp_id = headline.text 605 | if tmp_id.lower() == tmp_sec_title: 606 | id_tag = headline.get("id") 607 | break 608 | 609 | if id_tag is not None: 610 | return self._parse_section_links(id_tag) 611 | return None 612 | 613 | # Protected Methods 614 | async def __load(self, redirect=True, preload=False): 615 | """ load the basic page information """ 616 | query_params = { 617 | "prop": "info|pageprops", 618 | "inprop": "url", 619 | "ppprop": "disambiguation", 620 | "redirects": "", 621 | } 622 | query_params.update(self.__title_query_param()) 623 | 624 | # params add by KoishiMoe 625 | if self._converttitles: 626 | query_params.update({"converttitles": 1}) 627 | if self._iwurl: 628 | query_params.update({"iwurl": 1}) 629 | 630 | request = await self.mediawiki.wiki_request(query_params) 631 | 632 | query = request["query"] 633 | if query.get("pages"): 634 | pageid = list(query["pages"].keys())[0] 635 | page = query["pages"][pageid] 636 | 637 | # determine result of the request 638 | # interwiki is present in query if page is a interwiki; in this case, there's no `pages` in query 639 | if "interwiki" in query: 640 | self._handle_interwiki(query) 641 | # converted may be present in query if convert_titles == True 642 | if "converted" in query: 643 | self.title = query["converted"][0].get('to') or self.title 644 | # missing is present if the page is missing 645 | if "missing" in page or pageid == '-1': # sometimes it doesn't return missing, but pageid == -1 646 | self._raise_page_error() 647 | # redirects is present in query if page is a redirect 648 | elif "redirects" in query: 649 | await self._handle_redirect(redirect, preload, query, page) 650 | # if pageprops is returned, it must be a disambiguation error 651 | elif "pageprops" in page: 652 | await self._raise_disambiguation_error(page, pageid) 653 | else: 654 | self.pageid = pageid 655 | self.title = page["title"] 656 | self.url = page["fullurl"] 657 | 658 | def _raise_page_error(self): 659 | """ raise the correct type of page error """ 660 | if hasattr(self, "title"): 661 | raise PageError(title=self.title) 662 | raise PageError(pageid=self.pageid) 663 | 664 | async def _raise_disambiguation_error(self, page, pageid): 665 | """ parse and throw a disambiguation error """ 666 | query_params = { 667 | "prop": "revisions", 668 | "rvprop": "content", 669 | "rvparse": "", 670 | "rvlimit": 1, 671 | } 672 | query_params.update(self.__title_query_param()) 673 | request = await self.mediawiki.wiki_request(query_params) 674 | html = request["query"]["pages"][pageid]["revisions"][0]["*"] 675 | 676 | lis = BeautifulSoup(html, "html.parser").find_all("li") 677 | filtered_lis = [ 678 | li for li in lis if "tocsection" not in "".join(li.get("class", list())) 679 | ] 680 | may_refer_to = [li.a.get_text() for li in filtered_lis if li.a] 681 | 682 | disambiguation = list() 683 | for lis_item in filtered_lis: 684 | item = lis_item.find_all("a") 685 | one_disambiguation = dict() 686 | one_disambiguation["description"] = lis_item.text 687 | if item and item[0].has_attr("title"): 688 | one_disambiguation["title"] = item[0]["title"] 689 | else: 690 | # these are non-linked records so double up the text 691 | one_disambiguation["title"] = lis_item.text 692 | disambiguation.append(one_disambiguation) 693 | raise DisambiguationError( 694 | getattr(self, "title", page["title"]), 695 | may_refer_to, 696 | page["fullurl"], 697 | disambiguation, 698 | ) 699 | 700 | # method add by KoishiMoe 701 | def _handle_interwiki(self, query): 702 | inter_wiki = query["interwiki"][0] 703 | title = inter_wiki.get("title", '')[len(f'{inter_wiki.get("iw", "")}:'):] 704 | url = inter_wiki.get("url", '') 705 | raise InterWikiError(title, url) 706 | 707 | async def _handle_redirect(self, redirect, preload, query, page): 708 | """ handle redirect """ 709 | if redirect: 710 | redirects = query["redirects"][0] 711 | 712 | if "normalized" in query: 713 | normalized = query["normalized"][0] 714 | if normalized["from"] != self.title: 715 | raise MediaWikiException(ODD_ERROR_MESSAGE) 716 | from_title = normalized["to"] 717 | else: 718 | if not getattr(self, "title", None): 719 | self.title = redirects["from"] 720 | delattr(self, "pageid") 721 | from_title = self.title 722 | if redirects["from"] != from_title: 723 | raise MediaWikiException(ODD_ERROR_MESSAGE) 724 | 725 | # change the title and reload the whole object 726 | self.__init__( 727 | self.mediawiki, 728 | title=redirects["to"], 729 | preload=preload, 730 | ) 731 | await self.__load(redirect=redirect, preload=preload) 732 | else: 733 | raise RedirectError(getattr(self, "title", page["title"])) 734 | 735 | async def _continued_query(self, query_params, key="pages"): 736 | """ Based on 737 | https://www.mediawiki.org/wiki/API:Query#Continuing_queries """ 738 | query_params.update(self.__title_query_param()) 739 | 740 | last_cont = dict() 741 | prop = query_params.get("prop") 742 | 743 | while True: 744 | params = query_params.copy() 745 | params.update(last_cont) 746 | 747 | request = await self.mediawiki.wiki_request(params) 748 | 749 | if "query" not in request: 750 | break 751 | 752 | pages = request["query"][key] 753 | if "generator" in query_params: 754 | for datum in pages.values(): 755 | yield datum 756 | elif isinstance(pages, list): 757 | for datum in list(enumerate(pages)): 758 | yield datum[1] 759 | else: 760 | for datum in pages[self.pageid].get(prop, list()): 761 | yield datum 762 | 763 | if "continue" not in request or request["continue"] == last_cont: 764 | break 765 | 766 | last_cont = request["continue"] 767 | 768 | def _parse_section_links(self, id_tag): 769 | """ given a section id, parse the links in the unordered list """ 770 | all_links = list() 771 | 772 | if id_tag is None: 773 | root = self._soup.find("div", {"class": "mw-parser-output"}) 774 | if root is None: 775 | return all_links 776 | candidates = root.children 777 | else: 778 | root = self._soup.find("span", {"id": id_tag}) 779 | if root is None: 780 | return all_links 781 | candidates = self._soup.find(id=id_tag).parent.next_siblings 782 | 783 | for node in candidates: 784 | if not isinstance(node, Tag): 785 | continue 786 | if node.get("role", "") == "navigation": 787 | continue 788 | elif "infobox" in node.get("class", []): 789 | continue 790 | 791 | # If the classname contains "toc", the element is a table of contents. 792 | # The comprehension is necessary because there are several possible 793 | # types of tocs: "toclevel", "toc", ... 794 | toc_classnames = [cname for cname in node.get("class", []) if "toc" in cname] 795 | if toc_classnames: 796 | continue 797 | 798 | # this is actually the child node's class... 799 | is_headline = node.find("span", {"class": "mw-headline"}) 800 | if is_headline is not None: 801 | break 802 | if node.name == "a": 803 | all_links.append(self.__parse_link_info(node)) 804 | else: 805 | for link in node.find_all("a"): 806 | all_links.append(self.__parse_link_info(link)) 807 | return all_links 808 | 809 | def __parse_link_info(self, link): 810 | """ parse the tag for the link """ 811 | href = link.get("href", "") 812 | txt = link.string or href 813 | is_rel = is_relative_url(href) 814 | if is_rel is True: 815 | tmp = "{0}{1}".format(self.mediawiki.base_url, href) 816 | elif is_rel is None: 817 | tmp = "{0}{1}".format(self.url, href) 818 | else: 819 | tmp = href 820 | return txt, tmp 821 | 822 | async def _parse_sections(self): 823 | """ parse sections and TOC """ 824 | 825 | def _list_to_dict(_dict, path, sec): 826 | tmp = _dict 827 | for elm in path[:-1]: 828 | tmp = tmp[elm] 829 | tmp[sec] = OrderedDict() 830 | 831 | self._sections = list() 832 | section_regexp = r"\n==* .* ==*\n" # '== {STUFF_NOT_\n} ==' 833 | found_obj = re.findall(section_regexp, await self.content()) 834 | 835 | res = OrderedDict() 836 | path = list() 837 | last_depth = 0 838 | for obj in found_obj: 839 | depth = obj.count("=") / 2 # this gets us to the single side... 840 | depth -= 2 # now, we can calculate depth 841 | 842 | sec = obj.lstrip("\n= ").rstrip(" =\n") 843 | if depth == 0: 844 | last_depth = 0 845 | path = [sec] 846 | res[sec] = OrderedDict() 847 | elif depth > last_depth: 848 | last_depth = depth 849 | path.append(sec) 850 | _list_to_dict(res, path, sec) 851 | elif depth < last_depth: 852 | # path.pop() 853 | while last_depth > depth: 854 | path.pop() 855 | last_depth -= 1 856 | path.pop() 857 | path.append(sec) 858 | _list_to_dict(res, path, sec) 859 | last_depth = depth 860 | else: 861 | path.pop() 862 | path.append(sec) 863 | _list_to_dict(res, path, sec) 864 | last_depth = depth 865 | self._sections.append(sec) 866 | 867 | self._table_of_contents = res 868 | 869 | def __title_query_param(self): 870 | """ util function to determine which parameter method to use """ 871 | if getattr(self, "title", None) is not None: 872 | return {"titles": self.title} 873 | return {"pageids": self.pageid} 874 | 875 | async def __pull_combined_properties(self): 876 | """ something here... """ 877 | 878 | query_params = { 879 | "titles": self.title, 880 | "prop": "extracts|redirects|links|coordinates|categories|extlinks", 881 | # "continue": dict(), 882 | "continue": "", 883 | # summary 884 | "explaintext": "", 885 | "exintro": "", # full first section for the summary! 886 | # redirects 887 | "rdprop": "title", 888 | "rdlimit": "max", 889 | # links 890 | "plnamespace": 0, 891 | "pllimit": "max", 892 | # coordinates 893 | "colimit": "max", 894 | # categories 895 | "cllimit": "max", 896 | "clshow": "!hidden", 897 | # references 898 | "ellimit": "max", 899 | } 900 | 901 | last_cont = dict() 902 | results = dict() 903 | idx = 0 904 | while True: 905 | params = query_params.copy() 906 | params.update(last_cont) 907 | 908 | request = await self.mediawiki.wiki_request(params) 909 | idx += 1 910 | 911 | if "query" not in request: 912 | break 913 | 914 | keys = [ 915 | "extracts", 916 | "redirects", 917 | "links", 918 | "coordinates", 919 | "categories", 920 | "extlinks", 921 | ] 922 | new_cont = request.get("continue") 923 | request = request["query"]["pages"][self.pageid] 924 | if not results: 925 | results = request 926 | else: 927 | for key in keys: 928 | if key in request and request.get(key) is not None: 929 | val = request.get(key) 930 | tmp = results.get(key) 931 | if isinstance(tmp, (list, tuple)): 932 | results[key] = results.get(key, list) + val 933 | if new_cont is None or new_cont == last_cont: 934 | break 935 | 936 | last_cont = new_cont 937 | 938 | # redirects 939 | tmp = [link["title"] for link in results.get("redirects", list())] 940 | self._redirects = sorted(tmp) 941 | 942 | # summary 943 | self._summary = results.get("extract") 944 | 945 | # links 946 | tmp = [link["title"] for link in results.get("links", list())] 947 | self._links = sorted(tmp) 948 | 949 | # categories 950 | def _get_cat(val): 951 | """ parse the category correctly """ 952 | tmp = val["title"] 953 | if tmp.startswith(self.mediawiki.category_prefix): 954 | return tmp[len(self.mediawiki.category_prefix) + 1:] 955 | return tmp 956 | 957 | tmp = [_get_cat(link) for link in results.get("categories", list())] 958 | self._categories = sorted(tmp) 959 | 960 | # coordinates 961 | if "coordinates" in results: 962 | self._coordinates = ( 963 | Decimal(results["coordinates"][0]["lat"]), 964 | Decimal(results["coordinates"][0]["lon"]), 965 | ) 966 | 967 | # references 968 | tmp = [link["*"] for link in results.get("extlinks", list())] 969 | self._references = sorted(tmp) 970 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/mediawiki/utilities.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions 3 | """ 4 | import asyncio 5 | import functools 6 | import inspect 7 | import sys 8 | import time 9 | 10 | 11 | def parse_all_arguments(func): 12 | """ determine all positional and named arguments as a dict """ 13 | args = dict() 14 | 15 | func_args = inspect.signature(func) 16 | for itm in list(func_args.parameters)[1:]: 17 | param = func_args.parameters[itm] 18 | if param.default is not param.empty: 19 | args[param.name] = param.default 20 | return args 21 | 22 | 23 | def memoize(func): 24 | """ quick memoize decorator for class instance methods 25 | NOTE: this assumes that the class that the functions to be 26 | memoized already has a memoized and refresh_interval 27 | property """ 28 | 29 | @functools.wraps(func) 30 | async def wrapper(*args, **kwargs): 31 | """ wrap it up and store info in a cache """ 32 | cache = args[0].memoized 33 | refresh = args[0].refresh_interval 34 | use_cache = args[0].use_cache 35 | 36 | # short circuit if not using cache 37 | if use_cache is False: 38 | return await func(*args, **kwargs) 39 | 40 | if func.__name__ not in cache: 41 | cache[func.__name__] = dict() 42 | if "defaults" not in cache: 43 | cache["defaults"] = dict() 44 | if "$locks" not in cache: 45 | cache["$locks"] = dict() 46 | cache["defaults"][func.__name__] = parse_all_arguments(func) 47 | if func.__name__ not in cache["$locks"]: 48 | cache["$locks"][func.__name__] = dict() 49 | locks: dict = cache["$locks"][func.__name__] 50 | # build a key; should also consist of the default values 51 | defaults = cache["defaults"][func.__name__].copy() 52 | for key, val in kwargs.items(): 53 | defaults[key] = val 54 | tmp = list() 55 | tmp.extend(args[1:]) 56 | for k in sorted(defaults.keys()): 57 | tmp.append("({0}: {1})".format(k, defaults[k])) 58 | 59 | tmp = [str(x) for x in tmp] 60 | key = " - ".join(tmp) 61 | 62 | # to avoid dog-piling 63 | if locks.get(key): 64 | if key in cache[func.__name__]: 65 | return cache[func.__name__][key][1] # when locked, return the stale value if possible 66 | else: 67 | while locks.get(key): 68 | await asyncio.sleep(args[0].refresh_interval / 2) 69 | # set the value in the cache if missing or needs to be refreshed 70 | if key not in cache[func.__name__]: 71 | locks[key] = True 72 | try: 73 | cache[func.__name__][key] = (time.time(), await func(*args, **kwargs)) 74 | except Exception as e: 75 | raise e 76 | finally: 77 | locks.pop(key) 78 | else: 79 | tmp = cache[func.__name__][key] 80 | # determine if we need to refresh the data... 81 | if refresh is not None and time.time() - tmp[0] > refresh: 82 | locks[key] = True 83 | try: 84 | cache[func.__name__][key] = (time.time(), await func(*args, **kwargs)) 85 | except Exception as e: 86 | raise e 87 | finally: 88 | locks.pop(key) 89 | return cache[func.__name__][key][1] 90 | 91 | return wrapper 92 | 93 | 94 | def str_or_unicode(text): 95 | """ handle python 3 unicode """ 96 | encoding = sys.stdout.encoding 97 | return text.encode(encoding).decode(encoding) 98 | 99 | 100 | def is_relative_url(url): 101 | """ simple method to determine if a url is relative or absolute """ 102 | if url.startswith("#"): 103 | return None 104 | if url.find("://") > 0 or url.startswith("//"): 105 | # either 'http(s)://...' or '//cdn...' and therefore absolute 106 | return False 107 | return True 108 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/utilities.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from io import StringIO 3 | from urllib.parse import urlparse, parse_qs, urlencode, urlunparse 4 | 5 | 6 | # 从 Flandre 里拆出来的解析器,用于解析命令和参数 7 | # TODO: 用argparse吧……谁手搓这玩意 8 | def process_command(command: str, user_input: str) -> tuple: 9 | """ 10 | :param command: 命令本体 11 | :param user_input: 用户输入 12 | :return: 处理后的参数元组,格式为([无名参数列表], {命名参数字典}) 13 | """ 14 | user_input = user_input.strip() 15 | command = command.strip() 16 | if user_input.startswith(command): 17 | user_input = user_input[len(command):].lstrip() # 去掉命令本体,用这个写法是为了与旧版python兼容 18 | 19 | f = StringIO(user_input) 20 | reader = csv.reader(f, delimiter=" ", escapechar="\\", skipinitialspace=True) 21 | input_list = [] 22 | for i in reader: 23 | input_list += [j for j in i if j] 24 | 25 | out_list = [] 26 | out_dict = {} 27 | 28 | i = 0 29 | while i < len(input_list): 30 | if _startswith(input_list[i], "--"): 31 | next_item = _get_item(input_list, i + 1) 32 | if not _startswith(next_item, "--"): 33 | out_dict[input_list[i].lstrip("--")] = next_item if next_item is not None else True 34 | # 此处将只提供参数名不提供值的认为是True(简化语法) 35 | i += 2 36 | else: 37 | out_dict[input_list[i].lstrip("--")] = True 38 | i += 1 39 | else: 40 | out_list.append(input_list[i]) 41 | i += 1 42 | 43 | return out_list, out_dict 44 | 45 | 46 | def _get_item(ls: list, item: int): 47 | """ 48 | 为列表实现类似字典的get方法 49 | :param ls: 要查询的列表 50 | :param item: 元素的下标 51 | :return: 若元素存在,返回该元素;否则返回None 52 | """ 53 | try: 54 | return ls[item] 55 | except IndexError: 56 | return None 57 | 58 | 59 | def _startswith(string: str, prefix: str) -> bool: 60 | """ 61 | 替换原版的startswith方法 62 | :param string: 要检测的字符串 63 | :param prefix: 要检测的前缀,字符串中的每个字符都将被单独检测 64 | :return: 判定结果 65 | """ 66 | if not string: 67 | return False 68 | for i in prefix: 69 | if string.startswith(i): 70 | return True 71 | return False 72 | 73 | def ensure_url_param(url, host_to_check, param_name, param_value): 74 | """ 75 | Check if URL is under a specific host and add a specific parameter if not present. 76 | 77 | Args: 78 | url (str): The URL to check 79 | host_to_check (str): The host to match (e.g., "example.com") 80 | param_name (str): The parameter to check for and potentially add 81 | param_value (str): The value to use if parameter needs to be added 82 | 83 | Returns: 84 | str: The original or modified URL 85 | """ 86 | parsed_url = urlparse(url) 87 | 88 | # Check if URL is under the specific host 89 | if parsed_url.netloc == host_to_check or parsed_url.netloc.endswith('.' + host_to_check): 90 | # Parse query parameters 91 | query_params = parse_qs(parsed_url.query) 92 | 93 | # Check if the specific parameter exists 94 | if param_name not in query_params: 95 | # Parameter doesn't exist, let's add it 96 | query_params[param_name] = [param_value] 97 | 98 | # Rebuild the query string 99 | new_query = urlencode(query_params, doseq=True) 100 | 101 | # Rebuild the URL with the new query string 102 | modified_url = urlunparse( 103 | (parsed_url.scheme, parsed_url.netloc, parsed_url.path, 104 | parsed_url.params, new_query, parsed_url.fragment) 105 | ) 106 | return modified_url 107 | 108 | # Return original URL if host doesn't match or parameter already exists 109 | return url 110 | -------------------------------------------------------------------------------- /nonebot_plugin_mediawiki/worker.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import urllib.parse 4 | from asyncio import TimeoutError 5 | from urllib import parse 6 | 7 | import nonebot 8 | from aiohttp import ContentTypeError, ClientProxyConnectionError, ClientConnectorError 9 | from nonebot import on_regex, on_command, logger 10 | from nonebot.adapters.onebot.v11 import Bot, utils, GroupMessageEvent, GROUP, MessageSegment 11 | from nonebot.internal.matcher import Matcher 12 | from nonebot.typing import T_State 13 | 14 | from .config import Config 15 | from .constants import ARTICLE_RAW, ARTICLE, RAW, TEMPLATE 16 | from .exception import NoDefaultPrefixException, NoSuchPrefixException 17 | 18 | __all__ = ['wiki_preprocess', 'wiki_parse'] 19 | 20 | from .fakemwapi import DummyMediaWiki, DummyPage 21 | 22 | from .mediawiki import MediaWiki, HTTPTimeoutError, MediaWikiException, MediaWikiGeoCoordError, PageError, \ 23 | DisambiguationError 24 | from .mediawiki.exceptions import InterWikiError, MediaWikiAPIURLError, MediaWikiBaseException 25 | from .utilities import ensure_url_param 26 | 27 | # 已有的MediaWiki实例 28 | wiki_instances = {} 29 | 30 | playwright = None 31 | browser = None 32 | playwright_not_installed = False 33 | playwright_launch_error = False 34 | 35 | 36 | @nonebot.get_driver().on_shutdown 37 | async def shutdown(): 38 | global playwright, browser 39 | if browser: 40 | await browser.close() 41 | browser = None 42 | if playwright: 43 | playwright.stop() 44 | playwright = None 45 | 46 | # 响应器 47 | # TODO: use matcher group 48 | wiki_article = on_regex(ARTICLE_RAW, permission=GROUP, state={"mode": "article"}) 49 | wiki_template = on_regex(TEMPLATE, permission=GROUP, state={"mode": "template"}) 50 | wiki_raw = on_regex(RAW, permission=GROUP, state={"mode": "raw"}) 51 | wiki_quick = on_command("wiki ", permission=GROUP, state={"mode": "quick"}) 52 | wiki_shot = on_command("wiki.shot ", permission=GROUP, state={"mode": "shot"}) 53 | 54 | 55 | @wiki_article.handle() 56 | @wiki_template.handle() 57 | @wiki_raw.handle() 58 | @wiki_quick.handle() 59 | @wiki_shot.handle() 60 | async def wiki_preprocess(bot: Bot, event: GroupMessageEvent, state: T_State, matcher: Matcher): 61 | message = utils.unescape(str(event.message).strip()) 62 | mode = state["mode"] 63 | if mode == "article": 64 | title = re.findall(ARTICLE, message) 65 | elif mode == "template": 66 | title = re.findall(TEMPLATE, message) 67 | state["is_template"] = True 68 | elif mode == "raw": 69 | title = re.findall(RAW, message) 70 | state["is_raw"] = True 71 | elif mode == "quick": 72 | title = message[4:].lstrip() 73 | if not title: 74 | await matcher.finish() 75 | title = [title] 76 | elif mode == "shot": 77 | global playwright, browser, playwright_launch_error, playwright_not_installed 78 | if playwright_not_installed: 79 | await matcher.finish("Playwright未安装") 80 | if playwright_launch_error: 81 | await matcher.finish("Playwright启动失败,如果您已安装Chromium,请重启Bot") 82 | if not playwright: 83 | try: 84 | from playwright.async_api import async_playwright, Error 85 | playwright = await async_playwright().start() 86 | if not browser: 87 | try: 88 | p = nonebot.get_driver().config.wiki_proxy 89 | if p: 90 | p = urllib.parse.urlparse(p) 91 | proxy = { 92 | "server": f"{p.scheme}://{p.hostname}:{p.port}", 93 | "username": p.username, 94 | "password": p.password 95 | } 96 | browser = await playwright.chromium.launch(proxy=proxy) 97 | else: 98 | browser = await playwright.chromium.launch() 99 | except Error as e: 100 | playwright_launch_error = True 101 | logger.warning("Playwright启动失败,请检查是否安装了Chromium\n" 102 | "安装方法:在bot的虚拟环境中执行:playwright install chromium") 103 | logger.warning("注意:对于无头服务器,您可能需要使用系统的包管理器安装完整版的Chromium以保证系统中有可用的依赖\n" 104 | "例如:在Ubuntu 20.04中,您可以使用apt安装:sudo apt install chromium-browser\n" 105 | "在Archlinux中,您可以使用pacman安装:sudo pacman -S chromium") 106 | logger.warning(f"下面是Playwright的错误信息,可能对您有帮助:\n{e}") 107 | await matcher.finish("Playwright启动失败,请检查是否安装了Chromium") 108 | except ImportError: 109 | playwright_not_installed = True 110 | await matcher.finish("Playwright未安装") 111 | 112 | title = message[9:].lstrip() 113 | if not title: 114 | await matcher.finish() 115 | title = [title] 116 | state["is_shot"] = True 117 | 118 | if not title: 119 | await matcher.finish() 120 | state["title"] = title[0] 121 | state["is_user_choice"] = False 122 | 123 | 124 | @wiki_article.got("title", "请从上面选择一项,或回复0来根据原标题直接生成链接,回复”取消“退出") 125 | @wiki_template.got("title", "请从上面选择一项,或回复0来根据原标题直接生成链接,回复”取消“退出") 126 | @wiki_raw.got("title", "请从上面选择一项,或回复0来根据原标题直接生成链接,回复”取消“退出") 127 | @wiki_quick.got("title", "请从上面选择一项,或回复0来根据原标题直接生成链接,回复”取消“退出") 128 | @wiki_shot.got("title", "请从上面选择一项,或回复0来根据原标题直接生成链接,回复”取消“退出") 129 | async def wiki_parse(bot: Bot, event: GroupMessageEvent, state: T_State, matcher: Matcher): 130 | # 标记 131 | page = None 132 | exception = None 133 | 134 | if state.get("is_user_choice"): # 选择模式,获取先前存储的数据 135 | msg = str(state["title"]).strip() 136 | if (not msg.isdigit()) or int(msg) not in range(len(state["options"]) + 1): # 非选择项或超范围 137 | await matcher.finish() 138 | 139 | choice = int(msg) 140 | if not choice: # 选0,直接生成链接 141 | if state.get("disambiguation"): 142 | page = DummyPage(state['disambiguation'].url, state['raw_title']) 143 | else: 144 | instance = state["dummy_instance"] 145 | page = await instance.page(state["raw_title"]) 146 | else: 147 | title = state["options"][choice - 1] 148 | wiki_instance = state["instance"] 149 | dummy_instance = state["dummy_instance"] 150 | api = state["api"] 151 | else: 152 | config = Config(event.group_id) 153 | title = state["title"] 154 | prefix = re.match(r'\w+:|\w+:', title) 155 | if not prefix: 156 | prefix = '' 157 | else: 158 | prefix = prefix.group(0).lower().rstrip("::") 159 | if prefix in config.prefixes: 160 | title = re.sub(f"{prefix}:|{prefix}:", '', title, count=1, flags=re.I) 161 | else: 162 | prefix = '' 163 | 164 | if title is None or title.strip() == "": 165 | await matcher.finish() 166 | 167 | # 检查锚点 168 | anchor_list = re.split('#', title, maxsplit=1) 169 | title = anchor_list[0] 170 | state["anchor"] = anchor_list[1] if len(anchor_list) > 1 else state.get("anchor") 171 | 172 | if not state.get("is_user_choice"): 173 | if state.get("is_template"): 174 | title = "Template:" + title 175 | try: 176 | api, url = config.get_from_prefix(prefix)[:2] 177 | except NoDefaultPrefixException: 178 | await matcher.finish("没有找到默认前缀,请群管或bot管理员先设置默认前缀") 179 | return 180 | except NoSuchPrefixException: 181 | await matcher.finish("指定的默认前缀对应的wiki不存在,请管理员检查设置") 182 | return 183 | 184 | state["api"] = api # 选择模式下,不会主动读取配置,因此需要提供api地址供生成链接 185 | 186 | dummy_instance = DummyMediaWiki(url) # 用于生成直链的MediaWiki实例 187 | if state.get("is_raw"): 188 | wiki_instance = dummy_instance 189 | else: 190 | # 获取已有的MediaWiki实例,以api链接作为key 191 | global wiki_instances 192 | if api in wiki_instances.keys(): 193 | wiki_instance = wiki_instances[api] 194 | else: 195 | if api: 196 | try: 197 | p = nonebot.get_driver().config.wiki_proxy 198 | if p: 199 | wiki_instance = await MediaWiki.create(url=api, proxies=p) 200 | else: 201 | wiki_instance = await MediaWiki.create(url=api) 202 | wiki_instances[api] = wiki_instance 203 | except (MediaWikiBaseException, TimeoutError, ClientProxyConnectionError, ConnectionRefusedError, AssertionError, ClientConnectorError) as e: 204 | logger.info(f"连接到MediaWiki API 时发生了错误:{e}") 205 | exception = "Api连接失败" 206 | wiki_instance = dummy_instance 207 | else: # 没api地址就算了 208 | wiki_instance = dummy_instance 209 | 210 | if not page: 211 | try: 212 | page = await wiki_instance.page(title=title, auto_suggest=False, convert_titles=True, iwurl=True) 213 | exception = exception or None 214 | except (HTTPTimeoutError, TimeoutError): 215 | exception = "连接超时" 216 | page = await dummy_instance.page(title=title) 217 | except (MediaWikiException, MediaWikiGeoCoordError, ContentTypeError) as e: # ContentTypeError:非json内容 218 | exception = "Api调用出错" 219 | logger.info(f"MediaWiki API 返回了错误信息:{e}") 220 | page = await dummy_instance.page(title=title) 221 | except PageError: 222 | try: 223 | search = await wiki_instance.search(title) 224 | if search: 225 | result = f"页面 {title} 不存在;你是不是想找:" 226 | for k, v in enumerate(search): 227 | result += f"\n{k + 1}. {v}" 228 | state["is_user_choice"] = True 229 | state["options"] = search 230 | state["raw_title"] = title 231 | state["instance"] = wiki_instance 232 | state["dummy_instance"] = dummy_instance 233 | state.pop("title") 234 | await matcher.reject(result) 235 | return # 同理,糊弄下IDE 236 | else: 237 | page = await dummy_instance.page(title=title) 238 | except (MediaWikiBaseException, TimeoutError): 239 | page = await dummy_instance.page(title=title) 240 | exception = "未找到页面" 241 | except DisambiguationError as e: 242 | result = f"条目 {e.title} 是一个消歧义页面,有以下含义:" 243 | for k, v in enumerate(e.options): 244 | result += f"\n{k + 1}. {v}" 245 | state["is_user_choice"] = True 246 | state["disambiguation"] = e 247 | state["options"] = e.options 248 | state["raw_title"] = title 249 | state["instance"] = wiki_instance 250 | state["dummy_instance"] = dummy_instance 251 | state.pop("title") 252 | await matcher.reject(result) 253 | return 254 | except InterWikiError as e: 255 | result = f"跨维基链接:{e.title}\n" \ 256 | f"链接:{e.url}" 257 | await matcher.finish(result) 258 | return 259 | except Exception as e: 260 | exception = "未知错误" 261 | logger.warning(f"MediaWiki API 发生了未知异常:{e}") 262 | page = await dummy_instance.page(title=title) 263 | 264 | if not exception and state.get("mode") == "shot": 265 | if browser: 266 | try: 267 | pg = await browser.new_page() 268 | try: 269 | await pg.set_viewport_size({"width": 1920, "height": 1080}) 270 | u = ensure_url_param(page.url, "moegirl.org.cn", "useskin", "vector") if not os.getenv("MOEGIRL_USE_NEW_SKIN") else page.url 271 | try: 272 | timeout = float(nonebot.get_driver().config.wiki_shot_timeout) 273 | except AttributeError: 274 | timeout = 30.0 275 | try: 276 | wait_until = nonebot.get_driver().config.wiki_shot_wait_until 277 | except AttributeError: 278 | wait_until = "load" 279 | await pg.goto(u, timeout=timeout, wait_until=wait_until) 280 | img = await pg.screenshot(full_page=True, type="jpeg", quality=80) 281 | await matcher.send(MessageSegment.image(img)) 282 | except TimeoutError: 283 | logger.warning(f"页面{page.url}加载超时") 284 | exception = "截图失败:页面加载超时" 285 | except Exception as e: 286 | logger.warning(f"截图时发生了错误:{e}") 287 | exception = "截图失败:页面加载失败" 288 | finally: 289 | await pg.close() 290 | except Exception as e: 291 | logger.warning(f"截图时发生了错误:{e}") 292 | exception = "截图失败" 293 | 294 | result = f"错误:{exception}\n" if exception else "" 295 | if page.title != title: 296 | result += f"重定向 {title} → {page.title}\n" 297 | else: 298 | result += f"标题:{page.title}\n" 299 | if hasattr(page, "pageid"): 300 | result += f"链接:{api[:-7]}index.php?curid={page.pageid}" # 使用页面id来缩短链接 301 | else: 302 | result += f"链接:{page.url}" 303 | if state.get("anchor"): 304 | result += parse.quote("#" + state["anchor"]) 305 | 306 | await matcher.finish(result) 307 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "nonebot-plugin-mediawiki" 3 | version = "1.2.2" 4 | description = "nonebot2 mediawiki 查询插件" 5 | authors = ["KoishiMoe <68314080+KoishiMoe@users.noreply.github.com>"] 6 | license = "AGPL-3.0-or-later" 7 | readme = "README.md" 8 | repository = "https://github.com/KoishiMoe/nonebot-plugin-mediawiki" 9 | 10 | [tool.poetry.dependencies] 11 | python = ">=3.9, <4.0.0" 12 | aiohttp = ">=3.10.11, <4.0" 13 | nonebot2 = ">=2.3.2, <3.0" 14 | nonebot-adapter-onebot = ">=2.4.4, <3.0" 15 | beautifulsoup4 = ">=4.12.3, <5.0" 16 | playwright = {version = ">=1.45.1, <2.0", optional = true} 17 | 18 | [tool.poetry.extras] 19 | shot = ["playwright"] 20 | 21 | [build-system] 22 | requires = ["poetry-core>=1.0.0"] 23 | build-backend = "poetry.core.masonry.api" 24 | --------------------------------------------------------------------------------