├── .gitattributes ├── MANIFEST.in ├── docs ├── source │ ├── logo.png │ ├── reference │ │ ├── page.rst │ │ ├── image.rst │ │ ├── site.rst │ │ ├── errors.rst │ │ └── index.rst │ ├── user │ │ ├── index.rst │ │ ├── files.rst │ │ ├── page-ops.rst │ │ └── connecting.rst │ ├── development │ │ └── index.rst │ ├── logo.svg │ ├── index.rst │ └── conf.py ├── Makefile └── make.bat ├── examples ├── test-image.png ├── upload.py └── basic_edit.py ├── .gitignore ├── .landscape.yaml ├── mwclient ├── util.py ├── __init__.py ├── sleep.py ├── errors.py ├── image.py ├── ex.py ├── listing.py ├── page.py └── client.py ├── tox.ini ├── .travis.yml ├── setup.cfg ├── tests ├── test_util.py ├── test_sleep.py ├── test_listing.py ├── test_page.py └── test_client.py ├── LICENSE.md ├── setup.py ├── CREDITS.md ├── REFERENCE.md ├── README.rst └── CHANGELOG.md /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.md 2 | -------------------------------------------------------------------------------- /docs/source/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CoR/mwclient/master/docs/source/logo.png -------------------------------------------------------------------------------- /examples/test-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CoR/mwclient/master/examples/test-image.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | build 3 | dist 4 | .cache 5 | *~ 6 | *.egg 7 | *.egg-info 8 | .eggs 9 | .tox 10 | -------------------------------------------------------------------------------- /docs/source/reference/page.rst: -------------------------------------------------------------------------------- 1 | :class:`Page` 2 | -------------- 3 | 4 | .. autoclass:: mwclient.page.Page 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/reference/image.rst: -------------------------------------------------------------------------------- 1 | :class:`Image` 2 | -------------- 3 | 4 | .. autoclass:: mwclient.image.Image 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/reference/site.rst: -------------------------------------------------------------------------------- 1 | :class:`Site` 2 | -------------- 3 | 4 | .. autoclass:: mwclient.client.Site 5 | :members: 6 | -------------------------------------------------------------------------------- /.landscape.yaml: -------------------------------------------------------------------------------- 1 | python-targets: 2 | - 2 3 | - 3 4 | pylint: 5 | disable: 6 | - redefined-builtin 7 | - too-many-arguments -------------------------------------------------------------------------------- /docs/source/reference/errors.rst: -------------------------------------------------------------------------------- 1 | .. _errors: 2 | 3 | :class:`InsufficientPermission` 4 | ------------------------------- 5 | 6 | .. autoclass:: mwclient.errors.InsufficientPermission 7 | :members: 8 | -------------------------------------------------------------------------------- /mwclient/util.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | def parse_timestamp(t): 5 | if t is None or t == '0000-00-00T00:00:00Z': 6 | return (0, 0, 0, 0, 0, 0, 0, 0, 0) 7 | return time.strptime(t, '%Y-%m-%dT%H:%M:%SZ') 8 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27,py34,py35,py36 3 | [testenv] 4 | deps=pytest 5 | pytest-pep8 6 | responses 7 | mock 8 | commands=py.test -v --pep8 mwclient tests 9 | 10 | [flake8] 11 | max-line-length=90 12 | [pep8] 13 | max-line-length=90 14 | -------------------------------------------------------------------------------- /docs/source/reference/index.rst: -------------------------------------------------------------------------------- 1 | .. _reference: 2 | 3 | Reference guide 4 | =============== 5 | 6 | This is the mwclient API reference, autogenerated from the source 7 | code. 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | 12 | site 13 | page 14 | image 15 | errors 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # http://about.travis-ci.org/docs/user/languages/python/ 2 | 3 | sudo: false 4 | language: python 5 | python: 6 | - "2.7" 7 | - "3.3" 8 | - "3.4" 9 | - "3.5" 10 | - "3.6" 11 | 12 | install: 13 | - python setup.py install 14 | - pip install coveralls 15 | 16 | script: 17 | - python setup.py test 18 | 19 | after_success: 20 | - coveralls 21 | -------------------------------------------------------------------------------- /docs/source/user/index.rst: -------------------------------------------------------------------------------- 1 | .. _userguide: 2 | 3 | User guide 4 | ================= 5 | 6 | This guide is intended as an introductory overview, and 7 | explains how to make use of the most important features of 8 | mwclient. For detailed reference documentation of the functions and 9 | classes contained in the package, see the :ref:`reference`. 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | connecting 15 | page-ops 16 | files 17 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.8.6 3 | commit = True 4 | tag = True 5 | 6 | [aliases] 7 | test = pytest 8 | 9 | [bumpversion:file:setup.py] 10 | search = version='{current_version}' 11 | replace = version='{new_version}' 12 | 13 | [bumpversion:file:mwclient/client.py] 14 | 15 | [bumpversion:file:README.rst] 16 | 17 | [bdist_wheel] 18 | universal = 1 19 | 20 | [tool:pytest] 21 | pep8ignore = E501 W602 22 | addopts = -v --pep8 tests mwclient --cov mwclient 23 | 24 | -------------------------------------------------------------------------------- /tests/test_util.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | from __future__ import print_function 3 | import unittest 4 | import time 5 | from mwclient.util import parse_timestamp 6 | 7 | if __name__ == "__main__": 8 | print() 9 | print("Note: Running in stand-alone mode. Consult the README") 10 | print(" (section 'Contributing') for advice on running tests.") 11 | print() 12 | 13 | 14 | class TestUtil(unittest.TestCase): 15 | 16 | def test_parse_empty_timestamp(self): 17 | assert (0, 0, 0, 0, 0, 0, 0, 0, 0) == parse_timestamp('0000-00-00T00:00:00Z') 18 | 19 | def test_parse_nonempty_timestamp(self): 20 | assert time.struct_time([2015, 1, 2, 20, 18, 36, 4, 2, -1]) == parse_timestamp('2015-01-02T20:18:36Z') 21 | 22 | if __name__ == '__main__': 23 | unittest.main() 24 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2006-2013 Bryan Tong Minh 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding=utf-8 3 | from __future__ import print_function 4 | import os 5 | import sys 6 | from setuptools import setup 7 | 8 | here = os.path.abspath(os.path.dirname(__file__)) 9 | README = open(os.path.join(here, 'README.rst')).read() 10 | 11 | setup(name='mwclient', 12 | version='0.8.6', # Use bumpversion to update 13 | description='MediaWiki API client', 14 | long_description=README, 15 | classifiers=[ 16 | 'Programming Language :: Python', 17 | 'Programming Language :: Python :: 2.7', 18 | 'Programming Language :: Python :: 3.3', 19 | 'Programming Language :: Python :: 3.4', 20 | 'Programming Language :: Python :: 3.5', 21 | 'Programming Language :: Python :: 3.6', 22 | ], 23 | keywords='mediawiki wikipedia', 24 | author='Bryan Tong Minh', 25 | author_email='bryan.tongminh@gmail.com', 26 | url='https://github.com/btongminh/mwclient', 27 | license='MIT', 28 | packages=['mwclient'], 29 | install_requires=['requests_oauthlib', 'six'], 30 | setup_requires=['pytest-runner'], 31 | tests_require=['pytest', 'pytest-pep8', 'pytest-cache', 'pytest-cov', 32 | 'responses>=0.6.0', 'mock'], 33 | zip_safe=True 34 | ) 35 | -------------------------------------------------------------------------------- /examples/upload.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from past.builtins import xrange 3 | from StringIO import StringIO 4 | import sys 5 | import os 6 | import pprint 7 | import random 8 | 9 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../'))) 10 | import mwclient 11 | 12 | if len(sys.argv) < 3: 13 | print(sys.argv[0], 'username', 'password', '[host=test.wikipedia.org]', '[path=/w/]') 14 | sys.exit() 15 | if len(sys.argv) > 3: 16 | host = sys.argv[3] 17 | else: 18 | host = 'test.wikipedia.org' 19 | if len(sys.argv) > 4: 20 | path = sys.argv[4] 21 | else: 22 | path = '/w/' 23 | 24 | site = mwclient.Site(host, path) 25 | site.login(sys.argv[1], sys.argv[2]) 26 | 27 | name = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz') for i in xrange(8)) + '.png' 28 | 29 | print('Using http://%s%sindex.php?title=File:' % (host, path) + name) 30 | print('Regular upload test') 31 | 32 | res = site.upload(open('test-image.png', 'rb'), name, 'Regular upload test', ignore=True) 33 | pprint.pprint(res) 34 | assert res['result'] == 'Success' 35 | assert 'exists' not in res['warnings'] 36 | 37 | print('Overwriting; should give a warning') 38 | res = site.upload(open('test-image.png', 'rb'), name, 'Overwrite upload test') 39 | pprint.pprint(res) 40 | assert res['result'] == 'Warning' 41 | assert 'exists' in res['warnings'] 42 | 43 | ses = res['sessionkey'] 44 | 45 | print('Overwriting with stashed file') 46 | res = site.upload(filename=name, filekey=ses) 47 | pprint.pprint(res) 48 | assert res['result'] == 'Warning' 49 | assert 'duplicate' in res['warnings'] 50 | assert 'exists' in res['warnings'] 51 | 52 | print('Uploading empty file; error expected') 53 | res = site.upload(StringIO(), name, 'Empty upload test') 54 | -------------------------------------------------------------------------------- /mwclient/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2006-2011 Bryan Tong Minh 3 | 4 | Permission is hereby granted, free of charge, to any person 5 | obtaining a copy of this software and associated documentation 6 | files (the "Software"), to deal in the Software without 7 | restriction, including without limitation the rights to use, 8 | copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the 10 | Software is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 18 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 20 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 21 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 23 | OTHER DEALINGS IN THE SOFTWARE. 24 | """ 25 | 26 | from mwclient.errors import * # pylint: disable=unused-import 27 | from mwclient.client import Site, __ver__ # pylint: disable=unused-import 28 | import logging 29 | import warnings 30 | 31 | # Show DeprecationWarning 32 | warnings.simplefilter('always', DeprecationWarning) 33 | 34 | # Logging: Add a null handler to avoid "No handler found" warnings. 35 | try: 36 | from logging import NullHandler 37 | except ImportError: 38 | class NullHandler(logging.Handler): 39 | def emit(self, record): 40 | pass 41 | 42 | logging.getLogger(__name__).addHandler(NullHandler()) 43 | -------------------------------------------------------------------------------- /mwclient/sleep.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | from mwclient.errors import MaximumRetriesExceeded 4 | 5 | log = logging.getLogger(__name__) 6 | 7 | 8 | class Sleepers(object): 9 | 10 | def __init__(self, max_retries, retry_timeout, callback=lambda *x: None): 11 | self.max_retries = max_retries 12 | self.retry_timeout = retry_timeout 13 | self.callback = callback 14 | 15 | def make(self, args=None): 16 | return Sleeper(args, self.max_retries, self.retry_timeout, self.callback) 17 | 18 | 19 | class Sleeper(object): 20 | """ 21 | For any given operation, a `Sleeper` object keeps count of the number of 22 | retries. For each retry, the sleep time increases until the max number of 23 | retries is reached and a `MaximumRetriesExceeded` is raised. The sleeper 24 | object should be discarded once the operation is successful. 25 | """ 26 | 27 | def __init__(self, args, max_retries, retry_timeout, callback): 28 | self.args = args 29 | self.retries = 0 30 | self.max_retries = max_retries 31 | self.retry_timeout = retry_timeout 32 | self.callback = callback 33 | 34 | def sleep(self, min_time=0): 35 | """ 36 | Sleep a minimum of `min_time` seconds. 37 | The actual sleeping time will increase with the number of retries. 38 | """ 39 | self.retries += 1 40 | if self.retries > self.max_retries: 41 | raise MaximumRetriesExceeded(self, self.args) 42 | 43 | self.callback(self, self.retries, self.args) 44 | 45 | timeout = self.retry_timeout * (self.retries - 1) 46 | if timeout < min_time: 47 | timeout = min_time 48 | log.debug('Sleeping for %d seconds', timeout) 49 | time.sleep(timeout) 50 | -------------------------------------------------------------------------------- /docs/source/user/files.rst: -------------------------------------------------------------------------------- 1 | .. _`files`: 2 | 3 | Working with files 4 | ================== 5 | 6 | Assuming you have :ref:`connected ` to your site. 7 | 8 | 9 | Getting info about a file 10 | ------------------------- 11 | 12 | To get information about a file: 13 | 14 | >>> file = site.images['Example.jpg'] 15 | 16 | where ``file`` is now an instance of :class:`Image ` 17 | that you can query for various properties: 18 | 19 | >>> file.imageinfo 20 | {'comment': 'Reverted to version as of 17:58, 12 March 2010', 21 | 'descriptionshorturl': 'https://commons.wikimedia.org/w/index.php?curid=6428847', 22 | 'descriptionurl': 'https://commons.wikimedia.org/wiki/File:Example.jpg', 23 | 'height': 178, 24 | 'metadata': [{'name': 'MEDIAWIKI_EXIF_VERSION', 'value': 1}], 25 | 'sha1': 'd01b79a6781c72ac9bfff93e5e2cfbeef4efc840', 26 | 'size': 9022, 27 | 'timestamp': '2010-03-14T17:20:20Z', 28 | 'url': 'https://upload.wikimedia.org/wikipedia/commons/a/a9/Example.jpg', 29 | 'user': 'SomeUser', 30 | 'width': 172} 31 | 32 | You also have easy access to file usage: 33 | 34 | >>> for page in image.imageusage(): 35 | >>> print('Page:', page.name, '; namespace:', page.namespace) 36 | 37 | See the :class:`API reference ` for more options. 38 | 39 | .. caution:: 40 | Note that ``Image.exists`` refers to whether a file exists *locally*. If a file 41 | does not exist locally, but in a shared repo like Wikimedia Commons, it will 42 | return ``False``. 43 | 44 | To check if a file exists locally *or* in a shared repo, you could test if 45 | ``image.imageinfo != {}``. 46 | 47 | Uploading a file 48 | ---------------- 49 | 50 | >>> site.upload(open('file.jpg'), 'destination.jpg', 'Image description') 51 | 52 | -------------------------------------------------------------------------------- /tests/test_sleep.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | from __future__ import print_function 3 | import unittest 4 | import time 5 | import mock 6 | import pytest 7 | from mwclient.sleep import Sleepers 8 | from mwclient.sleep import Sleeper 9 | from mwclient.errors import MaximumRetriesExceeded 10 | 11 | if __name__ == "__main__": 12 | print() 13 | print("Note: Running in stand-alone mode. Consult the README") 14 | print(" (section 'Contributing') for advice on running tests.") 15 | print() 16 | 17 | 18 | class TestSleepers(unittest.TestCase): 19 | 20 | def setUp(self): 21 | self.sleep = mock.patch('time.sleep').start() 22 | self.max_retries = 10 23 | self.sleepers = Sleepers(self.max_retries, 30) 24 | 25 | def tearDown(self): 26 | mock.patch.stopall() 27 | 28 | def test_make(self): 29 | sleeper = self.sleepers.make() 30 | assert type(sleeper) == Sleeper 31 | assert sleeper.retries == 0 32 | 33 | def test_sleep(self): 34 | sleeper = self.sleepers.make() 35 | sleeper.sleep() 36 | sleeper.sleep() 37 | self.sleep.assert_has_calls([mock.call(0), mock.call(30)]) 38 | 39 | def test_min_time(self): 40 | sleeper = self.sleepers.make() 41 | sleeper.sleep(5) 42 | self.sleep.assert_has_calls([mock.call(5)]) 43 | 44 | def test_retries_count(self): 45 | sleeper = self.sleepers.make() 46 | sleeper.sleep() 47 | sleeper.sleep() 48 | assert sleeper.retries == 2 49 | 50 | def test_max_retries(self): 51 | sleeper = self.sleepers.make() 52 | for x in range(self.max_retries): 53 | sleeper.sleep() 54 | with pytest.raises(MaximumRetriesExceeded): 55 | sleeper.sleep() 56 | 57 | if __name__ == '__main__': 58 | unittest.main() 59 | -------------------------------------------------------------------------------- /docs/source/development/index.rst: -------------------------------------------------------------------------------- 1 | .. _development: 2 | 3 | Development 4 | =========== 5 | 6 | Mwclient development is coordinated at https://github.com/mwclient/mwclient. 7 | Patches are very welcome. There's currently no chat room or mailing list 8 | for the project, but don't hesitate to use the issue tracker at GitHub for 9 | general discussions. 10 | 11 | Development environment 12 | ----------------------- 13 | 14 | If you plan to submit a pull request, you should first 15 | `fork `_ 16 | the mwclient repo on GitHub, then clone your own fork: 17 | 18 | .. code:: bash 19 | 20 | $ git clone git@github.com:MYUSERNAME/mwclient.git 21 | $ cd mwclient 22 | 23 | You can then use pip to do an "editable" install so that your 24 | edits will be immediately available for (both interactive 25 | and automated) testing: 26 | 27 | .. code:: bash 28 | 29 | $ pip install -e . 30 | 31 | Running tests 32 | ------------- 33 | 34 | To run the automated tests, install the test dependencies and run `pytest `_: 35 | 36 | .. code:: bash 37 | 38 | $ pip install pytest pytest-pep8 responses 39 | $ py.test 40 | 41 | To run tests with different Python versions in isolated virtualenvs, you can use `Tox `_: 42 | 43 | .. code:: bash 44 | 45 | $ pip install tox 46 | $ tox 47 | 48 | 49 | Note that the test suite is quite limited yet. 50 | If you'd like to expand it by adding more tests, please go ahead! 51 | 52 | Making a pull request 53 | --------------------- 54 | 55 | Make sure to run tests before committing. When it comes to the commit message, 56 | there's no specific requirements for the format, but try to explain your changes 57 | in a clear and concise manner. 58 | 59 | If it's been some time since you forked, please consider rebasing your branch 60 | on the main master branch to ease merging: 61 | 62 | .. code:: bash 63 | 64 | $ git remote add upstream https://github.com/mwclient/mwclient.git 65 | $ git rebase upstream master 66 | 67 | Then push your code and open a pull request on GitHub. 68 | -------------------------------------------------------------------------------- /CREDITS.md: -------------------------------------------------------------------------------- 1 | The **mwclient** framework was originally written by Bryan Tong Minh 2 | ([@btongminh](https://github.com/btongminh)) 3 | and released in 2008 [on Sourceforge](http://sourceforge.net/projects/mwclient/). 4 | Bryan maintained the project until version 0.6.5, released on 6 May 2011. 5 | 6 | In 2013, Waldir Pimenta ([@waldyrious](https://github.com/waldyrious)) 7 | contacted Bryan and proposed helping out with a conversion from SVN to git 8 | and moving the project to Github. 9 | After getting the appropriate permissions, 10 | he performed the repository conversion 11 | using [sf2github](http://github.com/ttencate/sf2github) 12 | ([#1](https://github.com/mwclient/mwclient/issues/1)), 13 | converted the wiki previously hosted on sourceforge 14 | ([#12](https://github.com/mwclient/mwclient/issues/12)), 15 | updated the sourceforge project page 16 | ([#15](https://github.com/mwclient/mwclient/issues/15)), 17 | identified the users who had created bug reports 18 | ([#1, comment](https://github.com/mwclient/mwclient/issues/1#issuecomment-13972022)), 19 | contacted the authors of forks of the project 20 | suggesting them to provide their changes as PRs 21 | ([#14](https://github.com/mwclient/mwclient/issues/14)), 22 | and handed the repository to Bryan 23 | ([#11](https://github.com/mwclient/mwclient/issues/11)). 24 | 25 | Dan Michael O. Heggø ([@danmichaelo](https://github.com/danmichaelo)) 26 | was the author of one of those forks, 27 | and the most prolific submitter of PRs 28 | in the early history of mwclient as a git repository. 29 | Not long after the git transition, the repository was moved to an organization 30 | ([#12, comment](https://github.com/mwclient/mwclient/issues/12#issuecomment-20447515)), 31 | and Dan became the main force behind the 2014 release of version 0.7.0 32 | (the first after a 3-year hiatus). 33 | Since then, he has been the lead maintainer of the project, 34 | which has attracted contributions from 35 | [several other people](../../graphs/contributors). 36 | 37 | For more details on the technical history of the project, 38 | see the [CHANGELOG.md](CHANGELOG.md) document. 39 | -------------------------------------------------------------------------------- /examples/basic_edit.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | import os 4 | 5 | if len(sys.argv) > 3: 6 | sys.path.append(os.path.abspath(sys.argv[3])) 7 | if len(sys.argv) < 3: 8 | print('python basic_edit_test.py []\n') 9 | sys.exit() 10 | 11 | # Create a config file containing: 12 | # host = 'test.wikipedia.org' 13 | # path = '/w/' 14 | # ext = '.php' 15 | # username = 'Bryan' 16 | # password = 'xyz' 17 | 18 | prefix = sys.argv[2] 19 | 20 | # import cgitb; cgitb.enable(format = 'text') 21 | try: 22 | import apiedit as mwclient 23 | except ImportError: 24 | import mwclient 25 | site = mwclient.ex.ConfiguredSite(sys.argv[1]) 26 | site.compress = False 27 | 28 | print('Running configured site', sys.argv[1]) 29 | print('Site has writeapi:', getattr(site, 'writeapi', False)) 30 | 31 | page = site.Pages[prefix + '/text1'] 32 | 33 | print('Editing page1') 34 | 35 | page.edit() 36 | text1 = u"""== [[Test page]] == 37 | This is a [[test]] page generated by [http://mwclient.sourceforge.org/ mwclient]. 38 | This test is done using the [[w:mw:API]].""" 39 | comment1 = 'Test page1' 40 | page.save(text1, comment1) 41 | 42 | rev = page.revisions(limit=1, prop='timestamp|comment|content').next() 43 | assert rev['comment'] == comment1, rev 44 | assert rev['*'] == rev['*'], rev 45 | print('Page edited on', rev['timestamp']) 46 | print('Links:', list(page.links(generator=False))) 47 | print('External links:', list(page.extlinks())) 48 | 49 | print('Uploading image') 50 | site.upload(open('test-image.png', 'rb'), prefix + '-test-image.png', 'desc', ignore=True) 51 | print('Uploading image for the second time') 52 | site.upload(open('test-image.png', 'rb'), prefix + '-test-image.png', 'desc', ignore=True) 53 | image = site.Images[prefix + '-test-image.png'] 54 | print('Imageinfo:', image.imageinfo) 55 | history = list(image.imagehistory()) 56 | print('History:', history) 57 | 58 | if site.writeapi: 59 | print('Deleting old version') 60 | archivename = history[1]['archivename'] 61 | image.delete('Testing history deletion', oldimage=archivename) 62 | print('History:', list(image.imagehistory())) 63 | 64 | text = page.edit() 65 | text += u'\n[[Image:%s-test-image.png]]' % prefix 66 | page.save(text, 'Adding image') 67 | print('Images:', list(page.images(generator=False))) 68 | 69 | print('Cleaning up') 70 | image.delete('Cleanup') 71 | page.delete('Cleanup') 72 | 73 | print('Done') 74 | -------------------------------------------------------------------------------- /docs/source/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 60 | -------------------------------------------------------------------------------- /docs/source/user/page-ops.rst: -------------------------------------------------------------------------------- 1 | .. _`page-ops`: 2 | 3 | Page operations 4 | =============== 5 | 6 | Start by :ref:`connecting ` to your site: 7 | 8 | >>> from mwclient import Site 9 | >>> site = mwclient.Site('en.wikipedia.org') 10 | 11 | For information about authenticating, please see 12 | :ref:`the section on authenticating `. 13 | 14 | Editing or creating a page 15 | -------------------------- 16 | 17 | To get the wikitext for a specific page: 18 | 19 | >>> page = site.pages['Greater guinea pig'] 20 | >>> text = page.text() 21 | 22 | If a page doesn't exist, :meth:`Page.text() ` 23 | just returns an empty string. If you need to test the existence of the 24 | page, use `page.exists`: 25 | 26 | >>> page.exists 27 | True 28 | 29 | Edit the text as you like before saving it back to the wiki: 30 | 31 | >>> page.save(text, 'Edit summary') 32 | 33 | If the page didn't exist, this operation will create it. 34 | 35 | Listing page revisions 36 | ---------------------- 37 | 38 | :meth:`Page.revisions() ` returns a List object 39 | that you can iterate over using a for loop. Continuation 40 | is handled under the hood so you don't have to worry about it. 41 | 42 | *Example:* Let's find out which users did the most number of edits to a page: 43 | 44 | >>> users = [rev['user'] for rev in page.revisions()] 45 | >>> unique_users = set(users) 46 | >>> user_revisions = [{'user': user, 'count': users.count(user)} for user in unique_users] 47 | >>> sorted(user_revisions, key=lambda x: x['count'], reverse=True)[:5] 48 | [{'count': 6, 'user': u'Wolf12345'}, 49 | {'count': 4, 'user': u'Test-bot'}, 50 | {'count': 4, 'user': u'Mirxaeth'}, 51 | {'count': 3, 'user': u'192.251.192.201'}, 52 | {'count': 3, 'user': u'78.50.51.180'}] 53 | 54 | *Tip:* If you want to retrieve a specific number of revisions, the 55 | :code:`itertools.islice` method can come in handy: 56 | 57 | >>> from datetime import datetime 58 | >>> from time import mktime 59 | >>> from itertools import islice 60 | >>> for revision in islice(page.revisions(), 5): 61 | ... dt = datetime.fromtimestamp(mktime(revision['timestamp'])) 62 | ... print '{}'.format(dt.strftime('%F %T')) 63 | 64 | 65 | Other page operations 66 | --------------------- 67 | 68 | There are many other page operations like 69 | :meth:`backlinks() `, 70 | :meth:`embeddedin() `, 71 | etc. See the :class:`API reference ` for more. 72 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | 2 | mwclient: lightweight MediaWiki client 3 | ====================================== 4 | 5 | .. image:: logo.png 6 | :align: right 7 | :width: 30% 8 | 9 | Mwclient is a :ref:`MIT licensed ` client library to the `MediaWiki API`_ 10 | that should work well with both Wikimedia wikis and other wikis running 11 | MediaWiki 1.16 or above. It works with Python 2.7 and 3.3+. 12 | 13 | .. _install: 14 | 15 | Installation 16 | ------------ 17 | 18 | Installing Mwclient is simple with `pip `_, just run 19 | this in your terminal: 20 | 21 | .. code:: bash 22 | 23 | pip install mwclient 24 | 25 | Quickstart 26 | ---------- 27 | 28 | .. code-block:: python 29 | 30 | >>> site = mwclient.Site(('https', 'en.wikipedia.org')) 31 | >>> page = site.pages[u'Leipäjuusto'] 32 | >>> page.text() 33 | u'{{Unreferenced|date=September 2009}}\n[[Image:Leip\xe4juusto cheese with cloudberry jam.jpg|thumb|Leip\xe4juusto with [[cloudberry]] jam]]\n\'\'\'Leip\xe4juusto\'\'\' (bread cheese) or \'\'juustoleip\xe4\'\', which is also known in English as \'\'\'Finnish squeaky cheese\'\'\', is a fresh [[cheese]] traditionally made from cow\'s [[beestings]], rich milk from a cow that has recently calved.' 34 | >>> [x for x in page.categories()] 35 | [>, 36 | >, 37 | >, 38 | >] 39 | 40 | 41 | User guide 42 | ---------- 43 | 44 | This guide is intended as an introductory overview, and 45 | explains how to make use of the most important features of 46 | mwclient. 47 | 48 | .. toctree:: 49 | :maxdepth: 2 50 | 51 | user/index 52 | 53 | 54 | Reference guide 55 | --------------- 56 | 57 | If you are looking for information on a specific function, class or method, 58 | this part of the documentation is for you. It's autogenerated 59 | from the source code. 60 | 61 | .. toctree:: 62 | :maxdepth: 3 63 | 64 | reference/index 65 | 66 | 67 | Development 68 | ----------- 69 | 70 | Looking for information on contributing to mwclient development? 71 | 72 | .. toctree:: 73 | :maxdepth: 3 74 | 75 | development/index 76 | 77 | 78 | .. _license: 79 | 80 | MIT License 81 | ----------- 82 | 83 | .. include:: ../../LICENSE.md 84 | 85 | Indices and tables 86 | ------------------ 87 | 88 | * :ref:`genindex` 89 | * :ref:`modindex` 90 | * :ref:`search` 91 | 92 | .. _`MediaWiki API`: https://www.mediawiki.org/wiki/API 93 | -------------------------------------------------------------------------------- /mwclient/errors.py: -------------------------------------------------------------------------------- 1 | class MwClientError(RuntimeError): 2 | pass 3 | 4 | 5 | class MediaWikiVersionError(MwClientError): 6 | pass 7 | 8 | 9 | class APIDisabledError(MwClientError): 10 | pass 11 | 12 | 13 | class MaximumRetriesExceeded(MwClientError): 14 | pass 15 | 16 | 17 | class APIError(MwClientError): 18 | 19 | def __init__(self, code, info, kwargs): 20 | self.code = code 21 | self.info = info 22 | super(APIError, self).__init__(code, info, kwargs) 23 | 24 | 25 | class InsufficientPermission(MwClientError): 26 | pass 27 | 28 | 29 | class UserBlocked(InsufficientPermission): 30 | pass 31 | 32 | 33 | class EditError(MwClientError): 34 | pass 35 | 36 | 37 | class ProtectedPageError(EditError, InsufficientPermission): 38 | 39 | def __init__(self, page, code=None, info=None): 40 | self.page = page 41 | self.code = code 42 | self.info = info 43 | 44 | def __str__(self): 45 | if self.info is not None: 46 | return self.info 47 | return 'You do not have the "edit" right.' 48 | 49 | 50 | class FileExists(EditError): 51 | pass 52 | 53 | 54 | class LoginError(MwClientError): 55 | pass 56 | 57 | 58 | class OAuthAuthorizationError(LoginError): 59 | 60 | def __init__(self, code, info): 61 | self.code = code 62 | self.info = info 63 | 64 | def __str__(self): 65 | return self.info 66 | 67 | 68 | class AssertUserFailedError(LoginError): 69 | 70 | def __init__(self): 71 | self.message = 'By default, mwclient protects you from ' + \ 72 | 'accidentally editing without being logged in. If you ' + \ 73 | 'actually want to edit without logging in, you can set ' + \ 74 | 'force_login on the Site object to False.' 75 | 76 | LoginError.__init__(self) 77 | 78 | def __str__(self): 79 | return self.message 80 | 81 | 82 | class EmailError(MwClientError): 83 | pass 84 | 85 | 86 | class NoSpecifiedEmail(EmailError): 87 | pass 88 | 89 | 90 | class NoWriteApi(MwClientError): 91 | pass 92 | 93 | 94 | class InvalidResponse(MwClientError): 95 | 96 | def __init__(self, response_text=None): 97 | self.message = 'Did not get a valid JSON response from the server. Check that ' + \ 98 | 'you used the correct hostname. If you did, the server might ' + \ 99 | 'be wrongly configured or experiencing temporary problems.' 100 | self.response_text = response_text 101 | super(InvalidResponse, self).__init__(self.message, response_text) 102 | 103 | def __str__(self): 104 | return self.message 105 | -------------------------------------------------------------------------------- /mwclient/image.py: -------------------------------------------------------------------------------- 1 | import mwclient.listing 2 | import mwclient.page 3 | 4 | 5 | class Image(mwclient.page.Page): 6 | 7 | def __init__(self, site, name, info=None): 8 | super(Image, self).__init__(site, name, info, 9 | extra_properties={'imageinfo': (('iiprop', 'timestamp|user|comment|url|size|sha1|metadata|archivename'), )}) 10 | self.imagerepository = self._info.get('imagerepository', '') 11 | self.imageinfo = self._info.get('imageinfo', ({}, ))[0] 12 | 13 | def imagehistory(self): 14 | """ 15 | Get file revision info for the given file. 16 | 17 | API doc: https://www.mediawiki.org/wiki/API:Imageinfo 18 | """ 19 | return mwclient.listing.PageProperty(self, 'imageinfo', 'ii', 20 | iiprop='timestamp|user|comment|url|size|sha1|metadata|archivename') 21 | 22 | def imageusage(self, namespace=None, filterredir='all', redirect=False, 23 | limit=None, generator=True): 24 | """ 25 | List pages that use the given file. 26 | 27 | API doc: https://www.mediawiki.org/wiki/API:Imageusage 28 | """ 29 | prefix = mwclient.listing.List.get_prefix('iu', generator) 30 | kwargs = dict(mwclient.listing.List.generate_kwargs(prefix, title=self.name, namespace=namespace, filterredir=filterredir)) 31 | if redirect: 32 | kwargs['%sredirect' % prefix] = '1' 33 | return mwclient.listing.List.get_list(generator)(self.site, 'imageusage', 'iu', limit=limit, return_values='title', **kwargs) 34 | 35 | def duplicatefiles(self, limit=None): 36 | """ 37 | List duplicates of the current file. 38 | 39 | API doc: https://www.mediawiki.org/wiki/API:Duplicatefiles 40 | """ 41 | return mwclient.listing.PageProperty(self, 'duplicatefiles', 'df', dflimit=limit) 42 | 43 | def download(self, destination=None): 44 | """ 45 | Download the file. If `destination` is given, the file will be written 46 | directly to the stream. Otherwise the file content will be stored in memory 47 | and returned (with the risk of running out of memory for large files). 48 | 49 | Recommended usage: 50 | 51 | >>> with open(filename, 'wb') as fd: 52 | ... image.download(fd) 53 | 54 | Args: 55 | destination (file object): Destination file 56 | """ 57 | url = self.imageinfo['url'] 58 | if destination is not None: 59 | res = self.site.connection.get(url, stream=True) 60 | for chunk in res.iter_content(1024): 61 | destination.write(chunk) 62 | else: 63 | return self.site.connection.get(url).content 64 | 65 | def __repr__(self): 66 | return "" % (self.name.encode('utf-8'), self.site) 67 | -------------------------------------------------------------------------------- /mwclient/ex.py: -------------------------------------------------------------------------------- 1 | import client 2 | import requests 3 | 4 | 5 | def read_config(config_files, **predata): 6 | cfg = {} 7 | for config_file in config_files: 8 | cfg.update(_read_config_file( 9 | config_file, predata)) 10 | return cfg 11 | 12 | 13 | def _read_config_file(_config_file, predata): 14 | _file = open(_config_file) 15 | exec(_file, globals(), predata) 16 | _file.close() 17 | 18 | for _k, _v in predata.iteritems(): 19 | if not _k.startswith('_'): 20 | yield _k, _v 21 | for _k, _v in locals().iteritems(): 22 | if not _k.startswith('_'): 23 | yield _k, _v 24 | 25 | 26 | class SiteList(object): 27 | 28 | def __init__(self): 29 | self.sites = {} 30 | 31 | def __getitem__(self, key): 32 | if key not in self.sites: 33 | self.sites[key] = {} 34 | return self.sites[key] 35 | 36 | def __iter__(self): 37 | return self.sites.itervalues() 38 | 39 | 40 | class ConfiguredSite(client.Site): 41 | 42 | def __init__(self, *config_files, **kwargs): 43 | self.config = read_config(config_files, sites=SiteList()) 44 | 45 | if 'name' in kwargs: 46 | self.config.update(self.config['sites'][kwargs['name']]) 47 | 48 | do_login = 'username' in self.config and 'password' in self.config 49 | 50 | super(ConfiguredSite, self).__init__( 51 | host=self.config['host'], 52 | path=self.config['path'], 53 | ext=self.config.get('ext', '.php'), 54 | do_init=not do_login, 55 | retry_timeout=self.config.get('retry_timeout', 30), 56 | max_retries=self.config.get('max_retries', -1), 57 | ) 58 | 59 | if do_login: 60 | self.login(self.config['username'], 61 | self.config['password']) 62 | 63 | 64 | class ConfiguredPool(list): 65 | 66 | def __init__(self, *config_files): 67 | self.config = read_config(config_files, sites=SiteList()) 68 | self.pool = requests.Session() 69 | 70 | config = dict([(k, v) for k, v in self.config.iteritems() 71 | if k != 'sites']) 72 | 73 | for site in self.config['sites']: 74 | cfg = config.copy() 75 | cfg.update(site) 76 | site.update(cfg) 77 | 78 | do_login = 'username' in site and 'password' in site 79 | 80 | self.append(client.Site(host=site['host'], 81 | path=site['path'], ext=site.get('ext', '.php'), 82 | pool=self.pool, do_init=not do_login, 83 | retry_timeout=site.get('retry_timeout', 30), 84 | max_retries=site.get('max_retries', -1))) 85 | if do_login: 86 | self[-1].login(site['username'], site['password']) 87 | self[-1].config = site 88 | -------------------------------------------------------------------------------- /tests/test_listing.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | from __future__ import print_function 3 | 4 | import unittest 5 | import pytest 6 | import logging 7 | import requests 8 | import responses 9 | import mock 10 | import mwclient 11 | from mwclient.listing import List, GeneratorList 12 | 13 | try: 14 | import json 15 | except ImportError: 16 | import simplejson as json 17 | 18 | if __name__ == "__main__": 19 | print() 20 | print("Note: Running in stand-alone mode. Consult the README") 21 | print(" (section 'Contributing') for advice on running tests.") 22 | print() 23 | 24 | 25 | class TestList(unittest.TestCase): 26 | 27 | def setUp(self): 28 | pass 29 | 30 | def setupDummyResponses(self, mock_site, result_member, ns=None): 31 | if ns is None: 32 | ns = [0, 0, 0] 33 | mock_site.get.side_effect = [ 34 | { 35 | 'continue': { 36 | 'apcontinue': 'Kre_Mbaye', 37 | 'continue': '-||' 38 | }, 39 | 'query': { 40 | result_member: [ 41 | { 42 | "pageid": 19839654, 43 | "ns": ns[0], 44 | "title": "Kre'fey", 45 | }, 46 | { 47 | "pageid": 19839654, 48 | "ns": ns[1], 49 | "title": "Kre-O", 50 | } 51 | ] 52 | } 53 | }, 54 | { 55 | 'query': { 56 | result_member: [ 57 | { 58 | "pageid": 30955295, 59 | "ns": ns[2], 60 | "title": "Kre-O Transformers", 61 | } 62 | ] 63 | } 64 | }, 65 | ] 66 | 67 | @mock.patch('mwclient.client.Site') 68 | def test_list_continuation(self, mock_site): 69 | # Test that the list fetches all three responses 70 | # and yields dicts when return_values not set 71 | 72 | lst = List(mock_site, 'allpages', 'ap', limit=2) 73 | self.setupDummyResponses(mock_site, 'allpages') 74 | vals = [x for x in lst] 75 | 76 | assert len(vals) == 3 77 | assert type(vals[0]) == dict 78 | 79 | @mock.patch('mwclient.client.Site') 80 | def test_list_with_str_return_value(self, mock_site): 81 | # Test that the List yields strings when return_values is string 82 | 83 | lst = List(mock_site, 'allpages', 'ap', limit=2, return_values='title') 84 | self.setupDummyResponses(mock_site, 'allpages') 85 | vals = [x for x in lst] 86 | 87 | assert len(vals) == 3 88 | assert type(vals[0]) == str 89 | 90 | @mock.patch('mwclient.client.Site') 91 | def test_list_with_tuple_return_value(self, mock_site): 92 | # Test that the List yields tuples when return_values is tuple 93 | 94 | lst = List(mock_site, 'allpages', 'ap', limit=2, 95 | return_values=('title', 'ns')) 96 | self.setupDummyResponses(mock_site, 'allpages') 97 | vals = [x for x in lst] 98 | 99 | assert len(vals) == 3 100 | assert type(vals[0]) == tuple 101 | 102 | @mock.patch('mwclient.client.Site') 103 | def test_generator_list(self, mock_site): 104 | # Test that the GeneratorList yields Page objects 105 | 106 | lst = GeneratorList(mock_site, 'pages', 'p') 107 | self.setupDummyResponses(mock_site, 'pages', ns=[0, 6, 14]) 108 | vals = [x for x in lst] 109 | 110 | assert len(vals) == 3 111 | assert type(vals[0]) == mwclient.page.Page 112 | assert type(vals[1]) == mwclient.image.Image 113 | assert type(vals[2]) == mwclient.listing.Category 114 | 115 | if __name__ == '__main__': 116 | unittest.main() 117 | -------------------------------------------------------------------------------- /REFERENCE.md: -------------------------------------------------------------------------------- 1 | This file is intended to be a reference to mwclient. 2 | The current version is mwclient 0.6.5. 3 | 4 | The mwclient framework provides access to the MediaWiki API. 5 | It provides the functions of the MediaWiki API in a Pythonic manner. 6 | 7 | ## Sites ## 8 | The `Site` object is the most important class. 9 | It represents a MediaWiki site. 10 | Its constructor accepts various arguments, 11 | of which the first two, `host` and `path`, are the most important. 12 | They represent respectively 13 | the hostname without protocol 14 | and the root directory where `api.php` is located. 15 | The path parameter should end with a slash, /. 16 | Protocols other than HTTP and HTTPS are currently not supported. 17 | 18 | ```python 19 | #http 20 | site = mwclient.Site(host, path = '/w/', ...) 21 | 22 | #https 23 | site = mwclient.Site(('https', host), path = '/w/', ...) 24 | ``` 25 | 26 | ### Pages ### 27 | Sites provide access to pages via various generators and the Pages object. 28 | The base Page object is called Page 29 | and from that derive Category and Image. 30 | When the page is retrieved via `Site.Pages` or a generator, 31 | it will check automatically which of those three specific types 32 | should be returned. 33 | To get a page by its name, call `Site.Pages` as a scriptable object: 34 | 35 | ```python 36 | page = site.Pages['Template:Stub'] 37 | image = site.Pages['Image:Wiki.png'] # This will return an Image object 38 | image2 = site.Images['Wiki.png'] # The same image 39 | ``` 40 | 41 | Alternatively, `Site.Images` and `Site.Categories` are provided, 42 | which do exactly the same as `Site.Pages`, 43 | except that they require the page name without its namespace prefixed. 44 | 45 | #### PageProperties #### 46 | The `Page` object provides many generators available in the API. 47 | In addition to the page properties listed in the API documentation, 48 | also the lists backlinks and embedded in are members of the Page object. For more information about using generators 49 | see the section on generators below. 50 | 51 | `Category` objects provide an extra function, `members` 52 | to list all members of a category. 53 | The Category object can also be used itself 54 | as an iterator yielding all its members. 55 | 56 | ```python 57 | #list pages in Category:Help by name 58 | category = site.Pages['Category:Help'] 59 | for page in category: 60 | print page.name 61 | ``` 62 | 63 | `Image` objects have additional functions `imagehistory` and `imageusage` 64 | which represent the old versions of the image and its usage, respectively. 65 | `Image.download` returns a file object to the full size image. 66 | 67 | ```python 68 | fr = image.download() 69 | fw = open('Wiki.png', 'rb') 70 | while True: 71 | s = fr.read(4096) 72 | if not s: break 73 | fw.write(s) 74 | fr.close() # Always close those file objects !!! 75 | fw.close() 76 | ``` 77 | 78 | #### Editing pages #### 79 | Call `Page.text()` to retrieve the page content. 80 | Use `Page.save(text, summary = u'', ...)` to save the page. 81 | If available, `Page.save` uses the API to edit, 82 | but falls back to the old way if the write API is not available. 83 | 84 | ## Generators ## 85 | 86 | ## Exceptions ## 87 | 88 | ## Implementation notes ## 89 | Most properties and generators accept the same parameters as the API, 90 | without their two-letter prefix. 91 | Exceptions: 92 | * `Image.imageinfo` is the `imageinfo` of the *latest* image. 93 | Earlier versions can be fetched using `imagehistory()` 94 | * `Site.all*`: parameter `(ap)from` renamed to `start` 95 | * `categorymembers` is implemented as `Category.members` 96 | * `deletedrevs` is `deletedrevisions` 97 | * `usercontribs` is `usercontributions` 98 | * First parameters of `search` and `usercontributions` 99 | are `search` and `user`, respectively 100 | 101 | Properties and generators are implemented as Python generators. 102 | Their limit parameter is only an indication 103 | of the number of items in one chunk. 104 | It is not the total limit. 105 | Doing `list(generator(limit = limit))` will return 106 | ALL items of generator, and not be limited by the limit value. 107 | Use `list(generator(max_items = max_items))` 108 | to limit the amount of items returned. 109 | Default chunk size is generally the maximum chunk size. 110 | 111 | ## Links ## 112 | * Project page at GitHub: https://github.com/mwclient/mwclient 113 | * More in-depth documentation on the GitHub wiki: 114 | https://github.com/mwclient/mwclient/wiki 115 | * MediaWiki API documentation: https://mediawiki.org/wiki/API 116 | -------------------------------------------------------------------------------- /docs/source/user/connecting.rst: -------------------------------------------------------------------------------- 1 | .. _`connecting`: 2 | 3 | Connecting to your site 4 | ======================= 5 | 6 | Begin by importing the Site class: 7 | 8 | >>> from mwclient import Site 9 | 10 | Then try to connect to a site: 11 | 12 | >>> site = mwclient.Site('test.wikipedia.org') 13 | 14 | By default, mwclient will connect using https. If your site doesn't support 15 | https, you need to explicitly request http like so: 16 | 17 | >>> site = mwclient.Site(('http', 'test.wikipedia.org')) 18 | 19 | .. _endpoint: 20 | 21 | The API endpoint location 22 | ------------------------- 23 | 24 | The API endpoint location on a MediaWiki site depends on the configurable 25 | `$wgScriptPath`_. Mwclient defaults to the script path '/w/' used by the 26 | Wikimedia wikis. If you get a 404 Not Found or a 27 | :class:`mwclient.errors.InvalidResponse` error upon connecting, your site might 28 | use a different script path. You can specify it using the ``path`` argument: 29 | 30 | >>> site = mwclient.Site('my-awesome-wiki.org', path='/wiki/', ) 31 | 32 | .. _$wgScriptPath: https://www.mediawiki.org/wiki/Manual:$wgScriptPath 33 | 34 | .. _user-agent: 35 | 36 | Specifying a user agent 37 | ----------------------- 38 | 39 | If you are connecting to a Wikimedia site, you should follow the 40 | `Wikimedia User-Agent policy`_ and identify your tool like so: 41 | 42 | >>> ua = 'MyCoolTool/0.2 run by User:Xyz' 43 | >>> site = mwclient.Site('test.wikipedia.org', clients_useragent=ua) 44 | 45 | Note that Mwclient appends ' - MwClient/{version} ({url})' to your string. 46 | 47 | .. _Wikimedia User-Agent policy: https://meta.wikimedia.org/wiki/User-Agent_policy 48 | 49 | .. _auth: 50 | 51 | Errors and warnings 52 | ------------------- 53 | 54 | Deprecations and other warnings from the API are logged using the 55 | `standard Python logging facility`_, so you can handle them in any way you like. 56 | To print them to stdout: 57 | 58 | >>> import logging 59 | >>> logging.basicConfig(level=logging.WARNING) 60 | 61 | .. _standard Python logging facility: https://docs.python.org/3/library/logging.html 62 | 63 | Errors are thrown as exceptions. All exceptions inherit 64 | :class:`mwclient.errors.MwClientError`. 65 | 66 | Authenticating 67 | -------------- 68 | 69 | Mwclient supports several methods for authentication described below. By default 70 | it will also protect you from editing when not authenticated by raising a 71 | :class:`mwclient.errors.LoginError`. If you actually *do* want to edit 72 | unauthenticated, just set 73 | 74 | >>> site.force_login = False 75 | 76 | .. _oauth: 77 | 78 | OAuth 79 | ^^^^^ 80 | 81 | On Wikimedia wikis, the recommended authentication method is to authenticate as 82 | a `owner-only consumer`_. Once you have obtained the *consumer token* (also 83 | called *consumer key*), the *consumer secret*, the *access token* and the 84 | *access secret*, you can authenticate like so: 85 | 86 | >>> site = mwclient.Site('test.wikipedia.org', 87 | consumer_token='my_consumer_token', 88 | consumer_secret='my_consumer_secret', 89 | access_token='my_access_token', 90 | access_secret='my_access_secret') 91 | 92 | 93 | .. _owner-only consumer: https://www.mediawiki.org/wiki/OAuth/Owner-only_consumers 94 | .. _old_login: 95 | 96 | Old-school login 97 | ^^^^^^^^^^^^^^^^ 98 | 99 | To use old-school login, call the login method: 100 | 101 | >>> site.login('my_username', 'my_password') 102 | 103 | If login fails, a :class:`mwclient.errors.LoginError` will be thrown. 104 | 105 | .. _http-auth: 106 | 107 | HTTP authentication 108 | ^^^^^^^^^^^^^^^^^^^ 109 | 110 | If your server is configured to use HTTP authentication, you can 111 | authenticate using the ``httpauth`` parameter. For Basic HTTP authentication: 112 | 113 | >>> site = mwclient.Site('awesome.site', httpauth=('my_username', 'my_password')) 114 | 115 | You can also pass in any other :ref:`authentication mechanism ` 116 | based on the :class:`requests.auth.AuthBase`, such as Digest authentication: 117 | 118 | >>> from requests.auth import HTTPDigestAuth 119 | >>> site = mwclient.Site('awesome.site', httpauth=HTTPDigestAuth('my_username', 'my_password')) 120 | 121 | SSL client certificate authentication 122 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 123 | 124 | If your server requires a SSL client certifiate to authenticate, you can 125 | pass the ``client_certificate`` parameter: 126 | 127 | >>> site = mwclient.Site('awesome.site', client_certificate='/path/to/client-and-key.pem') 128 | 129 | This parameter being a proxy to :class:`requests`' cert_ parameter, you can also specify a tuple (certificate, key) like: 130 | 131 | >>> site = mwclient.Site('awesome.site', client_certificate=('client.pem', 'key.pem')) 132 | 133 | Please note that the private key must not be encrypted. 134 | 135 | .. _cert: http://docs.python-requests.org/en/master/user/advanced/#ssl-cert-verification 136 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 2 | .. figure:: docs/source/logo.svg 3 | :alt: Logo 4 | :align: center 5 | :width: 270px 6 | 7 | mwclient 8 | ======== 9 | 10 | .. image:: https://img.shields.io/travis/mwclient/mwclient.svg 11 | :target: https://travis-ci.org/mwclient/mwclient 12 | :alt: Build status 13 | 14 | .. image:: https://img.shields.io/coveralls/mwclient/mwclient.svg 15 | :target: https://coveralls.io/r/mwclient/mwclient 16 | :alt: Test coverage 17 | 18 | .. image:: https://landscape.io/github/mwclient/mwclient/master/landscape.svg?style=flat 19 | :target: https://landscape.io/github/mwclient/mwclient/master 20 | :alt: Code health 21 | 22 | .. image:: https://img.shields.io/pypi/v/mwclient.svg 23 | :target: https://pypi.python.org/pypi/mwclient 24 | :alt: Latest version 25 | 26 | .. image:: https://img.shields.io/pypi/dw/mwclient.svg 27 | :target: https://pypi.python.org/pypi/mwclient 28 | :alt: Downloads 29 | 30 | .. image:: https://img.shields.io/github/license/mwclient/mwclient.svg 31 | :target: http://opensource.org/licenses/MIT 32 | :alt: MIT license 33 | 34 | .. image:: https://readthedocs.org/projects/mwclient/badge/?version=master 35 | :target: http://mwclient.readthedocs.io/en/latest/ 36 | :alt: Documentation status 37 | 38 | .. image:: http://isitmaintained.com/badge/resolution/tldr-pages/tldr.svg 39 | :target: http://isitmaintained.com/project/tldr-pages/tldr 40 | :alt: Issue statistics 41 | 42 | mwclient is a lightweight Python client library to the `MediaWiki API `_ 43 | which provides access to most API functionality. 44 | It works with Python 2.7, 3.3 and above, and supports MediaWiki 1.16 and above. 45 | For functions not available in the current MediaWiki, a ``MediaWikiVersionError`` is raised. 46 | 47 | The current stable `version 0.8.6 `_ 48 | is `available through PyPI `_: 49 | 50 | .. code-block:: console 51 | 52 | $ pip install mwclient 53 | 54 | The current `development version `_ 55 | can be installed from GitHub: 56 | 57 | .. code-block:: console 58 | 59 | $ pip install git+git://github.com/mwclient/mwclient.git 60 | 61 | Please see the 62 | `changelog document `_ 63 | for a list of changes. 64 | 65 | Getting started 66 | --------------- 67 | 68 | See the `user guide `_ 69 | to get started using mwclient. 70 | 71 | For more information, see the 72 | `REFERENCE.md `_ file 73 | and the `documentation on the wiki `_. 74 | 75 | 76 | Contributing 77 | -------------------- 78 | 79 | mwclient ships with a test suite based on `pytest `_. 80 | Only a small part of mwclient is currently tested, 81 | but hopefully coverage will improve in the future. 82 | 83 | The easiest way to run tests is: 84 | 85 | .. code-block:: console 86 | 87 | $ python setup.py test 88 | 89 | This will make an in-place build and download test dependencies locally 90 | if needed. To make tests run faster, you can use pip to do an 91 | `"editable" install `_: 92 | 93 | .. code-block:: console 94 | 95 | $ pip install pytest pytest-pep8 responses 96 | $ pip install -e . 97 | $ py.test 98 | 99 | To run tests with different Python versions in isolated virtualenvs, you 100 | can use `Tox `_: 101 | 102 | .. code-block:: console 103 | 104 | $ pip install tox 105 | $ tox 106 | 107 | *Documentation* consists of both a manually compiled user guide (under ``docs/user``) 108 | and a reference guide generated from the docstrings, 109 | using Sphinx autodoc with the napoleon extension. 110 | Documentation is built automatically on `ReadTheDocs`_ after each commit. 111 | To build documentation locally for testing, do: 112 | 113 | .. code-block:: console 114 | 115 | $ cd docs 116 | $ make html 117 | 118 | When writing docstrings, try to adhere to the `Google style`_. 119 | 120 | .. _Google style: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html 121 | .. _ReadTheDocs: https://mwclient.readthedocs.io/ 122 | 123 | Implementation notes 124 | -------------------- 125 | 126 | Most properties and generators accept the same parameters as the API, 127 | without their two-letter prefix. Exceptions to this rule: 128 | 129 | * ``Image.imageinfo`` is the imageinfo of the latest image. 130 | Earlier versions can be fetched using ``imagehistory()`` 131 | * ``Site.all*``: parameter ``[ap]from`` renamed to ``start`` 132 | * ``categorymembers`` is implemented as ``Category.members`` 133 | * ``deletedrevs`` is ``deletedrevisions`` 134 | * ``usercontribs`` is ``usercontributions`` 135 | * First parameters of ``search`` and ``usercontributions`` are ``search`` and ``user`` 136 | respectively 137 | 138 | Properties and generators are implemented as Python generators. 139 | Their limit parameter is only an indication of the number of items in one chunk. 140 | It is not the total limit. 141 | Doing ``list(generator(limit = limit))`` will return ALL items of generator, 142 | and not be limited by the limit value. 143 | Default chunk size is generally the maximum chunk size. 144 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/mwclient.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/mwclient.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/mwclient" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/mwclient" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\mwclient.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\mwclient.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # mwclient documentation build configuration file, created by 4 | # sphinx-quickstart on Sat Sep 27 11:19:56 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import datetime 16 | import sys 17 | import os 18 | 19 | # If extensions (or modules to document with autodoc) are in another directory, 20 | # add these directories to sys.path here. If the directory is relative to the 21 | # documentation root, use os.path.abspath to make it absolute, like shown here. 22 | sys.path.insert(0, os.path.abspath('../..')) 23 | import sphinx_rtd_theme 24 | import mwclient 25 | from mwclient import __ver__ 26 | 27 | # -- General configuration ------------------------------------------------ 28 | 29 | # If your documentation needs a minimal Sphinx version, state it here. 30 | #needs_sphinx = '1.0' 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = [ 36 | 'sphinx.ext.autodoc', 37 | 'sphinx.ext.intersphinx', 38 | 'sphinx.ext.todo', 39 | 'sphinx.ext.coverage', 40 | 'sphinx.ext.viewcode', 41 | 'sphinx.ext.napoleon' 42 | ] 43 | 44 | # Add any paths that contain templates here, relative to this directory. 45 | # templates_path = ['_templates'] 46 | 47 | # The suffix of source filenames. 48 | source_suffix = '.rst' 49 | 50 | # The encoding of source files. 51 | #source_encoding = 'utf-8-sig' 52 | 53 | # The master toctree document. 54 | master_doc = 'index' 55 | 56 | # General information about the project. 57 | project = u'mwclient' 58 | copyright = u'{0}, Bryan Tong Minh'.format(datetime.datetime.now().year) 59 | 60 | # The version info for the project you're documenting, acts as replacement for 61 | # |version| and |release|, also used in various other places throughout the 62 | # built documents. 63 | # 64 | # The short X.Y version. 65 | version = __ver__ 66 | # The full version, including alpha/beta/rc tags. 67 | release = version 68 | 69 | # The language for content autogenerated by Sphinx. Refer to documentation 70 | # for a list of supported languages. 71 | #language = None 72 | 73 | # There are two options for replacing |today|: either, you set today to some 74 | # non-false value, then it is used: 75 | #today = '' 76 | # Else, today_fmt is used as the format for a strftime call. 77 | #today_fmt = '%B %d, %Y' 78 | 79 | # List of patterns, relative to source directory, that match files and 80 | # directories to ignore when looking for source files. 81 | exclude_patterns = [] 82 | 83 | # The reST default role (used for this markup: `text`) to use for all 84 | # documents. 85 | #default_role = None 86 | 87 | # If true, '()' will be appended to :func: etc. cross-reference text. 88 | #add_function_parentheses = True 89 | 90 | # If true, the current module name will be prepended to all description 91 | # unit titles (such as .. function::). 92 | #add_module_names = True 93 | 94 | # If true, sectionauthor and moduleauthor directives will be shown in the 95 | # output. They are ignored by default. 96 | #show_authors = False 97 | 98 | # The name of the Pygments (syntax highlighting) style to use. 99 | pygments_style = 'sphinx' 100 | 101 | # A list of ignored prefixes for module index sorting. 102 | #modindex_common_prefix = [] 103 | 104 | # If true, keep warnings as "system message" paragraphs in the built documents. 105 | #keep_warnings = False 106 | 107 | 108 | # -- Options for HTML output ---------------------------------------------- 109 | 110 | # The theme to use for HTML and HTML Help pages. See the documentation for 111 | # a list of builtin themes. 112 | 113 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 114 | html_theme = 'sphinx_rtd_theme' 115 | # html_theme_options = {'github_fork': 'mwclient/mwclient'} 116 | 117 | html_theme_options = { 118 | # 'sticky_navigation': True # Set to False to disable the sticky nav while scrolling. 119 | # 'logo_only': True, # if we have a html_logo below, this shows /only/ the logo with no title text 120 | } 121 | 122 | # html_style = 'css/my_theme.css' 123 | 124 | # The name for this set of Sphinx documents. If None, it defaults to 125 | # " v documentation". 126 | #html_title = None 127 | 128 | # A shorter title for the navigation bar. Default is the same as html_title. 129 | #html_short_title = None 130 | 131 | # The name of an image file (relative to this directory) to place at the top 132 | # of the sidebar. 133 | # html_logo = 'logo.png' 134 | 135 | # The name of an image file (within the static path) to use as favicon of the 136 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 137 | # pixels large. 138 | #html_favicon = None 139 | 140 | # Add any paths that contain custom static files (such as style sheets) here, 141 | # relative to this directory. They are copied after the builtin static files, 142 | # so a file named "default.css" will overwrite the builtin "default.css". 143 | # html_static_path = ['_static'] 144 | 145 | # Add any extra paths that contain custom files (such as robots.txt or 146 | # .htaccess) here, relative to this directory. These files are copied 147 | # directly to the root of the documentation. 148 | #html_extra_path = [] 149 | 150 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 151 | # using the given strftime format. 152 | #html_last_updated_fmt = '%b %d, %Y' 153 | 154 | # If true, SmartyPants will be used to convert quotes and dashes to 155 | # typographically correct entities. 156 | html_use_smartypants = True 157 | 158 | # Custom sidebar templates, maps document names to template names. 159 | #html_sidebars = {} 160 | 161 | # Additional templates that should be rendered to pages, maps page names to 162 | # template names. 163 | #html_additional_pages = {} 164 | 165 | # If false, no module index is generated. 166 | #html_domain_indices = True 167 | 168 | # If false, no index is generated. 169 | html_use_index = True 170 | 171 | # If true, the index is split into individual pages for each letter. 172 | html_split_index = False 173 | 174 | # If true, links to the reST sources are added to the pages. 175 | html_show_sourcelink = True 176 | 177 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 178 | html_show_sphinx = True 179 | 180 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 181 | #html_show_copyright = True 182 | 183 | # If true, an OpenSearch description file will be output, and all pages will 184 | # contain a tag referring to it. The value of this option must be the 185 | # base URL from which the finished HTML is served. 186 | #html_use_opensearch = '' 187 | 188 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 189 | #html_file_suffix = None 190 | 191 | # Output file base name for HTML help builder. 192 | htmlhelp_basename = 'mwclientdoc' 193 | 194 | 195 | # -- Options for LaTeX output --------------------------------------------- 196 | 197 | latex_elements = { 198 | # The paper size ('letterpaper' or 'a4paper'). 199 | #'papersize': 'letterpaper', 200 | 201 | # The font size ('10pt', '11pt' or '12pt'). 202 | #'pointsize': '10pt', 203 | 204 | # Additional stuff for the LaTeX preamble. 205 | #'preamble': '', 206 | } 207 | 208 | # Grouping the document tree into LaTeX files. List of tuples 209 | # (source start file, target name, title, 210 | # author, documentclass [howto, manual, or own class]). 211 | latex_documents = [ 212 | ('index', 'mwclient.tex', u'mwclient Documentation', 213 | u'Bryan Tong Minh', 'manual'), 214 | ] 215 | 216 | # The name of an image file (relative to this directory) to place at the top of 217 | # the title page. 218 | latex_logo = 'logo.png' 219 | 220 | # For "manual" documents, if this is true, then toplevel headings are parts, 221 | # not chapters. 222 | #latex_use_parts = False 223 | 224 | # If true, show page references after internal links. 225 | #latex_show_pagerefs = False 226 | 227 | # If true, show URL addresses after external links. 228 | #latex_show_urls = False 229 | 230 | # Documents to append as an appendix to all manuals. 231 | #latex_appendices = [] 232 | 233 | # If false, no module index is generated. 234 | #latex_domain_indices = True 235 | 236 | 237 | # -- Options for manual page output --------------------------------------- 238 | 239 | # One entry per manual page. List of tuples 240 | # (source start file, name, description, authors, manual section). 241 | man_pages = [ 242 | ('index', 'mwclient', u'mwclient Documentation', 243 | [u'Bryan Tong Minh'], 1) 244 | ] 245 | 246 | # If true, show URL addresses after external links. 247 | #man_show_urls = False 248 | 249 | 250 | # -- Options for Texinfo output ------------------------------------------- 251 | 252 | # Grouping the document tree into Texinfo files. List of tuples 253 | # (source start file, target name, title, author, 254 | # dir menu entry, description, category) 255 | texinfo_documents = [ 256 | ('index', 'mwclient', u'mwclient Documentation', 257 | u'Bryan Tong Minh', 'mwclient', 'One line description of project.', 258 | 'Miscellaneous'), 259 | ] 260 | 261 | # Documents to append as an appendix to all manuals. 262 | #texinfo_appendices = [] 263 | 264 | # If false, no module index is generated. 265 | #texinfo_domain_indices = True 266 | 267 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 268 | #texinfo_show_urls = 'footnote' 269 | 270 | # If true, do not generate a @detailmenu in the "Top" node's menu. 271 | #texinfo_no_detailmenu = False 272 | 273 | 274 | # Example configuration for intersphinx: refer to the Python standard library. 275 | intersphinx_mapping = { 276 | 'requests': ('http://requests.readthedocs.org/en/latest/', None) 277 | } 278 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Release Notes for mwclient 2 | 3 | See [GitHub releases](https://github.com/mwclient/mwclient/releases/) for 4 | release notes for mwclient 0.7.1+. 5 | 6 | ## Changes in version 0.7.0 7 | 8 | Mwclient 0.7.0 was released on 27 September 2014. 9 | 10 | Upgrade notices: 11 | - This version requires minimum Python 2.6 and MediaWiki 1.16. 12 | Support for Python 2.4–2.5 and MediaWiki 1.11–1.15 has been dropped. 13 | - The `Page.edit()` method has been renamed to `Page.text()`. 14 | While `Page.edit()` is deprecated, it will be available for a long time. 15 | The old `Page.text` attribute, that used to store a copy of the wikitext 16 | from the last `Page.edit()` call, has been removed entirely. 17 | The `readonly` argument has also been removed (it was never really 18 | implemented, so it acted only as a dummy argument before the removal). 19 | - The `Page.get_expanded()` method has been deprecated in favour of 20 | calling `Page.text(expandtemplates=True)`. 21 | 22 | Detailed changelog: 23 | * [2012-08-30] [@btongminh](https://github.com/btongminh): 24 | Allow setting both the upload description and the page content separately. 25 | [0aa748f](https://github.com/mwclient/mwclient/commit/0aa748f). 26 | * [2012-08-30] [@tommorris](https://github.com/tommorris): 27 | Improve documentation. 28 | [a2723e7](https://github.com/mwclient/mwclient/commit/a2723e7). 29 | * [2013-02-15] [@waldyrious](https://github.com/waldyrious): 30 | Converted the repository to git and moved from sourceforge to github. 31 | [#1](https://github.com/mwclient/mwclient/issues/1) (also 32 | [#11](https://github.com/mwclient/mwclient/issues/11), 33 | [#13](https://github.com/mwclient/mwclient/issues/13) and 34 | [#15](https://github.com/mwclient/mwclient/issues/15)). 35 | * [2013-03-20] [@eug48](https://github.com/eug48): 36 | Support for customising the useragent. 37 | [773adf9](https://github.com/mwclient/mwclient/commit/773adf9), 38 | [#16](https://github.com/mwclient/mwclient/pull/16). 39 | * [2013-03-20] [@eug48](https://github.com/eug48): 40 | Removed unused `Request` class. 41 | [99e786d](https://github.com/mwclient/mwclient/commit/99e786d), 42 | [#16](https://github.com/mwclient/mwclient/pull/16). 43 | * [2013-05-13] [@danmichaelo](https://github.com/danmichaelo): 44 | Support for requesting pages by their page id (`site.pages[page_id]`). 45 | [a1a2ced](https://github.com/danmichaelo/mwclient/commit/a1a2ced), 46 | [#19](https://github.com/mwclient/mwclient/pull/19). 47 | * [2013-05-13] [@danmichaelo](https://github.com/danmichaelo): 48 | Support for editing sections. 49 | [546f77d](https://github.com/danmichaelo/mwclient/commit/546f77d), 50 | [#19](https://github.com/mwclient/mwclient/pull/19). 51 | * [2013-05-13] [@danmichaelo](https://github.com/danmichaelo): 52 | New method `Page.redirects_to()` and helper method `Page.resolve_redirect()`. 53 | [3b851cb](https://github.com/danmichaelo/mwclient/commit/3b851cb), 54 | [36e8dcc](https://github.com/danmichaelo/mwclient/commit/36e8dcc), 55 | [#19](https://github.com/mwclient/mwclient/pull/19). 56 | * [2013-05-13] [@danmichaelo](https://github.com/danmichaelo): 57 | Support argument `action` with `logevents()`. 58 | [241ed37](https://github.com/danmichaelo/mwclient/commit/241ed37), 59 | [#19](https://github.com/mwclient/mwclient/pull/19). 60 | * [2013-05-13] [@danmichaelo](https://github.com/danmichaelo): 61 | Support argument `page` with `parse()`. 62 | [223aa0](https://github.com/danmichaelo/mwclient/commit/223aa0), 63 | [#19](https://github.com/mwclient/mwclient/pull/19). 64 | * [2013-11-14] [@kyv](https://github.com/kyv): 65 | Allow setting HTTP `Authorization` header. 66 | [HTTP headers](http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.8). 67 | [72fc49a](https://github.com/kyv/mwclient/commit/72fc49a). 68 | * [2013-11-15] [@kyv](https://github.com/kyv): 69 | Add support for the `ask` API action 70 | [provided by Semantic MediaWiki](http://semantic-mediawiki.org/wiki/Ask_API). 71 | [0a16afc](https://github.com/kyv/mwclient/commit/0a16afc). 72 | * [2014-05-02] [@danmichaelo](https://github.com/danmichaelo): 73 | Quickfix for [#38](https://github.com/mwclient/mwclient/issues/38). 74 | [98b850b](https://github.com/mwclient/mwclient/commit/98b850b). 75 | * [2014-06-13] [@tuffnatty](https://github.com/tuffnatty): 76 | Fix updating of Page.last_rev_time upon save(). 77 | [d0cc7db](https://github.com/mwclient/mwclient/commit/d0cc7db), 78 | [#41](https://github.com/mwclient/mwclient/issues/41). 79 | * [2014-06-13] [@jimt](https://github.com/jimt), [@danmichaelo](https://github.com/danmichaelo): 80 | Support more arguments to `list=allusers`. 81 | [7cb4383](https://github.com/mwclient/mwclient/commit/7cb4383), 82 | [#8](https://github.com/mwclient/mwclient/issues/8). 83 | * [2014-08-18] [@danmichaelo](https://github.com/danmichaelo): 84 | Replace http.py with the Requests library. 85 | [593cb44](https://github.com/mwclient/mwclient/commit/593cb44), 86 | [#45](https://github.com/mwclient/mwclient/issues/45). 87 | * [2014-08-18] [@jaloren](https://github.com/jaloren), [@danmichaelo](https://github.com/danmichaelo): 88 | Don't crash if edit response does not contain timestamp. 89 | [bd7bc3b](https://github.com/mwclient/mwclient/commit/bd7bc3b), 90 | [0ef9a17](https://github.com/mwclient/mwclient/commit/0ef9a17), 91 | [#57](https://github.com/mwclient/mwclient/issues/57). 92 | * [2014-08-31] [@danmichaelo](https://github.com/danmichaelo): 93 | Retry on internal_api_error_DBQueryError. 94 | [d0ce831](https://github.com/mwclient/mwclient/commit/d0ce831). 95 | * [2014-09-22] [@danmichaelo](https://github.com/danmichaelo): 96 | Rename `Page.edit()` to `Page.text()`. Note that `text` is now a required 97 | parameter to `Page.save()`. 98 | [61155f1](https://github.com/mwclient/mwclient/commit/61155f1), 99 | [#51](https://github.com/mwclient/mwclient/issues/51). 100 | * [2014-09-27] [@danmichaelo](https://github.com/danmichaelo): 101 | Add `expandtemplates` argument to `Page.text()` and deprecate `Page.get_expanded()` 102 | [57df5f4](https://github.com/mwclient/mwclient/commit/57df5f4). 103 | 104 | ## Changes in version 0.6.5 105 | Mwclient 0.6.5 was released on 6 May 2011. 106 | * [2011-02-16] Fix for upload by URL. 107 | [7ceb14b](https://github.com/mwclient/mwclient/commit/7ceb14b). 108 | * [2011-05-06] Explicitly convert the `Content-Length` header to `str`, 109 | avoiding a `TypeError` on some versions of Python. 110 | [4a829bc](https://github.com/mwclient/mwclient/commit/4a829bc), 111 | [2ca1fbd](https://github.com/mwclient/mwclient/commit/2ca1fbd). 112 | * [2011-05-06] Handle `readapidenied` error in site init. 113 | [c513396](https://github.com/mwclient/mwclient/commit/c513396). 114 | * [2011-05-06] Fix version parsing for almost any sane version string. 115 | [9f5339f](https://github.com/mwclient/mwclient/commit/9f5339f). 116 | 117 | ## Changes in version 0.6.4 118 | Mwclient 0.6.3 was released on 7 April 2010. 119 | * [2009-08-27] Added support for upload API. 120 | [56eeb9b](https://github.com/mwclient/mwclient/commit/56eeb9b), 121 | [0d7caab](https://github.com/mwclient/mwclient/commit/0d7caab) (see also 122 | [610411a](https://github.com/mwclient/mwclient/commit/610411a)). 123 | * [2009-11-02] Added `prop=duplicatefiles`. 124 | [241e5d6](https://github.com/mwclient/mwclient/commit/241e5d6). 125 | * [2009-11-02] Properly fix detection of alpha versions. 126 | [241e5d6](https://github.com/mwclient/mwclient/commit/241e5d6). 127 | * [2009-11-14] Added support for built-in JSON library. 128 | [73e9cd6](https://github.com/mwclient/mwclient/commit/73e9cd6). 129 | * [2009-11-15] Handle badtoken once. 130 | [4b384e1](https://github.com/mwclient/mwclient/commit/4b384e1). 131 | * [2010-02-23] Fix module conflict with simplejson-1.x 132 | by inserting mwclient path at the beginning of `sys.path` 133 | instead of the end. 134 | [cd37ef0](https://github.com/mwclient/mwclient/commit/cd37ef0). 135 | * [2010-02-23] Fix revision iteration. 136 | [09b68e9](https://github.com/mwclient/mwclient/commit/09b68e9), 137 | [2ad32f1](https://github.com/mwclient/mwclient/commit/2ad32f1), 138 | [afdd5e8](https://github.com/mwclient/mwclient/commit/afdd5e8), 139 | [993b346](https://github.com/mwclient/mwclient/commit/993b346), 140 | [#3](https://github.com/mwclient/mwclient/issues/3). 141 | * [2010-04-07] Supply token on login if necessary. 142 | [3731de5](https://github.com/mwclient/mwclient/commit/3731de5). 143 | 144 | ## Changes in version 0.6.3 145 | Mwclient 0.6.3 was released on 16 July 2009. 146 | * Added domain parameter to login 147 | * Applied edit fix to `page_nowriteapi` 148 | * Allow arbitrary data to be passed to `page.save` 149 | * Fix mwclient on WMF wikis 150 | 151 | ## Changes in version 0.6.2 152 | Mwclient 0.6.2 was released on 2 May 2009. 153 | * Compatibility fixes for MediaWiki 1.13 154 | * Download fix for images 155 | * Full support for editing pages via write API 156 | and split of compatibility to another file. 157 | * Added `expandtemplates` API call 158 | * Added and fixed moving via API 159 | * Raise an `ApiDisabledError` if the API is disabled 160 | * Added support for HTTPS 161 | * Fixed email code 162 | * Mark edits as bots by default. 163 | * Added `action=parse`. Modified patch by Brian Mingus. 164 | * Improved general HTTP and upload handling. 165 | 166 | ## Changes in version 0.6.1 167 | Mwclient 0.6.1 was released in May 2008. 168 | No release notes were kept for this version. 169 | 170 | ## Changes in version 0.6.0 171 | Mwclient 0.6.0 was released in February 2008. 172 | This was the first official release via Sourceforge. 173 | This version removed some Pywikipedia influences added in 0.4. 174 | 175 | ## Changes in versions 0.5 176 | Mwclient 0.5 was an architectural redesign 177 | which accomplished easy extendability 178 | and added proper support for continuations. 179 | 180 | ## Changes in version 0.4 181 | Mwclient 0.4 was somewhat the basis for future releases 182 | and shows the current module architecture. 183 | It was influenced by Pywikipedia, 184 | which was discovered by the author at the time. 185 | 186 | ## Changes in versions 0.2 and 0.3 187 | Mwclient 0.2 and 0.3 were probably a bit of a generalization, 188 | and maybe already used the API for some part, 189 | but details are unknown. 190 | 191 | ## Mwclient 0.1 192 | Mwclient 0.1 was a non-API module for accessing Wikipedia using an XML parser. 193 | -------------------------------------------------------------------------------- /mwclient/listing.py: -------------------------------------------------------------------------------- 1 | import six 2 | import six.moves 3 | from six import text_type 4 | from mwclient.util import parse_timestamp 5 | import mwclient.page 6 | import mwclient.image 7 | 8 | 9 | class List(object): 10 | """Base class for lazy iteration over api response content 11 | 12 | This is a class providing lazy iteration. This means that the 13 | content is loaded in chunks as long as the response hints at 14 | continuing content. 15 | """ 16 | 17 | def __init__(self, site, list_name, prefix, 18 | limit=None, return_values=None, max_items=None, 19 | *args, **kwargs): 20 | # NOTE: Fix limit 21 | self.site = site 22 | self.list_name = list_name 23 | self.generator = 'list' 24 | self.prefix = prefix 25 | 26 | kwargs.update(args) 27 | self.args = kwargs 28 | 29 | if limit is None: 30 | limit = site.api_limit 31 | self.args[self.prefix + 'limit'] = text_type(limit) 32 | 33 | self.count = 0 34 | self.max_items = max_items 35 | 36 | self._iter = iter(six.moves.range(0)) 37 | 38 | self.last = False 39 | self.result_member = list_name 40 | self.return_values = return_values 41 | 42 | def __iter__(self): 43 | return self 44 | 45 | def __next__(self): 46 | if self.max_items is not None: 47 | if self.count >= self.max_items: 48 | raise StopIteration 49 | try: 50 | item = six.next(self._iter) 51 | except StopIteration: 52 | if self.last: 53 | raise 54 | self.load_chunk() 55 | item = six.next(self._iter) 56 | 57 | self.count += 1 58 | if 'timestamp' in item: 59 | item['timestamp'] = parse_timestamp(item['timestamp']) 60 | 61 | if isinstance(self, GeneratorList): 62 | return item 63 | if type(self.return_values) is tuple: 64 | return tuple((item[i] for i in self.return_values)) 65 | if self.return_values is not None: 66 | return item[self.return_values] 67 | return item 68 | 69 | def next(self, *args, **kwargs): 70 | """ For Python 2.x support """ 71 | return self.__next__(*args, **kwargs) 72 | 73 | def load_chunk(self): 74 | """Query a new chunk of data 75 | 76 | If the query is empty, `raise StopIteration`. 77 | 78 | Else, update the iterator accordingly. 79 | 80 | If 'continue' is in the response, it is added to `self.args` 81 | (new style continuation, added in MediaWiki 1.21). 82 | 83 | If not, but 'query-continue' is in the response, query its 84 | item called `self.list_name` and add this to `self.args` (old 85 | style continuation). 86 | 87 | Else, set `self.last` to True. 88 | """ 89 | data = self.site.get( 90 | 'query', (self.generator, self.list_name), 91 | *[(text_type(k), v) for k, v in six.iteritems(self.args)] 92 | ) 93 | if not data: 94 | # Non existent page 95 | raise StopIteration 96 | self.set_iter(data) 97 | 98 | if data.get('continue'): 99 | # New style continuation, added in MediaWiki 1.21 100 | self.args.update(data['continue']) 101 | 102 | elif self.list_name in data.get('query-continue', ()): 103 | # Old style continuation 104 | self.args.update(data['query-continue'][self.list_name]) 105 | 106 | else: 107 | self.last = True 108 | 109 | def set_iter(self, data): 110 | """Set `self._iter` to the API response `data`.""" 111 | if self.result_member not in data['query']: 112 | self._iter = iter(six.moves.range(0)) 113 | elif type(data['query'][self.result_member]) is list: 114 | self._iter = iter(data['query'][self.result_member]) 115 | else: 116 | self._iter = six.itervalues(data['query'][self.result_member]) 117 | 118 | def __repr__(self): 119 | return "" % (self.list_name, self.site) 120 | 121 | @staticmethod 122 | def generate_kwargs(_prefix, *args, **kwargs): 123 | kwargs.update(args) 124 | for key, value in six.iteritems(kwargs): 125 | if value is not None and value is not False: 126 | yield _prefix + key, value 127 | 128 | @staticmethod 129 | def get_prefix(prefix, generator=False): 130 | return ('g' if generator else '') + prefix 131 | 132 | @staticmethod 133 | def get_list(generator=False): 134 | return GeneratorList if generator else List 135 | 136 | 137 | class NestedList(List): 138 | def __init__(self, nested_param, *args, **kwargs): 139 | super(NestedList, self).__init__(*args, **kwargs) 140 | self.nested_param = nested_param 141 | 142 | def set_iter(self, data): 143 | self._iter = iter(data['query'][self.result_member][self.nested_param]) 144 | 145 | 146 | class GeneratorList(List): 147 | """Lazy-loaded list of Page, Image or Category objects 148 | 149 | While the standard List class yields raw response data 150 | (optionally filtered based on the value of List.return_values), 151 | this subclass turns the data into Page, Image or Category objects. 152 | """ 153 | 154 | def __init__(self, site, list_name, prefix, *args, **kwargs): 155 | super(GeneratorList, self).__init__(site, list_name, prefix, 156 | *args, **kwargs) 157 | 158 | self.args['g' + self.prefix + 'limit'] = self.args[self.prefix + 'limit'] 159 | del self.args[self.prefix + 'limit'] 160 | self.generator = 'generator' 161 | 162 | self.args['prop'] = 'info|imageinfo' 163 | self.args['inprop'] = 'protection' 164 | 165 | self.result_member = 'pages' 166 | 167 | self.page_class = mwclient.page.Page 168 | 169 | def __next__(self): 170 | info = super(GeneratorList, self).__next__() 171 | if info['ns'] == 14: 172 | return Category(self.site, u'', info) 173 | if info['ns'] == 6: 174 | return mwclient.image.Image(self.site, u'', info) 175 | return mwclient.page.Page(self.site, u'', info) 176 | 177 | def load_chunk(self): 178 | # Put this here so that the constructor does not fail 179 | # on uninitialized sites 180 | self.args['iiprop'] = 'timestamp|user|comment|url|size|sha1|metadata|archivename' 181 | return super(GeneratorList, self).load_chunk() 182 | 183 | 184 | class Category(mwclient.page.Page, GeneratorList): 185 | 186 | def __init__(self, site, name, info=None, namespace=None): 187 | mwclient.page.Page.__init__(self, site, name, info) 188 | kwargs = {} 189 | kwargs['gcmtitle'] = self.name 190 | if namespace: 191 | kwargs['gcmnamespace'] = namespace 192 | GeneratorList.__init__(self, site, 'categorymembers', 'cm', **kwargs) 193 | 194 | def __repr__(self): 195 | return "" % (self.name.encode('utf-8'), self.site) 196 | 197 | def members(self, prop='ids|title', namespace=None, sort='sortkey', 198 | dir='asc', start=None, end=None, generator=True): 199 | prefix = self.get_prefix('cm', generator) 200 | kwargs = dict(self.generate_kwargs(prefix, prop=prop, namespace=namespace, 201 | sort=sort, dir=dir, start=start, end=end, title=self.name)) 202 | return self.get_list(generator)(self.site, 'categorymembers', 'cm', **kwargs) 203 | 204 | 205 | class PageList(GeneratorList): 206 | 207 | def __init__(self, site, prefix=None, start=None, namespace=0, redirects='all', end=None): 208 | self.namespace = namespace 209 | 210 | kwargs = {} 211 | if prefix: 212 | kwargs['gapprefix'] = prefix 213 | if start: 214 | kwargs['gapfrom'] = start 215 | if end: 216 | kwargs['gapto'] = end 217 | 218 | super(PageList, self).__init__(site, 'allpages', 'ap', 219 | gapnamespace=text_type(namespace), 220 | gapfilterredir=redirects, 221 | **kwargs) 222 | 223 | def __getitem__(self, name): 224 | return self.get(name, None) 225 | 226 | def get(self, name, info=()): 227 | """Return the page of name `name` as an object. 228 | 229 | If self.namespace is not zero, use {namespace}:{name} as the 230 | page name, otherwise guess the namespace from the name using 231 | `self.guess_namespace`. 232 | 233 | Returns: 234 | One of Category, Image or Page (default), according to namespace. 235 | """ 236 | if self.namespace != 0: 237 | full_page_name = u"{namespace}:{name}".format( 238 | namespace=self.site.namespaces[self.namespace], 239 | name=name, 240 | ) 241 | namespace = self.namespace 242 | else: 243 | full_page_name = name 244 | try: 245 | namespace = self.guess_namespace(name) 246 | except AttributeError: 247 | # raised when `namespace` doesn't have a `startswith` attribute 248 | namespace = 0 249 | 250 | cls = { 251 | 14: Category, 252 | 6: mwclient.image.Image, 253 | }.get(namespace, mwclient.page.Page) 254 | 255 | return cls(self.site, full_page_name, info) 256 | 257 | def guess_namespace(self, name): 258 | """Guess the namespace from name 259 | 260 | If name starts with any of the site's namespaces' names or 261 | default_namespaces, use that. Else, return zero. 262 | 263 | Args: 264 | name (str): The pagename as a string (having `.startswith`) 265 | 266 | Returns: 267 | The id of the guessed namespace or zero. 268 | """ 269 | for ns in self.site.namespaces: 270 | if ns == 0: 271 | continue 272 | if name.startswith(u'%s:' % self.site.namespaces[ns].replace(' ', '_')): 273 | return ns 274 | elif ns in self.site.default_namespaces: 275 | if name.startswith(u'%s:' % self.site.default_namespaces[ns].replace(' ', '_')): 276 | return ns 277 | return 0 278 | 279 | 280 | class PageProperty(List): 281 | 282 | def __init__(self, page, prop, prefix, *args, **kwargs): 283 | super(PageProperty, self).__init__(page.site, prop, prefix, 284 | titles=page.name, 285 | *args, **kwargs) 286 | self.page = page 287 | self.generator = 'prop' 288 | 289 | def set_iter(self, data): 290 | for page in six.itervalues(data['query']['pages']): 291 | if page['title'] == self.page.name: 292 | self._iter = iter(page.get(self.list_name, ())) 293 | return 294 | raise StopIteration 295 | 296 | 297 | class PagePropertyGenerator(GeneratorList): 298 | 299 | def __init__(self, page, prop, prefix, *args, **kwargs): 300 | super(PagePropertyGenerator, self).__init__(page.site, prop, prefix, 301 | titles=page.name, 302 | *args, **kwargs) 303 | self.page = page 304 | 305 | 306 | class RevisionsIterator(PageProperty): 307 | 308 | def load_chunk(self): 309 | if 'rvstartid' in self.args and 'rvstart' in self.args: 310 | del self.args['rvstart'] 311 | return super(RevisionsIterator, self).load_chunk() 312 | -------------------------------------------------------------------------------- /tests/test_page.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | from __future__ import print_function 3 | 4 | import unittest 5 | import pytest 6 | import logging 7 | import requests 8 | import responses 9 | import mock 10 | import mwclient 11 | from mwclient.page import Page 12 | from mwclient.client import Site 13 | from mwclient.errors import APIError, AssertUserFailedError, ProtectedPageError 14 | 15 | try: 16 | import json 17 | except ImportError: 18 | import simplejson as json 19 | 20 | if __name__ == "__main__": 21 | print() 22 | print("Note: Running in stand-alone mode. Consult the README") 23 | print(" (section 'Contributing') for advice on running tests.") 24 | print() 25 | 26 | 27 | class TestPage(unittest.TestCase): 28 | 29 | def setUp(self): 30 | pass 31 | 32 | @mock.patch('mwclient.client.Site') 33 | def test_api_call_on_page_init(self, mock_site): 34 | # Check that site.get() is called once on Page init 35 | 36 | title = 'Some page' 37 | mock_site.get.return_value = { 38 | 'query': {'pages': {'1': {}}} 39 | } 40 | page = Page(mock_site, title) 41 | 42 | # test that Page called site.get with the right parameters 43 | mock_site.get.assert_called_once_with('query', inprop='protection', titles=title, prop='info') 44 | 45 | @mock.patch('mwclient.client.Site') 46 | def test_nonexisting_page(self, mock_site): 47 | # Check that API response results in page.exists being set to False 48 | 49 | title = 'Some nonexisting page' 50 | mock_site.get.return_value = { 51 | 'query': {'pages': {'-1': {'missing': ''}}} 52 | } 53 | page = Page(mock_site, title) 54 | 55 | assert page.exists is False 56 | 57 | @mock.patch('mwclient.client.Site') 58 | def test_existing_page(self, mock_site): 59 | # Check that API response results in page.exists being set to True 60 | 61 | title = 'Norge' 62 | mock_site.get.return_value = { 63 | 'query': {'pages': {'728': {}}} 64 | } 65 | page = Page(mock_site, title) 66 | 67 | assert page.exists is True 68 | 69 | @mock.patch('mwclient.client.Site') 70 | def test_pageprops(self, mock_site): 71 | # Check that variouse page props are read correctly from API response 72 | 73 | title = 'Some page' 74 | mock_site.get.return_value = { 75 | 'query': { 76 | 'pages': { 77 | '728': { 78 | 'contentmodel': 'wikitext', 79 | 'counter': '', 80 | 'lastrevid': 13355471, 81 | 'length': 58487, 82 | 'ns': 0, 83 | 'pageid': 728, 84 | 'pagelanguage': 'nb', 85 | 'protection': [], 86 | 'title': title, 87 | 'touched': '2014-09-14T21:11:52Z' 88 | } 89 | } 90 | } 91 | } 92 | page = Page(mock_site, title) 93 | 94 | assert page.exists is True 95 | assert page.redirect is False 96 | assert page.revision == 13355471 97 | assert page.length == 58487 98 | assert page.namespace == 0 99 | assert page.name == title 100 | assert page.page_title == title 101 | 102 | @mock.patch('mwclient.client.Site') 103 | def test_protection_levels(self, mock_site): 104 | # If page is protected, check that protection is parsed correctly 105 | 106 | title = 'Some page' 107 | mock_site.get.return_value = { 108 | 'query': { 109 | 'pages': { 110 | '728': { 111 | 'protection': [ 112 | { 113 | 'expiry': 'infinity', 114 | 'level': 'autoconfirmed', 115 | 'type': 'edit' 116 | }, 117 | { 118 | 'expiry': 'infinity', 119 | 'level': 'sysop', 120 | 'type': 'move' 121 | } 122 | ] 123 | } 124 | } 125 | } 126 | } 127 | mock_site.rights = ['read', 'edit', 'move'] 128 | 129 | page = Page(mock_site, title) 130 | 131 | assert page.protection == {'edit': ('autoconfirmed', 'infinity'), 'move': ('sysop', 'infinity')} 132 | assert page.can('read') is True 133 | assert page.can('edit') is False # User does not have 'autoconfirmed' right 134 | assert page.can('move') is False # User does not have 'sysop' right 135 | 136 | mock_site.rights = ['read', 'edit', 'move', 'autoconfirmed'] 137 | 138 | assert page.can('edit') is True # User has 'autoconfirmed' right 139 | assert page.can('move') is False # User doesn't have 'sysop' right 140 | 141 | mock_site.rights = ['read', 'edit', 'move', 'autoconfirmed', 'editprotected'] 142 | 143 | assert page.can('edit') is True # User has 'autoconfirmed' right 144 | assert page.can('move') is True # User has 'sysop' right 145 | 146 | @mock.patch('mwclient.client.Site') 147 | def test_redirect(self, mock_site): 148 | # Check that page.redirect is set correctly 149 | 150 | title = 'Some redirect page' 151 | mock_site.get.return_value = { 152 | "query": { 153 | "pages": { 154 | "796917": { 155 | "contentmodel": "wikitext", 156 | "counter": "", 157 | "lastrevid": 9342494, 158 | "length": 70, 159 | "ns": 0, 160 | "pageid": 796917, 161 | "pagelanguage": "nb", 162 | "protection": [], 163 | "redirect": "", 164 | "title": title, 165 | "touched": "2014-08-29T22:25:15Z" 166 | } 167 | } 168 | } 169 | } 170 | page = Page(mock_site, title) 171 | 172 | assert page.exists is True 173 | assert page.redirect is True 174 | 175 | @mock.patch('mwclient.client.Site') 176 | def test_captcha(self, mock_site): 177 | # Check that Captcha results in EditError 178 | mock_site.blocked = False 179 | mock_site.rights = ['read', 'edit'] 180 | 181 | title = 'Norge' 182 | mock_site.get.return_value = { 183 | 'query': {'pages': {'728': {'protection': []}}} 184 | } 185 | page = Page(mock_site, title) 186 | mock_site.post.return_value = { 187 | 'edit': {'result': 'Failure', 'captcha': { 188 | 'type': 'math', 189 | 'mime': 'text/tex', 190 | 'id': '509895952', 191 | 'question': '36 + 4 = ' 192 | }} 193 | } 194 | 195 | # For now, mwclient will just raise an EditError. 196 | # 197 | with pytest.raises(mwclient.errors.EditError): 198 | page.save('Some text') 199 | 200 | 201 | class TestPageApiArgs(unittest.TestCase): 202 | 203 | def setUp(self): 204 | title = 'Some page' 205 | self.page_text = 'Hello world' 206 | 207 | MockSite = mock.patch('mwclient.client.Site').start() 208 | self.site = MockSite() 209 | 210 | self.site.get.return_value = {'query': {'pages': {'1': {'title': title}}}} 211 | self.site.rights = ['read'] 212 | 213 | self.page = Page(self.site, title) 214 | 215 | self.site.get.return_value = {'query': {'pages': {'2': { 216 | 'ns': 0, 'pageid': 2, 'revisions': [{'*': 'Hello world', 'timestamp': '2014-08-29T22:25:15Z'}], 'title': title 217 | }}}} 218 | 219 | def get_last_api_call_args(self, http_method='POST'): 220 | if http_method == 'GET': 221 | args, kwargs = self.site.get.call_args 222 | else: 223 | args, kwargs = self.site.post.call_args 224 | action = args[0] 225 | args = args[1:] 226 | kwargs.update(args) 227 | return kwargs 228 | 229 | def tearDown(self): 230 | mock.patch.stopall() 231 | 232 | def test_get_page_text(self): 233 | # Check that page.text() works, and that a correct API call is made 234 | text = self.page.text() 235 | args = self.get_last_api_call_args(http_method='GET') 236 | 237 | assert text == self.page_text 238 | assert args == { 239 | 'prop': 'revisions', 240 | 'rvdir': 'older', 241 | 'titles': self.page.page_title, 242 | 'rvprop': 'content|timestamp', 243 | 'rvlimit': '1' 244 | } 245 | 246 | def test_get_page_text_cached(self): 247 | # Check page.text() caching 248 | self.page.revisions = mock.Mock(return_value=iter([])) 249 | self.page.text() 250 | self.page.text() 251 | # When cache is hit, revisions is not, so call_count should be 1 252 | assert self.page.revisions.call_count == 1 253 | self.page.text(cache=False) 254 | # With cache explicitly disabled, we should hit revisions 255 | assert self.page.revisions.call_count == 2 256 | 257 | def test_get_section_text(self): 258 | # Check that the 'rvsection' parameter is sent to the API 259 | text = self.page.text(section=0) 260 | args = self.get_last_api_call_args(http_method='GET') 261 | 262 | assert args['rvsection'] == '0' 263 | 264 | def test_get_text_expanded(self): 265 | # Check that the 'rvexpandtemplates' parameter is sent to the API 266 | text = self.page.text(expandtemplates=True) 267 | args = self.get_last_api_call_args(http_method='GET') 268 | 269 | assert args['rvexpandtemplates'] == '1' 270 | 271 | def test_assertuser_true(self): 272 | # Check that assert=user is sent when force_login=True 273 | self.site.blocked = False 274 | self.site.rights = ['read', 'edit'] 275 | self.site.logged_in = True 276 | self.site.force_login = True 277 | 278 | self.site.api.return_value = { 279 | 'edit': {'result': 'Ok'} 280 | } 281 | self.page.save('Some text') 282 | args = self.get_last_api_call_args() 283 | 284 | assert args['assert'] == 'user' 285 | 286 | def test_assertuser_false(self): 287 | # Check that assert=user is not sent when force_login=False 288 | self.site.blocked = False 289 | self.site.rights = ['read', 'edit'] 290 | self.site.logged_in = False 291 | self.site.force_login = False 292 | 293 | self.site.api.return_value = { 294 | 'edit': {'result': 'Ok'} 295 | } 296 | self.page.save('Some text') 297 | args = self.get_last_api_call_args() 298 | 299 | assert 'assert' not in args 300 | 301 | def test_handle_edit_error_assertuserfailed(self): 302 | # Check that AssertUserFailedError is triggered 303 | api_error = APIError('assertuserfailed', 304 | 'Assertion that the user is logged in failed', 305 | 'See https://en.wikipedia.org/w/api.php for API usage') 306 | 307 | with pytest.raises(AssertUserFailedError): 308 | self.page.handle_edit_error(api_error, 'n/a') 309 | 310 | def test_handle_edit_error_protected(self): 311 | # Check that ProtectedPageError is triggered 312 | api_error = APIError('protectedpage', 313 | 'The "editprotected" right is required to edit this page', 314 | 'See https://en.wikipedia.org/w/api.php for API usage') 315 | 316 | with pytest.raises(ProtectedPageError) as pp_error: 317 | self.page.handle_edit_error(api_error, 'n/a') 318 | 319 | assert pp_error.value.code == 'protectedpage' 320 | assert str(pp_error.value) == 'The "editprotected" right is required to edit this page' 321 | 322 | 323 | if __name__ == '__main__': 324 | unittest.main() 325 | -------------------------------------------------------------------------------- /mwclient/page.py: -------------------------------------------------------------------------------- 1 | import six 2 | from six import text_type 3 | import time 4 | import warnings 5 | from mwclient.util import parse_timestamp 6 | import mwclient.listing 7 | import mwclient.errors 8 | 9 | 10 | class Page(object): 11 | 12 | def __init__(self, site, name, info=None, extra_properties=None): 13 | if type(name) is type(self): 14 | self.__dict__.update(name.__dict__) 15 | return 16 | self.site = site 17 | self.name = name 18 | self._textcache = {} 19 | 20 | if not info: 21 | if extra_properties: 22 | prop = 'info|' + '|'.join(six.iterkeys(extra_properties)) 23 | extra_props = [] 24 | for extra_prop in six.itervalues(extra_properties): 25 | extra_props.extend(extra_prop) 26 | else: 27 | prop = 'info' 28 | extra_props = () 29 | 30 | if type(name) is int: 31 | info = self.site.get('query', prop=prop, pageids=name, 32 | inprop='protection', *extra_props) 33 | else: 34 | info = self.site.get('query', prop=prop, titles=name, 35 | inprop='protection', *extra_props) 36 | info = six.next(six.itervalues(info['query']['pages'])) 37 | self._info = info 38 | 39 | self.namespace = info.get('ns', 0) 40 | self.name = info.get('title', u'') 41 | if self.namespace: 42 | self.page_title = self.strip_namespace(self.name) 43 | else: 44 | self.page_title = self.name 45 | 46 | self.touched = parse_timestamp(info.get('touched')) 47 | self.revision = info.get('lastrevid', 0) 48 | self.exists = 'missing' not in info 49 | self.length = info.get('length') 50 | self.protection = { 51 | i['type']: (i['level'], i['expiry']) 52 | for i in info.get('protection', ()) 53 | if i 54 | } 55 | self.redirect = 'redirect' in info 56 | self.pageid = info.get('pageid', None) 57 | self.contentmodel = info.get('contentmodel', None) 58 | self.pagelanguage = info.get('pagelanguage', None) 59 | self.restrictiontypes = info.get('restrictiontypes', None) 60 | 61 | self.last_rev_time = None 62 | self.edit_time = None 63 | 64 | def redirects_to(self): 65 | """ Returns the redirect target page, or None if the page is not a redirect page.""" 66 | info = self.site.get('query', prop='pageprops', titles=self.name, redirects='')['query'] 67 | if 'redirects' in info: 68 | for page in info['redirects']: 69 | if page['from'] == self.name: 70 | return Page(self.site, page['to']) 71 | return None 72 | else: 73 | return None 74 | 75 | def resolve_redirect(self): 76 | """ Returns the redirect target page, or the current page if it's not a redirect page.""" 77 | target_page = self.redirects_to() 78 | if target_page is None: 79 | return self 80 | else: 81 | return target_page 82 | 83 | def __repr__(self): 84 | return "" % (self.name.encode('utf-8'), self.site) 85 | 86 | def __unicode__(self): 87 | return self.name 88 | 89 | @staticmethod 90 | def strip_namespace(title): 91 | if title[0] == ':': 92 | title = title[1:] 93 | return title[title.find(':') + 1:] 94 | 95 | @staticmethod 96 | def normalize_title(title): 97 | # TODO: Make site dependent 98 | title = title.strip() 99 | if title[0] == ':': 100 | title = title[1:] 101 | title = title[0].upper() + title[1:] 102 | title = title.replace(' ', '_') 103 | return title 104 | 105 | def can(self, action): 106 | """Check if the current user has the right to carry out some action 107 | with the current page. 108 | 109 | Example: 110 | >>> page.can('edit') 111 | True 112 | 113 | """ 114 | level = self.protection.get(action, (action, ))[0] 115 | if level == 'sysop': 116 | level = 'editprotected' 117 | 118 | return level in self.site.rights 119 | 120 | def get_token(self, type, force=False): 121 | return self.site.get_token(type, force, title=self.name) 122 | 123 | def edit(self, *args, **kwargs): 124 | """Deprecated. Use page.text() instead""" 125 | warnings.warn("page.edit() was deprecated in mwclient 0.7.0 " 126 | "and will be removed in 0.9.0, please use page.text() instead.", 127 | category=DeprecationWarning, stacklevel=2) 128 | return self.text(*args, **kwargs) 129 | 130 | def text(self, section=None, expandtemplates=False, cache=True): 131 | """Get the current wikitext of the page, or of a specific section. 132 | 133 | If the page does not exist, an empty string is returned. By 134 | default, results will be cached and if you call text() again 135 | with the same section and expandtemplates the result will come 136 | from the cache. The cache is stored on the instance, so it 137 | lives as long as the instance does. 138 | 139 | Args: 140 | section (int): numbered section or `None` to get the whole page (default: `None`) 141 | expandtemplates (bool): set to `True` to expand templates (default: `False`) 142 | cache (bool): set to `False` to disable caching (default: `True`) 143 | """ 144 | 145 | if not self.can('read'): 146 | raise mwclient.errors.InsufficientPermission(self) 147 | if not self.exists: 148 | return u'' 149 | if section is not None: 150 | section = text_type(section) 151 | 152 | key = hash((section, expandtemplates)) 153 | if cache and key in self._textcache: 154 | return self._textcache[key] 155 | 156 | revs = self.revisions(prop='content|timestamp', limit=1, section=section, 157 | expandtemplates=expandtemplates) 158 | try: 159 | rev = next(revs) 160 | text = rev['*'] 161 | self.last_rev_time = rev['timestamp'] 162 | except StopIteration: 163 | text = u'' 164 | self.last_rev_time = None 165 | if not expandtemplates: 166 | self.edit_time = time.gmtime() 167 | 168 | if cache: 169 | self._textcache[key] = text 170 | return text 171 | 172 | def save(self, text, summary=u'', minor=False, bot=True, section=None, **kwargs): 173 | """Update the text of a section or the whole page by performing an edit operation. 174 | """ 175 | if not self.site.logged_in and self.site.force_login: 176 | raise mwclient.errors.AssertUserFailedError() 177 | if self.site.blocked: 178 | raise mwclient.errors.UserBlocked(self.site.blocked) 179 | if not self.can('edit'): 180 | raise mwclient.errors.ProtectedPageError(self) 181 | 182 | if not self.site.writeapi: 183 | raise mwclient.errors.NoWriteApi(self) 184 | 185 | data = {} 186 | if minor: 187 | data['minor'] = '1' 188 | if not minor: 189 | data['notminor'] = '1' 190 | if self.last_rev_time: 191 | data['basetimestamp'] = time.strftime('%Y%m%d%H%M%S', self.last_rev_time) 192 | if self.edit_time: 193 | data['starttimestamp'] = time.strftime('%Y%m%d%H%M%S', self.edit_time) 194 | if bot: 195 | data['bot'] = '1' 196 | if section: 197 | data['section'] = section 198 | 199 | data.update(kwargs) 200 | 201 | if self.site.force_login: 202 | data['assert'] = 'user' 203 | 204 | def do_edit(): 205 | result = self.site.post('edit', title=self.name, text=text, 206 | summary=summary, token=self.get_token('edit'), 207 | **data) 208 | if result['edit'].get('result').lower() == 'failure': 209 | raise mwclient.errors.EditError(self, result['edit']) 210 | return result 211 | try: 212 | result = do_edit() 213 | except mwclient.errors.APIError as e: 214 | if e.code == 'badtoken': 215 | # Retry, but only once to avoid an infinite loop 216 | self.get_token('edit', force=True) 217 | try: 218 | result = do_edit() 219 | except mwclient.errors.APIError as e: 220 | self.handle_edit_error(e, summary) 221 | else: 222 | self.handle_edit_error(e, summary) 223 | 224 | # 'newtimestamp' is not included if no change was made 225 | if 'newtimestamp' in result['edit'].keys(): 226 | self.last_rev_time = parse_timestamp(result['edit'].get('newtimestamp')) 227 | 228 | # clear the page text cache 229 | self._textcache = {} 230 | return result['edit'] 231 | 232 | def handle_edit_error(self, e, summary): 233 | if e.code == 'editconflict': 234 | raise mwclient.errors.EditError(self, summary, e.info) 235 | elif e.code in {'protectedtitle', 'cantcreate', 'cantcreate-anon', 236 | 'noimageredirect-anon', 'noimageredirect', 'noedit-anon', 237 | 'noedit', 'protectedpage', 'cascadeprotected', 238 | 'customcssjsprotected', 239 | 'protectednamespace-interface', 'protectednamespace'}: 240 | raise mwclient.errors.ProtectedPageError(self, e.code, e.info) 241 | elif e.code == 'assertuserfailed': 242 | raise mwclient.errors.AssertUserFailedError() 243 | else: 244 | raise e 245 | 246 | def move(self, new_title, reason='', move_talk=True, no_redirect=False): 247 | """Move (rename) page to new_title. 248 | 249 | If user account is an administrator, specify no_redirect as True to not 250 | leave a redirect. 251 | 252 | If user does not have permission to move page, an InsufficientPermission 253 | exception is raised. 254 | 255 | """ 256 | if not self.can('move'): 257 | raise mwclient.errors.InsufficientPermission(self) 258 | 259 | if not self.site.writeapi: 260 | raise mwclient.errors.NoWriteApi(self) 261 | 262 | data = {} 263 | if move_talk: 264 | data['movetalk'] = '1' 265 | if no_redirect: 266 | data['noredirect'] = '1' 267 | result = self.site.post('move', ('from', self.name), to=new_title, 268 | token=self.get_token('move'), reason=reason, **data) 269 | return result['move'] 270 | 271 | def delete(self, reason='', watch=False, unwatch=False, oldimage=False): 272 | """Delete page. 273 | 274 | If user does not have permission to delete page, an InsufficientPermission 275 | exception is raised. 276 | 277 | """ 278 | if not self.can('delete'): 279 | raise mwclient.errors.InsufficientPermission(self) 280 | 281 | if not self.site.writeapi: 282 | raise mwclient.errors.NoWriteApi(self) 283 | 284 | data = {} 285 | if watch: 286 | data['watch'] = '1' 287 | if unwatch: 288 | data['unwatch'] = '1' 289 | if oldimage: 290 | data['oldimage'] = oldimage 291 | result = self.site.post('delete', title=self.name, 292 | token=self.get_token('delete'), 293 | reason=reason, **data) 294 | return result['delete'] 295 | 296 | def purge(self): 297 | """Purge server-side cache of page. This will re-render templates and other 298 | dynamic content. 299 | 300 | """ 301 | self.site.post('purge', titles=self.name) 302 | 303 | # def watch: requires 1.14 304 | 305 | # Properties 306 | def backlinks(self, namespace=None, filterredir='all', redirect=False, 307 | limit=None, generator=True): 308 | """List pages that link to the current page, similar to Special:Whatlinkshere. 309 | 310 | API doc: https://www.mediawiki.org/wiki/API:Backlinks 311 | 312 | """ 313 | prefix = mwclient.listing.List.get_prefix('bl', generator) 314 | kwargs = dict(mwclient.listing.List.generate_kwargs( 315 | prefix, namespace=namespace, filterredir=filterredir, 316 | )) 317 | if redirect: 318 | kwargs['%sredirect' % prefix] = '1' 319 | kwargs[prefix + 'title'] = self.name 320 | 321 | return mwclient.listing.List.get_list(generator)( 322 | self.site, 'backlinks', 'bl', limit=limit, return_values='title', 323 | **kwargs 324 | ) 325 | 326 | def categories(self, generator=True): 327 | """List categories used on the current page. 328 | 329 | API doc: https://www.mediawiki.org/wiki/API:Categories 330 | 331 | """ 332 | if generator: 333 | return mwclient.listing.PagePropertyGenerator(self, 'categories', 'cl') 334 | else: 335 | # TODO: return sortkey if wanted 336 | return mwclient.listing.PageProperty(self, 'categories', 'cl', 337 | return_values='title') 338 | 339 | def embeddedin(self, namespace=None, filterredir='all', limit=None, generator=True): 340 | """List pages that transclude the current page. 341 | 342 | API doc: https://www.mediawiki.org/wiki/API:Embeddedin 343 | 344 | Args: 345 | namespace (int): Restricts search to a given namespace (Default: None) 346 | filterredir (str): How to filter redirects, either 'all' (default), 347 | 'redirects' or 'nonredirects'. 348 | limit (int): Maximum amount of pages to return per request 349 | generator (bool): Use generator 350 | 351 | Returns: 352 | mwclient.listings.List: Page iterator 353 | """ 354 | prefix = mwclient.listing.List.get_prefix('ei', generator) 355 | kwargs = dict(mwclient.listing.List.generate_kwargs(prefix, namespace=namespace, 356 | filterredir=filterredir)) 357 | kwargs[prefix + 'title'] = self.name 358 | 359 | return mwclient.listing.List.get_list(generator)( 360 | self.site, 'embeddedin', 'ei', limit=limit, return_values='title', 361 | **kwargs 362 | ) 363 | 364 | def extlinks(self): 365 | """List external links from the current page. 366 | 367 | API doc: https://www.mediawiki.org/wiki/API:Extlinks 368 | 369 | """ 370 | return mwclient.listing.PageProperty(self, 'extlinks', 'el', return_values='*') 371 | 372 | def images(self, generator=True): 373 | """List files/images embedded in the current page. 374 | 375 | API doc: https://www.mediawiki.org/wiki/API:Images 376 | 377 | """ 378 | if generator: 379 | return mwclient.listing.PagePropertyGenerator(self, 'images', '') 380 | else: 381 | return mwclient.listing.PageProperty(self, 'images', '', 382 | return_values='title') 383 | 384 | def iwlinks(self): 385 | """List interwiki links from the current page. 386 | 387 | API doc: https://www.mediawiki.org/wiki/API:Iwlinks 388 | 389 | """ 390 | return mwclient.listing.PageProperty(self, 'iwlinks', 'iw', 391 | return_values=('prefix', '*')) 392 | 393 | def langlinks(self, **kwargs): 394 | """List interlanguage links from the current page. 395 | 396 | API doc: https://www.mediawiki.org/wiki/API:Langlinks 397 | 398 | """ 399 | return mwclient.listing.PageProperty(self, 'langlinks', 'll', 400 | return_values=('lang', '*'), 401 | **kwargs) 402 | 403 | def links(self, namespace=None, generator=True, redirects=False): 404 | """List links to other pages from the current page. 405 | 406 | API doc: https://www.mediawiki.org/wiki/API:Links 407 | 408 | """ 409 | prefix = mwclient.listing.List.get_prefix('pl', generator) 410 | kwargs = dict(mwclient.listing.List.generate_kwargs(prefix, namespace=namespace)) 411 | 412 | if redirects: 413 | kwargs['redirects'] = '1' 414 | if generator: 415 | return mwclient.listing.PagePropertyGenerator(self, 'links', 'pl', **kwargs) 416 | else: 417 | return mwclient.listing.PageProperty(self, 'links', 'pl', return_values='title', 418 | **kwargs) 419 | 420 | def revisions(self, startid=None, endid=None, start=None, end=None, 421 | dir='older', user=None, excludeuser=None, limit=50, 422 | prop='ids|timestamp|flags|comment|user', 423 | expandtemplates=False, section=None, 424 | diffto=None): 425 | """List revisions of the current page. 426 | 427 | API doc: https://www.mediawiki.org/wiki/API:Revisions 428 | 429 | Args: 430 | startid (int): Revision ID to start listing from. 431 | endid (int): Revision ID to stop listing at. 432 | start (str): Timestamp to start listing from. 433 | end (str): Timestamp to end listing at. 434 | dir (str): Direction to list in: 'older' (default) or 'newer'. 435 | user (str): Only list revisions made by this user. 436 | excludeuser (str): Exclude revisions made by this user. 437 | limit (int): The maximum number of revisions to return per request. 438 | prop (str): Which properties to get for each revision, 439 | default: 'ids|timestamp|flags|comment|user' 440 | expandtemplates (bool): Expand templates in rvprop=content output 441 | section (int): If rvprop=content is set, only retrieve the contents of this section. 442 | diffto (str): Revision ID to diff each revision to. Use "prev", 443 | "next" and "cur" for the previous, next and current 444 | revision respectively. 445 | 446 | Returns: 447 | mwclient.listings.List: Revision iterator 448 | """ 449 | kwargs = dict(mwclient.listing.List.generate_kwargs('rv', startid=startid, endid=endid, 450 | start=start, end=end, user=user, 451 | excludeuser=excludeuser, diffto=diffto)) 452 | kwargs['rvdir'] = dir 453 | kwargs['rvprop'] = prop 454 | if expandtemplates: 455 | kwargs['rvexpandtemplates'] = '1' 456 | if section is not None: 457 | kwargs['rvsection'] = section 458 | 459 | return mwclient.listing.RevisionsIterator(self, 'revisions', 'rv', limit=limit, 460 | **kwargs) 461 | 462 | def templates(self, namespace=None, generator=True): 463 | """List templates used on the current page. 464 | 465 | API doc: https://www.mediawiki.org/wiki/API:Templates 466 | 467 | """ 468 | prefix = mwclient.listing.List.get_prefix('tl', generator) 469 | kwargs = dict(mwclient.listing.List.generate_kwargs(prefix, namespace=namespace)) 470 | if generator: 471 | return mwclient.listing.PagePropertyGenerator(self, 'templates', prefix, 472 | **kwargs) 473 | else: 474 | return mwclient.listing.PageProperty(self, 'templates', prefix, 475 | return_values='title', **kwargs) 476 | -------------------------------------------------------------------------------- /tests/test_client.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | from __future__ import print_function 3 | 4 | from six import StringIO 5 | import unittest 6 | import pytest 7 | import mwclient 8 | import logging 9 | import requests 10 | import responses 11 | import pkg_resources # part of setuptools 12 | import mock 13 | import time 14 | from requests_oauthlib import OAuth1 15 | 16 | try: 17 | import json 18 | except ImportError: 19 | import simplejson as json 20 | 21 | if __name__ == "__main__": 22 | print() 23 | print("Note: Running in stand-alone mode. Consult the README") 24 | print(" (section 'Contributing') for advice on running tests.") 25 | print() 26 | 27 | logging.basicConfig(level=logging.DEBUG) 28 | 29 | 30 | class TestCase(unittest.TestCase): 31 | 32 | def metaResponse(self, **kwargs): 33 | tpl = '{"query":{"general":{"generator":"MediaWiki %(version)s"},"namespaces":{"-1":{"*":"Special","canonical":"Special","case":"first-letter","id":-1},"-2":{"*":"Media","canonical":"Media","case":"first-letter","id":-2},"0":{"*":"","case":"first-letter","content":"","id":0},"1":{"*":"Talk","canonical":"Talk","case":"first-letter","id":1,"subpages":""},"10":{"*":"Template","canonical":"Template","case":"first-letter","id":10,"subpages":""},"100":{"*":"Test namespace 1","canonical":"Test namespace 1","case":"first-letter","id":100,"subpages":""},"101":{"*":"Test namespace 1 talk","canonical":"Test namespace 1 talk","case":"first-letter","id":101,"subpages":""},"102":{"*":"Test namespace 2","canonical":"Test namespace 2","case":"first-letter","id":102,"subpages":""},"103":{"*":"Test namespace 2 talk","canonical":"Test namespace 2 talk","case":"first-letter","id":103,"subpages":""},"11":{"*":"Template talk","canonical":"Template talk","case":"first-letter","id":11,"subpages":""},"1198":{"*":"Translations","canonical":"Translations","case":"first-letter","id":1198,"subpages":""},"1199":{"*":"Translations talk","canonical":"Translations talk","case":"first-letter","id":1199,"subpages":""},"12":{"*":"Help","canonical":"Help","case":"first-letter","id":12,"subpages":""},"13":{"*":"Help talk","canonical":"Help talk","case":"first-letter","id":13,"subpages":""},"14":{"*":"Category","canonical":"Category","case":"first-letter","id":14},"15":{"*":"Category talk","canonical":"Category talk","case":"first-letter","id":15,"subpages":""},"2":{"*":"User","canonical":"User","case":"first-letter","id":2,"subpages":""},"2500":{"*":"VisualEditor","canonical":"VisualEditor","case":"first-letter","id":2500},"2501":{"*":"VisualEditor talk","canonical":"VisualEditor talk","case":"first-letter","id":2501},"2600":{"*":"Topic","canonical":"Topic","case":"first-letter","defaultcontentmodel":"flow-board","id":2600},"3":{"*":"User talk","canonical":"User talk","case":"first-letter","id":3,"subpages":""},"4":{"*":"Wikipedia","canonical":"Project","case":"first-letter","id":4,"subpages":""},"460":{"*":"Campaign","canonical":"Campaign","case":"case-sensitive","defaultcontentmodel":"Campaign","id":460},"461":{"*":"Campaign talk","canonical":"Campaign talk","case":"case-sensitive","id":461},"5":{"*":"Wikipedia talk","canonical":"Project talk","case":"first-letter","id":5,"subpages":""},"6":{"*":"File","canonical":"File","case":"first-letter","id":6},"7":{"*":"File talk","canonical":"File talk","case":"first-letter","id":7,"subpages":""},"710":{"*":"TimedText","canonical":"TimedText","case":"first-letter","id":710},"711":{"*":"TimedText talk","canonical":"TimedText talk","case":"first-letter","id":711},"8":{"*":"MediaWiki","canonical":"MediaWiki","case":"first-letter","id":8,"subpages":""},"828":{"*":"Module","canonical":"Module","case":"first-letter","id":828,"subpages":""},"829":{"*":"Module talk","canonical":"Module talk","case":"first-letter","id":829,"subpages":""},"866":{"*":"CNBanner","canonical":"CNBanner","case":"first-letter","id":866},"867":{"*":"CNBanner talk","canonical":"CNBanner talk","case":"first-letter","id":867,"subpages":""},"9":{"*":"MediaWiki talk","canonical":"MediaWiki talk","case":"first-letter","id":9,"subpages":""},"90":{"*":"Thread","canonical":"Thread","case":"first-letter","id":90},"91":{"*":"Thread talk","canonical":"Thread talk","case":"first-letter","id":91},"92":{"*":"Summary","canonical":"Summary","case":"first-letter","id":92},"93":{"*":"Summary talk","canonical":"Summary talk","case":"first-letter","id":93}},"userinfo":{"anon":"","groups":["*"],"id":0,"name":"127.0.0.1","rights": %(rights)s}}}' 34 | tpl = tpl % {'version': kwargs.get('version', '1.24wmf17'), 35 | 'rights': json.dumps(kwargs.get('rights', ["createaccount", "read", "edit", "createpage", "createtalk", "writeapi", "editmyusercss", "editmyuserjs", "viewmywatchlist", "editmywatchlist", "viewmyprivateinfo", "editmyprivateinfo", "editmyoptions", "centralauth-merge", "abusefilter-view", "abusefilter-log", "translate", "vipsscaler-test", "upload"])) 36 | } 37 | 38 | res = json.loads(tpl) 39 | if kwargs.get('writeapi', True): 40 | res['query']['general']['writeapi'] = '' 41 | 42 | return res 43 | 44 | def metaResponseAsJson(self, **kwargs): 45 | return json.dumps(self.metaResponse(**kwargs)) 46 | 47 | def httpShouldReturn(self, body=None, callback=None, scheme='https', host='test.wikipedia.org', path='/w/', 48 | script='api', headers=None, status=200, method='GET'): 49 | url = '{scheme}://{host}{path}{script}.php'.format(scheme=scheme, host=host, path=path, script=script) 50 | mock = responses.GET if method == 'GET' else responses.POST 51 | if body is None: 52 | responses.add_callback(mock, url, callback=callback) 53 | else: 54 | responses.add(mock, url, body=body, content_type='application/json', 55 | headers=headers, status=status) 56 | 57 | def stdSetup(self): 58 | self.httpShouldReturn(self.metaResponseAsJson()) 59 | site = mwclient.Site('test.wikipedia.org') 60 | responses.reset() 61 | return site 62 | 63 | def makePageResponse(self, title='Dummy.jpg', **kwargs): 64 | # Creates a dummy page response 65 | 66 | pageinfo = { 67 | "contentmodel": "wikitext", 68 | "lastrevid": 112353797, 69 | "length": 389, 70 | "ns": 6, 71 | "pageid": 738154, 72 | "pagelanguage": "en", 73 | "protection": [], 74 | "title": title, 75 | "touched": "2014-09-10T20:37:25Z" 76 | } 77 | pageinfo.update(**kwargs) 78 | 79 | res = { 80 | "query": { 81 | "pages": { 82 | "9": pageinfo 83 | } 84 | } 85 | } 86 | return json.dumps(res) 87 | 88 | 89 | class TestClient(TestCase): 90 | 91 | def setUp(self): 92 | pass 93 | 94 | def testVersion(self): 95 | # The version specified in setup.py should equal the one specified in client.py 96 | version = pkg_resources.require("mwclient")[0].version 97 | 98 | assert version == mwclient.__ver__ 99 | 100 | @responses.activate 101 | def test_https_as_default(self): 102 | # 'https' should be the default scheme 103 | 104 | self.httpShouldReturn(self.metaResponseAsJson(), scheme='https') 105 | 106 | site = mwclient.Site('test.wikipedia.org') 107 | 108 | assert len(responses.calls) == 1 109 | assert responses.calls[0].request.method == 'GET' 110 | 111 | @responses.activate 112 | def test_max_lag(self): 113 | # Client should wait and retry if lag exceeds max-lag 114 | 115 | def request_callback(request): 116 | if len(responses.calls) == 0: 117 | return (200, {'x-database-lag': '0', 'retry-after': '0'}, '') 118 | else: 119 | return (200, {}, self.metaResponseAsJson()) 120 | 121 | self.httpShouldReturn(callback=request_callback, scheme='https') 122 | 123 | site = mwclient.Site('test.wikipedia.org') 124 | 125 | assert len(responses.calls) == 2 126 | assert 'retry-after' in responses.calls[0].response.headers 127 | assert 'retry-after' not in responses.calls[1].response.headers 128 | 129 | @responses.activate 130 | def test_http_error(self): 131 | # Client should raise HTTPError 132 | 133 | self.httpShouldReturn('Uh oh', scheme='https', status=400) 134 | 135 | with pytest.raises(requests.exceptions.HTTPError): 136 | site = mwclient.Site('test.wikipedia.org') 137 | 138 | @responses.activate 139 | def test_force_http(self): 140 | # Setting http should work 141 | 142 | self.httpShouldReturn(self.metaResponseAsJson(), scheme='http') 143 | 144 | site = mwclient.Site(('http', 'test.wikipedia.org')) 145 | 146 | assert len(responses.calls) == 1 147 | 148 | @responses.activate 149 | def test_user_agent_is_sent(self): 150 | # User specified user agent should be sent sent to server 151 | 152 | self.httpShouldReturn(self.metaResponseAsJson()) 153 | 154 | site = mwclient.Site('test.wikipedia.org', clients_useragent='MyFabulousClient') 155 | 156 | assert 'MyFabulousClient' in responses.calls[0].request.headers['user-agent'] 157 | 158 | @responses.activate 159 | def test_custom_headers_are_sent(self): 160 | # Custom headers should be sent to the server 161 | 162 | self.httpShouldReturn(self.metaResponseAsJson()) 163 | 164 | site = mwclient.Site('test.wikipedia.org', custom_headers={'X-Wikimedia-Debug': 'host=mw1099.eqiad.wmnet; log'}) 165 | 166 | assert 'host=mw1099.eqiad.wmnet; log' in responses.calls[0].request.headers['X-Wikimedia-Debug'] 167 | 168 | @responses.activate 169 | def test_basic_request(self): 170 | 171 | self.httpShouldReturn(self.metaResponseAsJson()) 172 | 173 | site = mwclient.Site('test.wikipedia.org') 174 | 175 | assert 'action=query' in responses.calls[0].request.url 176 | assert 'meta=siteinfo%7Cuserinfo' in responses.calls[0].request.url 177 | 178 | @responses.activate 179 | def test_httpauth_defaults_to_basic_auth(self): 180 | 181 | self.httpShouldReturn(self.metaResponseAsJson()) 182 | 183 | site = mwclient.Site('test.wikipedia.org', httpauth=('me', 'verysecret')) 184 | 185 | assert isinstance(site.connection.auth, requests.auth.HTTPBasicAuth) 186 | 187 | @responses.activate 188 | def test_httpauth_raise_error_on_invalid_type(self): 189 | 190 | self.httpShouldReturn(self.metaResponseAsJson()) 191 | 192 | with pytest.raises(RuntimeError): 193 | site = mwclient.Site('test.wikipedia.org', httpauth=1) 194 | 195 | @responses.activate 196 | def test_oauth(self): 197 | 198 | self.httpShouldReturn(self.metaResponseAsJson()) 199 | 200 | site = mwclient.Site('test.wikipedia.org', 201 | consumer_token='a', consumer_secret='b', 202 | access_token='c', access_secret='d') 203 | assert isinstance(site.connection.auth, OAuth1) 204 | 205 | @responses.activate 206 | def test_api_disabled(self): 207 | # Should raise APIDisabledError if API is not enabled 208 | 209 | self.httpShouldReturn('MediaWiki API is not enabled for this site.') 210 | 211 | with pytest.raises(mwclient.errors.APIDisabledError): 212 | site = mwclient.Site('test.wikipedia.org') 213 | 214 | @responses.activate 215 | def test_version(self): 216 | # Should parse the MediaWiki version number correctly 217 | 218 | self.httpShouldReturn(self.metaResponseAsJson(version='1.16')) 219 | 220 | site = mwclient.Site('test.wikipedia.org') 221 | 222 | assert site.initialized is True 223 | assert site.version == (1, 16) 224 | 225 | @responses.activate 226 | def test_min_version(self): 227 | # Should raise MediaWikiVersionError if API version is < 1.16 228 | 229 | self.httpShouldReturn(self.metaResponseAsJson(version='1.15')) 230 | 231 | with pytest.raises(mwclient.errors.MediaWikiVersionError): 232 | site = mwclient.Site('test.wikipedia.org') 233 | 234 | @responses.activate 235 | def test_private_wiki(self): 236 | # Should not raise error 237 | 238 | self.httpShouldReturn(json.dumps({ 239 | 'error': { 240 | 'code': 'readapidenied', 241 | 'info': 'You need read permission to use this module' 242 | } 243 | })) 244 | 245 | site = mwclient.Site('test.wikipedia.org') 246 | 247 | assert site.initialized is False 248 | 249 | # ----- Use standard setup for rest 250 | 251 | @responses.activate 252 | def test_headers(self): 253 | # Content-type should be 'application/x-www-form-urlencoded' for POST requests 254 | 255 | site = self.stdSetup() 256 | 257 | self.httpShouldReturn('{}', method='POST') 258 | site.post('purge', title='Main Page') 259 | 260 | assert len(responses.calls) == 1 261 | assert 'content-type' in responses.calls[0].request.headers 262 | assert responses.calls[0].request.headers['content-type'] == 'application/x-www-form-urlencoded' 263 | 264 | @responses.activate 265 | def test_raw_index(self): 266 | # Initializing the client should result in one request 267 | 268 | site = self.stdSetup() 269 | 270 | self.httpShouldReturn('Some data', script='index') 271 | site.raw_index(action='purge', title='Main Page', http_method='GET') 272 | 273 | assert len(responses.calls) == 1 274 | 275 | @responses.activate 276 | def test_api_error_response(self): 277 | # Test that APIError is thrown on error response 278 | 279 | site = self.stdSetup() 280 | 281 | self.httpShouldReturn(json.dumps({ 282 | 'error': { 283 | 'code': 'assertuserfailed', 284 | 'info': 'Assertion that the user is logged in failed', 285 | '*': 'See https://en.wikipedia.org/w/api.php for API usage' 286 | } 287 | }), method='POST') 288 | with pytest.raises(mwclient.errors.APIError) as excinfo: 289 | site.api(action='edit', title='Wikipedia:Sandbox') 290 | 291 | assert excinfo.value.code == 'assertuserfailed' 292 | assert excinfo.value.info == 'Assertion that the user is logged in failed' 293 | assert len(responses.calls) == 1 294 | 295 | @responses.activate 296 | def test_smw_error_response(self): 297 | # Test that APIError is thrown on error response from SMW 298 | 299 | site = self.stdSetup() 300 | self.httpShouldReturn(json.dumps({ 301 | 'error': { 302 | 'query': u'Certains « [[ » dans votre requête n’ont pas été clos par des « ]] » correspondants.' 303 | } 304 | }), method='GET') 305 | with pytest.raises(mwclient.errors.APIError) as excinfo: 306 | list(site.ask('test')) 307 | 308 | assert excinfo.value.code is None 309 | assert excinfo.value.info == u'Certains « [[ » dans votre requête n’ont pas été clos par des « ]] » correspondants.' 310 | assert len(responses.calls) == 1 311 | 312 | @responses.activate 313 | def test_repr(self): 314 | # Test repr() 315 | 316 | site = self.stdSetup() 317 | 318 | assert repr(site) == '' 319 | 320 | 321 | class TestLogin(TestCase): 322 | 323 | @mock.patch('mwclient.client.Site.site_init') 324 | @mock.patch('mwclient.client.Site.raw_api') 325 | def test_old_login_flow(self, raw_api, site_init): 326 | # The login flow used before MW 1.27 that starts with a action=login POST request 327 | login_token = 'abc+\\' 328 | 329 | def side_effect(*args, **kwargs): 330 | 331 | if 'lgtoken' not in kwargs: 332 | return { 333 | 'login': {'result': 'NeedToken', 'token': login_token} 334 | } 335 | elif 'lgname' in kwargs: 336 | assert kwargs['lgtoken'] == login_token 337 | return { 338 | 'login': {'result': 'Success'} 339 | } 340 | 341 | raw_api.side_effect = side_effect 342 | 343 | site = mwclient.Site('test.wikipedia.org') 344 | site.login('myusername', 'mypassword') 345 | 346 | call_args = raw_api.call_args_list 347 | 348 | assert len(call_args) == 3 349 | assert call_args[0] == mock.call('query', 'GET', meta='tokens', type='login') 350 | assert call_args[1] == mock.call('login', 'POST', lgname='myusername', lgpassword='mypassword') 351 | assert call_args[2] == mock.call('login', 'POST', lgname='myusername', lgpassword='mypassword', lgtoken=login_token) 352 | 353 | @mock.patch('mwclient.client.Site.site_init') 354 | @mock.patch('mwclient.client.Site.raw_api') 355 | def test_new_login_flow(self, raw_api, site_init): 356 | # The login flow used from MW 1.27 that starts with a meta=tokens GET request 357 | 358 | login_token = 'abc+\\' 359 | 360 | def side_effect(*args, **kwargs): 361 | if kwargs.get('meta') == 'tokens': 362 | return { 363 | 'query': {'tokens': {'logintoken': login_token}} 364 | } 365 | elif 'lgname' in kwargs: 366 | assert kwargs['lgtoken'] == login_token 367 | return { 368 | 'login': {'result': 'Success'} 369 | } 370 | 371 | raw_api.side_effect = side_effect 372 | 373 | site = mwclient.Site('test.wikipedia.org') 374 | site.login('myusername', 'mypassword') 375 | 376 | call_args = raw_api.call_args_list 377 | 378 | assert len(call_args) == 2 379 | assert call_args[0] == mock.call('query', 'GET', meta='tokens', type='login') 380 | assert call_args[1] == mock.call('login', 'POST', lgname='myusername', lgpassword='mypassword', lgtoken=login_token) 381 | 382 | 383 | class TestClientApiMethods(TestCase): 384 | 385 | def setUp(self): 386 | self.api = mock.patch('mwclient.client.Site.api').start() 387 | self.api.return_value = self.metaResponse() 388 | self.site = mwclient.Site('test.wikipedia.org') 389 | 390 | def tearDown(self): 391 | mock.patch.stopall() 392 | 393 | def test_revisions(self): 394 | 395 | self.api.return_value = { 396 | 'query': {'pages': {'1': { 397 | 'pageid': 1, 398 | 'title': 'Test page', 399 | 'revisions': [{ 400 | 'revid': 689697696, 401 | 'timestamp': '2015-11-08T21:52:46Z', 402 | 'comment': 'Test comment 1' 403 | }, { 404 | 'revid': 689816909, 405 | 'timestamp': '2015-11-09T16:09:28Z', 406 | 'comment': 'Test comment 2' 407 | }] 408 | }}}} 409 | 410 | revisions = [rev for rev in self.site.revisions([689697696, 689816909], prop='content')] 411 | 412 | args, kwargs = self.api.call_args 413 | assert kwargs.get('revids') == '689697696|689816909' 414 | assert len(revisions) == 2 415 | assert revisions[0]['pageid'] == 1 416 | assert revisions[0]['pagetitle'] == 'Test page' 417 | assert revisions[0]['revid'] == 689697696 418 | assert revisions[0]['timestamp'] == time.strptime('2015-11-08T21:52:46Z', '%Y-%m-%dT%H:%M:%SZ') 419 | assert revisions[1]['revid'] == 689816909 420 | 421 | 422 | class TestClientUploadArgs(TestCase): 423 | 424 | def setUp(self): 425 | self.raw_call = mock.patch('mwclient.client.Site.raw_call').start() 426 | 427 | def configure(self, rights=['read', 'upload']): 428 | 429 | self.raw_call.side_effect = [self.metaResponseAsJson(rights=rights)] 430 | self.site = mwclient.Site('test.wikipedia.org') 431 | 432 | self.vars = { 433 | 'fname': u'Some "ßeta" æøå.jpg', 434 | 'comment': u'Some slightly complex comment
π ≈ 3, © Me.jpg', 435 | 'token': u'abc+\\' 436 | } 437 | 438 | self.raw_call.side_effect = [ 439 | 440 | # 1st response: 441 | self.makePageResponse(title='File:Test.jpg', imagerepository='local', imageinfo=[{ 442 | "comment": "", 443 | "height": 1440, 444 | "metadata": [], 445 | "sha1": "69a764a9cf8307ea4130831a0aa0b9b7f9585726", 446 | "size": 123, 447 | "timestamp": "2013-12-22T07:11:07Z", 448 | "user": "TestUser", 449 | "width": 2160 450 | }]), 451 | 452 | # 2nd response: 453 | json.dumps({'query': {'tokens': {'csrftoken': self.vars['token']}}}), 454 | 455 | # 3rd response: 456 | json.dumps({ 457 | "upload": { 458 | "result": "Success", 459 | "filename": self.vars['fname'], 460 | "imageinfo": [] 461 | } 462 | }) 463 | ] 464 | 465 | def tearDown(self): 466 | mock.patch.stopall() 467 | 468 | def test_upload_args(self): 469 | # Test that methods are called, and arguments sent as expected 470 | self.configure() 471 | 472 | self.site.upload(file=StringIO('test'), filename=self.vars['fname'], comment=self.vars['comment']) 473 | 474 | args, kwargs = self.raw_call.call_args 475 | data = args[1] 476 | files = args[2] 477 | 478 | assert data.get('action') == 'upload' 479 | assert data.get('filename') == self.vars['fname'] 480 | assert data.get('comment') == self.vars['comment'] 481 | assert data.get('token') == self.vars['token'] 482 | assert 'file' in files 483 | 484 | def test_upload_missing_filename(self): 485 | self.configure() 486 | 487 | with pytest.raises(TypeError): 488 | self.site.upload(file=StringIO('test')) 489 | 490 | def test_upload_ambigitious_args(self): 491 | self.configure() 492 | 493 | with pytest.raises(TypeError): 494 | self.site.upload(filename='Test', file=StringIO('test'), filekey='abc') 495 | 496 | def test_upload_missing_upload_permission(self): 497 | self.configure(rights=['read']) 498 | 499 | with pytest.raises(mwclient.errors.InsufficientPermission): 500 | self.site.upload(filename='Test', file=StringIO('test')) 501 | 502 | 503 | class TestClientGetTokens(TestCase): 504 | 505 | def setUp(self): 506 | self.raw_call = mock.patch('mwclient.client.Site.raw_call').start() 507 | 508 | def configure(self, version='1.24'): 509 | self.raw_call.return_value = self.metaResponseAsJson(version=version) 510 | self.site = mwclient.Site('test.wikipedia.org') 511 | responses.reset() 512 | 513 | def tearDown(self): 514 | mock.patch.stopall() 515 | 516 | def test_token_new_system(self): 517 | # Test get_token for MW >= 1.24 518 | self.configure(version='1.24') 519 | 520 | self.raw_call.return_value = json.dumps({ 521 | 'query': {'tokens': {'csrftoken': 'sometoken'}} 522 | }) 523 | self.site.get_token('edit') 524 | 525 | args, kwargs = self.raw_call.call_args 526 | data = args[1] 527 | 528 | assert 'intoken' not in data 529 | assert data.get('type') == 'csrf' 530 | assert 'csrf' in self.site.tokens 531 | assert self.site.tokens['csrf'] == 'sometoken' 532 | assert 'edit' not in self.site.tokens 533 | 534 | def test_token_old_system_without_specifying_title(self): 535 | # Test get_token for MW < 1.24 536 | self.configure(version='1.23') 537 | 538 | self.raw_call.return_value = self.makePageResponse(edittoken='sometoken', title='Test') 539 | self.site.get_token('edit') 540 | 541 | args, kwargs = self.raw_call.call_args 542 | data = args[1] 543 | 544 | assert 'type' not in data 545 | assert data.get('intoken') == 'edit' 546 | assert 'edit' in self.site.tokens 547 | assert self.site.tokens['edit'] == 'sometoken' 548 | assert 'csrf' not in self.site.tokens 549 | 550 | def test_token_old_system_with_specifying_title(self): 551 | # Test get_token for MW < 1.24 552 | self.configure(version='1.23') 553 | 554 | self.raw_call.return_value = self.makePageResponse(edittoken='sometoken', title='Some page') 555 | self.site.get_token('edit', title='Some page') 556 | 557 | args, kwargs = self.raw_call.call_args 558 | data = args[1] 559 | 560 | assert self.site.tokens['edit'] == 'sometoken' 561 | 562 | if __name__ == '__main__': 563 | unittest.main() 564 | -------------------------------------------------------------------------------- /mwclient/client.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | import warnings 3 | import logging 4 | from six import text_type 5 | import six 6 | 7 | from collections import OrderedDict 8 | 9 | try: 10 | import json 11 | except ImportError: 12 | import simplejson as json 13 | import requests 14 | from requests.auth import HTTPBasicAuth, AuthBase 15 | from requests_oauthlib import OAuth1 16 | 17 | import mwclient.errors as errors 18 | import mwclient.listing as listing 19 | from mwclient.sleep import Sleepers 20 | from mwclient.util import parse_timestamp 21 | 22 | try: 23 | import gzip 24 | except ImportError: 25 | gzip = None 26 | 27 | __ver__ = '0.8.6' 28 | 29 | log = logging.getLogger(__name__) 30 | 31 | 32 | class Site(object): 33 | """A MediaWiki site identified by its hostname. 34 | 35 | >>> import mwclient 36 | >>> site = mwclient.Site('en.wikipedia.org') 37 | 38 | Do not include the leading "http://". 39 | 40 | Mwclient assumes that the script path (where index.php and api.php are located) 41 | is '/w/'. If the site uses a different script path, you must specify this 42 | (path must end in a '/'). 43 | 44 | Examples: 45 | 46 | >>> site = mwclient.Site('vim.wikia.com', path='/') 47 | >>> site = mwclient.Site('sourceforge.net', path='/apps/mediawiki/mwclient/') 48 | 49 | """ 50 | api_limit = 500 51 | 52 | def __init__(self, host, path='/w/', ext='.php', pool=None, retry_timeout=30, 53 | max_retries=25, wait_callback=lambda *x: None, clients_useragent=None, 54 | max_lag=3, compress=True, force_login=True, do_init=True, httpauth=None, 55 | reqs=None, consumer_token=None, consumer_secret=None, access_token=None, 56 | access_secret=None, client_certificate=None, custom_headers=None): 57 | # Setup member variables 58 | self.host = host 59 | self.path = path 60 | self.ext = ext 61 | self.credentials = None 62 | self.compress = compress 63 | self.max_lag = text_type(max_lag) 64 | self.force_login = force_login 65 | self.requests = reqs or {} 66 | 67 | if consumer_token is not None: 68 | auth = OAuth1(consumer_token, consumer_secret, access_token, access_secret) 69 | elif isinstance(httpauth, (list, tuple)): 70 | auth = HTTPBasicAuth(*httpauth) 71 | elif httpauth is None or isinstance(httpauth, (AuthBase,)): 72 | auth = httpauth 73 | else: 74 | raise RuntimeError('Authentication is not a tuple or an instance of AuthBase') 75 | 76 | self.sleepers = Sleepers(max_retries, retry_timeout, wait_callback) 77 | 78 | # Site properties 79 | self.blocked = False # Whether current user is blocked 80 | self.hasmsg = False # Whether current user has new messages 81 | self.groups = [] # Groups current user belongs to 82 | self.rights = [] # Rights current user has 83 | self.tokens = {} # Edit tokens of the current user 84 | self.version = None 85 | 86 | self.namespaces = self.default_namespaces 87 | self.writeapi = False 88 | 89 | # Setup connection 90 | if pool is None: 91 | self.connection = requests.Session() 92 | self.connection.auth = auth 93 | if client_certificate: 94 | self.connection.cert = client_certificate 95 | 96 | prefix = '{} - '.format(clients_useragent) if clients_useragent else '' 97 | self.connection.headers['User-Agent'] = ( 98 | '{prefix}MwClient/{ver} ({url})'.format( 99 | prefix=prefix, 100 | ver=__ver__, 101 | url='https://github.com/mwclient/mwclient' 102 | ) 103 | ) 104 | if custom_headers: 105 | self.connection.headers.update(custom_headers) 106 | else: 107 | self.connection = pool 108 | 109 | # Page generators 110 | self.pages = listing.PageList(self) 111 | self.categories = listing.PageList(self, namespace=14) 112 | self.images = listing.PageList(self, namespace=6) 113 | 114 | # Compat page generators 115 | self.Pages = self.pages 116 | self.Categories = self.categories 117 | self.Images = self.images 118 | 119 | # Initialization status 120 | self.initialized = False 121 | 122 | if do_init: 123 | try: 124 | self.site_init() 125 | except errors.APIError as e: 126 | if e.args[0] == 'mwoauth-invalid-authorization': 127 | raise errors.OAuthAuthorizationError(e.code, e.info) 128 | 129 | # Private wiki, do init after login 130 | if e.args[0] not in {u'unknown_action', u'readapidenied'}: 131 | raise 132 | 133 | def site_init(self): 134 | 135 | if self.initialized: 136 | info = self.get('query', meta='userinfo', uiprop='groups|rights') 137 | userinfo = info['query']['userinfo'] 138 | self.username = userinfo['name'] 139 | self.groups = userinfo.get('groups', []) 140 | self.rights = userinfo.get('rights', []) 141 | self.tokens = {} 142 | return 143 | 144 | meta = self.get('query', meta='siteinfo|userinfo', 145 | siprop='general|namespaces', uiprop='groups|rights', 146 | retry_on_error=False) 147 | 148 | # Extract site info 149 | self.site = meta['query']['general'] 150 | self.namespaces = { 151 | namespace['id']: namespace.get('*', '') 152 | for namespace in six.itervalues(meta['query']['namespaces']) 153 | } 154 | self.writeapi = 'writeapi' in self.site 155 | 156 | self.version = self.version_tuple_from_generator(self.site['generator']) 157 | 158 | # Require MediaWiki version >= 1.16 159 | self.require(1, 16) 160 | 161 | # User info 162 | userinfo = meta['query']['userinfo'] 163 | self.username = userinfo['name'] 164 | self.groups = userinfo.get('groups', []) 165 | self.rights = userinfo.get('rights', []) 166 | self.initialized = True 167 | 168 | @staticmethod 169 | def version_tuple_from_generator(string, prefix='MediaWiki '): 170 | """Return a version tuple from a MediaWiki Generator string. 171 | 172 | Example: 173 | "MediaWiki 1.5.1" → (1, 5, 1) 174 | 175 | Args: 176 | prefix (str): The expected prefix of the string 177 | """ 178 | if not string.startswith(prefix): 179 | raise errors.MediaWikiVersionError('Unknown generator {}'.format(string)) 180 | 181 | version = string[len(prefix):].split('.') 182 | 183 | def split_num(s): 184 | """Split the string on the first non-digit character. 185 | 186 | Returns: 187 | A tuple of the digit part as int and, if available, 188 | the rest of the string. 189 | """ 190 | i = 0 191 | while i < len(s): 192 | if s[i] < '0' or s[i] > '9': 193 | break 194 | i += 1 195 | if s[i:]: 196 | return (int(s[:i]), s[i:], ) 197 | else: 198 | return (int(s[:i]), ) 199 | 200 | version_tuple = sum((split_num(s) for s in version), ()) 201 | 202 | if len(version_tuple) < 2: 203 | raise errors.MediaWikiVersionError('Unknown MediaWiki {}' 204 | .format('.'.join(version))) 205 | 206 | return version_tuple 207 | 208 | default_namespaces = { 209 | 0: u'', 1: u'Talk', 2: u'User', 3: u'User talk', 4: u'Project', 210 | 5: u'Project talk', 6: u'Image', 7: u'Image talk', 8: u'MediaWiki', 211 | 9: u'MediaWiki talk', 10: u'Template', 11: u'Template talk', 12: u'Help', 212 | 13: u'Help talk', 14: u'Category', 15: u'Category talk', 213 | -1: u'Special', -2: u'Media' 214 | } 215 | 216 | def __repr__(self): 217 | return "" % (self.host, self.path) 218 | 219 | def get(self, action, *args, **kwargs): 220 | """Perform a generic API call using GET. 221 | 222 | This is just a shorthand for calling api() with http_method='GET'. 223 | All arguments will be passed on. 224 | 225 | Returns: 226 | The raw response from the API call, as a dictionary. 227 | """ 228 | return self.api(action, 'GET', *args, **kwargs) 229 | 230 | def post(self, action, *args, **kwargs): 231 | """Perform a generic API call using POST. 232 | 233 | This is just a shorthand for calling api() with http_method='POST'. 234 | All arguments will be passed on. 235 | 236 | Returns: 237 | The raw response from the API call, as a dictionary. 238 | """ 239 | return self.api(action, 'POST', *args, **kwargs) 240 | 241 | def api(self, action, http_method='POST', *args, **kwargs): 242 | """Perform a generic API call and handle errors. 243 | 244 | All arguments will be passed on. 245 | 246 | Example: 247 | To get coordinates from the GeoData MediaWiki extension at English Wikipedia: 248 | 249 | >>> site = Site('en.wikipedia.org') 250 | >>> result = site.api('query', prop='coordinates', titles='Oslo|Copenhagen') 251 | >>> for page in result['query']['pages'].values(): 252 | ... if 'coordinates' in page: 253 | ... print '{} {} {}'.format(page['title'], 254 | ... page['coordinates'][0]['lat'], 255 | ... page['coordinates'][0]['lon']) 256 | Oslo 59.95 10.75 257 | Copenhagen 55.6761 12.5683 258 | 259 | Returns: 260 | The raw response from the API call, as a dictionary. 261 | """ 262 | kwargs.update(args) 263 | 264 | if action == 'query' and 'continue' not in kwargs: 265 | kwargs['continue'] = '' 266 | if action == 'query': 267 | if 'meta' in kwargs: 268 | kwargs['meta'] += '|userinfo' 269 | else: 270 | kwargs['meta'] = 'userinfo' 271 | if 'uiprop' in kwargs: 272 | kwargs['uiprop'] += '|blockinfo|hasmsg' 273 | else: 274 | kwargs['uiprop'] = 'blockinfo|hasmsg' 275 | 276 | sleeper = self.sleepers.make() 277 | 278 | while True: 279 | info = self.raw_api(action, http_method, **kwargs) 280 | if not info: 281 | info = {} 282 | if self.handle_api_result(info, sleeper=sleeper): 283 | return info 284 | 285 | def handle_api_result(self, info, kwargs=None, sleeper=None): 286 | if sleeper is None: 287 | sleeper = self.sleepers.make() 288 | 289 | try: 290 | userinfo = info['query']['userinfo'] 291 | except KeyError: 292 | userinfo = () 293 | if 'blockedby' in userinfo: 294 | self.blocked = (userinfo['blockedby'], userinfo.get('blockreason', u'')) 295 | else: 296 | self.blocked = False 297 | self.hasmsg = 'messages' in userinfo 298 | self.logged_in = 'anon' not in userinfo 299 | if 'warnings' in info: 300 | for module, warning in info['warnings'].items(): 301 | if '*' in warning: 302 | log.warning(warning['*']) 303 | 304 | if 'error' in info: 305 | if info['error'].get('code') in {u'internal_api_error_DBConnectionError', 306 | u'internal_api_error_DBQueryError'}: 307 | sleeper.sleep() 308 | return False 309 | 310 | # cope with https://phabricator.wikimedia.org/T106066 311 | if (info['error'].get('code') == u'mwoauth-invalid-authorization' and 312 | 'Nonce already used' in info['error'].get('info')): 313 | log.warning('retrying due to nonce error https://phabricator.wikimedia.org/T106066') 314 | sleeper.sleep() 315 | return False 316 | 317 | if 'query' in info['error']: 318 | # Semantic Mediawiki does not follow the standard error format 319 | raise errors.APIError(None, info['error']['query'], kwargs) 320 | 321 | if '*' in info['error']: 322 | raise errors.APIError(info['error']['code'], 323 | info['error']['info'], info['error']['*']) 324 | raise errors.APIError(info['error']['code'], 325 | info['error']['info'], kwargs) 326 | return True 327 | 328 | @staticmethod 329 | def _query_string(*args, **kwargs): 330 | kwargs.update(args) 331 | qs1 = [(k, v) for k, v in six.iteritems(kwargs) if k not in {'wpEditToken', 'token'}] 332 | qs2 = [(k, v) for k, v in six.iteritems(kwargs) if k in {'wpEditToken', 'token'}] 333 | return OrderedDict(qs1 + qs2) 334 | 335 | def raw_call(self, script, data, files=None, retry_on_error=True, http_method='POST'): 336 | """ 337 | Perform a generic request and return the raw text. 338 | 339 | In the event of a network problem, or a HTTP response with status code 5XX, 340 | we'll wait and retry the configured number of times before giving up 341 | if `retry_on_error` is True. 342 | 343 | `requests.exceptions.HTTPError` is still raised directly for 344 | HTTP responses with status codes in the 4XX range, and invalid 345 | HTTP responses. 346 | 347 | Args: 348 | script (str): Script name, usually 'api'. 349 | data (dict): Post data 350 | files (dict): Files to upload 351 | retry_on_error (bool): Retry on connection error 352 | 353 | Returns: 354 | The raw text response. 355 | """ 356 | headers = {} 357 | if self.compress and gzip: 358 | headers['Accept-Encoding'] = 'gzip' 359 | sleeper = self.sleepers.make((script, data)) 360 | 361 | scheme = 'https' 362 | host = self.host 363 | if isinstance(host, (list, tuple)): 364 | scheme, host = host 365 | 366 | url = '{scheme}://{host}{path}{script}{ext}'.format(scheme=scheme, host=host, 367 | path=self.path, script=script, 368 | ext=self.ext) 369 | 370 | while True: 371 | try: 372 | if http_method == 'GET': 373 | stream = self.connection.get(url, params=data, files=files, 374 | headers=headers, **self.requests) 375 | else: 376 | stream = self.connection.post(url, data=data, files=files, 377 | headers=headers, **self.requests) 378 | if stream.headers.get('x-database-lag'): 379 | wait_time = int(stream.headers.get('retry-after')) 380 | log.warning('Database lag exceeds max lag. ' 381 | 'Waiting for {} seconds'.format(wait_time)) 382 | sleeper.sleep(wait_time) 383 | elif stream.status_code == 200: 384 | return stream.text 385 | elif stream.status_code < 500 or stream.status_code > 599: 386 | stream.raise_for_status() 387 | else: 388 | if not retry_on_error: 389 | stream.raise_for_status() 390 | log.warning('Received {status} response: {text}. ' 391 | 'Retrying in a moment.' 392 | .format(status=stream.status_code, 393 | text=stream.text)) 394 | sleeper.sleep() 395 | 396 | except requests.exceptions.ConnectionError: 397 | # In the event of a network problem 398 | # (e.g. DNS failure, refused connection, etc), 399 | # Requests will raise a ConnectionError exception. 400 | if not retry_on_error: 401 | raise 402 | log.warning('Connection error. Retrying in a moment.') 403 | sleeper.sleep() 404 | 405 | def raw_api(self, action, http_method='POST', *args, **kwargs): 406 | """Send a call to the API.""" 407 | try: 408 | retry_on_error = kwargs.pop('retry_on_error') 409 | except KeyError: 410 | retry_on_error = True 411 | kwargs['action'] = action 412 | kwargs['format'] = 'json' 413 | data = self._query_string(*args, **kwargs) 414 | res = self.raw_call('api', data, retry_on_error=retry_on_error, 415 | http_method=http_method) 416 | 417 | try: 418 | return json.loads(res, object_pairs_hook=OrderedDict) 419 | except ValueError: 420 | if res.startswith('MediaWiki API is not enabled for this site.'): 421 | raise errors.APIDisabledError 422 | raise errors.InvalidResponse(res) 423 | 424 | def raw_index(self, action, http_method='POST', *args, **kwargs): 425 | """Sends a call to index.php rather than the API.""" 426 | kwargs['action'] = action 427 | kwargs['maxlag'] = self.max_lag 428 | data = self._query_string(*args, **kwargs) 429 | return self.raw_call('index', data, http_method=http_method) 430 | 431 | def require(self, major, minor, revision=None, raise_error=True): 432 | if self.version is None: 433 | if raise_error is None: 434 | return 435 | raise RuntimeError('Site %s has not yet been initialized' % repr(self)) 436 | 437 | if revision is None: 438 | if self.version[:2] >= (major, minor): 439 | return True 440 | elif raise_error: 441 | raise errors.MediaWikiVersionError( 442 | 'Requires version {required[0]}.{required[1]}, ' 443 | 'current version is {current[0]}.{current[1]}' 444 | .format(required=(major, minor), 445 | current=(self.version[:2])) 446 | ) 447 | else: 448 | return False 449 | else: 450 | raise NotImplementedError 451 | 452 | # Actions 453 | def email(self, user, text, subject, cc=False): 454 | """ 455 | Send email to a specified user on the wiki. 456 | 457 | >>> try: 458 | ... site.email('SomeUser', 'Some message', 'Some subject') 459 | ... except mwclient.errors.NoSpecifiedEmailError as e: 460 | ... print 'The user does not accept email, or has not specified an email address.' 461 | 462 | Args: 463 | user (str): User name of the recipient 464 | text (str): Body of the email 465 | subject (str): Subject of the email 466 | cc (bool): True to send a copy of the email to yourself (default is False) 467 | 468 | Returns: 469 | Dictionary of the JSON response 470 | 471 | Raises: 472 | NoSpecifiedEmailError (mwclient.errors.NoSpecifiedEmailError): if recipient does not accept email 473 | EmailError (mwclient.errors.EmailError): on other errors 474 | """ 475 | 476 | token = self.get_token('email') 477 | 478 | try: 479 | info = self.post('emailuser', target=user, subject=subject, 480 | text=text, ccme=cc, token=token) 481 | except errors.APIError as e: 482 | if e.args[0] == u'noemail': 483 | raise errors.NoSpecifiedEmail(user, e.args[1]) 484 | raise errors.EmailError(*e) 485 | 486 | return info 487 | 488 | def login(self, username=None, password=None, cookies=None, domain=None): 489 | """Login to the wiki.""" 490 | 491 | if username and password: 492 | self.credentials = (username, password, domain) 493 | if cookies: 494 | self.connection.cookies.update(cookies) 495 | 496 | if self.credentials: 497 | sleeper = self.sleepers.make() 498 | kwargs = { 499 | 'lgname': self.credentials[0], 500 | 'lgpassword': self.credentials[1] 501 | } 502 | if self.credentials[2]: 503 | kwargs['lgdomain'] = self.credentials[2] 504 | 505 | # Try to login using the scheme for MW 1.27+. If the wiki is read protected, 506 | # it is not possible to get the wiki version upfront using the API, so we just 507 | # have to try. If the attempt fails, we try the old method. 508 | try: 509 | kwargs['lgtoken'] = self.get_token('login') 510 | except (errors.APIError, KeyError): 511 | log.debug('Failed to get login token, MediaWiki is older than 1.27.') 512 | 513 | while True: 514 | login = self.post('login', **kwargs) 515 | 516 | if login['login']['result'] == 'Success': 517 | break 518 | elif login['login']['result'] == 'NeedToken': 519 | kwargs['lgtoken'] = login['login']['token'] 520 | elif login['login']['result'] == 'Throttled': 521 | sleeper.sleep(int(login['login'].get('wait', 5))) 522 | else: 523 | raise errors.LoginError(self, login['login']) 524 | 525 | self.site_init() 526 | 527 | def get_token(self, type, force=False, title=None): 528 | 529 | if self.version is None or self.version[:2] >= (1, 24): 530 | # The 'csrf' (cross-site request forgery) token introduced in 1.24 replaces 531 | # the majority of older tokens, like edittoken and movetoken. 532 | if type not in {'watch', 'patrol', 'rollback', 'userrights', 'login'}: 533 | type = 'csrf' 534 | 535 | if type not in self.tokens: 536 | self.tokens[type] = '0' 537 | 538 | if self.tokens.get(type, '0') == '0' or force: 539 | 540 | if self.version is None or self.version[:2] >= (1, 24): 541 | # We use raw_api() rather than api() because api() is adding "userinfo" 542 | # to the query and this raises an readapideniederror if the wiki is read 543 | # protected and we're trying to fetch a login token. 544 | info = self.raw_api('query', 'GET', meta='tokens', type=type) 545 | 546 | self.handle_api_result(info) 547 | 548 | # Note that for read protected wikis, we don't know the version when 549 | # fetching the login token. If it's < 1.27, the request below will 550 | # raise a KeyError that we should catch. 551 | self.tokens[type] = info['query']['tokens']['%stoken' % type] 552 | 553 | else: 554 | if title is None: 555 | # Some dummy title was needed to get a token prior to 1.24 556 | title = 'Test' 557 | info = self.post('query', titles=title, 558 | prop='info', intoken=type) 559 | for i in six.itervalues(info['query']['pages']): 560 | if i['title'] == title: 561 | self.tokens[type] = i['%stoken' % type] 562 | 563 | return self.tokens[type] 564 | 565 | def upload(self, file=None, filename=None, description='', ignore=False, 566 | file_size=None, url=None, filekey=None, comment=None): 567 | """Upload a file to the site. 568 | 569 | Note that one of `file`, `filekey` and `url` must be specified, but not 570 | more than one. For normal uploads, you specify `file`. 571 | 572 | Args: 573 | file (str): File object or stream to upload. 574 | filename (str): Destination filename, don't include namespace 575 | prefix like 'File:' 576 | description (str): Wikitext for the file description page. 577 | ignore (bool): True to upload despite any warnings. 578 | file_size (int): Deprecated in mwclient 0.7 579 | url (str): URL to fetch the file from. 580 | filekey (str): Key that identifies a previous upload that was 581 | stashed temporarily. 582 | comment (str): Upload comment. Also used as the initial page text 583 | for new files if `description` is not specified. 584 | 585 | Example: 586 | 587 | >>> client.upload(open('somefile', 'rb'), filename='somefile.jpg', 588 | description='Some description') 589 | 590 | Returns: 591 | JSON result from the API. 592 | 593 | Raises: 594 | errors.InsufficientPermission 595 | requests.exceptions.HTTPError 596 | """ 597 | 598 | if file_size is not None: 599 | # Note that DeprecationWarning is hidden by default since Python 2.7 600 | warnings.warn( 601 | 'file_size is deprecated since mwclient 0.7', 602 | DeprecationWarning 603 | ) 604 | 605 | if filename is None: 606 | raise TypeError('filename must be specified') 607 | 608 | if len([x for x in [file, filekey, url] if x is not None]) != 1: 609 | raise TypeError("exactly one of 'file', 'filekey' and 'url' must be specified") 610 | 611 | image = self.Images[filename] 612 | if not image.can('upload'): 613 | raise errors.InsufficientPermission(filename) 614 | 615 | predata = {} 616 | 617 | if comment is None: 618 | predata['comment'] = description 619 | else: 620 | predata['comment'] = comment 621 | predata['text'] = description 622 | 623 | if ignore: 624 | predata['ignorewarnings'] = 'true' 625 | predata['token'] = image.get_token('edit') 626 | predata['action'] = 'upload' 627 | predata['format'] = 'json' 628 | predata['filename'] = filename 629 | if url: 630 | predata['url'] = url 631 | 632 | # sessionkey was renamed to filekey in MediaWiki 1.18 633 | # https://phabricator.wikimedia.org/rMW5f13517e36b45342f228f3de4298bb0fe186995d 634 | if self.version[:2] < (1, 18): 635 | predata['sessionkey'] = filekey 636 | else: 637 | predata['filekey'] = filekey 638 | 639 | postdata = predata 640 | files = None 641 | if file is not None: 642 | 643 | # Workaround for https://github.com/mwclient/mwclient/issues/65 644 | # ---------------------------------------------------------------- 645 | # Since the filename in Content-Disposition is not interpreted, 646 | # we can send some ascii-only dummy name rather than the real 647 | # filename, which might contain non-ascii. 648 | file = ('fake-filename', file) 649 | # End of workaround 650 | # ---------------------------------------------------------------- 651 | 652 | files = {'file': file} 653 | 654 | sleeper = self.sleepers.make() 655 | while True: 656 | data = self.raw_call('api', postdata, files) 657 | info = json.loads(data) 658 | if not info: 659 | info = {} 660 | if self.handle_api_result(info, kwargs=predata, sleeper=sleeper): 661 | return info.get('upload', {}) 662 | 663 | def parse(self, text=None, title=None, page=None, prop=None, 664 | redirects=False, mobileformat=False): 665 | kwargs = {} 666 | if text is not None: 667 | kwargs['text'] = text 668 | if title is not None: 669 | kwargs['title'] = title 670 | if page is not None: 671 | kwargs['page'] = page 672 | if prop is not None: 673 | kwargs['prop'] = prop 674 | if redirects: 675 | kwargs['redirects'] = '1' 676 | if mobileformat: 677 | kwargs['mobileformat'] = '1' 678 | result = self.post('parse', **kwargs) 679 | return result['parse'] 680 | 681 | # def block(self): TODO? 682 | # def unblock: TODO? 683 | # def patrol: TODO? 684 | # def import: TODO? 685 | 686 | # Lists 687 | def allpages(self, start=None, prefix=None, namespace='0', filterredir='all', 688 | minsize=None, maxsize=None, prtype=None, prlevel=None, 689 | limit=None, dir='ascending', filterlanglinks='all', generator=True, 690 | end=None): 691 | """Retrieve all pages on the wiki as a generator.""" 692 | 693 | pfx = listing.List.get_prefix('ap', generator) 694 | kwargs = dict(listing.List.generate_kwargs( 695 | pfx, ('from', start), ('to', end), prefix=prefix, 696 | minsize=minsize, maxsize=maxsize, prtype=prtype, prlevel=prlevel, 697 | namespace=namespace, filterredir=filterredir, dir=dir, 698 | filterlanglinks=filterlanglinks, 699 | )) 700 | return listing.List.get_list(generator)(self, 'allpages', 'ap', 701 | limit=limit, return_values='title', 702 | **kwargs) 703 | 704 | def allimages(self, start=None, prefix=None, minsize=None, maxsize=None, limit=None, 705 | dir='ascending', sha1=None, sha1base36=None, generator=True, end=None): 706 | """Retrieve all images on the wiki as a generator.""" 707 | 708 | pfx = listing.List.get_prefix('ai', generator) 709 | kwargs = dict(listing.List.generate_kwargs( 710 | pfx, ('from', start), ('to', end), prefix=prefix, 711 | minsize=minsize, maxsize=maxsize, 712 | dir=dir, sha1=sha1, sha1base36=sha1base36, 713 | )) 714 | return listing.List.get_list(generator)(self, 'allimages', 'ai', limit=limit, 715 | return_values='timestamp|url', 716 | **kwargs) 717 | 718 | def alllinks(self, start=None, prefix=None, unique=False, prop='title', 719 | namespace='0', limit=None, generator=True, end=None): 720 | """Retrieve a list of all links on the wiki as a generator.""" 721 | 722 | pfx = listing.List.get_prefix('al', generator) 723 | kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), ('to', end), 724 | prefix=prefix, 725 | prop=prop, namespace=namespace)) 726 | if unique: 727 | kwargs[pfx + 'unique'] = '1' 728 | return listing.List.get_list(generator)(self, 'alllinks', 'al', limit=limit, 729 | return_values='title', **kwargs) 730 | 731 | def allcategories(self, start=None, prefix=None, dir='ascending', limit=None, 732 | generator=True, end=None): 733 | """Retrieve all categories on the wiki as a generator.""" 734 | 735 | pfx = listing.List.get_prefix('ac', generator) 736 | kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), ('to', end), 737 | prefix=prefix, dir=dir)) 738 | return listing.List.get_list(generator)(self, 'allcategories', 'ac', limit=limit, 739 | **kwargs) 740 | 741 | def allusers(self, start=None, prefix=None, group=None, prop=None, limit=None, 742 | witheditsonly=False, activeusers=False, rights=None, end=None): 743 | """Retrieve all users on the wiki as a generator.""" 744 | 745 | kwargs = dict(listing.List.generate_kwargs('au', ('from', start), ('to', end), 746 | prefix=prefix, 747 | group=group, prop=prop, 748 | rights=rights, 749 | witheditsonly=witheditsonly, 750 | activeusers=activeusers)) 751 | return listing.List(self, 'allusers', 'au', limit=limit, **kwargs) 752 | 753 | def blocks(self, start=None, end=None, dir='older', ids=None, users=None, limit=None, 754 | prop='id|user|by|timestamp|expiry|reason|flags'): 755 | """Retrieve blocks as a generator. 756 | 757 | Each block is a dictionary containing: 758 | 759 | - user: the username or IP address of the user 760 | - id: the ID of the block 761 | - timestamp: when the block was added 762 | - expiry: when the block runs out (infinity for indefinite blocks) 763 | - reason: the reason they are blocked 764 | - allowusertalk: key is present (empty string) if the user is allowed to edit their user talk page 765 | - by: the administrator who blocked the user 766 | - nocreate: key is present (empty string) if the user's ability to create accounts has been disabled. 767 | 768 | """ 769 | 770 | # TODO: Fix. Fix what? 771 | kwargs = dict(listing.List.generate_kwargs('bk', start=start, end=end, dir=dir, 772 | ids=ids, users=users, prop=prop)) 773 | return listing.List(self, 'blocks', 'bk', limit=limit, **kwargs) 774 | 775 | def deletedrevisions(self, start=None, end=None, dir='older', namespace=None, 776 | limit=None, prop='user|comment'): 777 | # TODO: Fix 778 | 779 | kwargs = dict(listing.List.generate_kwargs('dr', start=start, end=end, dir=dir, 780 | namespace=namespace, prop=prop)) 781 | return listing.List(self, 'deletedrevs', 'dr', limit=limit, **kwargs) 782 | 783 | def exturlusage(self, query, prop=None, protocol='http', namespace=None, limit=None): 784 | r"""Retrieve the list of pages that link to a particular domain or URL, as a generator. 785 | 786 | This API call mirrors the Special:LinkSearch function on-wiki. 787 | 788 | Query can be a domain like 'bbc.co.uk'. 789 | Wildcards can be used, e.g. '\*.bbc.co.uk'. 790 | Alternatively, a query can contain a full domain name and some or all of a URL: 791 | e.g. '\*.wikipedia.org/wiki/\*' 792 | 793 | See for details. 794 | 795 | The generator returns dictionaries containing three keys: 796 | - url: the URL linked to. 797 | - ns: namespace of the wiki page 798 | - pageid: the ID of the wiki page 799 | - title: the page title. 800 | 801 | """ 802 | 803 | kwargs = dict(listing.List.generate_kwargs('eu', query=query, prop=prop, 804 | protocol=protocol, namespace=namespace)) 805 | return listing.List(self, 'exturlusage', 'eu', limit=limit, **kwargs) 806 | 807 | def logevents(self, type=None, prop=None, start=None, end=None, 808 | dir='older', user=None, title=None, limit=None, action=None): 809 | """Retrieve logevents as a generator.""" 810 | kwargs = dict(listing.List.generate_kwargs('le', prop=prop, type=type, start=start, 811 | end=end, dir=dir, user=user, 812 | title=title, action=action)) 813 | return listing.List(self, 'logevents', 'le', limit=limit, **kwargs) 814 | 815 | def checkuserlog(self, user=None, target=None, limit=10, dir='older', 816 | start=None, end=None): 817 | """Retrieve checkuserlog items as a generator.""" 818 | 819 | kwargs = dict(listing.List.generate_kwargs('cul', target=target, start=start, 820 | end=end, dir=dir, user=user)) 821 | return listing.NestedList('entries', self, 'checkuserlog', 'cul', 822 | limit=limit, **kwargs) 823 | 824 | # def protectedtitles requires 1.15 825 | def random(self, namespace, limit=20): 826 | """Retrieve a generator of random pages from a particular namespace. 827 | 828 | limit specifies the number of random articles retrieved. 829 | namespace is a namespace identifier integer. 830 | 831 | Generator contains dictionary with namespace, page ID and title. 832 | 833 | """ 834 | 835 | kwargs = dict(listing.List.generate_kwargs('rn', namespace=namespace)) 836 | return listing.List(self, 'random', 'rn', limit=limit, **kwargs) 837 | 838 | def recentchanges(self, start=None, end=None, dir='older', namespace=None, 839 | prop=None, show=None, limit=None, type=None, toponly=None): 840 | """List recent changes to the wiki, à la Special:Recentchanges. 841 | """ 842 | kwargs = dict(listing.List.generate_kwargs('rc', start=start, end=end, dir=dir, 843 | namespace=namespace, prop=prop, 844 | show=show, type=type, 845 | toponly='1' if toponly else None)) 846 | return listing.List(self, 'recentchanges', 'rc', limit=limit, **kwargs) 847 | 848 | def revisions(self, revids, prop='ids|timestamp|flags|comment|user', 849 | expandtemplates=False, diffto='prev'): 850 | """Get data about a list of revisions. 851 | 852 | See also the `Page.revisions()` method. 853 | 854 | API doc: https://www.mediawiki.org/wiki/API:Revisions 855 | 856 | Example: Get revision text for two revisions: 857 | 858 | >>> for revision in site.revisions([689697696, 689816909], prop='content'): 859 | ... print revision['*'] 860 | 861 | Args: 862 | revids (list): A list of (max 50) revisions. 863 | prop (str): Which properties to get for each revision. 864 | expandtemplates (bool): Expand templates in `rvprop=content` output. 865 | diffto (str): Revision ID to diff each revision to. Use "prev", 866 | "next" and "cur" for the previous, next and current 867 | revision respectively. 868 | 869 | Returns: 870 | A list of revisions 871 | """ 872 | kwargs = { 873 | 'prop': 'revisions', 874 | 'rvprop': prop, 875 | 'revids': '|'.join(map(text_type, revids)) 876 | } 877 | if expandtemplates: 878 | kwargs['rvexpandtemplates'] = '1' 879 | if diffto: 880 | kwargs['rvdiffto'] = diffto 881 | 882 | revisions = [] 883 | pages = self.get('query', **kwargs).get('query', {}).get('pages', {}).values() 884 | for page in pages: 885 | for revision in page.get('revisions', ()): 886 | revision['pageid'] = page.get('pageid') 887 | revision['pagetitle'] = page.get('title') 888 | revision['timestamp'] = parse_timestamp(revision['timestamp']) 889 | revisions.append(revision) 890 | return revisions 891 | 892 | def search(self, search, namespace='0', what=None, redirects=False, limit=None): 893 | """Perform a full text search. 894 | 895 | API doc: https://www.mediawiki.org/wiki/API:Search 896 | 897 | Example: 898 | >>> for result in site.search('prefix:Template:Citation/'): 899 | ... print(result.get('title')) 900 | 901 | Args: 902 | search (str): The query string 903 | namespace (int): The namespace to search (default: 0) 904 | what (str): Search scope: 'text' for fulltext, or 'title' for titles only. 905 | Depending on the search backend, 906 | both options may not be available. 907 | For instance 908 | `CirrusSearch `_ 909 | doesn't support 'title', but instead provides an "intitle:" 910 | query string filter. 911 | redirects (bool): Include redirect pages in the search 912 | (option removed in MediaWiki 1.23). 913 | 914 | Returns: 915 | mwclient.listings.List: Search results iterator 916 | """ 917 | kwargs = dict(listing.List.generate_kwargs('sr', search=search, 918 | namespace=namespace, what=what)) 919 | if redirects: 920 | kwargs['srredirects'] = '1' 921 | return listing.List(self, 'search', 'sr', limit=limit, **kwargs) 922 | 923 | def usercontributions(self, user, start=None, end=None, dir='older', namespace=None, 924 | prop=None, show=None, limit=None): 925 | """ 926 | List the contributions made by a given user to the wiki, à la Special:Contributions. 927 | 928 | API doc: https://www.mediawiki.org/wiki/API:Usercontribs 929 | """ 930 | kwargs = dict(listing.List.generate_kwargs('uc', user=user, start=start, end=end, 931 | dir=dir, namespace=namespace, 932 | prop=prop, show=show)) 933 | return listing.List(self, 'usercontribs', 'uc', limit=limit, **kwargs) 934 | 935 | def users(self, users, prop='blockinfo|groups|editcount'): 936 | """ 937 | Get information about a list of users. 938 | 939 | API doc: https://www.mediawiki.org/wiki/API:Users 940 | """ 941 | 942 | return listing.List(self, 'users', 'us', ususers='|'.join(users), usprop=prop) 943 | 944 | def watchlist(self, allrev=False, start=None, end=None, namespace=None, dir='older', 945 | prop=None, show=None, limit=None): 946 | """ 947 | List the pages on the current user's watchlist. 948 | 949 | API doc: https://www.mediawiki.org/wiki/API:Watchlist 950 | """ 951 | 952 | kwargs = dict(listing.List.generate_kwargs('wl', start=start, end=end, 953 | namespace=namespace, dir=dir, 954 | prop=prop, show=show)) 955 | if allrev: 956 | kwargs['wlallrev'] = '1' 957 | return listing.List(self, 'watchlist', 'wl', limit=limit, **kwargs) 958 | 959 | def expandtemplates(self, text, title=None, generatexml=False): 960 | """ 961 | Takes wikitext (text) and expands templates. 962 | 963 | API doc: https://www.mediawiki.org/wiki/API:Expandtemplates 964 | """ 965 | 966 | kwargs = {} 967 | if title is None: 968 | kwargs['title'] = title 969 | if generatexml: 970 | kwargs['generatexml'] = '1' 971 | 972 | result = self.get('expandtemplates', text=text, **kwargs) 973 | 974 | if generatexml: 975 | return result['expandtemplates']['*'], result['parsetree']['*'] 976 | else: 977 | return result['expandtemplates']['*'] 978 | 979 | def ask(self, query, title=None): 980 | """ 981 | Ask a query against Semantic MediaWiki. 982 | 983 | API doc: https://semantic-mediawiki.org/wiki/Ask_API 984 | 985 | Returns: 986 | Generator for retrieving all search results, with each answer as a dictionary. 987 | If the query is invalid, an APIError is raised. A valid query with zero 988 | results will not raise any error. 989 | 990 | Examples: 991 | 992 | >>> query = "[[Category:my cat]]|[[Has name::a name]]|?Has property" 993 | >>> for answer in site.ask(query): 994 | >>> for title, data in answer.items() 995 | >>> print(title) 996 | >>> print(data) 997 | """ 998 | kwargs = {} 999 | if title is None: 1000 | kwargs['title'] = title 1001 | 1002 | offset = 0 1003 | while offset is not None: 1004 | results = self.raw_api('ask', query=u'{query}|offset={offset}'.format( 1005 | query=query, offset=offset), http_method='GET', **kwargs) 1006 | self.handle_api_result(results) # raises APIError on error 1007 | offset = results.get('query-continue-offset') 1008 | answers = results['query'].get('results') or {} 1009 | for key, value in answers.items(): 1010 | yield {key: value} 1011 | --------------------------------------------------------------------------------