├── __init__.py
├── queries
    ├── captions.txt
    ├── duos.txt
    ├── shift_ranks.txt
    ├── fake_references.txt
    ├── missing_descriptions.txt
    ├── fake_references_url.txt
    ├── commons_labels.txt
    ├── ask_externalid_props.txt
    ├── external-ids.txt
    ├── dupes.txt
    ├── qualifiers.txt
    ├── redirects.txt
    ├── mixed_claims.txt
    ├── unmerged_dates.txt
    ├── units.txt
    └── duplicate_dates.txt
├── README.md
├── query_store.py
├── deferred.py
├── .gitignore
├── cswiki
    ├── klementinum.py
    ├── heritage_lists_diff.py
    ├── sync_heritage_lists.py
    ├── sync_tree_lists.py
    ├── pageviews.py
    └── iucn.py
├── fix_commons_labels.py
├── wikidata_cleanup.py
├── error_reporting.py
├── import_displaytitle.py
├── wikidata
    └── list_of_wikis.py
├── wikidata.py
├── importdata.py
├── tools.py
├── lua_formatter.py
├── shift_ranks.py
├── update_deathdate.py
├── wikitext.py
├── nounit.py
├── cleanup_redirects.py
├── fix_qualifiers.py
├── captiontoimage.py
├── check_disambigs.py
├── split_names_and_titles.py
├── wikidata_fix_redirects.py
├── clean_commonscat.py
├── split_claims.py
├── import_descriptions.py
├── merger.py
├── slice_externalids.py
├── list_typos.py
├── fake_references.py
├── typos.py
├── cleanup_dates.py
├── typoloader.py
├── clean_dupes.py
├── connect.py
├── manage_duos.py
└── checkwiki.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/queries/captions.txt:
--------------------------------------------------------------------------------
1 | SELECT DISTINCT ?item WHERE { ?item wdt:%(prop)s [] }


--------------------------------------------------------------------------------
/queries/duos.txt:
--------------------------------------------------------------------------------
1 | SELECT DISTINCT ?item WHERE {
2 |   ?item wdt:P31/wdt:P279* wd:%(class)s .
3 |   MINUS { ?item wdt:P527 [] } .
4 | }


--------------------------------------------------------------------------------
/queries/shift_ranks.txt:
--------------------------------------------------------------------------------
1 | SELECT DISTINCT ?item {
2 |   ?statement wikibase:rank wikibase:DeprecatedRank; pq:%(prop)s [] .
3 |   ?item ?p ?statement; wikibase:sitelinks [] .
4 | } LIMIT %(limit)i


--------------------------------------------------------------------------------
/queries/fake_references.txt:
--------------------------------------------------------------------------------
1 | SELECT DISTINCT ?item {
2 |   ?item ?p [ prov:wasDerivedFrom/pr:%(prop)s ?target; ?ps ?target ] .
3 |   ?ps ^wikibase:statementProperty [] .
4 |   ?target ?p [ ?ps ?item ] .
5 | } LIMIT %(limit)i


--------------------------------------------------------------------------------
/queries/missing_descriptions.txt:
--------------------------------------------------------------------------------
1 | SELECT DISTINCT ?item {
2 |   ?item ^schema:about [
3 |     schema:isPartOf <https://%(hostname)s/>; schema:name ?title
4 |   ]; wdt:P31 wd:Q5 .
5 |   MINUS { ?item schema:description ?desc FILTER( LANG( ?desc ) = '%(lang)s' ) } .
6 | }


--------------------------------------------------------------------------------
/queries/fake_references_url.txt:
--------------------------------------------------------------------------------
1 | SELECT DISTINCT ?item {
2 |   VALUES ?host { <http://www.wikidata.org/> <https://www.wikidata.org/> } .
3 |   ?item ?p [ prov:wasDerivedFrom/pr:%(prop)s ?url ] .
4 |   FILTER( STRSTARTS( STR( ?url ), STR( ?host ) ) ) .
5 | } LIMIT %(limit)i


--------------------------------------------------------------------------------
/queries/commons_labels.txt:
--------------------------------------------------------------------------------
1 | SELECT DISTINCT ?item {
2 |   ?item ^schema:about [
3 |     schema:isPartOf/^wdt:P856 wd:Q565; schema:name ?name
4 |   ]; rdfs:label ?label FILTER( LANG( ?label ) = 'en' ) .
5 |   FILTER( STRSTARTS( STR( ?name ), 'Category:' ) ) .
6 |   FILTER( STRSTARTS( STR( ?label ), 'Category:' ) ) .
7 |   MINUS { ?item wdt:P31/wdt:P279* wd:Q4167836 } .
8 | } LIMIT %(limit)s


--------------------------------------------------------------------------------
/queries/ask_externalid_props.txt:
--------------------------------------------------------------------------------
 1 | ASK {
 2 |   {
 3 |     SELECT * {
 4 |       ?prop wikibase:propertyType wikibase:ExternalId;
 5 |             wikibase:directClaim [];
 6 |             wdt:P1630 [] .
 7 |       FILTER( ?prop NOT IN ( wd:%(blacklist)s ) ) .
 8 |     }
 9 |     ORDER BY xsd:integer( STRAFTER( STR( ?prop ), STR( wd:P ) ) )
10 |     OFFSET %(offset)i LIMIT %(limit)i
11 |   }
12 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pywikibot-scripts
 2 | Own pywikibot scripts (for Wikimedia projects)
 3 | 
 4 | ## Requirements
 5 | Python 3.6.1 or newer.
 6 | Pywikibot version [4d6e674](https://github.com/wikimedia/pywikibot/commit/4d6e674bf1385961a27b3ddf9acc16bcb32373b0).
 7 | 
 8 | ## Usage
 9 | Checkout or download to "myscripts" directory inside "core/scripts/userscripts".
10 | Then add to your `user-config.py`:
11 | ```
12 | user_script_paths = ['scripts.userscripts.myscripts']
13 | ```


--------------------------------------------------------------------------------
/queries/external-ids.txt:
--------------------------------------------------------------------------------
 1 | SELECT ?item WITH {
 2 |   SELECT DISTINCT ?wdt {
 3 |     ?prop wikibase:propertyType wikibase:ExternalId;
 4 |           wikibase:directClaim ?wdt;
 5 |           wdt:P1630 [] .
 6 |     FILTER( ?prop NOT IN ( wd:%(blacklist)s ) ) .
 7 |   }
 8 |   ORDER BY xsd:integer( STRAFTER( STR( ?prop ), STR( wd:P ) ) )
 9 |   OFFSET %(offset)i LIMIT %(limit)i
10 | } AS %%predicates WHERE {
11 |   INCLUDE %%predicates .
12 |   ?item ?wdt ?value .
13 |   FILTER( STRSTARTS( ?value, 'http' ) ) .
14 | }


--------------------------------------------------------------------------------
/queries/dupes.txt:
--------------------------------------------------------------------------------
 1 | SELECT DISTINCT ?item WHERE {
 2 |   VALUES ?dupe { wd:%(dupe)s } .
 3 |   ?item p:P31 ?statement .
 4 |   ?statement ps:P31 ?dupe .
 5 |   {
 6 |     ?statement (pq:P460|pq:P642) ?target .
 7 |   } UNION {
 8 |     ?item wdt:P460 ?target .
 9 |   } .
10 |   MINUS { ?item wdt:P1889|^wdt:P1889 ?target } .
11 |   MINUS {
12 |     ?target wdt:P31/wdt:P279* wd:Q16521 .
13 |     ?item wikibase:sitelinks 0 .
14 |   } .
15 |   ?item schema:dateModified ?mod .
16 | } ORDER BY ?mod OFFSET %(offset)i


--------------------------------------------------------------------------------
/queries/qualifiers.txt:
--------------------------------------------------------------------------------
 1 | SELECT DISTINCT ?item WHERE {
 2 |   ?prop wikibase:propertyType [] .
 3 |   {
 4 |     ?prop p:P31/ps:P31 wd:%(item)s .
 5 |     MINUS { ?prop wikibase:propertyType wikibase:ExternalId } .
 6 |   } UNION {
 7 |     FILTER( ?prop IN ( wd:%(good)s ) ) .
 8 |   } .
 9 |   FILTER( ?prop NOT IN ( wd:%(bad)s ) ) .
10 |   MINUS { ?prop p:P31/ps:P31 wd:Q18608359 } .
11 |   ?prop wikibase:reference ?pr .
12 |   ?ref ?pr ?value .
13 |   ?statement prov:wasDerivedFrom ?ref .
14 |   ?item ?p ?statement .
15 |   [] wikibase:claim ?p .
16 | }


--------------------------------------------------------------------------------
/queries/redirects.txt:
--------------------------------------------------------------------------------
 1 | SELECT DISTINCT ?item {
 2 |   ?item owl:sameAs ?target; schema:dateModified ?date .
 3 |   {
 4 |     ?entity ?p [ ?pred ?item; wikibase:rank [] ] .
 5 |   } UNION {
 6 |     ?entity ?p1 [ ?predv [ wikibase:quantityUnit ?item ]; wikibase:rank [] ] .
 7 |   } UNION {
 8 |     ?ref ?pr ?item .
 9 |     ?st2 prov:wasDerivedFrom ?ref .
10 |     ?entity ?p2 ?st2 .
11 |   } UNION {
12 |     ?ref1 ?prv [ wikibase:quantityUnit ?item ] .
13 |     ?st3 prov:wasDerivedFrom ?ref .
14 |     ?entity ?p3 ?st3 .
15 |   } .
16 |   FILTER( NOW() - ?date > %(days)d ) .
17 | } ORDER BY ?date


--------------------------------------------------------------------------------
/queries/mixed_claims.txt:
--------------------------------------------------------------------------------
 1 | SELECT DISTINCT ?item {
 2 |   { ?st pq:P580 ?date1, ?date2 } UNION { ?st pq:P582 ?date1, ?date2 } .
 3 |   MINUS { ?st prov:wasDerivedFrom [] } .
 4 |   MINUS {
 5 |     ?st ?pq [] .
 6 |     FILTER( ?pq NOT IN ( pq:P580, pq:P582 ) ) .
 7 |     ?pq ^wikibase:qualifier [] .
 8 |   } .
 9 |   MINUS { ?item ?p [ pq:P580|pq:P582 ?date ] . FILTER( YEAR( ?date ) < 1 ) } .
10 |   FILTER( !ISBLANK( ?date1 ) && !ISBLANK( ?date2 ) ) .
11 |   MINUS { ?date1 a/a owl:Class } .
12 |   MINUS { ?date2 a/a owl:Class } .
13 |   FILTER( ?date1 < ?date2 ) .
14 |   ?item ?p ?st .
15 | } LIMIT %(limit)i


--------------------------------------------------------------------------------
/queries/unmerged_dates.txt:
--------------------------------------------------------------------------------
 1 | SELECT DISTINCT ?item WHERE {
 2 |   ?item schema:dateModified ?dateModified . hint:Prior hint:rangeSafe true .
 3 |   FILTER( ?dateModified > "%(date)s"^^xsd:dateTime ) .
 4 |   ?item p:%(prop)s ?statement1, ?statement2 .
 5 |   FILTER( ?statement1 != ?statement2 ) .
 6 |   FILTER( STR( ?statement1 ) < STR( ?statement2 ) ) .
 7 |   VALUES (?prec1 ?prec2) { (9 9) (10 10) } .
 8 |   ?statement1 psv:%(prop)s ?node1 .
 9 |   ?node1 wikibase:timeValue ?val1 . hint:Prior hint:rangeSafe true .
10 |   ?node1 wikibase:timePrecision ?prec1 .
11 |   ?statement2 psv:%(prop)s ?node2 .
12 |   ?node2 wikibase:timeValue ?val2 . hint:Prior hint:rangeSafe true .
13 |   ?node2 wikibase:timePrecision ?prec2 .
14 |   FILTER( ?val1 = ?val2 ) .
15 | }
16 | 


--------------------------------------------------------------------------------
/query_store.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | class QueryStore:
 5 | 
 6 |     '''Interface for loading SPARQL queries from text files'''
 7 | 
 8 |     def __init__(self, path=None):
 9 |         if path is None:
10 |             dirname = os.path.dirname(os.path.realpath(__file__))
11 |             path = os.path.join(dirname, 'queries')
12 |         self.path = path
13 | 
14 |     def get_query(self, name):
15 |         with open('%s.txt' % os.path.join(self.path, name), 'r',
16 |                   encoding='utf-8') as file:
17 |             file.seek(0)
18 |             return file.read()
19 | 
20 |     def build_query(self, name, **params):
21 |         return self.get_query(name) % params
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     print('This script is not runnable from command line.')
26 | 


--------------------------------------------------------------------------------
/queries/units.txt:
--------------------------------------------------------------------------------
 1 | SELECT DISTINCT ?item WHERE {
 2 |   {
 3 |     ?pst rdf:type wdno:P2237 .
 4 |   } UNION {
 5 |     ?pst ps:P2237 wd:%(good)s .
 6 |   } .
 7 |   ?prop p:P2237 ?pst;
 8 |         wikibase:claim ?p;
 9 |         wikibase:statementValue ?psv;
10 |         wikibase:qualifierValue ?pqv;
11 |         wikibase:referenceValue ?prv .
12 |   FILTER( ?prop != wd:P1092 ) .
13 |   {
14 |     ?statement ?psv ?value .
15 |     ?value wikibase:quantityUnit ?unit .
16 |     FILTER( ?unit != wd:Q199 ) .
17 |     ?item ?p ?statement .
18 |   } UNION {
19 |     ?statement1 ?pqv ?value .
20 |     ?value wikibase:quantityUnit ?unit .
21 |     FILTER( ?unit != wd:Q199 ) .
22 |     ?item ?claim1 ?statement1 .
23 |   } UNION {
24 |     ?ref ?prv ?value .
25 |     ?value wikibase:quantityUnit ?unit .
26 |     FILTER( ?unit != wd:Q199 ) .
27 |     ?statement2 prov:wasDerivedFrom ?ref .
28 |     ?item ?claim2 ?statement2 .
29 |   } .
30 | }


--------------------------------------------------------------------------------
/queries/duplicate_dates.txt:
--------------------------------------------------------------------------------
 1 | SELECT DISTINCT ?item {
 2 |   ?item schema:dateModified ?dateModified . hint:Prior hint:rangeSafe true .
 3 |   FILTER( ?dateModified > "%(date)s"^^xsd:dateTime ) .
 4 |   ?item p:%(prop)s ?statement1, ?statement2 FILTER( ?statement2 != ?statement1 ) .
 5 |   VALUES (?prec1 ?prec2) {
 6 |     (9 10) (9 11)
 7 |   } .
 8 |   ?statement1 psv:%(prop)s ?node1 .
 9 |   ?node1 wikibase:timeValue ?val1 . hint:Prior hint:rangeSafe true .
10 |   ?node1 wikibase:timePrecision ?prec1 .
11 |   ?statement1 wikibase:rank ?rank1 .
12 |   ?statement2 psv:%(prop)s ?node2 .
13 |   ?node2 wikibase:timeValue ?val2 . hint:Prior hint:rangeSafe true .
14 |   ?node2 wikibase:timePrecision ?prec2 .
15 |   ?statement2 wikibase:rank ?rank2 .
16 |   FILTER( YEAR( ?val1 ) = YEAR( ?val2 ) ) .
17 |   FILTER( ?rank1 = ?rank2 || ?rank2 = wikibase:NormalRank ) .
18 |   MINUS { ?statement1 prov:wasDerivedFrom/!(pr:P143|pr:P813|pr:P4656) [] } .
19 |   ?statement2 prov:wasDerivedFrom/!(pr:P143|pr:P813|pr:P4656) [] .
20 | }
21 | 


--------------------------------------------------------------------------------
/deferred.py:
--------------------------------------------------------------------------------
 1 | import pywikibot
 2 | 
 3 | from pywikibot.bot import BaseBot
 4 | 
 5 | 
 6 | class DeferredCallbacksBot(BaseBot):
 7 | 
 8 |     '''
 9 |     Bot deferring callbacks like purging pages
10 |     '''
11 | 
12 |     def __init__(self, **kwargs):
13 |         super().__init__(**kwargs)
14 |         self.callbacks = []
15 | 
16 |     def addCallback(self, func, *data, **kwargs):
17 |         callback = lambda *_, **__: func(*data, **kwargs)
18 |         self.callbacks.append(callback)
19 | 
20 |     def queueLen(self):
21 |         return len(self.callbacks)
22 | 
23 |     def hasCallbacks(self):
24 |         return self.queueLen() > 0
25 | 
26 |     def doWithCallback(self, func, *data, **kwargs):
27 |         if self.hasCallbacks():
28 |             kwargs['callback'] = self.callbacks.pop(0)
29 |         return func(*data, **kwargs)
30 | 
31 |     def exit(self):
32 |         pywikibot.info(f'Executing remaining deferred callbacks: {self.queueLen()} left')
33 |         try:
34 |             while self.hasCallbacks():
35 |                 callback = self.callbacks.pop(0)
36 |                 callback()
37 |         finally:
38 |             super().exit()
39 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/cswiki/klementinum.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import json
 3 | import re
 4 | 
 5 | from collections import OrderedDict
 6 | 
 7 | import mwparserfromhell as parser
 8 | import pywikibot
 9 | import requests
10 | 
11 | 
12 | def get_single_year(year):
13 |     return year.rpartition(', ')[2]
14 | 
15 | 
16 | def format_number(val):
17 |     return re.sub(r'(\d+),(\d+)', r'\1.\2', str(val))
18 | 
19 | 
20 | def main():
21 |     pywikibot.handle_args()
22 |     site = pywikibot.Site('cs', 'wikipedia')
23 |     url_pattern = 'https://www.chmi.cz/files/portal/docs/meteo/ok/klementinum/extrklem{:02d}_cs.html'
24 | 
25 |     data = OrderedDict()
26 |     sources = []
27 |     for i in range(1, 13):
28 |         url = url_pattern.format(i)
29 |         response = requests.get(url)
30 |         code = parser.parse(response.text)
31 | 
32 |         sources.append(url)
33 |         data[str(i)] = month = OrderedDict()
34 |         trs = (tr for tr in code.ifilter_tags() if tr.tag == 'tr')
35 |         next(trs)  # skip headline
36 |         for day, tr in enumerate(trs, start=1):
37 |             tags = tr.contents.filter_tags()
38 |             if len(tags) != 6:
39 |                 break
40 |             _, avg, mx, mx_year, mn, mn_year = [tag.contents for tag in tags]
41 |             month[str(day)] = OrderedDict([
42 |                 ('avg', format_number(avg)),
43 |                 ('max', format_number(mx)),
44 |                 ('max_year', get_single_year(mx_year)),
45 |                 ('min', format_number(mn)),
46 |                 ('min_year', get_single_year(mn_year)),
47 |             ])
48 | 
49 |     text = json.dumps({
50 |         '@metadata': {
51 |             'sources': sources,
52 |         },
53 |         'data': data,
54 |     })
55 |     page = pywikibot.Page(site, 'Šablona:Klementinum/data.json')
56 |     page.put(text, summary='aktualizace dat pro šablonu Klementinum',
57 |              minor=False, bot=False, apply_cosmetic_changes=False)
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     main()
62 | 


--------------------------------------------------------------------------------
/fix_commons_labels.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import pywikibot
 3 | 
 4 | from pywikibot import pagegenerators
 5 | 
 6 | from query_store import QueryStore
 7 | from wikidata import WikidataEntityBot
 8 | 
 9 | 
10 | class LabelsFixingBot(WikidataEntityBot):
11 | 
12 |     use_from_page = False
13 | 
14 |     def __init__(self, generator, **kwargs):
15 |         self.available_options.update({
16 |             'always': True,
17 |             'limit': 50,
18 |         })
19 |         super().__init__(**kwargs)
20 |         self.store = QueryStore()
21 |         self._generator = generator or self.custom_generator()
22 |         self.summary = 'remove prefix from [en] label'
23 | 
24 |     @property
25 |     def generator(self):
26 |         return pagegenerators.PreloadingEntityGenerator(self._generator)
27 | 
28 |     def custom_generator(self):
29 |         query = self.store.build_query('commons_labels',
30 |                                        limit=self.opt['limit'])
31 |         return pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo)
32 | 
33 |     def treat_page_and_item(self, page, item):
34 |         if any(cl.target_equals('Q4167836') for cl in item.claims.get('P31', [])):
35 |             return
36 |         if item.getSitelink('commonswiki').startswith('Category:'):
37 |             if item.labels['en'].startswith('Category:'):
38 |                 data = {'en': item.labels['en'].removeprefix('Category:')}
39 |                 self.user_edit_entity(item, {'labels': data},
40 |                                       summary=self.summary)
41 | 
42 | 
43 | def main(*args):
44 |     options = {}
45 |     local_args = pywikibot.handle_args(args)
46 |     site = pywikibot.Site()
47 |     genFactory = pagegenerators.GeneratorFactory(site=site)
48 |     for arg in genFactory.handle_args(local_args):
49 |         if arg.startswith('-'):
50 |             arg, sep, value = arg.partition(':')
51 |             if value != '':
52 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
53 |             else:
54 |                 options[arg[1:]] = True
55 | 
56 |     generator = genFactory.getCombinedGenerator()
57 |     bot = LabelsFixingBot(generator=generator, site=site, **options)
58 |     bot.run()
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     main()
63 | 


--------------------------------------------------------------------------------
/wikidata_cleanup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import pywikibot
 3 | 
 4 | from pywikibot import pagegenerators
 5 | 
 6 | from wikidata import WikidataEntityBot
 7 | from wikidata_cleanup_toolkit import WikidataCleanupToolkit
 8 | 
 9 | 
10 | class WikidataCleanupBot(WikidataEntityBot):
11 | 
12 |     use_from_page = False
13 | 
14 |     def __init__(self, generator, fix, **kwargs):
15 |         super().__init__(**kwargs)
16 |         self._generator = generator
17 |         self.fix = fix
18 |         self.my_kit = WikidataCleanupToolkit([self.fix])
19 | 
20 |     @property
21 |     def generator(self):
22 |         return pagegenerators.PreloadingEntityGenerator(self._generator)
23 | 
24 |     @property
25 |     def summary(self):
26 |         return {
27 |             'add_missing_labels': 'import labels from sitelinks',
28 |             'cleanup_labels': 'strip labels',
29 |             'deduplicate_aliases': 'remove duplicate aliases',
30 |             'deduplicate_claims': 'merge duplicate claims',
31 |             'deduplicate_references': 'remove duplicate references',
32 |             'fix_HTML': 'resolve HTML entities',
33 |             'fix_languages': 'resolve invalid languages',
34 |             'fix_quantities': 'remove explicit bounds',
35 |             'replace_invisible': 'replace invisible characters',
36 |         }[self.fix]
37 | 
38 |     def treat_page_and_item(self, page, item):
39 |         data = None  # seems to work more reliably than empty dict
40 |         if self.my_kit.cleanup(item, data):
41 |             self.user_edit_entity(item, data, summary=self.summary)
42 | 
43 | 
44 | def main(*args):
45 |     options = {}
46 |     local_args = pywikibot.handle_args(args)
47 |     site = pywikibot.Site()
48 |     genFactory = pagegenerators.GeneratorFactory(site=site)
49 |     for arg in genFactory.handle_args(local_args):
50 |         if arg.startswith('-'):
51 |             arg, sep, value = arg.partition(':')
52 |             if value != '':
53 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
54 |             else:
55 |                 options[arg[1:]] = True
56 | 
57 |     generator = genFactory.getCombinedGenerator()
58 |     bot = WikidataCleanupBot(generator=generator, site=site, **options)
59 |     bot.run()
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     main()
64 | 


--------------------------------------------------------------------------------
/error_reporting.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from contextlib import suppress
 4 | from threading import Lock, Timer
 5 | 
 6 | import pywikibot
 7 | 
 8 | from pywikibot.bot import BaseBot
 9 | from pywikibot.exceptions import NoPageError
10 | 
11 | 
12 | class ErrorReportingBot(BaseBot):
13 | 
14 |     file_name = None
15 |     page_pattern = None
16 | 
17 |     def __init__(self, **kwargs):
18 |         self.available_options.update({
19 |             'clearonly': False,
20 |             'interval': 5 * 60,
21 |         })
22 |         super().__init__(**kwargs)
23 |         self.timer = None
24 |         self.file_lock = Lock()
25 |         self.timer_lock = Lock()
26 | 
27 |     def run(self):
28 |         self.open()
29 |         self.save_file()
30 |         if not self.opt['clearonly']:
31 |             super().run()
32 | 
33 |     def open(self):
34 |         with suppress(OSError):
35 |             f = open(os.path.join('..', self.file_name), 'x')
36 |             f.close()
37 | 
38 |     @property
39 |     def log_page(self):
40 |         log_page = pywikibot.Page(
41 |             self.repo, self.page_pattern % self.repo.username())
42 |         try:
43 |             log_page.get()
44 |         except NoPageError:
45 |             log_page.text = ''
46 |         return log_page
47 | 
48 |     def append(self, text):
49 |         with (
50 |             self.file_lock,
51 |             open(os.path.join('..', self.file_name), 'a', encoding='utf-8') as f
52 |         ):
53 |             f.write(text)
54 | 
55 |     def save_file(self):
56 |         with (
57 |             self.file_lock,
58 |             open(os.path.join('..', self.file_name), 'r+', encoding='utf-8') as f
59 |         ):
60 |             f.seek(0)  # jump to the beginning
61 |             text = '\n'.join(f.read().splitlines())  # multi-platform
62 |             if text:
63 |                 log_page = self.log_page
64 |                 log_page.text += text
65 |                 log_page.save(summary='update')
66 |                 f.seek(0)  # jump to the beginning
67 |                 f.truncate()  # and delete everything
68 |         with self.timer_lock:
69 |             self.timer = Timer(self.opt['interval'], self.save_file)
70 |             self.timer.start()
71 | 
72 |     def teardown(self):
73 |         with self.timer_lock:
74 |             if self.timer:
75 |                 self.timer.cancel()
76 |         super().teardown()
77 | 


--------------------------------------------------------------------------------
/import_displaytitle.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import pywikibot
 3 | 
 4 | from pywikibot.pagegenerators import (
 5 |     page_with_property_generator,
 6 |     GeneratorFactory,
 7 |     NamespaceFilterPageGenerator,
 8 | )
 9 | from pywikibot.tools import first_lower
10 | 
11 | from wikidata import WikidataEntityBot
12 | 
13 | 
14 | class LabelSettingBot(WikidataEntityBot):
15 | 
16 |     def __init__(self, **kwargs):
17 |         self.available_options.update({
18 |             'create': False,
19 |         })
20 |         super().__init__(**kwargs)
21 |         self.create_missing_item = self.opt['create'] is True
22 | 
23 |     def stripped(self, title):
24 |         if title.endswith(')'):
25 |             return title.partition(' (')[0]
26 |         else:
27 |             return title
28 | 
29 |     def treat_page_and_item(self, page, item):
30 |         title = page.properties().get('displaytitle')
31 |         if not title:
32 |             return
33 |         page_title = page.title()
34 |         if first_lower(page_title) != title:
35 |             return
36 |         lang = page.site.lang
37 |         label = item.labels.get(lang)
38 |         if not label or self.stripped(label) == self.stripped(page_title):
39 |             item.labels[lang] = first_lower(label) if label else title
40 |             link = page.title(as_link=True, insite=item.site)
41 |             summary = f'importing [{lang}] label from displaytitle in {link}'
42 |             self.user_edit_entity(item, summary=summary)
43 | 
44 | 
45 | def main(*args):
46 |     options = {}
47 |     local_args = pywikibot.handle_args(args)
48 |     site = pywikibot.Site()
49 |     genFactory = GeneratorFactory(site=site)
50 |     for arg in genFactory.handle_args(local_args):
51 |         if arg.startswith('-'):
52 |             arg, sep, value = arg.partition(':')
53 |             if value != '':
54 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
55 |             else:
56 |                 options[arg[1:]] = True
57 | 
58 |     generator = genFactory.getCombinedGenerator()
59 |     if not generator:
60 |         generator = page_with_property_generator('displaytitle', site=site)
61 |         if genFactory.namespaces:
62 |             generator = NamespaceFilterPageGenerator(
63 |                 generator, genFactory.namespaces, site=site)
64 | 
65 |     bot = LabelSettingBot(generator=generator, site=site, **options)
66 |     bot.run()
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     main()
71 | 


--------------------------------------------------------------------------------
/wikidata/list_of_wikis.py:
--------------------------------------------------------------------------------
 1 | #!/bin/python3
 2 | import json
 3 | 
 4 | import pywikibot
 5 | 
 6 | from pywikibot.data.sparql import SparqlQuery
 7 | from pywikibot.exceptions import SiteDefinitionError, UnknownFamilyError
 8 | from tqdm import tqdm
 9 | 
10 | 
11 | pywikibot.handle_args()
12 | 
13 | repo = pywikibot.Site('wikidata')
14 | page = pywikibot.Page(repo, 'Wikidata:List of wikis/python')
15 | data = json.loads(page.text)
16 | 
17 | endpoint = SparqlQuery(repo=repo)
18 | query = '''SELECT * WHERE { ?item wdt:P1800 ?dbname } ORDER BY ?dbname'''
19 | missing_families = set()
20 | added = set()
21 | 
22 | out = {}
23 | for entry in tqdm(endpoint.select(query, full_data=True)):
24 |     item = entry['item'].getID()
25 |     dbname = entry['dbname'].value
26 |     code, sep, right = dbname.rpartition('wik')
27 |     if not sep:
28 |         pywikibot.output(f'dbname not recognized: {dbname}')
29 |         continue
30 | 
31 |     if dbname == 'sourceswiki':
32 |         code, family = 'mul', 'wikisource'
33 |     else:
34 |         family = sep + right
35 |         if family == 'wiki':
36 |             if code in data:  # commons, etc.
37 |                 family = code
38 |             else:
39 |                 family = 'wikipedia'
40 | 
41 |     if family in missing_families:
42 |         continue
43 | 
44 |     replace_hyphen = False
45 |     if '_' in code:
46 |         code = code.replace('_', '-')
47 |         replace_hyphen = True
48 | 
49 |     try:
50 |         site = pywikibot.Site(code, family)
51 |     except UnknownFamilyError as e:
52 |         missing_families.add(family)
53 |         pywikibot.log(e.unicode)
54 |         continue
55 |     except SiteDefinitionError as e:
56 |         pywikibot.log(e.unicode)
57 |         continue
58 | 
59 |     if replace_hyphen:
60 |         code = code.replace('-', '_')
61 | 
62 |     if code in out.setdefault(family, {}):
63 |         pywikibot.warning(f'Duplicate {code}.{family} entry for {dbname}')
64 |         continue
65 | 
66 |     out[family][code] = item
67 |     if code not in data.get(family, {}):
68 |         added.add(dbname)
69 | 
70 | if added:
71 |     total = sum(map(len, out.values()))
72 |     summary = f'Updating list of wikis: {total} wikis; added: ' + (
73 |         ', '.join(sorted(added)))
74 |     text = json.dumps(out, sort_keys=True, indent=4)
75 |     pywikibot.showDiff(page.text, text)
76 |     page.text = text
77 |     pywikibot.output(f'Edit summary: {summary}')
78 |     page.save(summary=summary, minor=False, bot=False)
79 | else:
80 |     pywikibot.output('No wikis to be added')
81 | 


--------------------------------------------------------------------------------
/wikidata.py:
--------------------------------------------------------------------------------
 1 | from contextlib import suppress
 2 | import random
 3 | 
 4 | import pywikibot
 5 | 
 6 | from pywikibot.bot import WikidataBot
 7 | from pywikibot.exceptions import NoPageError, IsRedirectPageError
 8 | 
 9 | from wikidata_cleanup_toolkit import WikidataCleanupToolkit
10 | 
11 | 
12 | class WikidataEntityBot(WikidataBot):
13 | 
14 |     use_redirects = False
15 | 
16 |     '''
17 |     Bot editing Wikidata entities
18 |     Features:
19 |     * Caches properties so that iterating claims can be faster
20 |     * Wraps around WikibataBot class.
21 |     * Item cleanup like missing labels, redundant data etc.
22 |     '''
23 | 
24 |     def __init__(self, **kwargs):
25 |         self.available_options.update({
26 |             'nocleanup': False,
27 |         })
28 |         self.bad_cache = set(kwargs.pop('bad_cache', []))
29 |         self.good_cache = set(kwargs.pop('good_cache', []))
30 |         self.kit = WikidataCleanupToolkit()
31 |         super().__init__(**kwargs)
32 | 
33 |     def init_page(self, item):
34 |         with suppress(NoPageError, IsRedirectPageError):
35 |             item.get()
36 |         return super().init_page(item)
37 | 
38 |     def checkProperty(self, prop):
39 |         if prop in self.good_cache:
40 |             return True
41 |         if prop in self.bad_cache:
42 |             return False
43 | 
44 |         self.cacheProperty(prop)
45 |         return self.checkProperty(prop)
46 | 
47 |     def cacheProperty(self, prop):
48 |         prop_page = pywikibot.PropertyPage(self.repo, prop)
49 |         if self.filterProperty(prop_page):
50 |             self.good_cache.add(prop)
51 |         else:
52 |             self.bad_cache.add(prop)
53 | 
54 |     def filterProperty(self, prop_page):
55 |         raise NotImplementedError(
56 |             f'{self.__class__.__name__}.filterProperty needs '
57 |             'overriding in a subclass')
58 | 
59 |     def new_editgroups_summary(self):
60 |         # https://www.wikidata.org/wiki/Wikidata:Edit_groups/Adding_a_tool
61 |         n = random.randrange(0, 2**48)
62 |         return f'[[:toollabs:editgroups/b/CB/{n:x}|details]]'
63 | 
64 |     def user_edit_entity(self, item, data=None, *, cleanup=None, **kwargs):
65 |         # todo: support stub items
66 |         if item.exists() and not (cleanup is False or (
67 |                 self.opt['nocleanup'] and cleanup is not True)):
68 |             if self.kit.cleanup(item, data):
69 |                 if kwargs.get('summary'):
70 |                     kwargs['summary'] += '; cleanup'
71 |                 else:
72 |                     kwargs['summary'] = 'cleanup'
73 |         kwargs.setdefault('show_diff', not self.opt['always'])
74 |         return super().user_edit_entity(item, data, **kwargs)
75 | 


--------------------------------------------------------------------------------
/importdata.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | from datetime import datetime
 3 | 
 4 | import pywikibot
 5 | 
 6 | pywikibot.handle_args()
 7 | 
 8 | site = pywikibot.Site('wikidata', 'wikidata')
 9 | repo = site.data_repository()
10 | 
11 | path = pywikibot.input('Path to file: ')
12 | date = pywikibot.WbTime(year=2025, month=1, day=1, site=repo)
13 | 
14 | ref_item = 'Q134497819'
15 | 
16 | with open(path, 'r', encoding='utf-8') as file_data:
17 |     next(file_data)  # header
18 |     for line in file_data:
19 |         if not line:
20 |             continue
21 |         split = line.split('\t')
22 |         item = pywikibot.ItemPage(repo, split[0])
23 |         hasNewClaim = False
24 |         upToDateClaims = []
25 |         count = int(split[1])
26 |         for claim in item.claims.get('P1082', []):
27 |             if claim.getRank() == 'preferred':
28 |                 claim.setRank('normal')
29 |                 upToDateClaims.append(claim)
30 |             if (claim.qualifiers.get('P585')
31 |                     and claim.qualifiers['P585'][0].target_equals(date)):
32 |                 hasNewClaim = True
33 |                 break
34 | 
35 |         if hasNewClaim is True:
36 |             continue
37 | 
38 |         newClaim = pywikibot.Claim(repo, 'P1082')
39 |         newClaim.setTarget(pywikibot.WbQuantity(count, site=repo))
40 |         newClaim.setRank('preferred')
41 | 
42 |         newClaim_date = pywikibot.Claim(repo, 'P585', is_qualifier=True)
43 |         newClaim_date.setTarget(date)
44 |         newClaim.addQualifier(newClaim_date)
45 | 
46 |         newClaim_criter = pywikibot.Claim(repo, 'P1013', is_qualifier=True)
47 |         newClaim_criter.setTarget(pywikibot.ItemPage(repo, 'Q2641256'))
48 |         newClaim.addQualifier(newClaim_criter)
49 | 
50 |         newClaim_men = pywikibot.Claim(repo, 'P1540', is_qualifier=True)
51 |         newClaim_men.setTarget(pywikibot.WbQuantity(int(split[2]), site=repo))
52 |         newClaim.addQualifier(newClaim_men)
53 | 
54 |         newClaim_women = pywikibot.Claim(repo, 'P1539', is_qualifier=True)
55 |         newClaim_women.setTarget(pywikibot.WbQuantity(int(split[3]), site=repo))
56 |         newClaim.addQualifier(newClaim_women)
57 | 
58 |         ref = pywikibot.Claim(repo, 'P248', is_reference=True)
59 |         ref.setTarget(pywikibot.ItemPage(repo, ref_item))
60 | 
61 |         now = datetime.now()
62 |         access_date = pywikibot.Claim(repo, 'P813', is_reference=True)
63 |         access_date.setTarget(pywikibot.WbTime(year=now.year, month=now.month,
64 |                                                day=now.day, site=repo))
65 |         newClaim.addSources([ref, access_date])
66 | 
67 |         data = {'claims':[newClaim.toJSON()]}
68 |         for upToDateClaim in upToDateClaims:
69 |             data['claims'].append(upToDateClaim.toJSON())
70 | 
71 |         item.editEntity(
72 |             data, asynchronous=True,
73 |             summary=f'Adding [[Property:P1082]]: {count} per data from '
74 |                     f'[[Q3504917]], see [[{ref_item}]]')
75 | 


--------------------------------------------------------------------------------
/tools.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | import pywikibot
 4 | from pywikibot.tools.chars import url2string
 5 | 
 6 | FULL_ARTICLE_REGEX = r'\A[\s\S]*\Z'
 7 | 
 8 | 
 9 | class FileRegexHolder:
10 | 
11 |     replaceR = None
12 |     FLOAT_PATTERN = r'\d+(?:\.\d+)?'
13 | 
14 |     @classmethod
15 |     def get_regex(cls, site):
16 |         if not cls.replaceR:
17 |             magic = ['img_baseline', 'img_border', 'img_bottom', 'img_center',
18 |                      'img_class', 'img_framed', 'img_frameless', 'img_left',
19 |                      'img_middle', 'img_none', 'img_right', 'img_sub',
20 |                      'img_super', 'img_text_bottom', 'img_text_top',
21 |                      'img_thumbnail', 'img_top']
22 |             words = []
23 |             for magicword in magic:
24 |                 words.extend(site.getmagicwords(magicword))
25 |             replace = '|'.join(map(re.escape, words))
26 |             for magicword in site.getmagicwords('img_manualthumb'):
27 |                 replace += '|' + magicword.replace('$1', cls.FLOAT_PATTERN)
28 |             for magicword in site.getmagicwords('img_upright'):
29 |                 replace += '|' + magicword.replace('$1', cls.FLOAT_PATTERN)
30 |             for magicword in site.getmagicwords('img_width'):
31 |                 replace += '|' + magicword.replace('$1', r'\d+')
32 |             cls.replaceR = re.compile(replace)
33 |         return cls.replaceR
34 | 
35 | 
36 | def deduplicate(arg):
37 |     # todo: merge with filter_unique?
38 |     for index, member in enumerate(arg, start=1):
39 |         while member in arg[index:]:
40 |             arg.pop(arg.index(member, index))
41 | 
42 | 
43 | def parse_image(text, site):
44 |     # TODO: merge with .migrate_infobox.InfoboxMigratingBot.handle_image
45 |     image = caption = None
46 |     imgR = re.compile(r'\[\[\s*(?:%s) *:' % '|'.join(site.namespaces[6]),
47 |                       flags=re.I)
48 |     if imgR.match(text):
49 |         split = text.rstrip()[:-2].split('|')
50 |         matchR = FileRegexHolder.get_regex(site)
51 |         while split[1:]:
52 |             tmp = split.pop().strip()
53 |             if not matchR.fullmatch(tmp):
54 |                 caption = tmp
55 |                 break
56 |         if caption:
57 |             while caption.count('[') != caption.count(']'):
58 |                 caption = split.pop() + '|' + caption
59 |             caption = caption.rstrip('.').strip()
60 |         image = split[0].partition(':')[2].rstrip(']')
61 |         image = url2string(image)
62 |         image = re.sub('[ _]+', ' ', image).strip()
63 | 
64 |     return image, caption
65 | 
66 | 
67 | def get_best_statements(statements):
68 |     best = []
69 |     best_rank = 'normal'
70 |     for st in statements:
71 |         if st.rank == best_rank:
72 |             best.append(st)
73 |         elif st.rank == 'preferred':
74 |             best[:] = [st]
75 |             best_rank = st.rank
76 |     return best
77 | 
78 | 
79 | def iter_all_snaks(data):
80 |     for claims in data.values():
81 |         for claim in claims:
82 |             yield claim
83 |             for snaks in claim.qualifiers.values():
84 |                 yield from snaks
85 |             for ref in claim.sources:
86 |                 for snaks in ref.values():
87 |                     yield from snaks
88 | 


--------------------------------------------------------------------------------
/lua_formatter.py:
--------------------------------------------------------------------------------
  1 | """This module is deprecated. Using JSON is more practical."""
  2 | 
  3 | __all__ = (
  4 |     'format_dictionary',
  5 |     'format_list',
  6 |     'QUOTES_SINGLE',
  7 |     'QUOTES_DOUBLE',
  8 | )
  9 | 
 10 | QUOTES_SINGLE = 1
 11 | QUOTES_DOUBLE = 2
 12 | 
 13 | 
 14 | def _indent(level, **kwargs):
 15 |     if kwargs.get('use_tabs') is True:
 16 |         return (level + 1) * '\t'
 17 |     else:
 18 |         return ((level + 1) * 4) * ' '
 19 | 
 20 | 
 21 | def _wrap_quotes(text, quote):
 22 |     if quote in text:
 23 |         text = text.replace(quote, '\\' + quote)
 24 |     return f'{quote}{text}{quote}'
 25 | 
 26 | 
 27 | def _format_string(text, **kwargs):
 28 |     opt = kwargs.get('force_quotes', 0)
 29 |     if opt == QUOTES_SINGLE:
 30 |         text = _wrap_quotes(text, "'")
 31 |     elif opt == QUOTES_DOUBLE:
 32 |         text = _wrap_quotes(text, '"')
 33 |     else:
 34 |         assert opt == 0
 35 |         text = (_wrap_quotes(text, '"')
 36 |                 if "'" in text else _wrap_quotes(text, "'"))
 37 |     return text
 38 | 
 39 | 
 40 | def _format_key(key, **kwargs):
 41 |     if key is None:
 42 |         return '[nil]'
 43 |     elif isinstance(key, (int, float)):
 44 |         key = str(key)
 45 |         return f'[{key.lower()}]'  # lower for booleans (which are ints)
 46 |     else:
 47 |         assert isinstance(key, str)
 48 |         if not key.isalnum() or kwargs.get('quotes_always') is True:
 49 |             return '[%s]' % _format_string(key, **kwargs)
 50 |         else:
 51 |             return key
 52 | 
 53 | 
 54 | def _format_value(value, level, **kwargs):
 55 |     if isinstance(value, dict):
 56 |         return _format_dictionary(value, level + 1, **kwargs)
 57 |     elif isinstance(value, (list, tuple)):
 58 |         return _format_list(value, level + 1, **kwargs)
 59 |     elif isinstance(value, (int, float)):
 60 |         return str(value).lower()  # lower for booleans (which are ints)
 61 |     else:
 62 |         return _format_string(value, **kwargs)
 63 | 
 64 | 
 65 | def _format_pair(key, value, level, **kwargs):
 66 |     return '%s = %s,' % (_format_key(key, **kwargs),
 67 |                          _format_value(value, level, **kwargs))
 68 | 
 69 | 
 70 | def _format_list(data, level, **kwargs):
 71 |     init = '\n' + _indent(level, **kwargs)
 72 |     string = ''
 73 |     if kwargs.get('show_keys') is True:
 74 |         for i, item in enumerate(data, start=1):
 75 |             string += init + _format_pair(i, item, **kwargs)
 76 |     else:
 77 |         for item in data:
 78 |             string += init + _format_value(item, level, **kwargs) + ','
 79 |     return '{' + string + '\n' + _indent(level-1, **kwargs) + '}'
 80 | 
 81 | 
 82 | def _format_dictionary(data, level, **kwargs):
 83 |     init = '\n' + _indent(level, **kwargs)
 84 |     string = ''
 85 |     keys = data.keys()
 86 |     if kwargs.get('sort_keys') is True:
 87 |         keys = sorted(keys)
 88 |     for key in keys:
 89 |         string += init + _format_pair(key, data[key], level, **kwargs)
 90 |     return '{' + string + '\n' + _indent(level-1, **kwargs) + '}'
 91 | 
 92 | 
 93 | def format_list(data, level=0, **kwargs):
 94 |     assert isinstance(data, (list, tuple))
 95 |     return _format_list(data, level, **kwargs)
 96 | 
 97 | 
 98 | def format_dictionary(data, level=0, **kwargs):
 99 |     assert isinstance(data, dict)
100 |     return _format_dictionary(data, level, **kwargs)
101 | 


--------------------------------------------------------------------------------
/shift_ranks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import pywikibot
 3 | 
 4 | from pywikibot import pagegenerators
 5 | 
 6 | from query_store import QueryStore
 7 | from wikidata import WikidataEntityBot
 8 | 
 9 | 
10 | class RanksShiftingBot(WikidataEntityBot):
11 | 
12 |     end_prop = 'P582'
13 |     reason_prop = 'P2241'
14 |     use_from_page = False
15 | 
16 |     def __init__(self, generator, **kwargs):
17 |         self.available_options.update({
18 |             'limit': 500,
19 |         })
20 |         super().__init__(**kwargs)
21 |         self.store = QueryStore()
22 |         self._generator = generator or self.custom_generator()
23 | 
24 |     def custom_generator(self):
25 |         query = self.store.build_query(
26 |             'shift_ranks',
27 |             limit=self.opt['limit'],
28 |             prop=self.end_prop
29 |         )
30 |         return pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo)
31 | 
32 |     @property
33 |     def generator(self):
34 |         return pagegenerators.PreloadingEntityGenerator(self._generator)
35 | 
36 |     @property
37 |     def summary(self):
38 |         return ('undeprecate claims and shift other ranks, see '
39 |                 '[[Special:MyLanguage/Help:Ranking|Help:Ranking]]')
40 | 
41 |     def treat_page_and_item(self, page, item):
42 |         changed = False
43 |         for claims in item.claims.values():
44 |             by_rank = {
45 |                 'preferred': [],
46 |                 'normal': [],
47 |                 'deprecated': [],
48 |             }
49 |             ok = False
50 |             for claim in claims:
51 |                 by_rank[claim.rank].append(claim)
52 |                 if claim.rank == 'preferred':
53 |                     if claim.qualifiers.get(self.end_prop):
54 |                         ok = False
55 |                         break
56 |                 elif claim.rank == 'deprecated':
57 |                     if claim.qualifiers.get(self.reason_prop):
58 |                         ok = False
59 |                         break
60 |                     if not ok:
61 |                         ok = bool(claim.qualifiers.get(self.end_prop))
62 |             if not ok:
63 |                 continue
64 |             for claim in by_rank['deprecated']:
65 |                 if claim.qualifiers.get(self.end_prop):
66 |                     claim.setRank('normal')
67 |                     changed = True
68 |             if not by_rank['preferred']:
69 |                 for claim in by_rank['normal']:
70 |                     if not claim.qualifiers.get(self.end_prop):
71 |                         claim.setRank('preferred')
72 |                         changed = True
73 |         if changed:
74 |             self.user_edit_entity(item, summary=self.summary)
75 | 
76 | 
77 | def main(*args):
78 |     options = {}
79 |     local_args = pywikibot.handle_args(args)
80 |     site = pywikibot.Site()
81 |     genFactory = pagegenerators.GeneratorFactory(site=site)
82 |     for arg in genFactory.handle_args(local_args):
83 |         if arg.startswith('-'):
84 |             arg, sep, value = arg.partition(':')
85 |             if value != '':
86 |                 options[arg[1:]] = int(value) if value.isdigit() else value
87 |             else:
88 |                 options[arg[1:]] = True
89 | 
90 |     generator = genFactory.getCombinedGenerator()
91 |     bot = RanksShiftingBot(generator=generator, site=site, **options)
92 |     bot.run()
93 | 
94 | 
95 | if __name__ == '__main__':
96 |     main()
97 | 


--------------------------------------------------------------------------------
/update_deathdate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import re
  3 | 
  4 | from datetime import datetime
  5 | from itertools import chain
  6 | 
  7 | import pywikibot
  8 | 
  9 | from pywikibot import i18n, textlib
 10 | from pywikibot.bot import ExistingPageBot, SingleSiteBot
 11 | from pywikibot.pagegenerators import PreloadingGenerator
 12 | 
 13 | birth = {
 14 |     'wikipedia': {
 15 |         'cs': r'Narození v roce (\d+)',
 16 |     },
 17 | }
 18 | 
 19 | death = {
 20 |     'wikipedia': {
 21 |         'cs': 'Úmrtí v roce %d',
 22 |     },
 23 | }
 24 | 
 25 | replace_pattern = '[[{inside}]] ({left}{year1}{right}–{left}{year2}{right})'
 26 | 
 27 | 
 28 | class DeathDateUpdatingBot(SingleSiteBot, ExistingPageBot):
 29 | 
 30 |     use_redirects = False
 31 | 
 32 |     def __init__(self, **kwargs):
 33 |         self.available_options.update({
 34 |             'year': datetime.today().year,
 35 |         })
 36 |         super().__init__(**kwargs)
 37 |         self.categoryR = re.compile(i18n.translate(self.site, birth))
 38 |         self.year = self.opt['year']
 39 | 
 40 |     @property
 41 |     def generator(self):
 42 |         while True:
 43 |             category = pywikibot.Category(
 44 |                 self.site, i18n.translate(self.site, death) % self.year)
 45 |             yield from category.articles(content=True, namespaces=[0])
 46 |             self.year -= 1
 47 | 
 48 |     def treat_page(self):
 49 |         page = self.current_page
 50 |         categories = textlib.getCategoryLinks(page.text, site=self.site)
 51 |         titles = (cat.title(with_ns=False, with_section=False,
 52 |                             allow_interwiki=False, insite=self.site)
 53 |                   for cat in categories)
 54 |         matches = [match for match in map(self.categoryR.fullmatch, titles)
 55 |                    if match]
 56 |         if not matches:
 57 |             pywikibot.info('No birthdate category found')
 58 |             return
 59 |         fullmatch = matches.pop()
 60 |         if matches:
 61 |             pywikibot.info('Multiple birthdate categories found')
 62 |             return
 63 |         birth_date = fullmatch[1]
 64 |         search_query = f'linksto:"{page.title()}"'  # todo: sanitize?
 65 |         search_query += r' insource:/\[\[[^\[\]]+\]\]'
 66 |         search_query += fr' +\(\* *\[*{birth_date}\]*\)/'
 67 |         search_query += ' -intitle:"Seznam"'
 68 |         pattern = r'\[\[((?:%s)(?:\|[^\[\]]+)?)\]\]' % '|'.join(
 69 |             re.escape(p.title()) for p in chain([page], page.backlinks(
 70 |                 follow_redirects=False, filter_redirects=True, namespaces=[0])))
 71 |         pattern += fr' +\(\* *(\[\[)?({birth_date})(\]\])?\)'
 72 |         regex = re.compile(pattern)
 73 |         for ref_page in PreloadingGenerator(
 74 |                 page.site.search(search_query, namespaces=[0])):
 75 |             new_text, num = regex.subn(self.replace_callback, ref_page.text)
 76 |             if num:
 77 |                 self.userPut(ref_page, ref_page.text, new_text,
 78 |                              summary='doplnění data úmrtí')
 79 | 
 80 |     def replace_callback(self, match):
 81 |         inside, left, year1, right = match.groups('')
 82 |         return replace_pattern.format(
 83 |             inside=inside, left=left, right=right, year1=year1,
 84 |             year2=self.year)
 85 | 
 86 | 
 87 | def main(*args):
 88 |     options = {}
 89 |     for arg in pywikibot.handle_args(args):
 90 |         if arg.startswith('-'):
 91 |             arg, sep, value = arg.partition(':')
 92 |             if value != '':
 93 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
 94 |             else:
 95 |                 options[arg[1:]] = True
 96 | 
 97 |     bot = DeathDateUpdatingBot(**options)
 98 |     bot.run()
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     main()
103 | 


--------------------------------------------------------------------------------
/wikitext.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | from itertools import chain
  3 | from operator import methodcaller
  4 | 
  5 | import pywikibot
  6 | 
  7 | from pywikibot import pagegenerators
  8 | from pywikibot.bot import SingleSiteBot, ExistingPageBot
  9 | 
 10 | from custome_fixes import all_fixes
 11 | 
 12 | 
 13 | class WikitextFixingBot(SingleSiteBot, ExistingPageBot):
 14 | 
 15 |     use_redirects = False
 16 | 
 17 |     '''
 18 |     Class for bots that save wikitext. It uses all demanded fixes from
 19 |     custome_fixes.py and applies them before cosmetic changes are
 20 |     executed.
 21 | 
 22 |     You can enable each fix by using its name as a command line argument
 23 |     or all fixes using -all (then, each used fix is excluded).
 24 |     '''
 25 | 
 26 |     def __init__(self, **kwargs):
 27 |         do_all = kwargs.pop('all', False) is True
 28 |         self.fixes = []
 29 |         for fix, cls in all_fixes.items():
 30 |             if do_all:
 31 |                 demand = fix not in kwargs
 32 |                 kwargs.pop(fix, None)
 33 |             else:
 34 |                 demand = bool(kwargs.pop(fix, False))
 35 |             if demand:
 36 |                 options = {}
 37 |                 for opt in cls.options.keys():
 38 |                     if opt in kwargs:
 39 |                         options[opt] = kwargs.pop(opt)
 40 |                 self.fixes.append(cls(**options))
 41 | 
 42 |         self.fixes.sort(key=lambda fix: fix.order)
 43 | 
 44 |         super().__init__(**kwargs)
 45 |         for fix in self.fixes:
 46 |             fix.site = self.site
 47 |         if not self.generator:
 48 |             pywikibot.info('No generator provided, making own generator...')
 49 |             self.generator = pagegenerators.PreloadingGenerator(
 50 |                 chain.from_iterable(map(methodcaller('generator'), self.fixes)))
 51 | 
 52 |     def treat_page(self):
 53 |         summaries = []
 54 |         page = self.current_page
 55 |         old_text = page.text
 56 |         callbacks = self.applyFixes(page, summaries)
 57 |         if len(summaries) < 1:
 58 |             pywikibot.info('No replacements worth saving')
 59 |             return
 60 |         pywikibot.showDiff(old_text, page.text)
 61 |         # todo: method
 62 |         callback = lambda _, exc: [cb() for cb in callbacks if not exc]
 63 |         # todo: put_current
 64 |         self._save_page(page, page.save, callback=callback,
 65 |                         summary='; '.join(summaries))
 66 | 
 67 |     def applyFixes(self, page, summaries=[]):
 68 |         callbacks = []
 69 |         for fix in self.fixes:
 70 |             fix.apply(page, summaries, callbacks)
 71 |         return callbacks
 72 | 
 73 |     def userPut(self, page, oldtext, newtext, **kwargs):
 74 |         if oldtext.rstrip() == newtext.rstrip():
 75 |             pywikibot.info(
 76 |                 f'No changes were needed on {page.title(as_link=True)}')
 77 |             return
 78 | 
 79 |         self.current_page = page
 80 | 
 81 |         show_diff = kwargs.pop('show_diff', not self.opt['always'])
 82 | 
 83 |         if show_diff:
 84 |             pywikibot.showDiff(oldtext, newtext)
 85 | 
 86 |         if 'summary' in kwargs:
 87 |             pywikibot.info(f"Edit summary: {kwargs['summary']}")
 88 | 
 89 |         page.text = newtext
 90 |         return self._save_page(page, self.fix_wikitext, page, **kwargs)
 91 | 
 92 |     def fix_wikitext(self, page, *args, **kwargs):
 93 |         summaries = [kwargs['summary']]
 94 |         callbacks = self.applyFixes(page, summaries)
 95 | 
 96 |         kwargs['summary'] = '; '.join(summaries)
 97 |         # todo: method
 98 |         kwargs['callback'] = lambda _, exc: [cb() for cb in callbacks
 99 |                                              if not exc]
100 |         page.save(*args, **kwargs)
101 | 
102 | 
103 | def main(*args):
104 |     options = {}
105 |     local_args = pywikibot.handle_args(args)
106 |     genFactory = pagegenerators.GeneratorFactory()
107 |     for arg in genFactory.handle_args(local_args):
108 |         if arg.startswith('-'):
109 |             arg, sep, value = arg.partition(':')
110 |             if value != '':
111 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
112 |             else:
113 |                 options[arg[1:]] = True
114 | 
115 |     generator = genFactory.getCombinedGenerator(preload=True)
116 |     bot = WikitextFixingBot(generator=generator, **options)
117 |     bot.run()
118 | 
119 | 
120 | if __name__ == '__main__':
121 |     main()
122 | 


--------------------------------------------------------------------------------
/nounit.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | """This script is obsolete!"""
  3 | import pywikibot
  4 | 
  5 | from pywikibot import pagegenerators
  6 | 
  7 | from query_store import QueryStore
  8 | from wikidata import WikidataEntityBot
  9 | 
 10 | 
 11 | class UnitsFixingBot(WikidataEntityBot):
 12 | 
 13 |     good_item = 'Q21027105'
 14 |     use_from_page = False
 15 | 
 16 |     def __init__(self, **kwargs):
 17 |         super().__init__(**kwargs)
 18 |         self.store = QueryStore()
 19 | 
 20 |     @property
 21 |     def generator(self):
 22 |         query = self.store.build_query('units', good=self.good_item)
 23 |         return pagegenerators.PreloadingEntityGenerator(
 24 |             pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo))
 25 | 
 26 |     def filterProperty(self, prop_page):
 27 |         if prop_page.type != 'quantity':
 28 |             return False
 29 |         prop_page.get()
 30 |         if 'P2237' not in prop_page.claims:
 31 |             return False
 32 |         for claim in prop_page.claims['P2237']:
 33 |             if claim.snaktype == 'novalue':
 34 |                 continue
 35 |             if (claim.snaktype == 'value' and
 36 |                     claim.target_equals(self.good_item)):
 37 |                 continue
 38 |             return False
 39 |         return True
 40 | 
 41 |     def treat_page_and_item(self, page, item):
 42 |         for prop, claims in item.claims.items():
 43 |             for claim in claims:
 44 |                 if claim.type == 'quantity':
 45 |                     if self.checkProperty(prop):
 46 |                         target = claim.getTarget()
 47 |                         if self.change_target(target):
 48 |                             pywikibot.output('Removing unit for property %s' % prop)
 49 |                             self._save_page(
 50 |                                 item, self._save_entity, claim.changeTarget,
 51 |                                 target, summary='removing invalid unit, see '
 52 |                                 "[[P:%s#P2237|property's page]]" % prop)
 53 |                 else:
 54 |                     self.bad_cache.add(prop)
 55 | 
 56 |                 json = claim.toJSON()
 57 |                 changed = False
 58 |                 for qprop, snaks in claim.qualifiers.items():
 59 |                     if not self.checkProperty(qprop):
 60 |                         continue
 61 |                     new_snaks = snaks.copy()
 62 |                     if self.handle_snaks(new_snaks):
 63 |                         changed = True
 64 |                         json['qualifiers'][qprop] = new_snaks
 65 |                         #pywikibot.output("Removing unit for qualifier %s of %s" % (qprop, prop))
 66 | 
 67 |                 for i, source in enumerate(claim.sources):
 68 |                     for ref_prop, snaks in source.items():
 69 |                         if not self.checkProperty(ref_prop):
 70 |                             continue
 71 |                         new_snaks = snaks.copy()
 72 |                         if self.handle_snaks(new_snaks):
 73 |                             changed = True
 74 |                             json['references'][i]['snaks'][ref_prop] = new_snaks
 75 |                             #pywikibot.output("Removing unit for reference %s of %s" % (ref_prop, prop))
 76 | 
 77 |                 if changed is True:
 78 |                     data = {'claims': [json]}
 79 |                     self.user_edit_entity(item, data, summary='removing invalid unit(s)')
 80 | 
 81 |     def change_target(self, target):
 82 |         if target is None or target._unit == '1':
 83 |             return False
 84 | 
 85 |         target._unit = '1'
 86 |         return True
 87 | 
 88 |     def handle_snaks(self, snaks):
 89 |         changed = False
 90 |         for snak in snaks:
 91 |             target = snak.getTarget()
 92 |             if self.change_target(target):
 93 |                 changed = True
 94 |                 snak.setTarget(target)
 95 |         return changed
 96 | 
 97 | 
 98 | def main(*args):
 99 |     options = {}
100 |     for arg in pywikibot.handle_args(args):
101 |         if arg.startswith('-'):
102 |             arg, sep, value = arg.partition(':')
103 |             if value != '':
104 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
105 |             else:
106 |                 options[arg[1:]] = True
107 | 
108 |     site = pywikibot.Site('wikidata', 'wikidata')
109 |     bot = UnitsFixingBot(site=site, **options)
110 |     bot.run()
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     main()
115 | 


--------------------------------------------------------------------------------
/cleanup_redirects.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import csv
  3 | import re
  4 | import urllib
  5 | 
  6 | from operator import methodcaller
  7 | from urllib.request import urlopen
  8 | 
  9 | import pywikibot
 10 | 
 11 | from pywikibot.bot import WikidataBot
 12 | from pywikibot.exceptions import NoPageError
 13 | 
 14 | from merger import Merger
 15 | 
 16 | 
 17 | class WikidataRedirectsBot(WikidataBot):
 18 | 
 19 |     labs_url = 'https://tools.wmflabs.org'
 20 |     sub_directory = 'wikidata-redirects-conflicts-reports/reports'
 21 |     namespaces = {0, 10, 14}
 22 |     ignore = {'ignore_save_related_errors': True,
 23 |               'ignore_server_errors': True,
 24 |               }
 25 |     treat_missing_item = False
 26 |     use_redirects = True
 27 | 
 28 |     def __init__(self, **kwargs):
 29 |         self.available_options.update({
 30 |             'always': False,
 31 |             'date': None,
 32 |             'force': False,
 33 |             'skip': [],
 34 |             'start': None,
 35 |             'touch': False,
 36 |         })
 37 |         super().__init__(**kwargs)
 38 | 
 39 |     @property
 40 |     def generator(self):
 41 |         if not self.opt['date']:
 42 |             self.options['date'] = pywikibot.input(
 43 |                 'Enter the date when the reports were created')
 44 | 
 45 |         url = f"{self.labs_url}/{self.sub_directory}/{self.opt['date']}/"
 46 |         response = urlopen(url)
 47 |         regex = re.compile('href="([^"]+)"')
 48 |         not_yet = bool(self.opt['start'])
 49 |         for match in regex.finditer(response.read().decode()):
 50 |             file_name = match[1]
 51 |             dbname = file_name.partition('-')[0]
 52 |             if not_yet:
 53 |                 if dbname == self.opt['start']:
 54 |                     not_yet = False
 55 |                 else:
 56 |                     continue
 57 | 
 58 |             if dbname in self.opt['skip']:
 59 |                 continue
 60 | 
 61 |             try:
 62 |                 site = pywikibot.site.APISite.fromDBName(dbname)
 63 |             except ValueError as e:
 64 |                 pywikibot.exception(e)
 65 |                 continue
 66 | 
 67 |             pywikibot.info(f"Working on '{dbname}'")
 68 |             resp = urlopen(url + file_name)
 69 |             lines = resp.readlines()
 70 |             if not lines:
 71 |                 continue
 72 |             lines.pop(0)
 73 |             f = map(methodcaller('decode', 'utf-8'), lines)
 74 |             for row in csv.reader(f, delimiter='\t'):
 75 |                 if len(set(row[1:3])) > 1:
 76 |                     continue
 77 |                 if int(row[1]) not in self.namespaces:
 78 |                     continue
 79 |                 if '#' in row[4]:
 80 |                     continue
 81 | 
 82 |                 yield pywikibot.Page(site, row[3], ns=int(row[1]))
 83 | 
 84 |     @property
 85 |     def summary(self):
 86 |         return (f"based on [[toollabs:{self.sub_directory}/{self.opt['date']}/"
 87 |                 "|Alphos' reports]]")
 88 | 
 89 |     def user_confirm(self, *args):
 90 |         return True
 91 | 
 92 |     def treat_page_and_item(self, page, item):
 93 |         items = [item]
 94 | 
 95 |         target = page.getRedirectTarget()
 96 |         try:
 97 |             items.append(target.data_item())
 98 |             target.get()
 99 |         except NoPageError:
100 |             self._save_page(items[0], items[0].setSitelink, target,
101 |                             **self.ignore)  # todo: summary
102 |             return
103 | 
104 |         Merger.sort_for_merge(items, key=['sitelinks', 'id'])
105 |         if not self._save_page(items[1], Merger.clean_merge, items[1], items[0],
106 |                                safe=not self.opt['force'],
107 |                                ignore_conflicts=['description'],
108 |                                summary=self.summary, **self.ignore):
109 |             return
110 | 
111 |         if self.opt['touch'] is True:
112 |             self._save_page(target, target.touch, **self.ignore)
113 | 
114 | 
115 | def main(*args):
116 |     options = {}
117 |     skip = []
118 |     for arg in pywikibot.handle_args(args):
119 |         if arg.startswith('-skip:'):
120 |             skip.append(arg.partition(':')[2])
121 |             continue
122 |         if arg.startswith('-'):
123 |             arg, sep, value = arg.partition(':')
124 |             if value != '':
125 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
126 |             else:
127 |                 options[arg[1:]] = True
128 | 
129 |     bot = WikidataRedirectsBot(skip=skip, **options)
130 |     bot.run()
131 | 
132 | 
133 | if __name__ == '__main__':
134 |     main()
135 | 


--------------------------------------------------------------------------------
/fix_qualifiers.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | """This script is obsolete!"""
  3 | import pywikibot
  4 | 
  5 | from pywikibot import pagegenerators
  6 | 
  7 | from query_store import QueryStore
  8 | from wikidata import WikidataEntityBot
  9 | 
 10 | 
 11 | class QualifiersFixingBot(WikidataEntityBot):
 12 | 
 13 |     blacklist = frozenset(['P143', 'P248', 'P459', 'P518', 'P577', 'P805',
 14 |                            'P972', 'P1065', 'P1135', 'P1480', 'P1545', 'P1932',
 15 |                            'P2315', 'P2701', 'P3274', ])
 16 |     whitelist = frozenset(['P17', 'P21', 'P39', 'P155', 'P156', 'P281', 'P580',
 17 |                            'P582', 'P585', 'P669', 'P708', 'P969', 'P1355',
 18 |                            'P1356', ])
 19 |     good_item = 'Q15720608'
 20 |     use_from_page = False
 21 | 
 22 |     def __init__(self, **kwargs):
 23 |         kwargs.update({
 24 |             'bad_cache': kwargs.get('bad_cache', []) + list(self.blacklist),
 25 |             'good_cache': kwargs.get('good_cache', []) + list(self.whitelist),
 26 |         })
 27 |         super().__init__(**kwargs)
 28 |         self.store = QueryStore()
 29 | 
 30 |     def filterProperty(self, prop_page):
 31 |         if prop_page.type == 'external-id':
 32 |             return False
 33 | 
 34 |         prop_page.get()
 35 |         if 'P31' not in prop_page.claims:
 36 |             pywikibot.warning('%s is not classified' % prop_page.getID())
 37 |             return False
 38 | 
 39 |         for claim in prop_page.claims['P31']:
 40 |             if claim.target_equals(self.good_item):
 41 |                 return True
 42 | 
 43 |         return False
 44 | 
 45 |     @property
 46 |     def generator(self):
 47 |         query = self.store.build_query(
 48 |             'qualifiers', item=self.good_item,
 49 |             good=', wd:'.join(self.whitelist),
 50 |             bad=', wd:'.join(self.blacklist))
 51 |         return pagegenerators.PreloadingItemGenerator(
 52 |             pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo))
 53 | 
 54 |     def treat_page_and_item(self, page, item):
 55 |         for prop in item.claims.keys():
 56 |             for claim in item.claims[prop]:
 57 |                 moved = set()
 58 |                 json = claim.toJSON()
 59 |                 i = -1
 60 |                 for source in claim.sources:
 61 |                     i += 1
 62 |                     for ref_prop in filter(self.checkProperty, source.keys()):
 63 |                         for snak in source[ref_prop]:
 64 |                             json.setdefault('qualifiers', {}).setdefault(ref_prop, [])
 65 |                             for qual in (pywikibot.Claim.qualifierFromJSON(self.repo, q)
 66 |                                          for q in json['qualifiers'][ref_prop]):
 67 |                                 if qual.target_equals(snak.getTarget()):
 68 |                                     break
 69 |                             else:
 70 |                                 snak.isReference = False
 71 |                                 snak.isQualifier = True
 72 |                                 json['qualifiers'][ref_prop].append(snak.toJSON())
 73 |                             json['references'][i]['snaks'][ref_prop].pop(0)
 74 |                             if len(json['references'][i]['snaks'][ref_prop]) == 0:
 75 |                                 json['references'][i]['snaks'].pop(ref_prop)
 76 |                                 if len(json['references'][i]['snaks']) == 0:
 77 |                                     json['references'].pop(i)
 78 |                                     i -= 1
 79 |                             moved.add(ref_prop)
 80 | 
 81 |                 if len(moved) > 0:
 82 |                     data = {'claims': [json]}
 83 |                     self.user_edit_entity(item, data, summary=self.makeSummary(prop, moved),
 84 |                                           asynchronous=True)
 85 | 
 86 |     def makeSummary(self, prop, props):
 87 |         props = ['[[Property:P%s]]' % pid for pid in sorted(
 88 |             int(pid[1:]) for pid in props)]
 89 |         return '[[Property:%s]]: moving misplaced reference%s %s to qualifiers' % (
 90 |             prop, 's' if len(props) > 1 else '', '%s and %s' % (
 91 |                 ', '.join(props[:-1]), props[-1]) if len(props) > 1 else props[0])
 92 | 
 93 | 
 94 | def main(*args):
 95 |     options = {}
 96 |     for arg in pywikibot.handle_args(args):
 97 |         if arg.startswith('-'):
 98 |             arg, sep, value = arg.partition(':')
 99 |             if value != '':
100 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
101 |             else:
102 |                 options[arg[1:]] = True
103 | 
104 |     site = pywikibot.Site('wikidata', 'wikidata')
105 |     bot = QualifiersFixingBot(site=site, **options)
106 |     bot.run()
107 | 
108 | 
109 | if __name__ == '__main__':
110 |     main()
111 | 


--------------------------------------------------------------------------------
/captiontoimage.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import pywikibot
  3 | 
  4 | from pywikibot import pagegenerators
  5 | 
  6 | from query_store import QueryStore
  7 | from wikidata import WikidataEntityBot
  8 | 
  9 | 
 10 | class CaptionToImageBot(WikidataEntityBot):
 11 | 
 12 |     '''
 13 |     Bot re-adding file captions as qualifiers to the files on Wikidata
 14 | 
 15 |     Supported parameters:
 16 |     * -removeall - if a caption cannot be reused, remove it as well
 17 |     '''
 18 | 
 19 |     caption_property = 'P2096'
 20 |     image_property = 'P18'
 21 |     use_from_page = False
 22 | 
 23 |     def __init__(self, generator, **kwargs):
 24 |         self.available_options.update({
 25 |             'removeall': False
 26 |         })
 27 |         kwargs.setdefault('bad_cache', []).append(self.caption_property)
 28 |         super().__init__(**kwargs)
 29 |         self.store = QueryStore()
 30 |         self._generator = generator or self.custom_generator()
 31 | 
 32 |     def custom_generator(self):
 33 |         query = self.store.build_query('captions', prop=self.caption_property)
 34 |         return pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo)
 35 | 
 36 |     @property
 37 |     def generator(self):
 38 |         return pagegenerators.PreloadingEntityGenerator(self._generator)
 39 | 
 40 |     def filterProperty(self, prop_page):
 41 |         return prop_page.type == 'commonsMedia'
 42 | 
 43 |     def skip_page(self, item):
 44 |         return super().skip_page(item) or (
 45 |             self.caption_property not in item.claims)
 46 | 
 47 |     def _save_entity(self, func, *args, **kwargs):
 48 |         # fixme upstream
 49 |         if 'asynchronous' in kwargs:
 50 |             kwargs.pop('asynchronous')
 51 |         return func(*args, **kwargs)
 52 | 
 53 |     def treat_page_and_item(self, page, item):
 54 |         our_prop = self.image_property
 55 |         if our_prop not in item.claims:
 56 |             our_prop = None
 57 |             for prop in item.claims:
 58 |                 if self.checkProperty(prop):
 59 |                     if our_prop is None:
 60 |                         our_prop = prop
 61 |                     else:
 62 |                         pywikibot.info('More than one media property used')
 63 |                         return
 64 | 
 65 |         remove_claims = []
 66 |         remove_all = self.opt['removeall'] is True
 67 |         if our_prop is None:
 68 |             pywikibot.info('No media property found')
 69 |             if remove_all:
 70 |                 remove_claims.extend(item.claims[self.caption_property])
 71 |                 self._save_page(item, self._save_entity, item.removeClaims,
 72 |                                 remove_claims, summary='removing redundant property')
 73 |             return
 74 | 
 75 |         media_claim = item.claims[our_prop][0]
 76 |         if len(item.claims[our_prop]) > 1:
 77 |             pywikibot.info(f'Property {our_prop} has more than one value')
 78 |             return
 79 | 
 80 |         for caption in item.claims[self.caption_property]:
 81 |             if self.caption_property in media_claim.qualifiers:
 82 |                 language = caption.getTarget().language
 83 |                 has_same_lang = any(
 84 |                     claim.getTarget().language == language
 85 |                     for claim in media_claim.qualifiers[self.caption_property])
 86 |                 if has_same_lang:
 87 |                     pywikibot.info(f'Property {our_prop} already has '
 88 |                                    f'a caption in language {language}')
 89 |                     if remove_all:
 90 |                         remove_claims.append(caption)
 91 |                     continue
 92 | 
 93 |             qualifier = caption.copy()
 94 |             qualifier.isQualifier = True
 95 |             if self._save_page(item, self._save_entity, media_claim.addQualifier,
 96 |                                qualifier):
 97 |                 remove_claims.append(caption)
 98 | 
 99 |         if remove_claims:
100 |             self._save_page(item, self._save_entity, item.removeClaims,
101 |                             remove_claims, summary='removing redundant property')
102 | 
103 | 
104 | def main(*args):
105 |     options = {}
106 |     local_args = pywikibot.handle_args(args)
107 |     site = pywikibot.Site()
108 |     genFactory = pagegenerators.GeneratorFactory(site=site)
109 |     for arg in genFactory.handle_args(local_args):
110 |         if arg.startswith('-'):
111 |             arg, sep, value = arg.partition(':')
112 |             if value != '':
113 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
114 |             else:
115 |                 options[arg[1:]] = True
116 | 
117 |     generator = genFactory.getCombinedGenerator()
118 |     bot = CaptionToImageBot(generator=generator, site=site, **options)
119 |     bot.run()
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     main()
124 | 


--------------------------------------------------------------------------------
/check_disambigs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import pywikibot
  3 | 
  4 | from pywikibot import pagegenerators
  5 | from pywikibot.exceptions import NoPageError
  6 | 
  7 | from error_reporting import ErrorReportingBot
  8 | from wikidata import WikidataEntityBot
  9 | 
 10 | 
 11 | class DisambigsCheckingBot(WikidataEntityBot, ErrorReportingBot):
 12 | 
 13 |     disambig_items = {'Q4167410', 'Q22808320', 'Q61996773'}
 14 |     file_name = 'log_disambigs.txt'
 15 |     page_pattern = 'User:%s/Disambig_errors'
 16 |     skip = {
 17 |         'brwiki',
 18 |         'enwiki',
 19 |         'hakwiki',
 20 |         'igwiki',
 21 |         'mkwiki',
 22 |         'mznwiki',
 23 |         'specieswiki',
 24 |         'towiki',
 25 |     }
 26 |     use_from_page = False
 27 | 
 28 |     def __init__(self, generator=None, **kwargs):
 29 |         self.available_options.update({
 30 |             'limit': 1000,
 31 |             'min_sitelinks': 1,
 32 |             'offset': 0,
 33 |             #'only': None, todo
 34 |         })
 35 |         super().__init__(**kwargs)
 36 |         self.generator = pagegenerators.PreloadingEntityGenerator(
 37 |             generator or self.custom_generator()
 38 |         )
 39 | 
 40 |     def skip_page(self, item):
 41 |         return super().skip_page(item) or (
 42 |             item.title(as_link=True, insite=self.repo) in self.log_page.text
 43 |             or not self.is_disambig(item))
 44 | 
 45 |     def is_disambig(self, item):
 46 |         for claim in item.claims.get('P31', []):
 47 |             if any(claim.target_equals(cls) for cls in self.disambig_items):
 48 |                 return True
 49 |         return False
 50 | 
 51 |     def custom_generator(self):
 52 |         # todo: move to store
 53 |         QUERY = '''SELECT ?item WITH {
 54 |   SELECT DISTINCT ?item {
 55 |     ?item wdt:P31 wd:%s; wikibase:sitelinks ?links .
 56 |     FILTER( ?links >= %i ) .
 57 |     MINUS { ?item wdt:P31 wd:Q101352 } .
 58 |   } OFFSET %i LIMIT %i
 59 | } AS %%disambig WHERE {
 60 |   INCLUDE %%disambig .
 61 |   BIND( MD5( CONCAT( STR( ?item ), STR( RAND() ) ) ) AS ?hash ) .
 62 | } ORDER BY ?hash''' % (self.disambig_item, self.opt['min_sitelinks'],
 63 |                        self.opt['offset'], self.opt['limit'])
 64 | 
 65 |         return pagegenerators.WikidataSPARQLPageGenerator(
 66 |             QUERY, site=self.repo, result_type=list)
 67 | 
 68 |     def treat_page_and_item(self, page, item):
 69 |         append_text = ''
 70 |         count = len(item.sitelinks)
 71 |         if count == 0:
 72 |             append_text += '\n** no sitelinks'
 73 |         for dbname in item.sitelinks:
 74 |             if dbname in self.skip:
 75 |                 continue
 76 |             page = pywikibot.Page(item.sitelinks[dbname])
 77 |             if not page.exists():
 78 |                 append_text += "\n** {} – {} – doesn't exist".format(
 79 |                     dbname, page.title(as_link=True, insite=self.repo))
 80 |                 continue
 81 |             if page.isRedirectPage():
 82 |                 target = page.getRedirectTarget()
 83 |                 try:
 84 |                     target_item = target.data_item()
 85 |                 except NoPageError:
 86 |                     link = "''no item''"
 87 |                 else:
 88 |                     link = target_item.title(as_link=True, insite=self.repo)
 89 |                 if not target.isDisambig():
 90 |                     link += ', not a disambiguation'
 91 |                 append_text += '\n** {} – {} – redirects to {} ({})'.format(
 92 |                     dbname, page.title(as_link=True, insite=self.repo),
 93 |                     target.title(as_link=True, insite=self.repo), link)
 94 |                 continue
 95 |             if not page.isDisambig():
 96 |                 append_text += '\n** {} – {} – not a disambiguation'.format(
 97 |                     dbname, page.title(as_link=True, insite=self.repo))
 98 | 
 99 |         if append_text:
100 |             prep = '\n* %s' % item.title(as_link=True, insite=self.repo)
101 |             if count > 0:
102 |                 prep += f' ({count} sitelink' + ('s' if count > 1 else '') + ')'
103 |             append_text = prep + append_text
104 |             self.append(append_text)
105 | 
106 | 
107 | def main(*args):
108 |     options = {}
109 |     local_args = pywikibot.handle_args(args)
110 |     site = pywikibot.Site()
111 |     genFactory = pagegenerators.GeneratorFactory(site=site)
112 |     for arg in genFactory.handle_args(local_args):
113 |         if arg.startswith('-'):
114 |             arg, sep, value = arg.partition(':')
115 |             if value != '':
116 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
117 |             else:
118 |                 options[arg[1:]] = True
119 | 
120 |     generator = genFactory.getCombinedGenerator()
121 | 
122 |     bot = DisambigsCheckingBot(site=site, generator=generator, **options)
123 |     bot.run()
124 | 
125 | 
126 | if __name__ == '__main__':
127 |     main()
128 | 


--------------------------------------------------------------------------------
/cswiki/heritage_lists_diff.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import math
  3 | from collections import defaultdict
  4 | 
  5 | import mwparserfromhell
  6 | import pywikibot
  7 | 
  8 | from pywikibot import Coordinate, pagegenerators
  9 | from pywikibot.textlib import removeDisabledParts
 10 | from pywikibot.data.sparql import *
 11 | from tqdm import tqdm
 12 | 
 13 | from tools import get_best_statements
 14 | 
 15 | 
 16 | def tidy(value) -> str:
 17 |     return removeDisabledParts(str(value), site=site).strip()
 18 | 
 19 | 
 20 | def distance(coord1: Coordinate, coord2: Coordinate):
 21 |     lat1, lon1 = coord1.lat, coord1.lon
 22 |     lat2, lon2 = coord2.lat, coord2.lon
 23 |     radius = 6372.795
 24 | 
 25 |     cosValue = \
 26 |              math.sin(math.radians(lat1)) * math.sin(math.radians(lat2)) \
 27 |              + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.cos(math.radians(lon2 - lon1))
 28 | 
 29 |     if cosValue > 1:
 30 |         return 0
 31 |     elif cosValue < -1:
 32 |         return radius * math.pi
 33 |     else:
 34 |         return radius * math.acos(cosValue)
 35 | 
 36 | 
 37 | args = pywikibot.handle_args()
 38 | 
 39 | site = pywikibot.Site('cs', 'wikipedia')
 40 | repo = site.data_repository()
 41 | image_repo = site.image_repository()
 42 | 
 43 | genFactory = pagegenerators.GeneratorFactory(site=site)
 44 | genFactory.handle_arg('-ns:0')
 45 | genFactory.handle_args(args)
 46 | generator = genFactory.getCombinedGenerator(preload=True)
 47 | if not generator:
 48 |     genFactory.handle_arg('-ref:Template:Památky v Česku')
 49 |     generator = genFactory.getCombinedGenerator(preload=True)
 50 | 
 51 | ignore_images = {'Noimage 2-1.png'}
 52 | 
 53 | pywikibot.info('Loading all identifiers...')
 54 | 
 55 | query = 'SELECT * WHERE { ?item wdt:P762 ?id }'
 56 | obj = SparqlQuery(repo=repo)
 57 | result = obj.select(query, full_data=True)
 58 | id_to_items = defaultdict(set)
 59 | for entry in result:
 60 |     item = entry['item'].getID()
 61 |     id_ = entry['id'].value
 62 |     id_to_items[id_].add(item)
 63 | del result
 64 | 
 65 | entries = []
 66 | 
 67 | for page in tqdm(generator):
 68 |     code = mwparserfromhell.parse(page.text)
 69 |     for template in code.ifilter_templates(
 70 |             matches=lambda t: t.name.matches('Památky v Česku')):
 71 |         item = None
 72 |         id_ = None
 73 |         if template.has('Wikidata', ignore_empty=True):
 74 |             linked_item = tidy(template.get('Wikidata').value)
 75 |         else:
 76 |             linked_item = None
 77 | 
 78 |         if not linked_item and template.has('Id_objektu', ignore_empty=True):
 79 |             id_ = tidy(template.get('Id_objektu').value)
 80 |             items = id_to_items[id_]
 81 |             if len(items) == 1:
 82 |                 item_id = items.pop()
 83 |                 item = pywikibot.ItemPage(repo, item_id)
 84 |                 items.add(item_id)
 85 |         elif linked_item:
 86 |             item = pywikibot.ItemPage(repo, linked_item)
 87 | 
 88 |         if not item:
 89 |             continue
 90 | 
 91 |         item.get(get_redirect=True)
 92 |         while item.isRedirectPage():
 93 |             item = item.getRedirectTarget()
 94 |             item.get(get_redirect=True)
 95 | 
 96 |         if template.has('Zeměpisná_šířka', ignore_empty=True) \
 97 |            or template.has('Zeměpisná_délka', ignore_empty=True):
 98 |             best = get_best_statements(item.claims.get('P625', []))
 99 |             if best and best[0].getTarget():
100 |                 coord_wd = best[0].getTarget()
101 |                 coord_list = Coordinate(
102 |                     lat=float(str(template.get('Zeměpisná_šířka').value)),
103 |                     lon=float(str(template.get('Zeměpisná_délka').value)),
104 |                     site=repo)
105 |                 dist = distance(coord_list, coord_wd)
106 |                 if dist > 0.05:
107 |                     entries.append((
108 |                         page.title(),
109 |                         item.getID(),
110 |                         coord_list,
111 |                         coord_wd,
112 |                         dist,
113 |                     ))
114 | 
115 | entries.sort(key=lambda t: t[-1], reverse=True)
116 | 
117 | text = '{| class="wikitable"'
118 | text += '\n! Seznam'
119 | text += ' !! Položka na WD'
120 | text += ' !! Souřadnice v seznamu'
121 | text += ' !! Souřadnice na WD'
122 | text += ' !! Vzdálenost [km]'
123 | for title, item_id, coord_list, coord_wd, dist in entries:
124 |     text += '\n|-'
125 |     text += f"\n| [[{title}|{title.removeprefix('Seznam kulturních památek ')}]]"
126 |     text += f'\n| [[d:{item_id}|{item_id}]]'
127 |     text += '\n| {{Souřadnice|%f|%f}}' % (coord_list.lat, coord_list.lon)
128 |     text += '\n| {{Souřadnice|%f|%f}}' % (coord_wd.lat, coord_wd.lon)
129 |     text += f'\n| {dist:.4f}'
130 | text += '\n|}'
131 | 
132 | out_page = pywikibot.Page(site, 'Matěj Suchánek/Reports/Souřadnice', ns=2)
133 | out_page.text = text
134 | out_page.save(summary='seznam', bot=False, minor=False)
135 | 


--------------------------------------------------------------------------------
/split_names_and_titles.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import re
  3 | 
  4 | import pywikibot
  5 | 
  6 | from pywikibot import pagegenerators, textlib
  7 | from pywikibot.tools import first_upper
  8 | from pywikibot.textlib import mwparserfromhell
  9 | 
 10 | try:
 11 |     from wikitext import WikitextFixingBot
 12 | except ImportError:
 13 |     from pywikibot.bot import SingleSiteBot, ExistingPageBot
 14 | 
 15 |     class WikitextFixingBot(SingleSiteBot, ExistingPageBot):
 16 |         use_redirects = False
 17 | 
 18 | 
 19 | class TitlesMovingBot(WikitextFixingBot):
 20 | 
 21 |     param = 'jméno'
 22 |     param_before = 'titul před'
 23 |     param_after = 'titul za'
 24 | 
 25 |     summary = 'přesun titulů do vlastních parametrů'
 26 | 
 27 |     def __init__(self, template, offset=0, **kwargs):
 28 |         self.template = self.normalize(template)
 29 |         self.start_offset = offset
 30 |         self.offset = 0
 31 |         super().__init__(**kwargs)
 32 | 
 33 |     def normalize(self, template):
 34 |         return first_upper(template
 35 |                            .partition('<!--')[0]
 36 |                            .replace('_', ' ')
 37 |                            .strip())
 38 | 
 39 |     def treat(self, page):
 40 |         self.offset += 1
 41 |         if self.offset > self.start_offset:
 42 |             super().treat(page)
 43 | 
 44 |     def treat_page(self):
 45 |         page = self.current_page
 46 |         code = mwparserfromhell.parse(page.text)
 47 |         for temp in code.ifilter_templates():
 48 |             if self.template != self.normalize(temp.name):
 49 |                 continue
 50 |             if not temp.has(self.param):
 51 |                 continue
 52 |             param = temp.get(self.param)
 53 |             value = str(param.value)
 54 |             before, inside, after = self.handle_param(value)
 55 |             if not before and not after:
 56 |                 continue
 57 |             temp.add(self.param, inside)
 58 |             if before:
 59 |                 temp.add(self.param_before, before, before=param)
 60 |                 my_param = temp.get(self.param_before)
 61 |                 my_param.value = self.add_spaces(my_param.value, value)
 62 |             if after:
 63 |                 index = temp.params.index(param)
 64 |                 if len(temp.params) - 1 == index:
 65 |                     temp.add(self.param_after, after)
 66 |                 else:
 67 |                     temp.add(self.param_after, after,
 68 |                              before=temp.params[index+1])
 69 |                 my_param = temp.get(self.param_after)
 70 |                 my_param.value = self.add_spaces(my_param.value, value)
 71 | 
 72 |         if self.put_current(str(code), summary=self.summary):
 73 |             self.offset -= 1
 74 | 
 75 |     def add_spaces(self, new, pattern):
 76 |         pre, post = re.fullmatch(r'(\s*).*?(\s*)', pattern, flags=re.S).groups()
 77 |         return pre + new.strip() + post
 78 | 
 79 |     def handle_param(self, param):
 80 |         before = after = ''
 81 |         if '<br>' in param.replace(' ', '').replace('/', '').replace('\\', ''):
 82 |             return before, param, after
 83 | 
 84 |         new_param = (param
 85 |                      .replace("'''", '')
 86 |                      .replace('<small>', '')
 87 |                      .replace('</small>', '')
 88 |                      .strip())
 89 |         title = self.current_page.title()
 90 |         first = title.partition(' ')[0]
 91 |         index = new_param.find(first)
 92 |         if index > 0:
 93 |             before = new_param[:index].strip()
 94 |             before = before.removesuffix('&nbsp;')
 95 |             if before.endswith('.') or before.endswith(']]'):
 96 |                 new_param = new_param[index:]
 97 |             else:
 98 |                 before = ''
 99 | 
100 |         new_param, comma, after = new_param.partition(', ')
101 |         if before or after:
102 |             param = new_param
103 | 
104 |         return before, new_param, after.strip()
105 | 
106 |     def exit(self):
107 |         super().exit()
108 |         pywikibot.info(f'Current offset: {self.offset}')
109 | 
110 | 
111 | def main(*args):
112 |     options = {}
113 |     local_args = pywikibot.handle_args(args)
114 |     genFactory = pagegenerators.GeneratorFactory()
115 |     for arg in genFactory.handle_args(local_args):
116 |         if arg.startswith('-'):
117 |             arg, sep, value = arg.partition(':')
118 |             if value != '':
119 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
120 |             else:
121 |                 options[arg[1:]] = True
122 | 
123 |     while not options.get('template', None):
124 |         options['template'] = pywikibot.input(
125 |             'Type the template you would like to work on:')
126 | 
127 |     generator = genFactory.getCombinedGenerator(preload=True)
128 |     if not generator:
129 |         genFactory.handle_arg(f"-transcludes:{options['template']}")
130 |         generator = genFactory.getCombinedGenerator(preload=True)
131 | 
132 |     bot = TitlesMovingBot(generator=generator, **options)
133 |     bot.run()
134 | 
135 | 
136 | if __name__ == '__main__':
137 |     if isinstance(mwparserfromhell, Exception):
138 |         pywikibot.error('Running this script requires having mwparserfromhell '
139 |                         'installed')
140 |     else:
141 |         main()
142 | 


--------------------------------------------------------------------------------
/wikidata_fix_redirects.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | from itertools import chain
  3 | 
  4 | import pywikibot
  5 | 
  6 | from pywikibot.pagegenerators import (
  7 |     GeneratorFactory,
  8 |     PreloadingEntityGenerator,
  9 |     WikidataSPARQLPageGenerator,
 10 | )
 11 | 
 12 | from query_store import QueryStore
 13 | from wikidata import WikidataEntityBot
 14 | 
 15 | 
 16 | class WikidataRedirectsFixingBot(WikidataEntityBot):
 17 | 
 18 |     use_from_page = False
 19 | 
 20 |     def __init__(self, generator, **kwargs):
 21 |         self.available_options.update({
 22 |             'always': True,
 23 |             'days': 7,
 24 |             'editgroups': False,
 25 |         })
 26 |         super().__init__(**kwargs)
 27 |         self.store = QueryStore()
 28 |         self.generator = generator or self.custom_generator()
 29 |         self.summary = 'fix redirect [[{}]] → [[{}]]'
 30 | 
 31 |     def custom_generator(self):
 32 |         query = self.store.build_query('redirects', days=self.opt['days'])
 33 |         return WikidataSPARQLPageGenerator(query, site=self.repo)
 34 | 
 35 |     def skip_page(self, item):
 36 |         return False
 37 | 
 38 |     def _make_callback(self, callback, *args, **kwargs):
 39 |         return lambda: callback(*args, **kwargs)
 40 | 
 41 |     def update_snak(self, snak, old_target, new_target):
 42 |         if snak.snaktype != 'value':
 43 |             return False
 44 |         if snak.type == 'wikibase-item':
 45 |             eq = snak.target_equals(old_target)
 46 |             if eq:
 47 |                 snak.setTarget(new_target)
 48 |             return eq
 49 |         #elif snak.type == 'wikibase-lexeme':
 50 |         elif snak.type == 'quantity':
 51 |             eq = snak.target.unit == old_target.concept_uri()
 52 |             if eq:
 53 |                 snak.target._unit = new_target
 54 |             return eq
 55 |         return False
 56 | 
 57 |     def treat_page_and_item(self, page, item):
 58 |         if not item.isRedirectPage():
 59 |             return
 60 |         target = item.getRedirectTarget()
 61 |         while target.isRedirectPage():
 62 |             target = target.getRedirectTarget()
 63 |         pywikibot.info(f'{item} --> {target}')
 64 |         backlinks = item.backlinks(follow_redirects=False,
 65 |                                    filter_redirects=None,
 66 |                                    namespaces=[0, 120])
 67 |         summary = self.summary.format(
 68 |             item.title(with_ns=True), target.title(with_ns=True))
 69 |         if self.opt.editgroups:
 70 |             summary += f' ({self.new_editgroups_summary()})'
 71 |         if target != item.getRedirectTarget():
 72 |             item.set_redirect_target(target, summary=summary)
 73 |         for entity in PreloadingEntityGenerator(backlinks):
 74 |             if entity == target:
 75 |                 continue
 76 |             if entity.isRedirectPage():
 77 |                 entity.set_redirect_target(target, summary=summary)
 78 |                 continue
 79 |             callbacks = []
 80 |             update = []
 81 |             for claim in chain.from_iterable(entity.claims.values()):
 82 |                 changed = False
 83 |                 if self.update_snak(claim, item, target):
 84 |                     changed = True
 85 |                     callbacks.append(self._make_callback(
 86 |                         claim.changeTarget, claim.target, summary=summary))
 87 |                 for snak in chain.from_iterable(claim.qualifiers.values()):
 88 |                     if self.update_snak(snak, item, target):
 89 |                         changed = True
 90 |                         callbacks.append(self._make_callback(
 91 |                             claim.repo.editQualifier, claim, snak,
 92 |                             summary=summary))
 93 |                 for source in claim.sources:
 94 |                     source_changed = False
 95 |                     snaks = list(chain.from_iterable(source.values()))
 96 |                     for snak in snaks:
 97 |                         if self.update_snak(snak, item, target):
 98 |                             source_changed = True
 99 |                     if source_changed:
100 |                         changed = True
101 |                         callbacks.append(self._make_callback(
102 |                             claim.repo.editSource, claim, snaks,
103 |                             summary=summary))
104 |                 if changed:
105 |                     update.append(claim)
106 |             if len(callbacks) > 1:
107 |                 data = {'claims': [c.toJSON() for c in update]}
108 |                 self.user_edit_entity(
109 |                     entity, data, cleanup=False, summary=summary)
110 |             elif len(callbacks) == 1:
111 |                 callbacks[0]()
112 | 
113 | 
114 | def main(*args):
115 |     options = {}
116 |     local_args = pywikibot.handle_args(args)
117 |     site = pywikibot.Site()
118 |     genFactory = GeneratorFactory(site=site)
119 |     for arg in genFactory.handle_args(local_args):
120 |         if arg.startswith('-'):
121 |             arg, sep, value = arg.partition(':')
122 |             if value != '':
123 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
124 |             else:
125 |                 options[arg[1:]] = True
126 | 
127 |     generator = genFactory.getCombinedGenerator()
128 |     bot = WikidataRedirectsFixingBot(generator=generator, site=site, **options)
129 |     bot.run()
130 | 
131 | 
132 | if __name__ == '__main__':
133 |     main()
134 | 


--------------------------------------------------------------------------------
/cswiki/sync_heritage_lists.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | from collections import defaultdict
  3 | 
  4 | import mwparserfromhell
  5 | import pywikibot
  6 | 
  7 | from pywikibot import pagegenerators
  8 | from pywikibot.textlib import removeDisabledParts
  9 | from pywikibot.data.sparql import *
 10 | 
 11 | from tools import get_best_statements
 12 | 
 13 | 
 14 | def get_sources(page):
 15 |     wiki = pywikibot.Claim(repo, 'P143', is_reference=True)
 16 |     wiki.setTarget(pywikibot.ItemPage(repo, 'Q191168'))
 17 |     url = pywikibot.Claim(repo, 'P4656', is_reference=True)
 18 |     url.setTarget('https:' + page.permalink())
 19 |     return [wiki, url]
 20 | 
 21 | 
 22 | def tidy(value) -> str:
 23 |     return removeDisabledParts(str(value), site=site).strip()
 24 | 
 25 | 
 26 | args = pywikibot.handle_args()
 27 | 
 28 | site = pywikibot.Site('cs', 'wikipedia')
 29 | repo = site.data_repository()
 30 | image_repo = site.image_repository()
 31 | 
 32 | genFactory = pagegenerators.GeneratorFactory(site=site)
 33 | genFactory.handle_arg('-ns:0')
 34 | genFactory.handle_args(args)
 35 | generator = genFactory.getCombinedGenerator(preload=True)
 36 | if not generator:
 37 |     genFactory.handle_arg('-ref:Template:Památky v Česku')
 38 |     generator = genFactory.getCombinedGenerator(preload=True)
 39 | 
 40 | ignore_images = {'Noimage 2-1.png'}
 41 | 
 42 | pywikibot.info('Loading all identifiers...')
 43 | 
 44 | query = 'SELECT * WHERE { ?item wdt:P762 ?id }'
 45 | obj = SparqlQuery(repo=repo)
 46 | result = obj.select(query, full_data=True)
 47 | #item_to_ids = defaultdict(set)
 48 | id_to_items = defaultdict(set)
 49 | for entry in result:
 50 |     item = entry['item'].getID()
 51 |     id_ = entry['id'].value
 52 |     #item_to_ids[item].add(id_)
 53 |     id_to_items[id_].add(item)
 54 | del result
 55 | 
 56 | for page in generator:
 57 |     pywikibot.info(page)
 58 |     code = mwparserfromhell.parse(page.text)
 59 |     change = False
 60 |     for template in code.ifilter_templates(
 61 |             matches=lambda t: t.name.matches('Památky v Česku')):
 62 |         item = None
 63 |         if template.has('Wikidata', ignore_empty=True):
 64 |             linked_item = tidy(template.get('Wikidata').value)
 65 |         else:
 66 |             linked_item = None
 67 |         if not linked_item and template.has('Id_objektu', ignore_empty=True):
 68 |             id_ = tidy(template.get('Id_objektu').value)
 69 |             items = id_to_items[id_]
 70 |             if len(items) == 1:
 71 |                 item_id = items.pop()
 72 |                 item = pywikibot.ItemPage(repo, item_id)
 73 |                 items.add(item_id)
 74 |         elif linked_item:
 75 |             item = pywikibot.ItemPage(repo, linked_item)
 76 |         if not item:
 77 |             continue
 78 | 
 79 |         item.get(get_redirect=True)
 80 |         while item.isRedirectPage():
 81 |             item = item.getRedirectTarget()
 82 |             item.get(get_redirect=True)
 83 | 
 84 |         if item.exists():
 85 |             if item.getID() != linked_item:
 86 |                 template.add('Wikidata', item.getID())
 87 |                 change = True
 88 | ##        else:
 89 | ##            template.add('Wikidata', '')
 90 | ##            change = change or bool(linked_item)
 91 | ##            item = None
 92 | 
 93 |         if item and not template.has('Commons', ignore_empty=True):
 94 |             ccat = None
 95 |             best = get_best_statements(item.claims.get('P373', []))
 96 |             if best:
 97 |                 ccat = best[0].getTarget()
 98 |             if not ccat:
 99 |                 link = item.sitelinks.get('commonswiki')
100 |                 if link and link.namespace == 14:
101 |                     ccat = link.title
102 |             if ccat:
103 |                 template.add('Commons', ccat)
104 |                 change = True
105 |             del best
106 | 
107 |         if item and not template.has('Článek', ignore_empty=True):
108 |             article = item.sitelinks.get('cswiki')
109 |             if article:
110 |                 template.add('Článek', article.ns_title())
111 |                 change = True
112 | 
113 |         if item and not (
114 |             template.has('Zeměpisná_šířka', ignore_empty=True)
115 |             and template.has('Zeměpisná_délka', ignore_empty=True)
116 |         ):
117 |             coord = None
118 |             best = get_best_statements(item.claims.get('P625', []))
119 |             if best:
120 |                 coord = best[0].getTarget()
121 |             if coord:
122 |                 template.add('Zeměpisná_šířka', str(coord.lat))
123 |                 template.add('Zeměpisná_délka', str(coord.lon))
124 |                 change = True
125 |             del best
126 | 
127 |         if item and template.has('Obrázek', ignore_empty=True):
128 |             image = pywikibot.FilePage(
129 |                 image_repo, tidy(template.get('Obrázek').value))
130 |             if (
131 |                 image.exists() and not image.isRedirectPage()
132 |                 and image.title(with_ns=False) not in ignore_images
133 |                 and not item.claims.get('P18')
134 |             ):
135 |                 # todo: check unique
136 |                 claim = pywikibot.Claim(repo, 'P18')
137 |                 claim.setTarget(image)
138 |                 claim.addSources(get_sources(page))
139 |                 item.addClaim(claim, asynchronous=True)
140 | 
141 |     if change:
142 |         page.text = str(code)
143 |         page.save(summary='synchronizace s údaji na Wikidatech',
144 |                   asynchronous=True)
145 | 


--------------------------------------------------------------------------------
/cswiki/sync_tree_lists.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import re
  3 | 
  4 | import mwparserfromhell
  5 | import pywikibot
  6 | 
  7 | from pywikibot import pagegenerators
  8 | from pywikibot.textlib import FILE_LINK_REGEX
  9 | from pywikibot.tools import first_upper
 10 | 
 11 | 
 12 | def get_sources(page):
 13 |     wiki = pywikibot.Claim(repo, 'P143', is_reference=True)
 14 |     wiki.setTarget(pywikibot.ItemPage(repo, 'Q191168'))
 15 |     url = pywikibot.Claim(repo, 'P4656', is_reference=True)
 16 |     url.setTarget('https:' + page.permalink())
 17 |     return [wiki, url]
 18 | 
 19 | 
 20 | args = pywikibot.handle_args()
 21 | 
 22 | site = pywikibot.Site('cs', 'wikipedia')
 23 | repo = site.data_repository()
 24 | image_repo = site.image_repository()
 25 | 
 26 | genFactory = pagegenerators.GeneratorFactory(site=site)
 27 | genFactory.handle_arg('-ns:0')
 28 | genFactory.handle_args(args)
 29 | generator = genFactory.getCombinedGenerator(preload=True)
 30 | if not generator:
 31 |     genFactory.handle_arg('-cat:Seznamy památných stromů v Česku podle okresů')
 32 |     generator = genFactory.getCombinedGenerator(preload=True)
 33 | 
 34 | ignore_images = {'Noimage 2-1.png'}
 35 | 
 36 | # todo: cache all in a single query
 37 | query = '''SELECT DISTINCT ?item {
 38 |   { ?item wdt:P3296 "%s" } UNION { ?item wdt:P677 "%s" }
 39 | } LIMIT 2'''
 40 | 
 41 | titleR = re.compile(r'(\s*)([^[|\]<>]+?)((?: *†| *\(x\))?\s*)')
 42 | fileR = re.compile(FILE_LINK_REGEX % '|'.join(site.namespaces[6]), re.VERBOSE)
 43 | 
 44 | for page in generator:
 45 |     pywikibot.info(page)
 46 |     code = mwparserfromhell.parse(page.text)
 47 |     change = False
 48 |     for table in code.ifilter_tags(matches=lambda t: t.tag == 'table'):
 49 |         rows = table.contents.ifilter_tags(matches=lambda t: t.tag == 'tr')
 50 |         first = next(rows)
 51 |         index = dict.fromkeys(('název', 'obrázek', 'kód'), None)
 52 |         for i, cell in enumerate(first.contents.ifilter_tags(
 53 |                 matches=lambda t: t.tag == 'th')):
 54 |             for key, value in index.items():
 55 |                 if value is None and key in str(cell.contents).lower():
 56 |                     index[key] = i
 57 |                     break
 58 | 
 59 |         for key, value in index.items():
 60 |             if value is None:
 61 |                 pywikibot.info(f"Couldn't determine column for '{key}'")
 62 |         if index['kód'] is None:
 63 |             continue
 64 | 
 65 |         for row in rows:
 66 |             cells = row.contents.filter_tags(matches=lambda t: t.tag == 'td')
 67 |             if not cells:
 68 |                 continue
 69 |             code_cell = cells[index['kód']]
 70 |             templates = code_cell.contents.filter_templates(
 71 |                 matches=lambda t: t.name.matches('Pstrom'))
 72 |             if len(templates) != 1:
 73 |                 continue
 74 |             template = templates[0]
 75 |             params = []
 76 |             for i in (1, 2, 3):
 77 |                 if template.has_param(i, ignore_empty=True):
 78 |                     params.append(str(template.get(i)).strip())
 79 |                 else:
 80 |                     params.append('')
 81 |             items = list(pagegenerators.WikidataSPARQLPageGenerator(
 82 |                 query % tuple(params[:2]), site=repo))
 83 |             if len(items) != 1:
 84 |                 pywikibot.info(
 85 |                     f"Couldn't determine the item for values "
 86 |                     f'{params[0]}/{params[1]} ({len(items)} items)')
 87 |                 continue
 88 | 
 89 |             item = items.pop()
 90 |             if params[2] != item.getID():  # 3rd param is index 2
 91 |                 template.add(3, item.getID())
 92 |                 change = True
 93 | 
 94 |             if index['název'] is not None:
 95 |                 title_cell = cells[index['název']]
 96 |                 nodes = title_cell.contents.nodes
 97 |                 # fixme: ignore &nbsp;
 98 |                 #wikilinks = title_cell.contents.filter_wikilinks()
 99 |                 #if not wikilinks:
100 |                 if len(nodes) == 1:
101 |                     match = titleR.fullmatch(str(nodes[0]))
102 |                     link = item.sitelinks.get(page.site)
103 |                     if link and match:
104 |                         groups = match.groups()
105 |                         if first_upper(groups[1]) == link.title:
106 |                             new = '{}[[{}]]{}'.format(*groups)
107 |                         else:
108 |                             new = '{1}[[{0}|{2}]]{3}'.format(
109 |                                 link.title, *groups)
110 |                         title_cell.contents.replace(nodes[0], new)
111 |                         change = True
112 | 
113 |             if index['obrázek'] is not None:
114 |                 match = fileR.search(str(cells[index['obrázek']]))
115 |                 if match:
116 |                     image = pywikibot.FilePage(image_repo, match['filename'])
117 |                     if (
118 |                         image.exists() and not image.isRedirectPage()
119 |                         and image.title(with_ns=False) not in ignore_images
120 |                         and not item.claims.get('P18')
121 |                     ):
122 |                         # todo: check unique
123 |                         claim = pywikibot.Claim(repo, 'P18')
124 |                         claim.setTarget(image)
125 |                         claim.addSources(get_sources(page))
126 |                         item.addClaim(claim, asynchronous=True)
127 | 
128 |     if change:
129 |         page.text = str(code)
130 |         page.save(summary='doplnění článků a/nebo položek na Wikidatech',
131 |                   asynchronous=True)
132 | 


--------------------------------------------------------------------------------
/clean_commonscat.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import itertools
  3 | import re
  4 | 
  5 | import pywikibot
  6 | 
  7 | from pywikibot import i18n, pagegenerators, textlib
  8 | from pywikibot.exceptions import UnknownExtensionError
  9 | 
 10 | from deferred import DeferredCallbacksBot
 11 | from wikidata import WikidataEntityBot
 12 | from wikitext import WikitextFixingBot
 13 | 
 14 | 
 15 | save_summary = {
 16 |     'cs': 'odstranění odkazu na neexistující kategorii na Commons',
 17 |     'en': 'removed link to a non-existing Commons category',
 18 | }
 19 | 
 20 | 
 21 | class CommonscatCleaningBot(WikitextFixingBot, WikidataEntityBot, DeferredCallbacksBot):
 22 | 
 23 |     def __init__(self, **kwargs):
 24 |         self.available_options.update({
 25 |             'createnew': False,
 26 |             'noclean': False,
 27 |             'noimport': False,
 28 |         })
 29 |         super().__init__(**kwargs)
 30 |         self.commons = pywikibot.Site('commons', 'commons')
 31 | 
 32 |     def setup(self):
 33 |         super().setup()
 34 |         self.cacheSources()
 35 |         # todo: l10n etc.
 36 |         templates = itertools.chain(
 37 |             map(re.escape, self.site.getmagicwords('defaultsort')),
 38 |             ('[Pp]ahýl', '[Pp]osloupnost', '[Aa]utoritní data', '[Pp]ortály'))
 39 |         templates = '|'.join(templates)
 40 |         ns = '|'.join(self.site.namespaces[14])
 41 |         self.empty_sectionR = re.compile(
 42 |             r'\s*\n==+ *Externí odkazy *==+ *\n\s*'
 43 |             r'^(==|\{\{(?:%s)|\[\[(?:%s):)' % (templates, ns),
 44 |             flags=re.M)
 45 | 
 46 |     def treat_page(self):  # todo: treat_page_and_item
 47 |         page = self.current_page
 48 |         item = page.data_item()
 49 |         if 'P373' in item.claims:
 50 |             self.addCallback(page.touch)
 51 |             pywikibot.info('Already has a category on Commons')
 52 |             return
 53 | 
 54 |         cat_name = None
 55 |         has_param = False
 56 |         for template, fielddict in page.raw_extracted_templates:
 57 |             # todo: l10n
 58 |             if template.lower() in ['commonscat', 'commons category']:
 59 |                 cat_name = page.title(with_ns=False)
 60 |                 value = fielddict.get('1', '').strip()
 61 |                 if value:
 62 |                     has_param = True
 63 |                     cat_name = value
 64 |                 break
 65 | 
 66 |         if cat_name is None:
 67 |             pywikibot.warning('Template not found')
 68 |             return
 69 | 
 70 |         commons_cat = pywikibot.Category(self.commons, cat_name)
 71 |         exists = commons_cat.exists()
 72 |         if not exists and not commons_cat.isEmptyCategory():
 73 |             if self.opt['createnew'] is not True:
 74 |                 pywikibot.warning(f'{commons_cat.title()} is not empty')
 75 |                 return
 76 | 
 77 |             exists = self.doWithCallback(
 78 |                 self.userPut, commons_cat, '', '{{Uncategorized}}',
 79 |                 asynchronous=False)
 80 | 
 81 |         if not exists:
 82 |             if self.opt['noclean'] is True:
 83 |                 pywikibot.info(
 84 |                     "Category doesn't exist on Commons, cleanup restricted")
 85 |                 return
 86 |             regex = r'(?:\n?|^)(?:\* *)?\{\{ *[Cc]ommons(?:cat|[_ ]?category)'
 87 |             if has_param:
 88 |                 regex += r' *\| *' + re.escape(cat_name)
 89 |             regex += r' *\}\}'
 90 |             page_replaced_text = re.sub(
 91 |                 regex, '', page.text, flags=re.M, count=1)
 92 |             if page_replaced_text != page.text:
 93 |                 page_replaced_text = self.empty_sectionR.sub(
 94 |                     r'\n\n\1', page_replaced_text, count=1)
 95 | 
 96 |             # fixme
 97 |             self.doWithCallback(
 98 |                 self.put_current, page_replaced_text,
 99 |                 summary=i18n.translate(page.site, save_summary))
100 |         else:
101 |             if self.opt['noimport'] is True:
102 |                 pywikibot.info('Category exists on Commons, import restricted')
103 |                 return
104 |             claim = pywikibot.Claim(self.repo, 'P373')
105 |             claim.setTarget(cat_name)
106 |             pywikibot.info('Category missing on Wikidata')
107 |             self.user_add_claim(item, claim, page.site, asynchronous=True)
108 |             self.addCallback(page.touch)
109 | 
110 | 
111 | def main(*args):
112 |     options = {}
113 |     local_args = pywikibot.handle_args(args)
114 |     genFactory = pagegenerators.GeneratorFactory()
115 |     for arg in genFactory.handle_args(local_args):
116 |         if arg.startswith('-'):
117 |             arg, sep, value = arg.partition(':')
118 |             if value != '':
119 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
120 |             else:
121 |                 options[arg[1:]] = True
122 | 
123 |     generator = genFactory.getCombinedGenerator(preload=True)
124 |     site = pywikibot.Site()
125 |     if not generator:
126 |         try:
127 |             category = site.page_from_repository('Q11925744')
128 |         except (NotImplementedError, UnknownExtensionError) as e:
129 |             pywikibot.error(e)
130 |             return
131 | 
132 |         if not category:
133 |             pywikibot.info(f"{site} doesn't have an appropriate category")
134 |             return
135 | 
136 |         generator = itertools.chain(
137 |             category.articles(namespaces=0),
138 |             category.subcategories())
139 | 
140 |     generator = pagegenerators.WikibaseItemFilterPageGenerator(generator)
141 |     bot = CommonscatCleaningBot(generator=generator, site=site, **options)
142 |     bot.run()
143 | 
144 | 
145 | if __name__ == '__main__':
146 |     main()
147 | 


--------------------------------------------------------------------------------
/split_claims.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import pywikibot
  3 | 
  4 | from pywikibot import pagegenerators
  5 | 
  6 | from query_store import QueryStore
  7 | from wikidata import WikidataEntityBot
  8 | 
  9 | 
 10 | class ClaimsSplittingBot(WikidataEntityBot):
 11 | 
 12 |     start_prop = 'P580'
 13 |     end_prop = 'P582'
 14 |     use_from_page = False
 15 | 
 16 |     def __init__(self, generator, **kwargs):
 17 |         self.available_options.update({
 18 |             'limit': 500,
 19 |         })
 20 |         super().__init__(**kwargs)
 21 |         self.store = QueryStore()
 22 |         self._generator = generator or self.custom_generator()
 23 | 
 24 |     def custom_generator(self):
 25 |         query = self.store.build_query(
 26 |             'mixed_claims', limit=self.opt['limit'])
 27 |         return pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo)
 28 | 
 29 |     @property
 30 |     def generator(self):
 31 |         return pagegenerators.PreloadingEntityGenerator(self._generator)
 32 | 
 33 |     def has_multiple(self, claim):
 34 |         return (len(claim.qualifiers.get(self.start_prop, [])) > 1
 35 |                 or len(claim.qualifiers.get(self.end_prop, [])) > 1)
 36 | 
 37 |     def can_divide(self, claim):
 38 |         qualifiers = (claim.qualifiers.get(self.start_prop, [])
 39 |                       + claim.qualifiers.get(self.end_prop, []))
 40 |         return (not claim.sources
 41 |                 and set(claim.qualifiers.keys()) == {
 42 |                     self.start_prop, self.end_prop}
 43 |                 and all(qual.snaktype == 'value' for qual in qualifiers))
 44 | 
 45 |     def sort_key(self, claim):
 46 |         return claim.target.toTimestamp()
 47 |             #{self.start_prop: 1, self.end_prop: 0}.get(claim.id)
 48 | 
 49 |     def get_qualifier_pairs(self, claim):
 50 |         qualifiers = (claim.qualifiers.get(self.start_prop, [])
 51 |                       + claim.qualifiers.get(self.end_prop, []))
 52 |         qualifiers.sort(key=self.sort_key)
 53 |         pairs = []
 54 |         i = 0
 55 |         any_previous_finished = False
 56 |         while i < len(qualifiers):
 57 |             qual = qualifiers[i]
 58 |             if qual.id == self.start_prop:
 59 |                 next_end = None
 60 |                 if i + 1 < len(qualifiers):
 61 |                     if qualifiers[i+1].id == self.end_prop:
 62 |                         pairs.append(
 63 |                             (qual, qualifiers[i+1])
 64 |                         )
 65 |                         i += 2
 66 |                         any_previous_finished = True
 67 |                         continue
 68 |                     elif qualifiers[i+1].id == self.start_prop:
 69 |                         next_end = pywikibot.Claim(self.repo, self.end_prop)
 70 |                         next_end.setSnakType('somevalue')
 71 |                         any_previous_finished = True
 72 |                 pairs.append(
 73 |                     (qual, next_end)
 74 |                 )
 75 |             elif qual.id == self.end_prop:
 76 |                 next_start = None
 77 |                 if any_previous_finished:
 78 |                     next_start = pywikibot.Claim(self.repo, self.start_prop)
 79 |                     next_start.setSnakType('somevalue')
 80 |                 pairs.append(
 81 |                     (next_start, qual)
 82 |                 )
 83 |                 any_previous_finished = True
 84 |             i += 1
 85 |         return pairs
 86 | 
 87 |     def treat_page_and_item(self, page, item):
 88 |         to_remove = []
 89 |         for claims in item.claims.values():
 90 |             for claim in claims:
 91 |                 if self.has_multiple(claim) and self.can_divide(claim):
 92 |                     assert not claim.sources  # todo
 93 |                     to_remove.append(claim)
 94 |                     pairs = self.get_qualifier_pairs(claim)
 95 |                     for start, end in pairs:
 96 |                         new_claim = pywikibot.Claim(self.repo, claim.id)
 97 |                         if claim.target:
 98 |                             new_claim.setTarget(claim.target)
 99 |                         else:
100 |                             new_claim.setSnakType(claim.snaktype)
101 |                         new_claim.setRank(claim.rank)
102 |                         if start:
103 |                             start.hash = None
104 |                             new_claim.addQualifier(start)
105 |                         if end:
106 |                             end.hash = None
107 |                             new_claim.addQualifier(end)
108 |                         for ref in claim.sources:
109 |                             sources = []
110 |                             for snaks in ref.values():
111 |                                 sources.extend(snaks)
112 |                             new_claim.addSources(sources)
113 |                         if not self.user_add_claim(
114 |                                 item, new_claim, summary='split claim'):
115 |                             break
116 |         if to_remove:
117 |             data = {'claims': [
118 |                 {'id': cl.toJSON()['id'], 'remove': ''} for cl in to_remove]}
119 |             self.user_edit_entity(
120 |                 item, data, summary='remove splitted claim(s)')
121 | 
122 | 
123 | def main(*args):
124 |     options = {}
125 |     local_args = pywikibot.handle_args(args)
126 |     site = pywikibot.Site()
127 |     genFactory = pagegenerators.GeneratorFactory(site=site)
128 |     for arg in genFactory.handle_args(local_args):
129 |         if arg.startswith('-'):
130 |             arg, sep, value = arg.partition(':')
131 |             if value != '':
132 |                 options[arg[1:]] = int(value) if value.isdigit() else value
133 |             else:
134 |                 options[arg[1:]] = True
135 | 
136 |     generator = genFactory.getCombinedGenerator()
137 |     bot = ClaimsSplittingBot(generator=generator, site=site, **options)
138 |     bot.run()
139 | 
140 | 
141 | if __name__ == '__main__':
142 |     main()
143 | 


--------------------------------------------------------------------------------
/import_descriptions.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import re
  3 | 
  4 | import pywikibot
  5 | 
  6 | from pywikibot import textlib
  7 | from pywikibot.pagegenerators import (
  8 |     GeneratorFactory,
  9 |     PreloadingEntityGenerator,
 10 |     PreloadingGenerator,
 11 |     SearchPageGenerator,
 12 |     WikidataSPARQLPageGenerator,
 13 | )
 14 | 
 15 | from query_store import QueryStore
 16 | from wikidata import WikidataEntityBot
 17 | 
 18 | 
 19 | class BaseDescriptionBot(WikidataEntityBot):
 20 | 
 21 |     def __init__(self, **kwargs):
 22 |         self.available_options.update({
 23 |             'min_words': 2,
 24 |         })
 25 |         super().__init__(**kwargs)
 26 |         self.FORMATTING_REGEX = re.compile("'{5}|'{2,3}")
 27 |         self.REF_REGEX = re.compile(r'<ref.*?(>.*?</ref|/)>')
 28 | 
 29 |     def get_regex_for_title(self, escaped_title):
 30 |         pattern = fr'^\*+ *\[\[({escaped_title})(?:\|[^][]+)?\]\]'
 31 |         pattern += r' *(?:\([^)]+\))?'
 32 |         pattern += '(?:,| [-–]) *(.*)$'
 33 |         return re.compile(pattern, re.M)
 34 | 
 35 |     @staticmethod
 36 |     def handle_link(match):
 37 |         text = match[2]
 38 |         if text:
 39 |             return text.lstrip('|').strip()
 40 |         else:
 41 |             return match['title'].strip()
 42 | 
 43 |     def validate_description(self, desc):
 44 |         return (bool(desc) and len(desc.split()) >= self.opt['min_words'])
 45 | 
 46 |     def parse_description(self, text):
 47 |         desc = textlib.removeDisabledParts(
 48 |             text,
 49 |             ['comment', 'file', 'nowiki', 'template', self.FORMATTING_REGEX,
 50 |              self.REF_REGEX])
 51 |         desc = LINK_REGEX.sub(self.handle_link, desc)
 52 |         desc = desc.replace('&nbsp;', ' ').strip()
 53 |         desc = re.sub(r' *\([^)]+\)$', '', desc)
 54 |         desc = desc.partition(';')[0]
 55 |         desc = re.sub(r'^.*\) [-–] +', '', desc)
 56 |         desc = re.sub(r'^\([^)]+\) +', '', desc)
 57 |         while ' ' * 2 in desc:
 58 |             desc = desc.replace(' ' * 2, ' ')
 59 |         if re.search(r'[^IVX]\.$', desc) or desc.endswith(tuple(',:')):
 60 |             desc = desc[:-1].rstrip()
 61 |         if desc.startswith(('a ', 'an ')):
 62 |             desc = desc.partition(' ')[2]
 63 |         return desc
 64 | 
 65 |     def get_summary(self, page, desc):
 66 |         link = page.title(as_link=True, insite=self.repo)
 67 |         return f'importing [{page.site.lang}] description "{desc}" from {link}'
 68 | 
 69 | 
 70 | class MissingDescriptionBot(BaseDescriptionBot):
 71 | 
 72 |     use_from_page = False
 73 | 
 74 |     def __init__(self, **kwargs):
 75 |         self.available_options.update({
 76 |             'allpages': False,
 77 |         })
 78 |         super().__init__(**kwargs)
 79 |         self.store = QueryStore()
 80 | 
 81 |     @property
 82 |     def generator(self):
 83 |         query = self.store.build_query(
 84 |             'missing_descriptions',
 85 |             hostname=self.site.hostname(),
 86 |             lang=self.site.lang)
 87 |         return PreloadingEntityGenerator(
 88 |             WikidataSPARQLPageGenerator(query, site=self.repo))
 89 | 
 90 |     def treat_page_and_item(self, page, item):
 91 |         if self.site.lang in item.descriptions:
 92 |             return
 93 |         title = item.getSitelink(self.site)
 94 |         link_start = re.escape('[[' + title)
 95 |         search_query = fr'linksto:"{title}" insource:/\* *{link_start}/'
 96 |         regex = self.get_regex_for_title(re.escape(title))
 97 |         for ref_page in PreloadingGenerator(
 98 |                 SearchPageGenerator(search_query, namespaces=[0])):
 99 |             # todo: first polish text
100 |             match = regex.search(ref_page.text)
101 |             if not match:
102 |                 continue
103 |             if not self.opt['allpages'] and not ref_page.isDisambig():
104 |                 continue
105 |             desc = self.parse_description(match[2])
106 |             if not self.validate_description(desc):
107 |                 continue
108 |             summary = self.get_summary(ref_page, desc)
109 |             item.descriptions[self.site.lang] = desc.strip()
110 |             if self.user_edit_entity(item, summary=summary):
111 |                 break
112 | 
113 | 
114 | class MappingDescriptionBot(BaseDescriptionBot):
115 | 
116 |     def __init__(self, **kwargs):
117 |         super().__init__(**kwargs)
118 |         self.regex = self.get_regex_for_title(r'[^\[\|\]]+')
119 | 
120 |     def get_pages_with_descriptions(self, text):
121 |         data = {}
122 |         for match in self.regex.finditer(text):
123 |             title, desc = match.groups()
124 |             page = pywikibot.Page(self.site, title)
125 |             data[page] = self.parse_description(desc)
126 |         return data
127 | 
128 |     def treat_page(self):
129 |         page = self.current_page
130 |         descriptions = self.get_pages_with_descriptions(page.text)
131 |         for item in PreloadingEntityGenerator(descriptions.keys()):
132 |             if self.site.lang in item.descriptions:
133 |                 continue
134 |             target = pywikibot.Page(item.sitelinks[self.site])
135 |             desc = descriptions.get(target)
136 |             if not self.validate_description(desc):
137 |                 continue
138 |             summary = self.get_summary(page, desc)
139 |             item.descriptions[self.site.lang] = desc.strip()
140 |             self.current_page = item
141 |             self.user_edit_entity(item, summary=summary)
142 | 
143 | 
144 | def main(*args):
145 |     options = {}
146 |     local_args = pywikibot.handle_args(args)
147 |     site = pywikibot.Site()
148 |     genFactory = GeneratorFactory(site=site)
149 |     for arg in genFactory.handle_args(local_args):
150 |         if arg.startswith('-'):
151 |             arg, sep, value = arg.partition(':')
152 |             if value != '':
153 |                 options[arg[1:]] = int(value) if value.isdigit() else value
154 |             else:
155 |                 options[arg[1:]] = True
156 | 
157 |     generator = genFactory.getCombinedGenerator(preload=True)
158 |     if generator:
159 |         bot = MappingDescriptionBot(generator=generator, site=site, **options)
160 |     else:
161 |         bot = MissingDescriptionBot(site=site, **options)
162 |     bot.run()
163 | 
164 | 
165 | if __name__ == '__main__':
166 |     main()
167 | 


--------------------------------------------------------------------------------
/merger.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import time
  3 | 
  4 | from operator import attrgetter
  5 | 
  6 | import pywikibot
  7 | 
  8 | from pywikibot.exceptions import APIError, OtherPageSaveError
  9 | from pywikibot.data.sparql import SparqlQuery
 10 | 
 11 | 
 12 | class Merger:
 13 | 
 14 |     strategies = {
 15 |         'id': '_sort_by_id',
 16 |         'claims': '_sort_by_claims',
 17 |         'revisions': '_sort_by_revisions',
 18 |         'sitelinks': '_sort_by_sitelinks',
 19 |     }
 20 |     no_conflict_props = {'P17', 'P21', 'P105', 'P170', 'P171', 'P225', 'P271',
 21 |                          'P296', 'P495', 'P569', 'P570', 'P734', 'P856'}
 22 |     no_conflict_trees = {
 23 |         'P19': 'P131',
 24 |         'P31': 'P279',
 25 |         'P131': 'P131',
 26 |         'P279': 'P279',
 27 |     }
 28 |     no_conflict_types = ['external-id']
 29 | 
 30 |     @classmethod
 31 |     def merge(cls, item_from, item_to, **kwargs):
 32 |         try:
 33 |             item_from.mergeInto(item_to, **kwargs)
 34 |         except APIError as e:
 35 |             raise OtherPageSaveError(item_from, e)
 36 | 
 37 |     @classmethod
 38 |     def clean_merge(cls, item_from, item_to, safe=False, quick=True, **kwargs):
 39 |         kwargs.pop('asynchronous', None)  # fixme
 40 |         if safe and not cls.can_merge(item_from, item_to, quick=quick):
 41 |             raise OtherPageSaveError(
 42 |                 item_from, f'Cannot merge {item_from} with {item_to}')
 43 | 
 44 |         cls.merge(item_from, item_to, **kwargs)
 45 |         if not item_from.isRedirectPage():
 46 |             try:
 47 |                 item_from.editEntity(
 48 |                     {}, clear=True, summary='Clearing item to prepare for redirect')
 49 |             except APIError as e:
 50 |                 raise OtherPageSaveError(item_from, e)
 51 | 
 52 |             cls.merge(item_from, item_to)
 53 | 
 54 |     @classmethod
 55 |     def _conflicts(cls, data1, data2):
 56 |         set1 = {repr(x.target) for x in data1}  # hack
 57 |         set2 = {repr(x.target) for x in data2}  # hack
 58 |         return not bool(set1 & set2)
 59 | 
 60 |     @classmethod
 61 |     def _has_dtype(cls, dtype, claims):
 62 |         for cl in claims:
 63 |             if cl.type == dtype:
 64 |                 return True
 65 |         return False
 66 | 
 67 |     @classmethod
 68 |     def _same_tree(cls, prop, data1, data2):
 69 |         sparql = SparqlQuery()  # fixme: dependencies
 70 |         pattern = ('ASK { VALUES ?x1 { wd:%s } . VALUES ?x2 { wd:%s } . '
 71 |                    '?x1 wdt:%s* ?x2 }')
 72 |         item1 = ' wd:'.join(map(attrgetter('target.id'), data1))
 73 |         item2 = ' wd:'.join(map(attrgetter('target.id'), data2))
 74 |         tries = 3
 75 |         for ask in (pattern % (item1, item2, prop),
 76 |                     pattern % (item2, item1, prop)):
 77 |             res = False
 78 |             while True:
 79 |                 try:
 80 |                     res = sparql.ask(ask)
 81 |                 except requests.exceptions.ConnectionError:
 82 |                     tries -= 1
 83 |                     if tries == 0:
 84 |                         raise
 85 |                     time.sleep(1)
 86 |                     continue
 87 |                 else:
 88 |                     break
 89 |             if res:
 90 |                 return True
 91 | 
 92 |         return False
 93 | 
 94 |     @classmethod
 95 |     def can_merge(cls, item1, item2, quick=True):
 96 |         props = list(cls.no_conflict_props)
 97 |         if quick:
 98 |             props.extend(cls.no_conflict_trees.keys())
 99 | 
100 |         for prop in props:
101 |             item1.get()
102 |             data1 = item1.claims.get(prop, [])
103 |             if not data1:
104 |                 continue
105 |             item2.get()
106 |             data2 = item2.claims.get(prop, [])
107 |             if not data2:
108 |                 continue
109 |             if cls._conflicts(data1, data2):
110 |                 return False
111 | 
112 |         key = lambda claims: claims[0].id
113 |         for dtype in cls.no_conflict_types:
114 |             callback = lambda claims: claims[0].type == dtype
115 |             item1.get()
116 |             keys1 = set(map(key, filter(callback, item1.claims.values())))
117 |             if not keys1:
118 |                 continue
119 |             item2.get()
120 |             keys2 = set(map(key, filter(callback, item2.claims.values())))
121 |             if not keys2:
122 |                 continue
123 |             for prop in keys1 & keys2:
124 |                 if cls._conflicts(item1.claims[prop], item2.claims[prop]):
125 |                     return False
126 | 
127 |         if not quick:
128 |             for prop in cls.no_conflict_trees:
129 |                 item1.get()
130 |                 data1 = item1.claims.get(prop, [])
131 |                 if not data1:
132 |                     continue
133 |                 item2.get()
134 |                 data2 = item2.claims.get(prop, [])
135 |                 if not data2:
136 |                     continue
137 |                 if not cls._same_tree(cls.no_conflict_trees[prop], data1, data2):
138 |                     return False
139 | 
140 |         return True
141 | 
142 |     @classmethod
143 |     def _sort_by_id(cls, item1, item2):
144 |         id1, id2 = item1.getID(numeric=True), item2.getID(numeric=True)
145 |         return (id1 < id2) - (id1 > id2)
146 | 
147 |     @classmethod
148 |     def _sort_by_revisions(cls, item1, item2):
149 |         len1, len2 = map(
150 |             lambda item: len(list(item.revisions())), [item1, item2])
151 |         return (len1 > len2) - (len1 < len2)
152 | 
153 |     @classmethod
154 |     def _sort_by_claims(cls, item1, item2):
155 |         callback = lambda item: sum(map(len, item.claims.values()))
156 |         count1, count2 = map(callback, [item1, item2])
157 |         return (count1 > count2) - (count1 < count2)
158 | 
159 |     @classmethod
160 |     def _sort_by_sitelinks(cls, item1, item2):
161 |         len1, len2 = map(lambda item: len(item.sitelinks), [item1, item2])
162 |         return (len1 > len2) - (len1 < len2)
163 | 
164 |     @classmethod
165 |     def sort_for_merge(cls, items, key=['id']):
166 |         for strategy in key:
167 |             if strategy not in cls.strategies:
168 |                 continue
169 |             callback = getattr(cls, cls.strategies[strategy])
170 |             res = callback(*items)
171 |             if res == 0:
172 |                 continue
173 |             if res == -1:
174 |                 items[:] = items[::-1]
175 |             break
176 |         target_item, from_item = items
177 |         return target_item, from_item
178 | 


--------------------------------------------------------------------------------
/slice_externalids.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import re
  3 | 
  4 | import pywikibot
  5 | 
  6 | from pywikibot.data.sparql import SparqlQuery
  7 | from pywikibot.pagegenerators import (
  8 |     PreloadingEntityGenerator,
  9 |     WikidataSPARQLPageGenerator,
 10 | )
 11 | 
 12 | from query_store import QueryStore
 13 | from wikidata import WikidataEntityBot
 14 | 
 15 | 
 16 | class ExternalIdSlicingBot(WikidataEntityBot):
 17 | 
 18 |     blacklist = {'P2013'}
 19 |     use_from_page = False
 20 | 
 21 |     def __init__(self, **options):
 22 |         self.available_options.update({
 23 |             'step': 10,
 24 |             'offset': 0,
 25 |         })
 26 |         super().__init__(**options)
 27 |         self.cache = {}
 28 |         self.failed = {}
 29 |         self.sparql = SparqlQuery(repo=self.repo)
 30 |         self.store = QueryStore()
 31 | 
 32 |     @property
 33 |     def generator(self):
 34 |         step = self.opt['step']
 35 |         opts = {
 36 |             # fixme: don't use this word
 37 |             'blacklist': ' wd:'.join(self.blacklist),
 38 |             'limit': step,
 39 |         }
 40 |         offset = self.opt['offset']
 41 |         while True:
 42 |             pywikibot.info(f'\nLoading items (offset {offset})...')
 43 |             opts['offset'] = offset
 44 |             ask = self.store.build_query('ask_externalid_props', **opts)
 45 |             if not self.sparql.ask(ask):
 46 |                 break
 47 |             query = self.store.build_query('external-ids', **opts)
 48 |             gen = PreloadingEntityGenerator(
 49 |                 WikidataSPARQLPageGenerator(query, site=self.repo))
 50 |             yield from gen
 51 |             offset += step
 52 | 
 53 |     def treat_page_and_item(self, page, item):
 54 |         for prop, claims in item.claims.items():
 55 |             if prop in self.blacklist:
 56 |                 continue
 57 |             if claims[0].type != 'external-id':
 58 |                 continue
 59 |             for cl in claims:
 60 |                 if not cl.target or not cl.target.startswith('http'):
 61 |                     continue
 62 |                 formatter, regex = self.get_formatter_and_regex(prop)
 63 |                 if not formatter:
 64 |                     pywikibot.info(f"{prop} doesn't have a formatter")
 65 |                     break
 66 |                 value = self.find_value(cl.target, formatter)
 67 |                 if not value:
 68 |                     pywikibot.info(
 69 |                         f'Value not found in "{cl.target}" for property {prop}')
 70 |                     self.failed.setdefault(prop, set()).add(item)
 71 |                     continue
 72 |                 if regex:
 73 |                     try:
 74 |                         match = re.match(f'({regex})', value)
 75 |                     except re.error:
 76 |                         pywikibot.info(f'Couldn\'t apply regex "{regex}"')
 77 |                         break
 78 |                     if not match:
 79 |                         pywikibot.info(
 80 |                             f'Value "{value}" not matched by regex "{regex}"')
 81 |                         self.failed.setdefault(prop, set()).add(item)
 82 |                         continue
 83 |                     value = match.group()
 84 |                 summary = 'harvested the identifier based on [[Property:P1630]]'
 85 |                 if regex:
 86 |                     summary += ' and [[Property:P1793]]'
 87 |                 cl.changeTarget(value, summary=summary)
 88 |     
 89 |     def get_formatter_and_regex(self, prop):
 90 |         if prop not in self.cache:
 91 |             formatter = regex = None
 92 |             ppage = pywikibot.PropertyPage(self.repo, prop)
 93 |             if 'P1630' in ppage.claims:
 94 |                 if len(ppage.claims['P1630']) > 1:
 95 |                     preferred = [cl for cl in ppage.claims['P1630']
 96 |                                  if cl.rank == 'preferred']
 97 |                     if len(preferred) == 1:
 98 |                         formatter = preferred[0].target
 99 |                 else:
100 |                     formatter = ppage.claims['P1630'][0].target
101 | 
102 |             if 'P1793' in ppage.claims:
103 |                 if len(ppage.claims['P1793']) > 1:
104 |                     preferred = [cl for cl in ppage.claims['P1793']
105 |                                  if cl.rank == 'preferred']
106 |                     if len(preferred) == 1:
107 |                         regex = preferred[0].target
108 |                 else:
109 |                     regex = ppage.claims['P1793'][0].target
110 | 
111 |             self.cache[prop] = (formatter, regex)
112 | 
113 |         return self.cache[prop]
114 | 
115 |     def strip_init_stuff(self, string):
116 |         if string.startswith(('http://', 'https://')):
117 |             string = string.partition('//')[2]
118 |         if string.startswith('www.'):
119 |             string = string[4:]
120 |         return string
121 | 
122 |     def find_value(self, url, formatter):
123 |         url = self.strip_init_stuff(url)
124 |         formatter = self.strip_init_stuff(formatter)
125 |         value = pywikibot.page.url2unicode(url)
126 |         split = formatter.split('$1')
127 |         if not value.startswith(split[0]):
128 |             return None
129 |         if not split[1]:
130 |             return value[len(split[0]):].rstrip('/')
131 | 
132 |         value = value[:-len(split[-1])]
133 | 
134 |         try:
135 |             index = value.index(split[1], len(split[0]))
136 |         except ValueError:
137 |             return None
138 |         else:
139 |             return value[len(split[0]):index].rstrip('/')
140 | 
141 |     def exit(self):  # fixme: teardown
142 |         if self.failed:
143 |             text = ''
144 |             for prop in sorted(self.failed):
145 |                 text += f'* [[Property:{prop}]]:\n'
146 |                 for item in sorted(self.failed[prop]):
147 |                     text += f'** [[{item.title()}]]\n'
148 |             username = self.repo.username()
149 |             page = pywikibot.Page(
150 |                 self.repo, f'User:{username}/Wrong external ids')
151 |             page.put(text, summary='update')
152 |         super().exit()
153 | 
154 | 
155 | def main(*args):
156 |     options = {}
157 |     for arg in pywikibot.handle_args(args):
158 |         if arg.startswith('-'):
159 |             arg, sep, value = arg.partition(':')
160 |             if value != '':
161 |                 options[arg[1:]] = int(value) if value.isdigit() else value
162 |             else:
163 |                 options[arg[1:]] = True
164 | 
165 |     site = pywikibot.Site('wikidata', 'wikidata')
166 |     bot = ExternalIdSlicingBot(site=site, **options)
167 |     bot.run()
168 | 
169 | 
170 | if __name__ == '__main__':
171 |     main()
172 | 


--------------------------------------------------------------------------------
/list_typos.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import re
  3 | 
  4 | from collections import defaultdict
  5 | 
  6 | import pywikibot
  7 | 
  8 | from pywikibot import textlib
  9 | from pywikibot.bot import SingleSiteBot, ExistingPageBot
 10 | from pywikibot.pagegenerators import PreloadingGenerator
 11 | from pywikibot.tools.itertools import itergroup
 12 | 
 13 | from typoloader import TypoRule, TyposLoader
 14 | 
 15 | 
 16 | class TypoReportBot(SingleSiteBot):
 17 | 
 18 |     pattern = '# {} \u2013 {}'
 19 | 
 20 |     def __init__(self, **kwargs):
 21 |         self.available_options.update({
 22 |             'always': True,
 23 |             'anything': False,
 24 |             'outputpage': None,
 25 |             'typospage': None,
 26 |             'whitelistpage': None,
 27 |             'false_positives': None,
 28 |         })
 29 |         super().__init__(**kwargs)
 30 |         self.loader = TyposLoader(
 31 |             self.site, allrules=True, typospage=self.opt.typospage,
 32 |             whitelistpage=self.opt.whitelistpage)
 33 |         self.false_positives = set()
 34 | 
 35 |     def setup(self):
 36 |         super().setup()
 37 |         self.typoRules = self.loader.loadTypos()
 38 |         #self.fp_page = self.loader.getWhitelistPage()
 39 |         self.whitelist = self.loader.loadWhitelist()
 40 |         self.data = defaultdict(list)
 41 |         self.order = []  # remove when dictionaries are ordered
 42 |         self.load_false_positives()
 43 | 
 44 |     def load_false_positives(self):
 45 |         if not self.opt.false_positives:
 46 |             return
 47 |         page = pywikibot.Page(self.site, self.opt.false_positives)
 48 |         fps = self.false_positives
 49 |         for line in page.text.splitlines():
 50 |             if line.startswith(('#', '*')):
 51 |                 fps.add(line.lstrip('#* '))
 52 | 
 53 |     @property
 54 |     def generator(self):
 55 |         for rule in self.typoRules:
 56 |             if rule.query is None:
 57 |                 continue
 58 | 
 59 |             pywikibot.info(f'Query: "{rule.query}"')
 60 |             self.current_rule = rule
 61 |             yield from PreloadingGenerator(
 62 |                 self.site.search(rule.query, namespaces=[0]))
 63 | 
 64 |     def skip_page(self, page):
 65 |         # TODO: better terminology
 66 |         if page.title() in self.whitelist:
 67 |             pywikibot.warning(f'Skipped {page} because it is whitelisted')
 68 |             return True
 69 | 
 70 |         if self.current_rule.find.search(page.title()):
 71 |             pywikibot.warning(
 72 |                 f'Skipped {page} because the rule matches the title')
 73 |             return True
 74 | 
 75 |         return super().skip_page(page)
 76 | 
 77 |     def remove_disabled_parts(self, text):
 78 |         return textlib.removeDisabledParts(
 79 |             text, TypoRule.exceptions, site=self.site)
 80 | 
 81 |     def treat(self, page):
 82 |         match = self.current_rule.find.search(page.text)
 83 |         if not match:
 84 |             return
 85 |         text = self.remove_disabled_parts(page.text)
 86 |         found = set()
 87 |         for match in self.current_rule.find.finditer(text):
 88 |             match_text = match[0]
 89 |             if match_text in found:
 90 |                 continue
 91 |             found.add(match_text)
 92 |             link = page.title(as_link=True)
 93 |             put_text = self.pattern.format(link, match_text)
 94 |             if put_text[2:] not in self.false_positives:
 95 |                 pywikibot.stdout(put_text)
 96 |                 if not self.data.get(link):
 97 |                     self.order.append(link)
 98 |                 self.data[link].append(match_text)
 99 | 
100 |     def teardown(self):
101 |         outputpage = self.opt.outputpage
102 |         if (self.generator_completed or self.opt.anything) and outputpage:
103 |             put = []
104 |             for link in self.order:
105 |                 for match in self.data[link]:
106 |                     put.append(self.pattern.format(link, match))
107 |             page = pywikibot.Page(self.site, outputpage)
108 |             page.text = '\n'.join(put)
109 |             page.save(summary='aktualizace seznamu překlepů', minor=False,
110 |                       bot=False, apply_cosmetic_changes=False)
111 |         super().teardown()
112 | 
113 | 
114 | class PurgeTypoReportBot(SingleSiteBot, ExistingPageBot):
115 | 
116 |     def __init__(self, **kwargs):
117 |         self.helper = TypoReportBot(**kwargs)
118 |         super().__init__(site=self.helper.site)
119 |         self.put = []
120 |         self.cache = defaultdict(list)
121 | 
122 |     def setup(self):
123 |         super().setup()
124 |         self.whitelist = self.helper.loader.loadWhitelist()
125 |         self.generator = [pywikibot.Page(self.site, self.helper.opt.outputpage)]
126 |         self.helper.load_false_positives()
127 | 
128 |     def line_iterator(self, text):
129 |         regex = re.compile(self.helper.pattern.format(
130 |             r'\[\[([^]]+)\]\]', '(.+)'))
131 |         for line in text.splitlines():
132 |             match = regex.fullmatch(line)
133 |             if match:
134 |                 title, text = match.groups()
135 |                 entry = pywikibot.Page(self.site, title)
136 |                 self.cache[entry.title()].append(text)
137 |                 yield entry
138 |             else:
139 |                 self.put.append(line)
140 | 
141 |     def treat(self, page):
142 |         pattern = self.helper.pattern
143 |         for entry in PreloadingGenerator(self.line_iterator(page.text)):
144 |             key = title = entry.title()
145 |             if not entry.exists():
146 |                 self.cache.pop(key)
147 |                 continue
148 |             while entry.isRedirectPage():
149 |                 entry = entry.getRedirectTarget()
150 |                 title = entry.title()
151 |             text = self.helper.remove_disabled_parts(entry.text)
152 |             for string in self.cache.pop(key):
153 |                 if string not in text:
154 |                     continue
155 |                 put_text = pattern.format(f'[[{title}]]', string)
156 |                 if put_text[2:] in self.helper.false_positives:
157 |                     continue
158 |                 self.put.append(put_text)
159 | 
160 |         page.text = '\n'.join(self.put)
161 |         page.save(summary='odstranění vyřešených překlepů', minor=True,
162 |                   bot=True, apply_cosmetic_changes=False)
163 | 
164 | 
165 | def main(*args):
166 |     options = {}
167 |     cls = TypoReportBot
168 |     for arg in pywikibot.handle_args(args):
169 |         if arg == 'purge':
170 |             cls = PurgeTypoReportBot
171 |         elif arg.startswith('-'):
172 |             arg, sep, value = arg.partition(':')
173 |             if value != '':
174 |                 options[arg[1:]] = int(value) if value.isdigit() else value
175 |             else:
176 |                 options[arg[1:]] = True
177 | 
178 |     bot = cls(**options)
179 |     bot.run()
180 | 
181 | 
182 | if __name__ == '__main__':
183 |     main()
184 | 


--------------------------------------------------------------------------------
/cswiki/pageviews.py:
--------------------------------------------------------------------------------
  1 | import heapq
  2 | import json
  3 | import os.path as osp
  4 | from collections import defaultdict
  5 | from datetime import date, datetime, timedelta
  6 | 
  7 | import pywikibot
  8 | import requests
  9 | from pywikibot.comms.http import user_agent
 10 | from pywikibot.pagegenerators import PreloadingGenerator
 11 | 
 12 | pywikibot.handle_args()
 13 | 
 14 | site = pywikibot.Site()
 15 | 
 16 | headers = {'User-Agent': user_agent()}
 17 | hostname = site.hostname()
 18 | prefix = 'https://wikimedia.org/api/rest_v1/metrics/pageviews'
 19 | pattern = f'{prefix}/top/{hostname}/all-access/%Y/%m/%d'
 20 | 
 21 | check_templates = {
 22 |     'Aktualizovat', 'Celkově zpochybněno', 'Globalizovat', 'Neověřeno', 'NPOV',
 23 |     'Pahýl', 'Pravopis', 'Reklama', 'Sloh', 'Upravit', 'Vlastní výzkum',
 24 |     'Vyhýbavá slova',
 25 | }
 26 | check_categories = {
 27 |     'Wikipedie:Polozamčené stránky',
 28 |     'Wikipedie:Rozšířeně polozamčené stránky',
 29 |     'Wikipedie:Dlouhodobě zamčené stránky',
 30 |     'Wikipedie:Dobré články',
 31 |     'Wikipedie:Nejlepší články',
 32 |     'Žijící lidé',
 33 | }
 34 | 
 35 | top = 100
 36 | days = 7
 37 | gamma = 0.85
 38 | weights = [pow(gamma, i) for i in range(days)]
 39 | 
 40 | today = date.today()
 41 | this = today - timedelta(days=1)
 42 | first = today - timedelta(days=days)
 43 | min_per_day = []
 44 | 
 45 | check_categories.add(f'Úmrtí v roce {this.year}')
 46 | check_categories.add(f'Úmrtí v roce {this.year - 1}')
 47 | 
 48 | aggregate_url = '{}/aggregate/{}/all-access/user/daily/{}/{}'.format(
 49 |     prefix,
 50 |     hostname,
 51 |     first.strftime('%Y%m%d'),
 52 |     this.strftime('%Y%m%d')
 53 | )
 54 | resp = requests.get(aggregate_url, headers=headers)
 55 | data = resp.json()
 56 | daily = [entry['views'] for entry in data['items']]
 57 | 
 58 | index = defaultdict(lambda: [None] * days)
 59 | for diff in range(days):
 60 |     the_day = this - timedelta(days=diff)
 61 |     resp = requests.get(the_day.strftime(pattern), headers=headers)
 62 |     data = resp.json()
 63 | 
 64 |     array = []
 65 |     for info in data['items'][0]['articles']:
 66 |         page = info['article']
 67 |         views = info['views']
 68 |         index[page][diff] = views
 69 |         array.append(views)
 70 |     min_per_day.append(min(array))
 71 |     del data
 72 | 
 73 | done_heap = []
 74 | stack = []
 75 | 
 76 | for page, values in index.items():
 77 |     if page.startswith('Speciální:'):
 78 |         continue
 79 |     complete = True
 80 |     total = 0
 81 |     for views, at_most in zip(values, min_per_day):
 82 |         if views is None:
 83 |             complete = False
 84 |             total += at_most
 85 |         else:
 86 |             total += views
 87 | 
 88 |     if complete:
 89 |         done_heap.append((total, page, values))
 90 |     else:
 91 |         stack.append((total, page, values))
 92 | 
 93 | done_heap.sort()
 94 | del done_heap[:-top]
 95 | stack.sort()
 96 | 
 97 | while True:
 98 |     possible, page, values = stack.pop()
 99 |     lowest = done_heap[0][0]
100 |     if possible < lowest:
101 |         break
102 | 
103 |     present = [i for i, val in enumerate(values) if val is None]
104 | 
105 |     start = this - timedelta(days=max(present))
106 |     end = this - timedelta(days=min(present))
107 | 
108 |     url = f'{prefix}/per-article/{hostname}/all-access/user/'
109 |     url += page.replace('/', '%2F') + '/daily/'
110 |     url += start.strftime('%Y%m%d00') + '/' + end.strftime('%Y%m%d00')
111 |     resp = requests.get(url, headers=headers)
112 |     if resp.ok:
113 |         data = resp.json()
114 |         for entry in data['items']:
115 |             dt = datetime.strptime(entry['timestamp'], '%Y%m%d%H')
116 |             delta = this - dt.date()
117 |             values[delta.days] = entry['views']
118 | 
119 |     for i in range(days):
120 |         if values[i] is None:
121 |             values[i] = 0
122 | 
123 |     total = sum(values)
124 |     assert total <= possible
125 |     if total >= lowest:
126 |         heapq.heappushpop(done_heap, (total, page, values))
127 | 
128 | done_heap.sort(reverse=True)
129 | 
130 | lines = []
131 | lines.append(
132 |     f"Nejčtenější stránky za období {first.day}. {first.month}. {first.year}"
133 |     f" – {this.day}. {this.month}. {this.year}."
134 | )
135 | lines.append('')
136 | lines.append('{| class="wikitable sortable"')
137 | lines.append('! Pořadí')
138 | lines.append('! Stránka')
139 | lines.append('! Celkový<br>počet návštěv')
140 | lines.append('! Vážený<br>počet návštěv')
141 | lines.append('! Koeficient')
142 | lines.append('! Problémy')
143 | lines.append('! Příznaky')
144 | lines.append('! class="unsortable" | Graf')
145 | 
146 | aggregate = sum(daily)
147 | weighted = sum(v * w for v, w in zip(daily, weights))
148 | coef = weighted / aggregate
149 | 
150 | lines.append('|-')
151 | lines.append('|')
152 | lines.append("| ''vše''")
153 | lines.append(f'| {aggregate}')
154 | lines.append(f'| {weighted:.0f}')
155 | lines.append('| %s' % f'{coef:.3f}'.replace('.', ',', 1))
156 | lines.append(f'|')
157 | lines.append(f'|')
158 | lines.append(f"| [https://pageviews.wmcloud.org/siteviews/?sites={hostname}"
159 |                  f"&agent=user&range=latest-20 <nowiki>[0]</nowiki>]")
160 | 
161 | gen = (pywikibot.Page(site, title) for _, title, _ in done_heap)
162 | for rank, (page, (total, title, values)) in enumerate(zip(
163 |     site.preloadpages(gen, templates=True, categories=True, content=False),
164 |     done_heap
165 | ), start=1):
166 |     weighted = sum(v * w for v, w in zip(values, weights))
167 |     coef = weighted / total
168 |     link_title = title.replace('_', ' ')
169 |     if link_title.startswith(('Soubor:', 'Kategorie:')):
170 |         link_title = f':{link_title}'
171 | 
172 |     lines.append('|-')
173 |     lines.append(f'| {rank}')
174 |     lines.append(f'| [[{link_title}]]')
175 |     lines.append(f'| {total}')
176 |     lines.append(f'| {weighted:.0f}')
177 |     lines.append('| %s' % f'{coef:.3f}'.replace('.', ',', 1))
178 | 
179 |     show_templates = check_templates.intersection(map(
180 |         lambda p: p.title(with_ns=False), page.templates()))
181 |     show_categories = check_categories.intersection(map(
182 |         lambda p: p.title(with_ns=False), page.categories()))
183 | 
184 |     if show_templates:
185 |         lines.append('| ' + ('<br>'.join(
186 |             f'[[Šablona:{t}|{t}]]' for t in sorted(show_templates))))
187 |     else:
188 |         lines.append('|')
189 | 
190 |     if show_categories:
191 |         lines.append('| ' + ('<br>'.join(
192 |             f"[[:Kategorie:{c}|{c.removeprefix('Wikipedie:')}]]"
193 |             for c in sorted(show_categories))))
194 |     else:
195 |         lines.append('|')
196 | 
197 |     lines.append(f"| [https://pageviews.wmcloud.org/pageviews/?project={hostname}"
198 |                  f"&agent=user&range=latest-20&pages={title}]")
199 | 
200 | lines.append('|}')
201 | 
202 | the_page = pywikibot.Page(site, f'{site.username()}/Návštěvy', ns=2)
203 | the_page.text = '\n'.join(lines)
204 | the_page.save(minor=False, bot=False, apply_cosmetic_changes=False,
205 |               summary='aktualizace')
206 | 


--------------------------------------------------------------------------------
/fake_references.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | from contextlib import suppress
  3 | 
  4 | import pywikibot
  5 | 
  6 | from pywikibot import pagegenerators
  7 | 
  8 | from query_store import QueryStore
  9 | from wikidata import WikidataEntityBot
 10 | 
 11 | 
 12 | class FakeReferencesBot(WikidataEntityBot):
 13 | 
 14 |     item_ids = ['Q2013']
 15 |     inferred_from = 'P3452'
 16 |     ref_props = ['P143', 'P248']
 17 |     url_props = ['P854']
 18 |     use_from_page = False
 19 |     whitelist_props = {'P813', 'P4656'}
 20 | 
 21 |     def __init__(self, generator, **kwargs):
 22 |         self.available_options.update({
 23 |             'limit': None,
 24 |         })
 25 |         super().__init__(**kwargs)
 26 |         self.store = QueryStore()
 27 |         self._generator = generator or self.subgenerator()
 28 |         self.url_start = self.repo.base_url(self.repo.article_path)
 29 | 
 30 |     def subgenerator(self):
 31 |         limit = self.opt['limit']
 32 |         for ident in self.item_ids:
 33 |             from_item = pywikibot.ItemPage(self.repo, ident)
 34 |             for item in pagegenerators.WikibaseItemGenerator(
 35 |                     from_item.backlinks(
 36 |                         total=limit, filterRedirects=False, namespaces=[0])):
 37 |                 yield item
 38 |                 if limit is not None:
 39 |                     limit -= 1
 40 | 
 41 |             if limit == 0:
 42 |                 return
 43 | 
 44 |         for prop in self.url_props:
 45 |             ok = True
 46 |             while ok and limit != 0:
 47 |                 ok = False
 48 |                 query = self.store.build_query(
 49 |                     'fake_references_url',
 50 |                     limit=500 if limit is None else min(500, limit),
 51 |                     prop=prop)
 52 |                 for item in pagegenerators.WikidataSPARQLPageGenerator(
 53 |                         query, site=self.repo):
 54 |                     ok = True
 55 |                     yield item
 56 |                     if limit is not None:
 57 |                         limit -= 1
 58 | 
 59 |         for prop in self.ref_props:
 60 |             ok = True
 61 |             while ok and limit != 0:
 62 |                 ok = False
 63 |                 query = self.store.build_query(
 64 |                     'fake_references',
 65 |                     limit=100 if limit is None else min(100, limit),
 66 |                     prop=prop)
 67 |                 for item in pagegenerators.WikidataSPARQLPageGenerator(
 68 |                         query, site=self.repo):
 69 |                     ok = True
 70 |                     yield item
 71 |                     if limit is not None:
 72 |                         limit -= 1
 73 | 
 74 |     @property
 75 |     def generator(self):
 76 |         return pagegenerators.PreloadingEntityGenerator(self._generator)
 77 | 
 78 |     @property
 79 |     def summary(self):
 80 |         return ('update reference per [[Wikidata:Requests for permissions/'
 81 |                 'Bot/MatSuBot 8|RfPB]]')
 82 | 
 83 |     def treat_page_and_item(self, page, item):
 84 |         changed = False
 85 |         for prop, claims in item.claims.items():
 86 |             for claim in claims:
 87 |                 if self.handle_claim(claim):
 88 |                     changed = True
 89 |         if changed:
 90 |             self.user_edit_entity(item, summary=self.summary)
 91 | 
 92 |     def handle_claim(self, claim):
 93 |         ret = False
 94 |         if not claim.sources:
 95 |             return ret
 96 |         if claim.type == 'wikibase-item':
 97 |             if claim.id == 'P1343' and 'P805' in claim.qualifiers:
 98 |                 target = claim.qualifiers['P805'][0].getTarget()
 99 |             else:
100 |                 target = claim.getTarget()
101 |             if target:
102 |                 for source in claim.sources:
103 |                     ret = self.handle_source_item(source, target) or ret
104 |         for source in claim.sources:
105 |             ret = self.handle_source_url(source) or ret
106 |         return ret
107 | 
108 |     def handle_source_item(self, source, target):
109 |         ret = False
110 |         for prop in self.ref_props:
111 |             keys = set(source.keys())
112 |             if prop not in keys:
113 |                 continue
114 |             if keys - (self.whitelist_props | {prop}):
115 |                 continue
116 |             if len(source[prop]) > 1:
117 |                 #continue?
118 |                 return ret
119 | 
120 |             fake = next(iter(source[prop]))
121 |             items = list(self.item_ids) + [target]
122 |             if any(fake.target_equals(tgt) for tgt in items):
123 |                 snak = pywikibot.Claim(
124 |                     self.repo, self.inferred_from, isReference=True)
125 |                 snak.setTarget(target)
126 |                 source.setdefault(self.inferred_from, []).append(snak)
127 |                 source.pop(prop)
128 |                 ret = True
129 |         return ret
130 | 
131 |     def handle_source_url(self, source):
132 |         ret = False
133 |         for prop in self.url_props:
134 |             keys = set(source.keys())
135 |             if prop not in keys:
136 |                 continue
137 |             if keys - (self.whitelist_props | {prop}):
138 |                 continue
139 |             if len(source[prop]) > 1:
140 |                 #continue?
141 |                 return ret
142 | 
143 |             snak = next(iter(source[prop]))
144 |             url = snak.getTarget()
145 |             if not url:
146 |                 continue
147 |             target = None
148 |             with suppress(pywikibot.InvalidTitle, ValueError):
149 |                 for prefix in [self.url_start, self.repo.concept_base_uri]:
150 |                     target_id = url.removeprefix(prefix)
151 |                     if target_id != url:
152 |                         target = pywikibot.ItemPage(self.repo, target_id)
153 |                         break
154 |             if target:
155 |                 if target.isRedirectPage():
156 |                     target = target.getRedirectTarget()
157 |                 if target != snak.on_item:
158 |                     snak = pywikibot.Claim(
159 |                         self.repo, self.inferred_from, isReference=True)
160 |                     snak.setTarget(target)
161 |                     source.setdefault(self.inferred_from, []).append(snak)
162 |                 source.pop(prop)
163 |                 ret = True
164 |         return ret
165 | 
166 | 
167 | def main(*args):
168 |     options = {}
169 |     local_args = pywikibot.handle_args(args)
170 |     site = pywikibot.Site()
171 |     genFactory = pagegenerators.GeneratorFactory(site=site)
172 |     for arg in genFactory.handle_args(local_args):
173 |         if arg.startswith('-'):
174 |             arg, sep, value = arg.partition(':')
175 |             if value != '':
176 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
177 |             else:
178 |                 options[arg[1:]] = True
179 | 
180 |     generator = genFactory.getCombinedGenerator()
181 |     bot = FakeReferencesBot(generator=generator, site=site, **options)
182 |     bot.run()
183 | 
184 | 
185 | if __name__ == '__main__':
186 |     main()
187 | 


--------------------------------------------------------------------------------
/typos.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import time
  3 | 
  4 | import pywikibot
  5 | from pywikibot import pagegenerators
  6 | 
  7 | from typoloader import TyposLoader
  8 | from wikitext import WikitextFixingBot
  9 | 
 10 | 
 11 | class TypoBot(WikitextFixingBot):
 12 | 
 13 |     '''
 14 |     Bot for typo fixing
 15 | 
 16 |     Supported parameters:
 17 |     * -allrules - use if you want to load rules that need user's decision
 18 |     * -offset:# - what typo rule do you want to start from
 19 |     * -quick - use if you want the bot to focus on the current rule,
 20 |       ie. skip the page if the rule couldn't be applied
 21 |     * -threshold:# - skip rule when loaded/replaced ratio gets over #
 22 |     * -typospage: - what page do you want to load typo rules from
 23 |     * -whitelistpage: - what page holds pages which should be skipped
 24 |     '''
 25 | 
 26 |     def __init__(self, generator, *, offset=0, **kwargs):
 27 |         self.available_options.update({
 28 |             'allrules': False,
 29 |             'quick': False,
 30 |             'threshold': 10,
 31 |             'typospage': None,
 32 |             'whitelistpage': None,
 33 |         })
 34 |         kwargs['typos'] = False
 35 |         self.own_generator = not bool(generator)
 36 |         if self.own_generator:
 37 |             self.generator = self.make_generator()
 38 |         else:
 39 |             self.generator = generator
 40 | 
 41 |         super().__init__(**kwargs)
 42 |         self.offset = offset
 43 | 
 44 |     def setup(self):
 45 |         loader = TyposLoader(
 46 |             self.site, allrules=self.opt['allrules'],
 47 |             typospage=self.opt['typospage'],
 48 |             whitelistpage=self.opt['whitelistpage'])
 49 |         self.typoRules = loader.loadTypos()
 50 |         self.fp_page = loader.getWhitelistPage()
 51 |         self.whitelist = loader.loadWhitelist()
 52 | 
 53 |     @property
 54 |     def is_rule_accurate(self):
 55 |         threshold = self.opt['threshold']
 56 |         result = (self.processed < threshold or
 57 |                   self.processed / threshold < self.replaced)
 58 |         return result
 59 | 
 60 |     def make_generator(self):
 61 |         for i, rule in enumerate(self.typoRules[:]):
 62 |             if self.offset > i:
 63 |                 continue
 64 |             if rule.query is None:
 65 |                 continue
 66 | 
 67 |             # todo: if not allrules:...
 68 |             self.offset = i
 69 |             pywikibot.info(f'\nQuery: "{rule.query}"')
 70 |             old_max = rule.longest
 71 |             rule.longest = 0.0
 72 |             self.current_rule = rule
 73 |             self.skip_rule = False
 74 |             self.processed = self.replaced = 0
 75 |             for page in self.site.search(rule.query, namespaces=[0]):
 76 |                 if self.skip_rule:
 77 |                     break
 78 |                 yield page
 79 |                 if not self.is_rule_accurate:
 80 |                     pywikibot.info(
 81 |                         f'Skipped inefficient query "{rule.query}" '
 82 |                         f'({self.replaced}/{self.processed}')
 83 |                     break
 84 |             else:
 85 |                 if self.processed < 1:
 86 |                     pywikibot.info(f'No results from query "{rule.query}"')
 87 |                 else:
 88 |                     percent = (self.replaced / self.processed) * 100
 89 |                     pywikibot.info(
 90 |                         f'{percent:.f}% accuracy of query "{rule.query}"')
 91 | 
 92 |             if self.processed > 0:
 93 |                 pywikibot.info(f'Longest match: {rule.longest}s')
 94 |             rule.longest = max(old_max, rule.longest)
 95 | 
 96 |     def save_false_positive(self, page):
 97 |         link = page.title(as_link=True)
 98 |         self.fp_page.text += f'\n* {link}'
 99 |         self.fp_page.save(summary=link, asynchronous=True)
100 |         self.whitelist.append(page.title())
101 | 
102 |     def skip_page(self, page):
103 |         if page.title() in self.whitelist:
104 |             pywikibot.warning(f'Skipped {page} because it is whitelisted')
105 |             return True
106 | 
107 |         if self.own_generator and self.current_rule.find.search(page.title()):
108 |             pywikibot.warning(
109 |                 f'Skipped {page} because the rule matches the title')
110 |             return True
111 | 
112 |         return super().skip_page(page)
113 | 
114 |     def init_page(self, page):
115 |         out = super().init_page(page)
116 |         if self.own_generator:
117 |             self.processed += 1
118 |         return out
119 | 
120 |     def treat_page(self):
121 |         page = self.current_page
122 |         text = page.text
123 |         done_replacements = []
124 |         quickly = self.opt['quick'] is True
125 |         start = time.time()
126 |         if self.own_generator:
127 |             text = self.current_rule.apply(page.text, done_replacements)
128 |             if page.text == text:
129 |                 if quickly:
130 |                     pywikibot.info('Typo not found, not fixing another '
131 |                                    'typos in quick mode')
132 |                     return
133 |             else:
134 |                 self.replaced += 1
135 | 
136 |         for rule in self.typoRules:
137 |             if self.own_generator and rule == self.current_rule:  # __eq__
138 |                 continue
139 |             if rule.find.search(page.title()):
140 |                 continue
141 |             if quickly and rule.needs_decision():
142 |                 continue
143 | 
144 |             text = rule.apply(text, done_replacements)
145 |             stop = time.time()
146 |             if quickly and stop - start > 15:
147 |                 pywikibot.warning('Other typos exceeded 15s, skipping')
148 |                 break
149 | 
150 |         self.put_current(
151 |             text, summary='oprava překlepů: %s' % ', '.join(done_replacements))
152 | 
153 |     def user_confirm(self, question):
154 |         if self.opt['always']:
155 |             return True
156 | 
157 |         options = [('yes', 'y'), ('no', 'n'), ('all', 'a')]
158 |         if self.fp_page.exists():
159 |             options.append(('false positive', 'f'))
160 |         if self.own_generator:
161 |             options.append(('skip rule', 's'))
162 |         options += [('open in browser', 'b'), ('quit', 'q')]
163 | 
164 |         choice = pywikibot.input_choice(question, options, default='N',
165 |                                         automatic_quit=False)
166 | 
167 |         if choice == 'n':
168 |             return False
169 | 
170 |         if choice == 's':
171 |             self.skip_rule = True
172 |             return False
173 | 
174 |         if choice == 'b':
175 |             pywikibot.bot.open_webbrowser(self.current_page)
176 |             return False
177 | 
178 |         if choice == 'f':
179 |             self.save_false_positive(self.current_page)
180 |             return False
181 | 
182 |         if choice == 'q':
183 |             self.quit()
184 | 
185 |         if choice == 'a':
186 |             self.options['always'] = True
187 | 
188 |         return True
189 | 
190 |     def teardown(self):
191 |         rules = sorted(
192 |             (rule for rule in self.typoRules if not rule.needs_decision()),
193 |             key=lambda rule: rule.longest, reverse=True)[:3]
194 |         pywikibot.info('\nSlowest autonomous rules:')
195 |         for i, rule in enumerate(rules, start=1):
196 |             pywikibot.info(f'{i}. "{rule.find.pattern}" - {rule.longest}')
197 |         if self.own_generator:
198 |             pywikibot.info(f'\nCurrent offset: {self.offset}\n')
199 |         super().teardown()
200 | 
201 | 
202 | def main(*args):
203 |     options = {}
204 |     local_args = pywikibot.handle_args(args)
205 |     genFactory = pagegenerators.GeneratorFactory()
206 |     genFactory.handle_arg('-ns:0')
207 |     for arg in genFactory.handle_args(local_args):
208 |         if arg.startswith('-'):
209 |             arg, sep, value = arg.partition(':')
210 |             if value != '':
211 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
212 |             else:
213 |                 options[arg[1:]] = True
214 | 
215 |     generator = genFactory.getCombinedGenerator(preload=True)
216 |     bot = TypoBot(generator, **options)
217 |     bot.run()
218 | 
219 | 
220 | if __name__ == '__main__':
221 |     main()
222 | 


--------------------------------------------------------------------------------
/cswiki/iucn.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import re
  3 | from datetime import datetime
  4 | 
  5 | import mwparserfromhell as parser
  6 | import pywikibot
  7 | import pywikibot.pagegenerators as pg
  8 | from pywikibot.exceptions import NoWikibaseEntityError
  9 | from pywikibot.page import PropertyPage
 10 | 
 11 | def get_revision_wrapper(item, rev_id: int):
 12 |     # https://github.com/matejsuchanek/wikidata-constraints/blob/11602b4050e4623c9f1e4e0b279cf2f6c14b2a53/retrieval.py#L131-L164
 13 |     cls = type(item)
 14 |     repo = item.repo
 15 |     entity_id = item.getID()
 16 | 
 17 |     rev = cls(repo, entity_id)
 18 |     data = json.loads(item.getOldVersion(rev_id))
 19 |     for key, val in data.items():
 20 |         # handle old serialization
 21 |         if val == []:
 22 |             data[key] = {}
 23 | 
 24 |     rev._content = data
 25 |     while True:
 26 |         try:
 27 |             rev.get()
 28 |         except (KeyError, NoWikibaseEntityError) as exc:
 29 |             # handle deleted properties
 30 |             if isinstance(exc, NoWikibaseEntityError):
 31 |                 key = exc.entity.id
 32 |             else:
 33 |                 key = exc.args[0]
 34 |             # in theory, this isn't needed
 35 |             if not PropertyPage.is_valid_id(key):
 36 |                 raise
 37 | 
 38 |             if key.lower() in data['claims']:
 39 |                 data['claims'].pop(key.lower())
 40 |             elif key.upper() in data['claims']:
 41 |                 data['claims'].pop(key.upper())
 42 |             else:
 43 |                 raise
 44 |         else:
 45 |             return rev
 46 | 
 47 | 
 48 | def get_best_statements(statements):
 49 |     best = []
 50 |     best_rank = 'normal'
 51 |     for st in statements:
 52 |         if st.rank == best_rank:
 53 |             best.append(st)
 54 |         elif st.rank == 'preferred':
 55 |             best[:] = [st]
 56 |             best_rank = st.rank
 57 |     return best
 58 | 
 59 | 
 60 | def is_different(old, new):
 61 |     if old == new:
 62 |         return False
 63 | 
 64 |     if old.getID() == 'Q11394' and new.getID() == 'Q96377276':
 65 |         return False
 66 | 
 67 |     return True
 68 | 
 69 | 
 70 | args = pywikibot.handle_args()
 71 | 
 72 | site = pywikibot.Site('cs', 'wikipedia')
 73 | repo = pywikibot.Site('wikidata', 'wikidata')
 74 | 
 75 | needle = re.compile(r'\b[Pp]141\b')
 76 | 
 77 | editions = {
 78 |     #'2012.1': '20120619',
 79 |     '2012.2': '20121017',
 80 |     '2013.1': '20130702',
 81 |     '2013.2': '20131126',
 82 |     '2014.1': '20140612',
 83 |     '2014.2': '20140724',
 84 |     '2014.3': '20141117',
 85 |     '2015.1': '20150603',
 86 |     '2015.2': '20150623',
 87 |     '2015.4': '20151119',
 88 |     '2016.2': '20160904',
 89 |     '2016.3': '20161208',
 90 |     '2017.2': '20170914',
 91 |     '2017.3': '20171205',
 92 |     '2018.1': '20180705',
 93 |     '2019.2': '20190718',
 94 |     '2019.3': '20191210',
 95 |     '2020.2': '20200709',
 96 |     '2020.3': '20201210',
 97 |     '2021.1': '20210325',
 98 |     '2021.2': '20210904',
 99 |     '2021.3': '20211209',
100 |     '2022.1': '20220101',
101 |     '2022.2': '20221209',
102 |     '2023.1': '20231211',
103 |     '2025.2': '20251010',
104 | }
105 | stat_to_label = {
106 |     'Q719675': 'téměř ohrožený',
107 |     'Q211005': 'málo dotčený',
108 |     'Q219127': 'kriticky ohrožený druh',
109 |     'Q237350': 'vyhynulý',
110 |     'Q239509': 'vyhynulý v přírodě',
111 |     'Q278113': 'zranitelný',
112 |     'Q719675': 'téměř ohrožený',
113 |     'Q3245245': 'chybí údaje',
114 |     'Q123509': 'vymírání',
115 |     'Q11394': 'ohrožený',
116 |     'Q96377276': 'ohrožený',
117 | }
118 | links = {
119 |     pywikibot.Page(site, 'Kriticky_ohrožený_taxon'),
120 |     pywikibot.Page(site, 'Málo_dotčený_taxon'),
121 |     pywikibot.Page(site, 'O_taxonu_chybí_údaje'),
122 |     pywikibot.Page(site, 'Nevyhodnocený_taxon'),
123 |     pywikibot.Page(site, 'Ohrožený_taxon'),
124 |     pywikibot.Page(site, 'Téměř_ohrožený_taxon'),
125 |     pywikibot.Page(site, 'Zranitelný_taxon'),
126 |     pywikibot.Page(site, 'Taxon vyhynulý v přírodě'),
127 |     pywikibot.Page(site, 'Vyhynulý_taxon'),
128 | }
129 | 
130 | lines = [
131 |     '<div style="overflow-x: auto; max-width: 100%">',
132 |     '{| class="wikitable sortable"',
133 |     '! Č.',
134 |     '! Taxon',
135 |     '! class="unsortable" | Wikidata',
136 |     '! Naposled',
137 |     '! class="unsortable" | Odkazuje na',
138 | ]
139 | lines.extend(f'! class="unsortable" | {ed}' for ed in editions)
140 | 
141 | i = 0
142 | 
143 | sparql = '''SELECT ?item WHERE {
144 |   ?article schema:about ?item; schema:isPartOf <https://cs.wikipedia.org/> .
145 |   ?item wdt:P141 ?iucn .
146 | } ORDER BY ?item'''
147 | 
148 | gen = pg.PreloadingEntityGenerator(
149 |     pg.WikidataSPARQLPageGenerator(sparql, site=repo)
150 | )
151 | 
152 | for item in gen:
153 |     best = get_best_statements(item.claims.get('P141', []))
154 |     if not best:
155 |         continue
156 | 
157 |     ts_to_status = {}
158 |     cur = None
159 | 
160 |     for rev in item.revisions(reverse=True, content=False):
161 |         if not rev.parentid:
162 |             continue
163 | 
164 |         if not needle.search(rev.comment):
165 |             continue
166 | 
167 |         if rev.comment.startswith('/* wbsetreference-set:'):
168 |             continue
169 | 
170 |         if 'mw-reverted' in rev.tags:
171 |             continue
172 | 
173 |         this = get_revision_wrapper(item, rev.revid)
174 |         claims = get_best_statements(this.claims.get('P141', []))
175 |         if claims:
176 |             new = claims[0].getTarget()
177 |             if cur is None or is_different(cur, new):
178 |                 key = rev.timestamp.strftime('%Y%m%d%H%M%S')
179 |                 ts_to_status[key] = new.getID()
180 |                 cur = new
181 | 
182 |     if len(ts_to_status) < 2:
183 |         continue
184 | 
185 |     last_change = max(ts_to_status)
186 | 
187 |     new = best[0].getTarget()
188 |     if cur is None or is_different(cur, new):
189 |         key = item.latest_revision.timestamp.strftime('%Y%m%d%H%M%S')
190 |         ts_to_status[key] = new.getID()
191 | 
192 |     link = item.sitelinks[site]
193 |     page = pywikibot.Page(link)
194 |     created = page.oldest_revision.timestamp
195 |     if created > datetime.strptime(last_change, '%Y%m%d%H%M%S'):
196 |         continue
197 | 
198 |     per_edition = {}
199 |     for ts, stat in ts_to_status.items():  # asc
200 |         last_release_date = max(
201 |             (date for date in editions.values() if date < ts),
202 |             default=0
203 |         )
204 |         for ed, date in editions.items():
205 |             if last_release_date <= date:
206 |                 per_edition[ed] = stat
207 | 
208 |     links_to = [
209 |         other.title(as_link=True)
210 |         for other in page.linkedPages(
211 |             namespaces=0,
212 |             content=False,
213 |             follow_redirects=True
214 |         )
215 |         if other in links
216 |     ]
217 | 
218 |     i += 1
219 |     ymd = f'{last_change[:4]}-{last_change[4:6]}-{last_change[6:8]}'
220 | 
221 |     lines.append('|-')
222 |     lines.append(f'| {i}')
223 |     lines.append(f'| {link.astext()}')
224 |     lines.append(f'| [[d:{item.getID()}|{item.getID()}]]')
225 |     lines.append(f'| data-sort-value="{last_change}" | {ymd}')
226 |     lines.append('| ' + ('<br>'.join(sorted(links_to))))
227 | 
228 |     last = '?'
229 |     streak = 0
230 |     for ed in editions:  # asc
231 |         stat = per_edition.get(ed, '?')
232 |         if stat == last:
233 |             streak += 1
234 |             continue
235 | 
236 |         if streak > 1:
237 |             lines.append(
238 |                 f'| colspan="{streak}" align="center" | {stat_to_label.get(last, last)}'
239 |             )
240 |         elif streak == 1:
241 |             lines.append(f'| {stat_to_label.get(last, last)}')
242 | 
243 |         last = stat
244 |         streak = 1
245 | 
246 |     if streak > 1:
247 |         lines.append(
248 |             f'| colspan="{streak}" align="center" | {stat_to_label.get(last, last)}'
249 |         )
250 |     elif streak == 1:
251 |         lines.append(f'| {stat_to_label.get(last, last)}')
252 | 
253 | lines.append('|}')
254 | lines.append('</div>')
255 | 
256 | new_text = '\n'.join(lines)
257 | 
258 | site.login()
259 | 
260 | output_page = pywikibot.Page(site, 'Wikipedie:WikiProjekt_Biologie/Status_ohrožení/vše')
261 | code = parser.parse(output_page.text)
262 | for old in code.ifilter_tags(matches='div'):
263 |     code.replace(old, new_text)
264 |     output_page.text = str(code)
265 |     break
266 | else:
267 |     output_page.text = new_text
268 | 
269 | output_page.save(
270 |     summary='tabulka', apply_cosmetic_changes=False, bot=False, minor=False
271 | )
272 | 


--------------------------------------------------------------------------------
/cleanup_dates.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | from contextlib import suppress
  3 | from datetime import datetime, timedelta
  4 | from itertools import chain, combinations
  5 | 
  6 | import pywikibot
  7 | 
  8 | from pywikibot import Claim
  9 | from pywikibot.exceptions import APIError
 10 | from pywikibot.pagegenerators import (
 11 |     GeneratorFactory,
 12 |     PreloadingEntityGenerator,
 13 |     WikidataSPARQLPageGenerator,
 14 | )
 15 | 
 16 | from query_store import QueryStore
 17 | from wikidata import WikidataEntityBot
 18 | 
 19 | 
 20 | class DuplicateDatesBot(WikidataEntityBot):
 21 | 
 22 |     invalid_refs = {'P143', 'P813', 'P3452', 'P4656'}
 23 |     use_from_page = False
 24 | 
 25 |     def __init__(self, generator, **kwargs):
 26 |         self.available_options.update({
 27 |             'days': 30,
 28 |             'props': ['P569', 'P570', 'P2031', 'P2032'],
 29 |         })
 30 |         super().__init__(**kwargs)
 31 |         self.store = QueryStore()
 32 |         self._generator = generator or self.custom_generator()
 33 | 
 34 |     def custom_generator(self):
 35 |         for prop in self.opt['props']:
 36 |             for key in ('duplicate_dates', 'unmerged_dates'):
 37 |                 time = datetime.now() - timedelta(days=self.opt['days'])
 38 |                 query = self.store.build_query(
 39 |                     key, prop=prop, date=time.isoformat(timespec='seconds'))
 40 |                 yield from WikidataSPARQLPageGenerator(query, site=self.repo)
 41 | 
 42 |     @property
 43 |     def generator(self):
 44 |         return PreloadingEntityGenerator(self._generator)
 45 | 
 46 |     @property
 47 |     def summary(self):
 48 |         return ('remove redundant and less precise unsourced claim(s), '
 49 |                 '[[Wikidata:Requests for permissions/Bot/MatSuBot 7|see RfPB]]')
 50 | 
 51 |     @staticmethod
 52 |     def first_inside_second(first, second):
 53 |         if first.precision > second.precision:
 54 |             if second.precision in {9, 10}:
 55 |                 if first.year == second.year:
 56 |                     if second.precision == 9:
 57 |                         return True
 58 |                     elif second.precision == 10:
 59 |                         return first.month == second.month
 60 |         return False
 61 | 
 62 |     @staticmethod
 63 |     def first_same_as_second(first, second):
 64 |         if first == second:
 65 |             return True
 66 |         if first.precision == second.precision:
 67 |             if first.precision in {9, 10} and first.year == second.year:
 68 |                 if first.precision == 10:
 69 |                     return first.month == second.month
 70 |                 else:
 71 |                     return True
 72 |         return False
 73 | 
 74 |     @classmethod
 75 |     def is_valid_source(cls, source):
 76 |         return bool(set(source) - cls.invalid_refs)
 77 | 
 78 |     @classmethod
 79 |     def number_of_sources(cls, claim):
 80 |         number = 0
 81 |         for source in claim.sources:
 82 |             number += cls.is_valid_source(source)
 83 |         return number
 84 | 
 85 |     @classmethod
 86 |     def is_sourced(cls, claim):
 87 |         return cls.number_of_sources(claim) > 0
 88 | 
 89 |     @classmethod
 90 |     def can_merge_claims(cls, claim1, claim2):
 91 |         if claim1.getSnakType() != claim2.getSnakType():
 92 |             return False
 93 | 
 94 |         if (
 95 |             claim1.getSnakType() == 'value'
 96 |             and not cls.first_same_as_second(
 97 |                 claim1.getTarget(),
 98 |                 claim2.getTarget()
 99 |             )
100 |         ):
101 |             return False
102 | 
103 |         if (
104 |             claim1.qualifiers != claim2.qualifiers
105 |             and not (
106 |                 claim1.rank != 'deprecated'
107 |                 and claim2.rank == 'normal'
108 |                 and not claim2.qualifiers
109 |                 and not cls.is_sourced(claim2)
110 |             )
111 |             and not (
112 |                 claim2.rank != 'deprecated'
113 |                 and claim1.rank == 'normal'
114 |                 and not claim1.qualifiers
115 |                 and not cls.is_sourced(claim1)
116 |             )
117 |         ):
118 |             return False
119 | 
120 |         return True
121 | 
122 |     def treat_page_and_item(self, page, item):
123 |         redundant = []
124 |         unmerged = []
125 |         for prop in self.opt['props']:
126 |             claims = item.claims.get(prop, [])
127 |             if len(claims) < 2:
128 |                 continue
129 | 
130 |             already = set()
131 |             for claim1, claim2 in combinations(claims, 2):
132 |                 if claim1.snak in already or claim2.snak in already:
133 |                     continue
134 | 
135 |                 if (claim1.rank, claim2.rank) in (
136 |                     ('preferred', 'deprecated'),
137 |                     ('deprecated', 'preferred'),
138 |                 ):
139 |                     # this would need manual intervention
140 |                     continue
141 | 
142 |                 if self.can_merge_claims(claim1, claim2):
143 |                     # never remove preferred/deprecated claim
144 |                     # if either is normal
145 |                     if claim1.rank != claim2.rank:
146 |                         if claim1.rank == 'normal':
147 |                             claim1, claim2 = claim2, claim1
148 |                     elif claim2.qualifiers and not claim1.qualifiers:
149 |                         claim1, claim2 = claim2, claim1
150 |                     elif (
151 |                         self.number_of_sources(claim2) >
152 |                         self.number_of_sources(claim1)
153 |                     ):
154 |                         claim1, claim2 = claim2, claim1
155 | 
156 |                     for source in claim2.sources:
157 |                         if not self.is_valid_source(source):
158 |                             continue
159 |                         sources_copy = [
160 |                             c.copy() for c in chain(*source.values())]
161 |                         with suppress(APIError):  # duplicate reference present
162 |                             claim1.addSources(sources_copy)
163 | 
164 |                     unmerged.append(claim2)
165 |                     already.add(claim2.snak)
166 |                     continue
167 | 
168 |                 if not (claim1.getSnakType() == 'value' == claim2.getSnakType()):
169 |                     continue
170 | 
171 |                 pairs = [(claim1, claim2), (claim2, claim1)]
172 |                 for first, second in pairs:
173 |                     if self.is_sourced(second):
174 |                         continue
175 |                     # never remove preferred/deprecated claim
176 |                     # if either is normal
177 |                     if first.rank != second.rank and second.rank != 'normal':
178 |                         continue
179 | 
180 |                     if (
181 |                         first.qualifiers != second.qualifiers
182 |                         and not (
183 |                             first.rank == 'preferred'
184 |                             and second.rank == 'normal'
185 |                             and not second.qualifiers
186 |                         )
187 |                     ):
188 |                         continue
189 | 
190 |                     if self.first_inside_second(
191 |                         first.getTarget(),
192 |                         second.getTarget()
193 |                     ):
194 |                         redundant.append(second)
195 |                         already.add(second.snak)
196 |                         break
197 | 
198 |         if redundant or unmerged:
199 |             if redundant:
200 |                 summary = self.summary
201 |             else:
202 |                 summary = 'remove redundant claim(s)'
203 |             item.removeClaims(redundant + unmerged, summary=summary)
204 | 
205 | 
206 | def main(*args):
207 |     options = {}
208 |     local_args = pywikibot.handle_args(args)
209 |     site = pywikibot.Site()
210 |     genFactory = GeneratorFactory(site=site)
211 |     for arg in genFactory.handle_args(local_args):
212 |         if arg.startswith('-'):
213 |             arg, sep, value = arg.partition(':')
214 |             if arg == '-prop':
215 |                 options.setdefault('props', []).append(
216 |                     value or pywikibot.input('Which property should be treated?'))
217 |             elif value:
218 |                 options[arg[1:]] = int(value) if value.isdigit() else value
219 |             else:
220 |                 options[arg[1:]] = True
221 | 
222 |     generator = genFactory.getCombinedGenerator()
223 |     bot = DuplicateDatesBot(generator=generator, site=site, **options)
224 |     bot.run()
225 | 
226 | 
227 | if __name__ == '__main__':
228 |     main()
229 | 


--------------------------------------------------------------------------------
/typoloader.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import time
  3 | 
  4 | import pywikibot
  5 | 
  6 | from pywikibot import textlib
  7 | 
  8 | 
  9 | class IncompleteTypoRuleException(Exception):
 10 | 
 11 |     '''Exception raised when constructing a typo rule from incomplete data'''
 12 | 
 13 |     def __init__(self, message):
 14 |         self.message = message
 15 | 
 16 | 
 17 | class InvalidExpressionException(Exception):
 18 | 
 19 |     '''Exception raised when an expression has invalid syntax'''
 20 | 
 21 |     def __init__(self, error, aspect='regular expression'):
 22 |         self.message = error.msg
 23 |         self.aspect = aspect
 24 | 
 25 | 
 26 | class TypoRule:
 27 | 
 28 |     '''Class representing one typo rule'''
 29 | 
 30 |     exceptions = [
 31 |         'category', 'comment', 'header', 'hyperlink', 'interwiki', 'invoke',
 32 |         'property', 'template',
 33 | 
 34 |         # tags
 35 |         'blockquote', 'code', 'gallery', 'graph', 'imagemap', 'kbd',
 36 |         'mapframe', 'maplink', 'math', 'nowiki', 'poem', 'pre', 'score',
 37 |         'section', 'syntaxhighlight', 'timeline', 'tt', 'var',
 38 | 
 39 |         # "target-part" of a wikilink
 40 |         re.compile(r'\[\[([^][|]+)(\]\]\w*|([^][|]+\|)+)'),
 41 | 
 42 |         re.compile('<[a-z]+ [^<>]+>|</[a-z]+>'),  # HTML tag
 43 |         re.compile(r'„[^\n"„“]+["“]|(?<!\w)"[^"\n]+"'),  # quotation marks
 44 |         # FIXME: re.compile(r"(?<!')''(?!')(?:(?!'')[^\n])+''"),  # italics
 45 |         re.compile(r'\b([A-Za-z]+\.)+[a-z]{2,}'),  # url fragment
 46 |     ]
 47 | 
 48 |     nowikiR = re.compile('</?nowiki>')
 49 | 
 50 |     def __init__(self, find, replacements, auto=False, query=None):
 51 |         self.find = find
 52 |         self.replacements = replacements
 53 |         self.auto = auto
 54 |         self.query = query
 55 |         self.longest = 0
 56 | 
 57 |     def __eq__(self, other):
 58 |         if isinstance(other, self.__class__):
 59 |             return self.id == other.id
 60 |         else:
 61 |             return False
 62 | 
 63 |     def __ne__(self, other):
 64 |         return not self.__eq__(other)
 65 | 
 66 |     def __repr__(self):
 67 |         return (
 68 |             f'{self.__class__.name}({self.find!r}, {self.replacements!r}, '
 69 |             f'auto={self.auto!r}, query={self.query!r})'
 70 |         )
 71 | 
 72 |     def needs_decision(self):
 73 |         return not self.auto or len(self.replacements) > 1
 74 | 
 75 |     @classmethod
 76 |     def newFromParameters(cls, parameters):
 77 |         if '1' not in parameters:
 78 |             raise IncompleteTypoRuleException('Missing find expression')
 79 | 
 80 |         find = cls.nowikiR.sub('', parameters['1'])
 81 |         try:
 82 |             find = re.compile(find, re.M)
 83 |         except re.error as exc:
 84 |             raise InvalidExpressionException(exc)
 85 | 
 86 |         replacements = []
 87 |         for key in '23456':
 88 |             if key in parameters:
 89 |                 replacement = re.sub(r'\$([1-9])', r'\\\1', cls.nowikiR.sub(
 90 |                     '', parameters[key]))
 91 |                 replacements.append(replacement)
 92 | 
 93 |         if not replacements:
 94 |             raise IncompleteTypoRuleException(
 95 |                 f'No replacements found for rule "{find.pattern}"')
 96 | 
 97 |         query = None
 98 |         if parameters.get('hledat'):
 99 |             part = parameters['hledat'].replace('{{!}}', '|')
100 |             if parameters.get('insource') == 'ne':
101 |                 query = part
102 |             else:
103 |                 try:
104 |                     re.compile(part)
105 |                     query = f'insource:/{part}/'
106 |                 except re.error as exc:
107 |                     raise InvalidExpressionException(exc, 'query')
108 | 
109 |         auto = parameters.get('auto') == 'ano'
110 | 
111 |         return cls(find, replacements, auto, query)
112 | 
113 |     def summary_hook(self, match, replaced):
114 |         def underscores(string):
115 |             if string.startswith(' '):
116 |                 string = '_' + string[1:]
117 |             if string.endswith(' '):
118 |                 string = string[:-1] + '_'
119 |             return string
120 | 
121 |         new = old = match.group()
122 |         if self.needs_decision():
123 |             options = [('keep', 'k')]
124 |             replacements = []
125 |             for i, repl in enumerate(self.replacements, start=1):
126 |                 replacement = match.expand(repl)
127 |                 replacements.append(replacement)
128 |                 options.append((f'{i} {underscores(replacement)}', str(i)))
129 |             text = match.string
130 |             pre = text[max(0, match.start() - 30):match.start()].rpartition('\n')[2]
131 |             post = text[match.end():match.end() + 30].partition('\n')[0]
132 |             pywikibot.info(f'{pre}<<lightred>>{old}<<default>>{pos}')
133 |             choice = pywikibot.input_choice('Choose the best replacement',
134 |                                             options, automatic_quit=False,
135 |                                             default='k')
136 |             if choice != 'k':
137 |                 new = replacements[int(choice) - 1]
138 |         else:
139 |             new = match.expand(self.replacements[0])
140 |             if old == new:
141 |                 pywikibot.warning(f'No replacement done in string "{old}"')
142 | 
143 |         if old != new:
144 |             old_str = underscores(old.replace('\n', '\\n'))
145 |             new_str = underscores(new.replace('\n', '\\n'))
146 |             fragment = f'{old_str} → {new_str}'
147 |             if fragment.lower() not in map(str.lower, replaced):
148 |                 replaced.append(fragment)
149 |         return new
150 | 
151 |     def apply(self, text, replaced=None):
152 |         if replaced is None:
153 |             replaced = []
154 |         hook = lambda match: self.summary_hook(match, replaced)
155 |         start = time.clock()
156 |         text = textlib.replaceExcept(
157 |             text, self.find, hook, self.exceptions, site=self.site)
158 |         finish = time.clock()
159 |         delta = finish - start
160 |         self.longest = max(delta, self.longest)
161 |         if delta > 5:
162 |             pywikibot.warning(f'Slow typo rule "{self.find.pattern}" ({delta})')
163 |         return text
164 | 
165 | 
166 | class TyposLoader:
167 | 
168 |     top_id = 0
169 | 
170 |     '''Class loading and holding typo rules'''
171 | 
172 |     def __init__(self, site, *, allrules=False, typospage=None,
173 |                  whitelistpage=None):
174 |         self.site = site
175 |         self.load_all = allrules
176 |         self.typos_page_name = typospage
177 |         self.whitelist_page_name = whitelistpage
178 | 
179 |     def getWhitelistPage(self):
180 |         if self.whitelist_page_name is None:
181 |             self.whitelist_page_name = 'Wikipedie:WPCleaner/Typo/False'
182 | 
183 |         return pywikibot.Page(self.site, self.whitelist_page_name)
184 | 
185 |     def loadTypos(self):
186 |         pywikibot.info('Loading typo rules...')
187 |         self.typoRules = []
188 | 
189 |         if self.typos_page_name is None:
190 |             self.typos_page_name = 'Wikipedie:WPCleaner/Typo'
191 |         typos_page = pywikibot.Page(self.site, self.typos_page_name)
192 |         if not typos_page.exists():
193 |             # todo: feedback
194 |             return
195 | 
196 |         text = textlib.removeDisabledParts(
197 |             typos_page.text, include=['nowiki'], site=self.site)
198 |         load_all = self.load_all is True
199 |         for template, fielddict in textlib.extract_templates_and_params(
200 |                 text, remove_disabled_parts=False, strip=False):
201 |             if template.lower() == 'typo':
202 |                 try:
203 |                     rule = TypoRule.newFromParameters(fielddict)
204 |                 except IncompleteTypoRuleException as exc:
205 |                     pywikibot.warning(exc.message)  # pwb.exception?
206 |                 except InvalidExpressionException as exc:
207 |                     if 'fixed-width' not in exc.message:
208 |                         pywikibot.warning('Invalid {} {}: {}'.format(
209 |                             exc.aspect, fielddict['1'], exc.message))
210 |                 else:
211 |                     rule.id = self.top_id
212 |                     # fixme: cvar or ivar?
213 |                     self.top_id += 1
214 |                     if load_all or not rule.needs_decision():
215 |                         self.typoRules.append(rule)
216 | 
217 |         pywikibot.info(f'{len(self.typoRules)} typo rules loaded')
218 |         return self.typoRules
219 | 
220 |     def loadWhitelist(self):
221 |         self.whitelist = []
222 |         self.fp_page = self.getWhitelistPage()
223 |         if self.fp_page.exists():
224 |             for match in re.finditer(r'\[\[([^]|]+)\]\]', self.fp_page.text):
225 |                 self.whitelist.append(match[1].strip())
226 |         return self.whitelist
227 | 


--------------------------------------------------------------------------------
/clean_dupes.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | from queue import Queue
  3 | from threading import Lock, Thread
  4 | 
  5 | import pywikibot
  6 | 
  7 | from pywikibot.exceptions import NoPageError
  8 | from pywikibot.pagegenerators import (
  9 |     GeneratorFactory,
 10 |     PreloadingEntityGenerator,
 11 |     WikidataSPARQLPageGenerator,
 12 | )
 13 | 
 14 | from merger import Merger
 15 | from query_store import QueryStore
 16 | from wikidata import WikidataEntityBot
 17 | from scripts.revertbot import BaseRevertBot
 18 | 
 19 | 
 20 | class DupesMergingBot(WikidataEntityBot):
 21 | 
 22 |     dupe_items = {'Q1263068', 'Q17362920', 'Q21528878'}
 23 |     use_from_page = False
 24 | 
 25 |     def __init__(self, generator, offset=0, **kwargs):
 26 |         self.available_options.update({
 27 |             'threads': 1,  # unstable
 28 |         })
 29 |         super().__init__(**kwargs)
 30 |         self.offset = offset
 31 |         self.store = QueryStore()
 32 |         self._generator = generator or self.custom_generator()
 33 |         self.save_lock = Lock()
 34 |         self.access_lock = Lock()
 35 |         self.site_locks = {}
 36 | 
 37 |     @property
 38 |     def generator(self):
 39 |         return PreloadingEntityGenerator(self._generator)
 40 | 
 41 |     def custom_generator(self):
 42 |         query = self.store.build_query(
 43 |             'dupes', dupe=' wd:'.join(self.dupe_items), offset=self.offset)
 44 |         return WikidataSPARQLPageGenerator(query, site=self.repo,
 45 |                                            result_type=list)
 46 | 
 47 |     def setup(self):
 48 |         super().setup()
 49 |         count = self.opt['threads']
 50 |         self.workers = []
 51 |         if count > 1:
 52 |             self.queue = Queue(count)
 53 |             for i in range(count):
 54 |                 thread = Thread(target=self.work)
 55 |                 thread.start()
 56 |                 self.workers.append(thread)
 57 | 
 58 |     def get_lock_for(self, site):
 59 |         with self.access_lock:
 60 |             return self.site_locks.setdefault(site, Lock())
 61 | 
 62 |     def work(self):
 63 |         while True:
 64 |             item = self.queue.get()
 65 |             if item is None:
 66 |                 break
 67 |             self.process_item(item)
 68 |             self.queue.task_done()
 69 | 
 70 |     def init_page(self, item):
 71 |         self.offset += 1
 72 |         return super().init_page(item)
 73 | 
 74 |     def skip_page(self, item):
 75 |         return 'P31' not in item.claims or super().skip_page(item)
 76 | 
 77 |     def treat_page_and_item(self, page, item):
 78 |         if self.opt['threads'] > 1:
 79 |             self.queue.put(item)
 80 |         else:
 81 |             self.process_item(item)
 82 | 
 83 |     def process_item(self, item):
 84 |         claims = []
 85 |         targets = set()
 86 |         for claim in item.claims['P31']:
 87 |             if claim.snaktype != 'value':
 88 |                 continue
 89 |             if claim.target.id not in self.dupe_items:
 90 |                 continue
 91 |             claims.append(claim)
 92 |             for snak in claim.qualifiers.get('P460', []):
 93 |                 if snak.snaktype == 'value':
 94 |                     targets.add(snak.getTarget())
 95 | 
 96 |         for claim in item.claims.get('P460', []):
 97 |             if claim.snaktype == 'value':
 98 |                 claims.append(claim)
 99 |                 targets.add(claim.getTarget())
100 | 
101 |         sitelinks = []
102 |         if not targets:
103 |             for page in item.iterlinks():
104 |                 site = page.site
105 |                 with self.get_lock_for(site):
106 |                     if not page.exists():
107 |                         sitelinks.append(site)
108 |                         continue
109 |                     if page.isRedirectPage():
110 |                         try:
111 |                             target = page.getRedirectTarget().data_item()
112 |                         except NoPageError:
113 |                             pass
114 |                         else:
115 |                             targets.add(target)
116 | 
117 |         if not targets:
118 |             pywikibot.info('No target found')
119 |             return
120 | 
121 |         target = targets.pop()
122 |         if targets:
123 |             pywikibot.info('Multiple targets found')
124 |             return
125 | 
126 |         while target.isRedirectPage():
127 |             pywikibot.warning(f'Target {target.getID()} is redirect')
128 |             target = target.getRedirectTarget()
129 | 
130 |         if item == target:
131 |             self._save_page(item, self._save_entity, item.removeClaims, claims)
132 |             return
133 | 
134 |         target_sitelinks = []
135 |         for dbname in item.sitelinks:
136 |             if dbname not in target.sitelinks:
137 |                 continue
138 | 
139 |             link = item.sitelinks[dbname]
140 |             site = link.site
141 |             with self.get_lock_for(site):
142 |                 page = pywikibot.Page(link)
143 |                 if not page.exists():
144 |                     sitelinks.append(site)
145 |                     continue
146 | 
147 |                 target_link = target.sitelinks[dbname]
148 |                 target_page = pywikibot.Page(target_link)
149 |                 if not target_page.exists():
150 |                     target_sitelinks.append(site)
151 |                     continue
152 | 
153 |                 if self.redirectsTo(page, target_page):
154 |                     if link.badges:
155 |                         sitelinks.append(site)
156 |                     continue
157 | 
158 |                 if self.redirectsTo(target_page, page):
159 |                     if target_link.badges:
160 |                         target_sitelinks.append(site)
161 |                     continue
162 | 
163 |             pywikibot.info(f'Target has a conflicting sitelink: {dbname}')
164 |             return
165 | 
166 |         target_claims = []
167 |         for claim in target.claims.get('P460', []):
168 |             if claim.snaktype != 'value':
169 |                 continue
170 |             if claim.target_equals(item):
171 |                 target_claims.append(claim)
172 | 
173 |         for claim in target.claims.get('P31', []):
174 |             if claim.snaktype != 'value':
175 |                 continue
176 |             if claim.target.id not in self.dupe_items:
177 |                 continue
178 |             for snak in claim.qualifiers.get('P460', []):
179 |                 if snak.snaktype == 'value' and snak.target_equals(item):
180 |                     target_claims.append(claim)
181 | 
182 |         if sitelinks:
183 |             self._save_page(
184 |                 item, self._save_entity, item.removeSitelinks, sitelinks,
185 |                 summary='removing sitelink(s) to non-existing / redirected page(s)')
186 |         if claims:
187 |             self._save_page(item, self._save_entity, item.removeClaims, claims)
188 |         if target_sitelinks:
189 |             self._save_page(
190 |                 target, self._save_entity, target.removeSitelinks, target_sitelinks,
191 |                 summary='removing sitelink(s) to non-existing / redirected page(s)')
192 |         if target_claims:
193 |             self._save_page(
194 |                 target, self._save_entity, target.removeClaims, target_claims)
195 | 
196 |         target, item = Merger.sort_for_merge(
197 |             [item, target], key=['sitelinks', 'claims', 'id'])
198 | 
199 |         if not self._save_page(
200 |                 item, self._save_entity, Merger.clean_merge, item, target,
201 |                 ignore_conflicts=['description']):
202 |             pywikibot.info('Reverting changes...')
203 |             bot = BaseRevertBot(self.site)  # todo: integrate to Merger
204 |             comment = 'Error occurred when attempting to merge with %s'
205 |             bot.comment = comment % target.title(as_link=True)
206 |             bot.revert({'title': item.title()})
207 |             bot.comment = comment % item.title(as_link=True)
208 |             bot.revert({'title': target.title()})
209 |             return
210 | 
211 |         self.offset -= 1
212 | 
213 |     def redirectsTo(self, page, target):
214 |         return page.isRedirectPage() and page.getRedirectTarget() == target
215 | 
216 |     def _save_entity(self, callback, *args, **kwargs):
217 |         with self.save_lock:
218 |             if 'asynchronous' in kwargs:
219 |                 kwargs.pop('asynchronous')
220 |             return callback(*args, **kwargs)
221 | 
222 |     def teardown(self):
223 |         count = len(self.workers)
224 |         for i in range(count):
225 |             self.queue.put(None)
226 |         for worker in self.workers:
227 |             worker.join()
228 |         super().teardown()
229 | 
230 |     def exit(self):
231 |         super().exit()
232 |         bound = self.offset - self.offset % 50
233 |         pywikibot.info(f'\nCurrent offset: {self.offset} (use {bound})\n')
234 | 
235 | 
236 | def main(*args):
237 |     options = {}
238 |     local_args = pywikibot.handle_args(args)
239 |     site = pywikibot.Site()
240 |     genFactory = GeneratorFactory(site=site)
241 |     for arg in genFactory.handle_args(local_args):
242 |         if arg.startswith('-'):
243 |             arg, sep, value = arg.partition(':')
244 |             if value != '':
245 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
246 |             else:
247 |                 options[arg[1:]] = True
248 | 
249 |     generator = genFactory.getCombinedGenerator()
250 |     bot = DupesMergingBot(generator=generator, site=site, **options)
251 |     bot.run()
252 | 
253 | 
254 | if __name__ == '__main__':
255 |     main()
256 | 


--------------------------------------------------------------------------------
/connect.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import datetime
  3 | 
  4 | import pywikibot
  5 | 
  6 | from pywikibot import pagegenerators, textlib
  7 | from pywikibot.exceptions import APIError, NoPageError
  8 | from pywikibot.tools import first_lower
  9 | 
 10 | pywikibot.handle_args()
 11 | 
 12 | start = datetime.datetime.now()
 13 | 
 14 | do_only = []
 15 | dont_do = []
 16 | 
 17 | tp_map = {
 18 |     'cs|wikipedia': {
 19 |         'commons': {
 20 |             '1': {
 21 |                 'lang': 'commons',
 22 |                 'family': 'commons'
 23 |             },
 24 |         },
 25 |         'commonscat': {
 26 |             '1': {
 27 |                 'lang': 'commons',
 28 |                 'family': 'commons',
 29 |                 'pattern': 'Category:%s',
 30 |                 'namespaces': [14]
 31 |             },
 32 |         },
 33 |         'wikicitáty': {
 34 |             'dílo': {
 35 |                 'family': 'wikiquote',
 36 |                 'pattern': 'Dílo:%s'
 37 |             },
 38 |             'kategorie': {
 39 |                 'family': 'wikiquote',
 40 |                 'pattern': 'Kategorie:%s'
 41 |             },
 42 |             'osoba': 'wikiquote',
 43 |             'téma': 'wikiquote'
 44 |         },
 45 |         'wikizdroje': {
 46 |             'dílo': 'wikisource',
 47 |             'autor': {
 48 |                 'family': 'wikisource',
 49 |                 'pattern': 'Autor:%s'
 50 |             },
 51 |             'kategorie': {
 52 |                 'family': 'wikiquote',
 53 |                 'pattern': 'Kategorie:%s'
 54 |             },
 55 |         },
 56 |         'wikidruhy': {
 57 |             'taxon': {
 58 |                 'family': 'species',
 59 |                 'lang': 'species',
 60 |             },
 61 |         },
 62 |     },
 63 |     'cs|wikiquote': {
 64 |         'commons': {
 65 |             'galerie': {
 66 |                 'lang': 'commons',
 67 |                 'family': 'commons'
 68 |             },
 69 |             'kategorie': {
 70 |                 'lang': 'commons',
 71 |                 'family': 'commons',
 72 |                 'pattern': 'Category:%s',
 73 |                 'namespaces': [14]
 74 |             },
 75 |         },
 76 |         'wikipedie': {
 77 |             'článek': 'wikipedia'
 78 |         },
 79 |     },
 80 |     'cs|wikisource': {
 81 |         'commons': {
 82 |             'galerie': {
 83 |                 'lang': 'commons',
 84 |                 'family': 'commons'
 85 |             },
 86 |             'kategorie': {
 87 |                 'lang': 'commons',
 88 |                 'family': 'commons',
 89 |                 'pattern': 'Category:%s',
 90 |                 'namespaces': [14]
 91 |             },
 92 |         },
 93 |         'autorinfo': {
 94 |             'BiografieWiki': 'wikipedia',
 95 |             'WikiquoteCS': 'wikiquote'
 96 |         },
 97 |     },
 98 |     'de|wikiquote': {
 99 |         'wikipedia': {
100 |             '1': 'wikipedia'
101 |         },
102 |     },
103 |     'es|wikiquote': {
104 |         'wikipedia': {
105 |             '1': 'wikipedia'
106 |         },
107 |     },
108 |     'fi|wikiquote': {
109 |         'wikipedia': {
110 |             '1': 'wikipedia'
111 |         },
112 |     },
113 |     'fr|wikiquote': {
114 |         'autres projets': {
115 |             'w': 'wikipedia',
116 |             's': 'wikisource',
117 |             'species': {
118 |                 'family': 'species',
119 |                 'lang': 'species'
120 |             },
121 |             'wikispecies': {
122 |                 'family': 'species',
123 |                 'lang': 'species'
124 |             },
125 |             'commons': {
126 |                 'lang': 'commons',
127 |                 'family': 'commons'
128 |             },
129 |             '1': {
130 |                 'lang': 'commons',
131 |                 'family': 'commons'
132 |             },
133 |         },
134 |     },
135 |     'fr|wikiquote': {
136 |         'wikipedia': {
137 |             '1': 'wikipedia'
138 |         },
139 |     },
140 |     'id|wikiquote': {
141 |         'wikipedia': {
142 |             '1': 'wikipedia'
143 |         },
144 |     },
145 |     'pl|wikiquote': {
146 |         'commons': {
147 |             '1': {
148 |                 'lang': 'commons',
149 |                 'family': 'commons'
150 |             }
151 |         },
152 |         'wikinews': {str(i): 'wikinews' for i in range(1, 10)},
153 |         'wikipediakat': {
154 |             '1': {
155 |                 'lang': 'pl',
156 |                 'family': 'wikipedia',
157 |                 'pattern': 'Category:%s',
158 |                 'namespaces': [14],
159 |             },
160 |         },
161 |         'wikisource': {}, # todo
162 |     },
163 |     'pt|wikiquote': {
164 |         'autor': {
165 |             'Wikinoticias': 'wikinews',
166 |             'Wikipedia': 'wikipedia',
167 |             'Wikisource': 'wikisource'
168 |         },
169 |         'wikipédia': {
170 |             '1': 'wikipedia'
171 |         },
172 |         'wikisource': {
173 |             '1': 'wikisource'
174 |         },
175 |     },
176 |     'ru|wikiquote': {
177 |         'википедия': {
178 |             '1': 'wikipedia'
179 |         },
180 |         'wikipedia': {
181 |             '1': 'wikipedia'
182 |         },
183 |         'навигация': {
184 |             'Википедия': 'wikipedia',
185 |             'Викитека': 'wikisource',
186 |             'Викивиды': {
187 |                 'family': 'species',
188 |                 'lang': 'species'
189 |             },
190 |             'Викисклад': {
191 |                 'lang': 'commons',
192 |                 'family': 'commons'
193 |             },
194 |             'Викигид': 'wikivoyage',
195 |         },
196 |     },
197 |     'sk|wikiquote': {
198 |         'wikipedia': {
199 |             '1': 'wikipedia'
200 |         },
201 |     },
202 |     'sv|wikiquote': {
203 |         'wikipedia': {
204 |             '1': 'wikipedia'
205 |         },
206 |     },
207 | }
208 | 
209 | for project in tp_map.keys():
210 |     lang, family = project.split('|', 1)
211 |     if len(do_only) > 0 and lang + family not in do_only and family not in do_only:
212 |         continue
213 |     if lang + family in dont_do or family in dont_do:
214 |         continue
215 | 
216 |     site = pywikibot.Site(lang, family)
217 |     pywikibot.info(f'Doing {lang}{family}')
218 |     site.login()
219 | 
220 |     genFactory = pagegenerators.GeneratorFactory(site=site)
221 |     for ns in (0, 14, 100):
222 |         if family != 'wikisource' and ns == 100:  # fixme: cswikiquote
223 |             continue
224 |         if family == 'wikisource' and ns == 0:
225 |             continue
226 |         genFactory.handle_arg(f'-ns:{ns}')
227 |     genFactory.handle_arg('-unconnectedpages')
228 |     generator = genFactory.getCombinedGenerator(preload=True)
229 | 
230 |     for page in generator:
231 |         if page.namespace() != 14 and page.isDisambig():
232 |             continue
233 | 
234 |         for template, fields in textlib.extract_templates_and_params(page.text):
235 |             if first_lower(template) not in tp_map[project]:
236 |                 continue
237 | 
238 |             params = tp_map[project][first_lower(template)]
239 |             for key in fields:
240 |                 if key not in params:
241 |                     continue
242 | 
243 |                 title = fields[key].strip()
244 |                 if not title:
245 |                     continue
246 | 
247 |                 target_lang = lang
248 |                 target_family = family
249 |                 if isinstance(params[key], dict):
250 |                     if params[key].get('namespaces', []) and page.namespace() not in params[key]['namespaces']:
251 |                         continue
252 |                     if 'pattern' in params[key].keys():
253 |                         title = params[key]['pattern'] % title
254 |                     if 'family' in params[key].keys():
255 |                         target_family = params[key]['family']
256 |                     if 'lang' in params[key].keys():
257 |                         target_lang = params[key]['lang']
258 |                 else:
259 |                     target_family = params[key]
260 | 
261 |                 target_site = pywikibot.Site(target_lang, target_family)
262 |                 if '{{' in title:
263 |                     title = site.expand_text(title, page.title())
264 |                 target_page = pywikibot.Page(target_site, title)
265 |                 if not target_page.exists():
266 |                     pywikibot.info("{target_page} doesn't exist")
267 |                     continue
268 |                 while target_page.isRedirectPage():
269 |                     target_page = target_page.getRedirectTarget()
270 |                 if target_page.isDisambig():
271 |                     pywikibot.info(f'{target_page} is a disambiguation')
272 |                     continue
273 | 
274 |                 try:
275 |                     item = target_page.data_item()
276 |                 except NoPageError:
277 |                     repo = site.data_repository()
278 |                     # fixme: unused return value
279 |                     data = repo.linkTitles(page, target_page)
280 |                     pywikibot.info('Item created')
281 |                     pywikibot.info(data)  # todo
282 |                     break
283 |                 if site.dbName() in item.sitelinks:
284 |                     pywikibot.info(page)
285 |                     pywikibot.info('%s already has sitelink to %s%s' % (
286 |                         item, lang, family))
287 |                     continue
288 | 
289 |                 try:
290 |                     item.setSitelink(
291 |                         page, summary='Adding sitelink %s' % page.title(
292 |                             asLink=True, insite=item.site))
293 |                 except APIError:
294 |                     pass
295 |                 else:
296 |                     page.purge()
297 |                     break
298 | 
299 | end = datetime.datetime.now()
300 | 
301 | pywikibot.info('Complete! Took %d seconds' % (end - start).total_seconds())
302 | 


--------------------------------------------------------------------------------
/manage_duos.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import pywikibot
  3 | 
  4 | from pywikibot import pagegenerators
  5 | from pywikibot.data.sparql import SparqlQuery
  6 | 
  7 | from query_store import QueryStore
  8 | from wikidata import WikidataEntityBot
  9 | 
 10 | 
 11 | class DuosManagingBot(WikidataEntityBot):
 12 | 
 13 |     conj = {
 14 |         'af': ' en ',
 15 |         'az': ' və ',
 16 |         'be': ' і ',
 17 |         'be-tarask': ' і ',
 18 |         'bg': ' и ',
 19 |         'br': ' ha ',
 20 |         'ca': ' i ',
 21 |         'cs': ' a ',
 22 |         'cy': ' a ',
 23 |         'da': ' og ',
 24 |         'de': ' und ',
 25 |         'de-at': ' und ',
 26 |         'el': ' και ',
 27 |         'eo': ' kaj ',
 28 |         'es': ' y ',
 29 |         'et': ' ja ',
 30 |         'eu': ' eta ',
 31 |         'fi': ' ja ',
 32 |         'fr': ' et ',
 33 |         'fy': ' en ',
 34 |         'gl': ' e ',
 35 |         'hr': ' i ',
 36 |         'hu': ' és ',
 37 |         'id': ' dan ',
 38 |         'it': ' e ',
 39 |         'ka': ' და ',
 40 |         'la': ' et ',
 41 |         'lt': ' ir ',
 42 |         'lv': ' un ',
 43 |         'ms': ' dan ',
 44 |         'nb': ' og ',
 45 |         'nl': ' en ',
 46 |         'nn': ' og ',
 47 |         'oc': ' e ',
 48 |         'pl': ' i ',
 49 |         'pt': ' e ',
 50 |         'ro': ' și ',
 51 |         'ru': ' и ',
 52 |         'sk': ' a ',
 53 |         'sl': ' in ',
 54 |         'sr': ' и ',
 55 |         'sr-ec': ' и ',
 56 |         'sr-el': ' i ',
 57 |         'sv': ' och ',
 58 |         'sw': ' na ',
 59 |         'tr': ' ve ',
 60 |         'uk': ' і ',
 61 |         'vi': ' và ',
 62 |         'war': ' ngan ',
 63 |     }
 64 |     distribute_properties = [
 65 |         'P22', 'P25', 'P27', 'P40', 'P53', 'P106', 'P1412',
 66 |     ]
 67 |     class_to_relation = [
 68 |         ('Q132776479', 'twin-sisters'),
 69 |         ('Q132776456', 'twin-brothers'),
 70 |         ('Q14756018', 'twin'),
 71 |         ('Q14073567', 'sibling'),
 72 |         ('Q3046146', 'spouse'),
 73 |         ('Q106925878', 'father-son'),
 74 |         ('Q1313923', 'relative'),
 75 |         # TODO: ('Q1141470', 'comedians'), not a "relation by blood"
 76 |     ]
 77 |     relation_map = {
 78 |         #'comedians': 'P1327',
 79 |         #'father-son': '', we don't know who is who
 80 |         # TODO: 'partner': 'P451',
 81 |         'relative': 'P1038',
 82 |         'sibling': 'P3373',
 83 |         'spouse': 'P26',
 84 |         'twin': 'P3373/P1039/Q131440579',
 85 |         'twin-brothers': 'P3373/P1039/Q108714555',
 86 |         'twin-sisters': 'P3373/P1039/Q108714611',
 87 |     }
 88 |     use_from_page = False
 89 | 
 90 |     def __init__(self, generator, **kwargs):
 91 |         self.available_options.update({
 92 |             'always': True,
 93 |             'class': 'Q10648343',
 94 |             'min_labels': 1,
 95 |         })
 96 |         super().__init__(**kwargs)
 97 |         self.store = QueryStore()
 98 |         self.sparql = SparqlQuery(repo=self.repo)
 99 |         self._generator = generator or self.custom_generator()
100 | 
101 |     def skip_page(self, item):
102 |         if super().skip_page(item):
103 |             return True
104 |         if 'P31' not in item.claims:
105 |             pywikibot.info(f'{item} is missing P31 property')
106 |             return True
107 |         if 'P527' in item.claims:
108 |             pywikibot.info(f'{item} already has P527 property')
109 |             return True
110 |         return False
111 | 
112 |     def custom_generator(self):
113 |         kwargs = {'class': self.opt['class']}
114 |         query = self.store.build_query('duos', **kwargs)
115 |         return pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo)
116 | 
117 |     @property
118 |     def generator(self):
119 |         return pagegenerators.PreloadingEntityGenerator(self._generator)
120 | 
121 |     def get_relation(self, item):
122 |         ask_pattern = 'ASK { wd:%s wdt:P31/wdt:P279* wd:%%s }' % item.id
123 |         for key, rel in self.class_to_relation:
124 |             if self.sparql.ask(ask_pattern % key):
125 |                 return rel
126 |         return None
127 | 
128 |     def get_labels(self, item, relation):
129 |         labels = [{}, {}]
130 |         for lang, value in item.labels.items():
131 |             delim = []
132 |             if lang in self.conj:
133 |                 delim.append(self.conj[lang])
134 |             delim.append(' and ')
135 |             delim.append(' & ')
136 |             for conj in delim:
137 |                 label = value.partition(' (')[0]
138 |                 if ', ' in label:
139 |                     continue
140 |                 split = label.split(conj)
141 |                 if len(split) != 2:
142 |                     continue
143 |                 split0 = split[0].split()
144 |                 split1 = split[1].split()
145 |                 if split1[0].islower():
146 |                     continue
147 |                 # TODO: if len(split1) > 1 and split1[0][-1] == '.':
148 |                 if len(split1) > len(split0):
149 |                     if len(split1) > 2 and split1[-2].islower():
150 |                         split1[-2:] = [' '.join(split1[-2:])]
151 |                     if len(split1) - len(split0) == 1:
152 |                         # if items are in a relation, then
153 |                         # they probably share their surname
154 |                         if relation:
155 |                             split[0] += ' %s' % split1[-1]
156 |                             split0.append(split1[-1])
157 |                 if len(split0) > 1 or len(split1) == 1:
158 |                     labels[0][lang] = split[0]
159 |                     labels[1][lang] = split[1]
160 |                     break
161 | 
162 |         return labels
163 | 
164 |     def treat_page_and_item(self, page, item):
165 |         relation = self.get_relation(item)
166 |         labels = self.get_labels(item, relation)
167 |         count = max(map(len, labels))
168 |         if count == 0:
169 |             pywikibot.info('No labels, skipping...')
170 |             return
171 | 
172 |         if count < self.opt['min_labels']:
173 |             pywikibot.info(f'Too few labels ({count}), skipping...')
174 |             return
175 | 
176 |         to_add = []
177 |         to_remove = []
178 |         if relation and relation.startswith('twin'):
179 |             distribute = self.distribute_properties + ['P569', 'P19']
180 |             if relation.startswith('twin-'):
181 |                 distribute.append('P21')
182 |         else:
183 |             distribute = self.distribute_properties
184 | 
185 |         for prop in distribute:
186 |             for claim in item.claims.get(prop, []):
187 |                 if claim.getTarget():
188 |                     to_remove.append(claim)
189 |                     json = claim.toJSON()
190 |                     json.pop('id')
191 |                     to_add.append(json)
192 | 
193 |         items = [self.create_item(item, data, relation, to_add)
194 |                  for data in labels]
195 |         if self.relation_map.get(relation):
196 |             recipe = self.relation_map[relation].split('/')
197 |             if len(recipe) == 3:
198 |                 prop, qprop, qval = recipe
199 |             else:
200 |                 prop, qprop, qval = recipe[0], None, None
201 |             for it, target in zip(items, reversed(items)):
202 |                 claim = pywikibot.Claim(self.repo, prop)
203 |                 claim.setTarget(target)
204 |                 if qprop:
205 |                     qualifier = pywikibot.Claim(self.repo, qprop, is_qualifier=True)
206 |                     qualifier.setTarget(pywikibot.ItemPage(self.repo, qval))
207 |                     claim.addQualifier(qualifier)
208 |                 source = pywikibot.Claim(self.repo, 'P3452', is_reference=True)
209 |                 source.setTarget(item)
210 |                 claim.addSource(source)
211 |                 self.user_add_claim(it, claim, asynchronous=False)
212 | 
213 |         for it in items:
214 |             claim = pywikibot.Claim(self.repo, 'P527')
215 |             claim.setTarget(it)
216 |             self.user_add_claim(item, claim, asynchronous=False)
217 | 
218 |         for claim in to_remove:
219 |             pywikibot.info(f'Removing {claim.id} --> {claim.getTarget()}')
220 |             json = claim.toJSON()
221 |             json['remove'] = ''
222 |             self.user_edit_entity(
223 |                 item,
224 |                 {'claims': [json]},
225 |                 asynchronous=False,
226 |                 summary='moved [[Property:{}]] to {} & {}'.format(
227 |                     claim.id,
228 |                     items[0].title(as_link=True, insite=self.repo),
229 |                     items[1].title(as_link=True, insite=self.repo)
230 |                 )
231 |             )
232 | 
233 |     def create_item(self, item, labels, relation, to_add):
234 |         instance_of = pywikibot.Claim(self.repo, 'P31')
235 |         instance_of.setTarget(pywikibot.ItemPage(self.repo, 'Q5'))
236 |         part_of = pywikibot.Claim(self.repo, 'P361')
237 |         part_of.setTarget(item)
238 | 
239 |         pywikibot.info(f'Creating item (relation "{relation}")...')
240 |         new_item = pywikibot.ItemPage(self.repo)
241 |         self.user_edit_entity(
242 |             new_item,
243 |             {
244 |                 'labels': labels,
245 |                 'claims': [instance_of.toJSON(), part_of.toJSON()] + to_add,
246 |             },
247 |             asynchronous=False,
248 |             summary='based on data in {}'.format(
249 |                 item.title(as_link=True, insite=self.repo)
250 |             )
251 |         )
252 | 
253 |         return new_item
254 | 
255 | 
256 | def main(*args):
257 |     options = {}
258 |     local_args = pywikibot.handle_args(args)
259 |     site = pywikibot.Site()
260 |     genFactory = pagegenerators.GeneratorFactory(site=site)
261 |     for arg in genFactory.handle_args(local_args):
262 |         if arg.startswith('-'):
263 |             arg, sep, value = arg.partition(':')
264 |             if value != '':
265 |                 options[arg[1:]] = value if not value.isdigit() else int(value)
266 |             else:
267 |                 options[arg[1:]] = True
268 | 
269 |     generator = genFactory.getCombinedGenerator()
270 |     bot = DuosManagingBot(generator=generator, site=site, **options)
271 |     bot.run()
272 | 
273 | 
274 | if __name__ == '__main__':
275 |     main()
276 | 


--------------------------------------------------------------------------------
/checkwiki.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import re
  3 | 
  4 | import pywikibot
  5 | 
  6 | from pywikibot import pagegenerators
  7 | from pywikibot.exceptions import UnknownExtension
  8 | 
  9 | from checkwiki_errors import *
 10 | from wikitext import WikitextFixingBot
 11 | 
 12 | 
 13 | class CheckWikiSettings:
 14 | 
 15 |     prio_map = {
 16 |         '0': '',
 17 |         '1': 'high',
 18 |         '2': 'medium',
 19 |         '3': 'low'
 20 |     }
 21 | 
 22 |     def __init__(self, data):
 23 |         self.data = data
 24 | 
 25 |     def get_priority(self, error):
 26 |         return self.data[error]['priority']
 27 | 
 28 |     def get_errors_by_priority(self, priority):
 29 |         for error, data in self.data.items():
 30 |             if data['priority'] == priority:
 31 |                 yield error
 32 | 
 33 |     @classmethod
 34 |     def new_from_text(cls, text, dbName):
 35 |         data = {}
 36 |         inside_setting = False
 37 |         setting = None
 38 |         setting_text = ''
 39 |         parsed_settings = {}
 40 |         for line in text.splitlines():
 41 |             if inside_setting is False:
 42 |                 match = re.match(' *([a-z0-9_]+) *=', line)
 43 |                 if match is not None:
 44 |                     setting = match[1]
 45 |                     setting_text = ''
 46 |                     inside_setting = True
 47 |                     line = line[match.end():]
 48 | 
 49 |             if inside_setting is True:
 50 |                 if 'END' in line:
 51 |                     setting_text += line[:line.index('END')].strip()
 52 |                     inside_setting = False
 53 |                     parsed_settings[setting] = setting_text
 54 |                 else:
 55 |                     setting_text += line.strip() + '\n'
 56 | 
 57 |         project = parsed_settings.pop('project', dbName)
 58 |         for setting, text in parsed_settings.items():
 59 |             split = setting.split('_')
 60 |             if len(split) != 4:
 61 |                 continue
 62 |             if split[0] != 'error':
 63 |                 continue
 64 |             if split[-1] != project:
 65 |                 continue
 66 |             if not split[1].isdigit():
 67 |                 continue
 68 |             num = int(split[1])
 69 |             if num > 500:
 70 |                 continue
 71 |             data.setdefault(num, {})
 72 |             if split[2] == 'prio':
 73 |                 text = text.strip()
 74 |                 if text in cls.prio_map.keys():
 75 |                     data[num]['priority'] = cls.prio_map[text]
 76 |             elif split[2] == 'whitelistpage':
 77 |                 data[num].setdefault('whitelists', []).append(text)
 78 |         return cls(data)
 79 | 
 80 |     @classmethod
 81 |     def new_from_site(cls, site):
 82 |         try:
 83 |             page = site.page_from_repository('Q10784379')
 84 |         except (NotImplementedError, UnknownExtension) as e:
 85 |             pywikibot.error(e)
 86 |             return None
 87 |         return cls.new_from_text(page.text, site.dbName())
 88 | 
 89 | 
 90 | class CheckWikiErrorGenerator:
 91 | 
 92 |     def __init__(self, checkwiki, priorities=None, ids=None):
 93 |         self.checkwiki = checkwiki
 94 |         self.priorities = priorities or []
 95 |         self.ids = ids or []
 96 | 
 97 |     def __iter__(self):
 98 |         for error in self.ids:
 99 |             yield from self.checkwiki.iter_pages(error)
100 |         already = set(self.ids)
101 |         for prio in self.priorities:
102 |             for error in self.checkwiki.settings.get_errors_by_priority(prio):
103 |                 if error not in already:
104 |                     yield from self.checkwiki.iter_pages(error)
105 | 
106 | 
107 | class CheckWiki:
108 | 
109 |     url = 'https://tools.wmflabs.org/checkwiki/cgi-bin/checkwiki_bots.cgi'
110 | 
111 |     errorMap = {
112 |         1: PrefixedTemplate,
113 |         2: BrokenHTMLTag,
114 |         7: LowHeadersLevel,
115 |         8: MissingEquation,
116 |         9: SingleLineCategories,
117 |         #10: NoEndSquareBrackets,
118 |         11: HTMLEntity,
119 |         16: InvisibleChars,
120 |         17: DuplicateCategory,
121 |         18: LowerCaseCategory,
122 |         19: SingleEquationHeader,
123 |         20: Dagger,
124 |         21: EnglishCategory,
125 |         22: CategoryWithSpace,
126 |         25: HeaderHierarchy,
127 |         26: Bold,
128 |         #27: Unicode,
129 |         32: MultiplePipes,
130 |         34: MagicWords,
131 |         38: Italics,
132 |         42: StrikedText,
133 |         44: BoldHeader,
134 |         48: SelfLink,
135 |         49: HTMLHeader,
136 |         50: EntitesAsDashes,
137 |         51: InterwikiBeforeHeader,
138 |         52: CategoriesBeforeHeader,
139 |         53: InterwikiBeforeCategory,
140 |         54: ListWithBreak,
141 |         57: HeaderWithColon,
142 |         59: ParameterWithBreak,
143 |         61: RefBeforePunctuation,
144 |         63: SmallInsideTags,
145 |         #75: BadListStructure,
146 |         #76: NoSpace,
147 |         80: BrokenExternalLink,
148 |         81: DuplicateReferences,
149 |         85: EmptyTag,
150 |         86: ExternalLinkLikeInternal,
151 |         88: DefaultsortSpace,
152 |         89: DefaultsortComma,
153 |         93: DoubleHttp,
154 |         101: Ordinals,
155 |         103: SuperfluousPipe,
156 |         104: ReferenceQuotes,
157 |     }
158 | 
159 |     def __init__(self, site):
160 |         self.site = site
161 | 
162 |     def purge(self):
163 |         self.__cache = {}
164 | 
165 |     @property
166 |     def site(self):
167 |         return self._site
168 | 
169 |     @site.setter
170 |     def site(self, value):
171 |         self._site = value
172 |         self.purge()
173 |         self.load_settings()
174 | 
175 |     def load_settings(self):
176 |         pywikibot.info('Loading CheckWiki settings...')
177 |         self._settings = CheckWikiSettings.new_from_site(self.site)
178 | 
179 |     @property
180 |     def settings(self):
181 |         if not hasattr(self, '_settings'):
182 |             self.load_settings()
183 |         return self._settings
184 | 
185 |     def get_error(self, number):
186 |         return self.__cache.setdefault(number, self.errorMap[number](self))
187 | 
188 |     def iter_errors(self, numbers=None, only_for_fixes=False, priorities=None):
189 |         for num in self.errorMap:
190 |             if numbers and num not in numbers:
191 |                 continue
192 |             if priorities and self.settings.get_priority(num) not in priorities:
193 |                 continue
194 | 
195 |             error = self.get_error(num)
196 |             if only_for_fixes and not error.isForFixes():
197 |                 continue
198 | 
199 |             yield error
200 | 
201 |     def apply(self, text, page, replaced=[], fixed=[], errors=[], **kwargs):
202 |         # todo: use a graph algorithm
203 |         errors = list(self.iter_errors(set(errors)))
204 |         while errors:
205 |             error = errors.pop(0)
206 |             if error.needsDecision() or error.handledByCC():  # todo
207 |                 continue
208 | 
209 |             numbers = [err.number for err in errors]
210 |             i = max([numbers.index(num) for num in error.needsFirst
211 |                      if num in numbers] + [0])
212 |             if i > 0:
213 |                 errors.insert(i, error)
214 |                 continue
215 | 
216 |             new_text = error.apply(text, page)
217 |             if new_text != text:
218 |                 text = new_text
219 |                 summary = error.summary
220 |                 fixed.append(error.number)
221 |                 if summary not in replaced:
222 |                     replaced.append(summary)
223 | 
224 |         return text
225 | 
226 |     def iter_titles(self, num, **kwargs):
227 |         data = {
228 |             'action': 'list',
229 |             'id': num,
230 |             'project': self.site.dbName(),
231 |         }
232 |         for line in self.get(data, **kwargs).iter_lines():
233 |             yield line.decode().replace('title=', '')  # fixme: b/c
234 | 
235 |     def iter_pages(self, num, **kwargs):
236 |         for title in self.iter_titles(num, **kwargs):
237 |             yield pywikibot.Page(self.site, title)
238 | 
239 |     def get(self, data, **kwargs):
240 |         return requests.get(self.url, data, **kwargs)
241 | 
242 |     def post(self, data, **kwargs):
243 |         return requests.post(self.url, data, **kwargs)
244 | 
245 |     def mark_as_fixed(self, page, error):
246 |         data = {
247 |             'action': 'mark',
248 |             'id': error,
249 |             'project': page.site.dbName(),
250 |             'title': page.title(),
251 |         }
252 |         return self.post(data)
253 | 
254 |     def mark_as_fixed_multiple(self, page, errors):
255 |         for error in errors:
256 |             self.mark_as_fixed(page, error)
257 | 
258 |     @staticmethod
259 |     def parse_option(option):
260 |         ids = []
261 |         priorities = []
262 |         for part in option.split(','):
263 |             if part.isdigit():
264 |                 ids.append(int(part))
265 |             elif part in CheckWikiSettings.prio_map.values():
266 |                 priorities.append(part)
267 |         return ids, priorities
268 | 
269 | 
270 | class CheckWikiBot(WikitextFixingBot):
271 | 
272 |     def __init__(self, checkwiki, numbers, **kwargs):
273 |         kwargs['checkwiki'] = False
274 |         super().__init__(**kwargs)
275 |         self.checkwiki = checkwiki
276 |         self.numbers = numbers
277 | 
278 |     def treat_page(self):
279 |         page = self.current_page
280 |         replaced = []
281 |         fixed = []
282 |         text = self.checkwiki.apply(
283 |             page.text, page, replaced, fixed, self.numbers)
284 |         summary = 'opravy dle [[WP:WCW|CheckWiki]]: %s' % ', '.join(replaced)
285 |         self.put_current(
286 |             text, summary=summary,
287 |             callback=lambda *args: self.mark_as_fixed_on_success(fixed, *args))
288 | 
289 |     def mark_as_fixed_on_success(self, numbers, page, exc=None):
290 |         if exc is not None:
291 |             return
292 |         self.checkwiki.mark_as_fixed_multiple(page, numbers)
293 | 
294 | 
295 | def main(*args):
296 |     options = {}
297 |     local_args = pywikibot.handle_args(args)
298 |     site = pywikibot.Site()
299 |     checkwiki = CheckWiki(site)
300 |     genFactory = pagegenerators.GeneratorFactory(site=site)
301 |     numbers = []
302 |     gens = []
303 |     for arg in genFactory.handle_args(local_args):
304 |         if arg.startswith('-checkwiki:'):
305 |             ids, priorities = checkwiki.parse_option(arg.partition(':')[2])
306 |             gen = CheckWikiErrorGenerator(
307 |                 checkwiki, ids=ids, priorities=priorities)
308 |             gens.append(gen)
309 |             continue
310 |         if arg.startswith('-'):
311 |             arg, sep, value = arg.partition(':')
312 |             if value != '':
313 |                 options[arg[1:]] = int(value) if value.isdigit() else value
314 |             else:
315 |                 options[arg[1:]] = True
316 |         else:
317 |             numbers.extend(checkwiki.parse_option(arg)[0])
318 | 
319 |     if gens:
320 |         genFactory.gens.extend(gens)
321 |     generator = genFactory.getCombinedGenerator(preload=True)
322 |     if not generator:
323 |         genFactory.gens.append(CheckWikiErrorGenerator(checkwiki, ids=numbers))
324 |         generator = genFactory.getCombinedGenerator(preload=True)
325 | 
326 |     bot = CheckWikiBot(checkwiki, numbers, generator=generator,
327 |                        site=site, **options)
328 |     bot.run()
329 | 
330 | 
331 | if __name__ == '__main__':
332 |     main()
333 | 


--------------------------------------------------------------------------------