├── .gitignore
├── LICENSE
├── LexData
    ├── __init__.py
    ├── claim.py
    ├── entity.py
    ├── form.py
    ├── language.py
    ├── languages.py
    ├── lexeme.py
    ├── sense.py
    ├── utils.py
    ├── version.py
    └── wikidatasession.py
├── README.md
├── docs
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── Claim.rst
    │   ├── Entity.rst
    │   ├── Form.rst
    │   ├── Language.rst
    │   ├── LexData.rst
    │   ├── Lexeme.rst
    │   ├── Sense.rst
    │   ├── conf.py
    │   ├── index.rst
    │   ├── intro.rst
    │   └── modules.rst
├── example.py
├── setup.py
└── test_lexdata.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | 
3 | __pycache__
4 | *pyc
5 | .mypy_cache
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018 The Python Packaging Authority
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/LexData/__init__.py:
--------------------------------------------------------------------------------
  1 | # -*-coding:utf-8-*
  2 | import json
  3 | import logging
  4 | from typing import List
  5 | 
  6 | from .claim import Claim
  7 | from .form import Form
  8 | from .language import Language
  9 | from .lexeme import Lexeme
 10 | from .sense import Sense
 11 | from .wikidatasession import WikidataSession
 12 | 
 13 | 
 14 | def get_or_create_lexeme(
 15 |     repo: WikidataSession, lemma: str, lang: Language, catLex: str
 16 | ) -> Lexeme:
 17 |     """Search for a lexeme in wikidata if not found, create it
 18 | 
 19 |     :param repo: Wikidata Session
 20 |     :type  repo: WikidataSession
 21 |     :param lemma: the lemma of the lexeme
 22 |     :type  lemma: str
 23 |     :param lang: language of the lexeme
 24 |     :type  lang: Language
 25 |     :param catLex: lexical Category of the lexeme
 26 |     :type  catLex: str
 27 |     :returns: Lexeme with the specified properties (created or found)
 28 |     :rtype: Lexeme
 29 | 
 30 |     """
 31 |     lexemes = search_lexemes(repo, lemma, lang, catLex)
 32 |     if len(lexemes) == 1:
 33 |         return lexemes[0]
 34 |     elif len(lexemes) > 1:
 35 |         logging.warning("Multiple lexemes found, using first one.")
 36 |         return lexemes[0]
 37 |     else:
 38 |         return create_lexeme(repo, lemma, lang, catLex)
 39 | 
 40 | 
 41 | def search_lexemes(
 42 |     repo: WikidataSession, lemma: str, lang: Language, catLex: str
 43 | ) -> List[Lexeme]:
 44 |     """
 45 |     Search for a lexeme by it's label, language and lexical category.
 46 | 
 47 |     :param repo: Wikidata Session
 48 |     :type  repo: WikidataSession
 49 |     :param lemma: the lemma of the lexeme
 50 |     :type  lemma: str
 51 |     :param lang: language of the lexeme
 52 |     :type  lang: Language
 53 |     :param catLex: lexical Category of the lexeme
 54 |     :type  catLex: str
 55 |     :returns: List of Lexemes with the specified properties
 56 |     :rtype: List[Lexeme]
 57 |     """
 58 |     # the language we specify in search is currently not used by the search
 59 |     # set it nevertheless, except if it is a Language without ISO code
 60 |     if lang.short[:3] == "mis":
 61 |         searchlang = "en"
 62 |     else:
 63 |         searchlang = lang.short
 64 | 
 65 |     PARAMS = {
 66 |         "action": "wbsearchentities",
 67 |         "language": searchlang,
 68 |         "type": "lexeme",
 69 |         "search": lemma,
 70 |         "format": "json",
 71 |         "limit": "10",
 72 |     }
 73 | 
 74 |     DATA = repo.get(PARAMS)
 75 | 
 76 |     if "error" in DATA:
 77 |         raise Exception(DATA["error"])
 78 | 
 79 |     # Iterate over all results and check for matches. Do not rely on
 80 |     # match-results, since they can differ for smaller languages – use them
 81 |     # however to avoid unnecessary queries.
 82 |     lexemes = []
 83 |     for item in DATA["search"]:
 84 |         if item["label"] == lemma:
 85 |             if "language" in item["match"]:
 86 |                 if item["match"]["language"] != lang.short and item["match"] != "und":
 87 |                     continue
 88 |             idLex = item["id"]
 89 |             lexeme = Lexeme(repo, idLex)
 90 |             if lexeme["language"] == lang.qid and lexeme["lexicalCategory"] == catLex:
 91 |                 logging.info("Found lexeme: %s", idLex)
 92 |                 lexemes.append(lexeme)
 93 |     return lexemes
 94 | 
 95 | 
 96 | def create_lexeme(
 97 |     repo: WikidataSession, lemma: str, lang: Language, catLex: str, claims=None
 98 | ) -> Lexeme:
 99 |     """Creates a lexeme
100 | 
101 |     :param repo: Wikidata Session
102 |     :type  repo: WikidataSession
103 |     :param lemma: value of the lexeme
104 |     :type  lemma: str
105 |     :param lang: language
106 |     :type  lang: Language
107 |     :param catLex: lexicographical category
108 |     :param claims: claims to add to the lexeme (Default value = None) -> Lexem)
109 |     :type  catLex: str
110 |     :returns: The created Lexeme
111 |     :rtype: Lexeme
112 | 
113 |     """
114 | 
115 |     # Create the json with the lexeme's data
116 |     data_lex = json.dumps(
117 |         {
118 |             "type": "lexeme",
119 |             "lemmas": {lang.short: {"value": lemma, "language": lang.short}},
120 |             "language": lang.qid,
121 |             "lexicalCategory": catLex,
122 |             "forms": [],
123 |         }
124 |     )
125 | 
126 |     # Send a post to edit a lexeme
127 |     PARAMS = {
128 |         "action": "wbeditentity",
129 |         "format": "json",
130 |         "bot": "1",
131 |         "new": "lexeme",
132 |         "token": "__AUTO__",
133 |         "data": data_lex,
134 |     }
135 | 
136 |     DATA = repo.post(PARAMS)
137 |     # Get the id of the new lexeme
138 |     idLex = DATA["entity"]["id"]
139 | 
140 |     logging.info("Created lexeme: %s", idLex)
141 |     lexeme = Lexeme(repo, idLex)
142 | 
143 |     if claims:
144 |         lexeme.createClaims(claims)
145 | 
146 |     return lexeme
147 | 


--------------------------------------------------------------------------------
/LexData/claim.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, Optional, Tuple, Union
  2 | 
  3 | from .utils import buildSnak
  4 | 
  5 | 
  6 | class Claim(dict):
  7 |     """Wrapper around a dict to represent a Claim
  8 | 
  9 |     There are two types of Claim objects:
 10 | 
 11 |     * Claims that where received from an existing entity.
 12 |     * Claims that where created by the user by Claim(propertyId, value) and
 13 |       have not yet been uploaded to Wikidata. These are called 'Detached Claims',
 14 |       since they don't belong to any entity.  They don't have an id nor an hash.
 15 |       They can be added to an entity by the function Entity.addClaims().
 16 | 
 17 |     Currently modifications on both types of claims can't be uploaded, except
 18 |     by use of the low level API call Lexeme.update_from_json().
 19 |     """
 20 | 
 21 |     # Hack needed to define a property called property
 22 |     property_decorator = property
 23 | 
 24 |     def __init__(
 25 |         self,
 26 |         claim: Optional[Dict[str, Any]] = None,
 27 |         propertyId: Optional[str] = None,
 28 |         value: Optional[Any] = None,
 29 |     ):
 30 |         super().__init__()
 31 |         if isinstance(claim, dict) and not propertyId and not value:
 32 |             self.update(claim)
 33 |         elif claim is None and propertyId and value:
 34 |             self["mainsnak"] = buildSnak(propertyId, value)
 35 |             self["rank"] = "normal"
 36 |         else:
 37 |             raise TypeError(
 38 |                 "Claim() received an invalid combination of arguments expected one of:"
 39 |                 + " * (dict claimObject)"
 40 |                 + " * (str propertyId, value)"
 41 |             )
 42 | 
 43 |     @property_decorator
 44 |     def value(self) -> Dict[str, Any]:
 45 |         """
 46 |         Return the value of the claim. The type depends on the data type.
 47 |         """
 48 |         return self["mainsnak"]["datavalue"]["value"]
 49 | 
 50 |     @property_decorator
 51 |     def type(self) -> str:
 52 |         """
 53 |         Return the data type of the claim.
 54 | 
 55 |         :rtype: str
 56 |         """
 57 |         return self["mainsnak"]["datatype"]
 58 | 
 59 |     @property_decorator
 60 |     def property(self) -> str:
 61 |         """
 62 |         Return the id of the property of the claim.
 63 | 
 64 |         :rtype: str
 65 |         """
 66 |         return self["mainsnak"]["property"]
 67 | 
 68 |     @property_decorator
 69 |     def rank(self) -> str:
 70 |         """
 71 |         Return the rank of the claim.
 72 | 
 73 |         :rtype: str
 74 |         """
 75 |         return self["rank"]
 76 | 
 77 |     @property_decorator
 78 |     def numeric_rank(self) -> int:
 79 |         """
 80 |         Return the rank of the claim as integer.
 81 | 
 82 |         :rtype: int
 83 |         """
 84 |         if self.rank == "normal":
 85 |             return 0
 86 |         elif self.rank == "preferred":
 87 |             return 1
 88 |         elif self.rank == "deprecated":
 89 |             return -1
 90 |         raise NotImplementedError("Unknown or invalid rank {}".format(self.rank))
 91 | 
 92 |     @property_decorator
 93 |     def pure_value(self) -> Union[str, int, float, Tuple[float, float]]:
 94 |         """
 95 |         Return just the 'pure' value, what this is depends on the type of the value:
 96 | 
 97 |         * wikibase-entity: the id as string, including 'L/Q/P'-prefix
 98 |         * string: the string
 99 |         * manolingualtext: the text as string
100 |         * quantity: the amount as float
101 |         * time: the timestamp as string in format ISO 8601
102 |         * globecoordinate: tuple of latitude and longitude as floats
103 | 
104 |         Be aware that for most types this is not the full information stored in
105 |         the value.
106 |         """
107 |         value = self.value
108 |         vtype = self.type
109 |         if vtype == "wikibase-entityid":
110 |             return value["id"]
111 |         if vtype == "string":
112 |             return value
113 |         if vtype == "monolingualtext":
114 |             return value["text"]
115 |         if vtype == "quantity":
116 |             return float(value["amount"])
117 |         if vtype == "time":
118 |             return value["time"]
119 |         if vtype == "globecoordinate":
120 |             return (float(value["latitude"]), float(value["longitude"]))
121 |         raise NotImplementedError
122 | 
123 |     def __repr__(self) -> str:
124 |         if "id" in self:
125 |             return "<Claim '{}'>".format(repr(self.value))
126 |         else:
127 |             return "<Detached Claim '{}'>".format(repr(self.value))
128 | 


--------------------------------------------------------------------------------
/LexData/entity.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | from typing import Dict, List, Union
  4 | 
  5 | from .claim import Claim
  6 | from .wikidatasession import WikidataSession
  7 | 
  8 | 
  9 | class Entity(dict):
 10 |     """
 11 |     Base class for all types of entities – currently: Lexeme, Form, Sense.
 12 |     Not yet implemented: Item, Property.
 13 |     """
 14 | 
 15 |     def __init__(self, repo: WikidataSession):
 16 |         super().__init__()
 17 |         self.repo = repo
 18 | 
 19 |     @property
 20 |     def claims(self) -> Dict[str, List[Claim]]:
 21 |         """
 22 |         All the claims of the Entity
 23 | 
 24 |         :rtype: Dict[str, List[Claim]]
 25 |         """
 26 |         if self.get("claims", {}) != []:
 27 |             return {k: [Claim(c) for c in v] for k, v in self.get("claims", {}).items()}
 28 |         else:
 29 |             return {}
 30 | 
 31 |     def addClaims(self, claims: Union[List[Claim], Dict[str, List[str]]]):
 32 |         """
 33 |         Add claims to the entity.
 34 | 
 35 |         :param claims: The claims to be added to the entity.
 36 | 
 37 |                        There are two possibilities for this:
 38 | 
 39 |                        - A list of Objects of type Claim
 40 | 
 41 |                          Example: ``[Claim(propertyId="P31", value="Q1")]``
 42 | 
 43 |                        - A dictionary with the property id as key and lists of
 44 |                          string formated entity ids as values.
 45 | 
 46 |                          Example: ``{"P31": ["Q1", "Q2"]}``
 47 | 
 48 |                        The first supports all datatypes, whereas the later
 49 |                        currently only supports datatypes of kind Entity.
 50 |         """
 51 |         if isinstance(claims, list):
 52 |             self.__setClaims__(claims)
 53 |         elif isinstance(claims, dict):
 54 |             self.__createClaims__(claims)
 55 |         else:
 56 |             raise TypeError("Invalid argument type:", type(claims))
 57 | 
 58 |     def __setClaims__(self, claims: List[Claim]):
 59 |         """
 60 |         Add prebuild claims to the entity
 61 | 
 62 |         :param claims: The list of claims to be added
 63 |         """
 64 |         for claim in claims:
 65 |             pid = claim.property
 66 |             self.__setClaim__(pid, claim)
 67 | 
 68 |     def __createClaims__(self, claims: Dict[str, List[str]]):
 69 |         """
 70 |         Create and add new claims to the entity.
 71 | 
 72 |         Only properties of some entity type are implemented:
 73 |         Item, Property, Lexeme, Form and Sense
 74 | 
 75 |         :param claims: The set of claims to be added
 76 |         """
 77 |         for cle, values in claims.items():
 78 |             for value in values:
 79 |                 self.__setEntityClaim__(cle, value)
 80 | 
 81 |     def __setEntityClaim__(self, idProp: str, idStr: str):
 82 |         """
 83 |         Add a claim of an entity-type to the entity.
 84 | 
 85 |         Supported types are Lexeme, Form, Sense, Item, Property.
 86 | 
 87 |         :param idProp: id of the property (example: "P31")
 88 |         :param idItem: id of the entity (example: "Q1")
 89 |         """
 90 |         entityId = int(idStr[1:])
 91 |         claim_value = json.dumps({"entity-type": "item", "numeric-id": entityId})
 92 |         self.__setClaim__(idProp, claim_value)
 93 | 
 94 |     def __setClaim__(self, idProp: str, claim_value):
 95 |         PARAMS = {
 96 |             "action": "wbcreateclaim",
 97 |             "format": "json",
 98 |             "entity": self.id,
 99 |             "snaktype": "value",
100 |             "bot": "1",
101 |             "property": idProp,
102 |             "value": claim_value,
103 |             "token": "__AUTO__",
104 |         }
105 | 
106 |         DATA = self.repo.post(PARAMS)
107 |         assert "claim" in DATA
108 |         addedclaim = DATA["claim"]
109 |         logging.info("Claim added")
110 | 
111 |         # Add the created claim to the local entity instance
112 |         if self.get("claims", []) == []:
113 |             self["claims"] = {idProp: addedclaim}
114 |         elif idProp in self.claims:
115 |             self.claims[idProp].append(addedclaim)
116 |         else:
117 |             self.claims[idProp] = [addedclaim]
118 | 
119 |     @property
120 |     def id(self) -> str:
121 |         EntityId = self.get("id")
122 |         assert isinstance(EntityId, str)
123 |         return EntityId
124 | 
125 |     def __str__(self) -> str:
126 |         return super().__repr__()
127 | 


--------------------------------------------------------------------------------
/LexData/form.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | from .entity import Entity
 4 | from .wikidatasession import WikidataSession
 5 | 
 6 | 
 7 | class Form(Entity):
 8 |     """Wrapper around a dict to represent a From"""
 9 | 
10 |     def __init__(self, repo: WikidataSession, form: Dict):
11 |         super().__init__(repo)
12 |         self.update(form)
13 | 
14 |     @property
15 |     def form(self) -> str:
16 |         """
17 |         String of the form value ("representation")
18 | 
19 |         :rtype: str
20 |         """
21 |         return list(self["representations"].values())[0]["value"]
22 | 
23 |     def __repr__(self) -> str:
24 |         return "<Form '{}'>".format(self.form)
25 | 


--------------------------------------------------------------------------------
/LexData/language.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module simply contains a few common Languages with their language-codes
 3 | and QIDs for easier use.
 4 | """
 5 | from dataclasses import dataclass
 6 | 
 7 | 
 8 | @dataclass
 9 | class Language:
10 |     """Dataclass representing a language"""
11 | 
12 |     short: str
13 |     qid: str
14 | 
15 | 
16 | # feel free to add more languages
17 | en = Language("en", "Q1860")
18 | de = Language("de", "Q188")
19 | fr = Language("fr", "Q150")
20 | 


--------------------------------------------------------------------------------
/LexData/languages.py:
--------------------------------------------------------------------------------
1 | import logging
2 | 
3 | from .language import *
4 | 
5 | logging.warning(
6 |     "LexData.languages has been renamed to LexData.language. "
7 |     + "LexData.languages will be removed in future versions."
8 | )
9 | 


--------------------------------------------------------------------------------
/LexData/lexeme.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | from typing import Dict, List, Optional
  4 | 
  5 | from .claim import Claim
  6 | from .entity import Entity
  7 | from .form import Form
  8 | from .language import Language
  9 | from .sense import Sense
 10 | from .wikidatasession import WikidataSession
 11 | 
 12 | 
 13 | class Lexeme(Entity):
 14 |     """Wrapper around a dict to represent a Lexeme"""
 15 | 
 16 |     def __init__(self, repo: WikidataSession, idLex: str):
 17 |         super().__init__(repo)
 18 |         self.getLex(idLex)
 19 | 
 20 |     def getLex(self, idLex: str):
 21 |         """This function gets and returns the data of a lexeme for a given id.
 22 | 
 23 |         :param idLex: Lexeme identifier (example: "L2")
 24 |         :type  idLex: str
 25 |         :returns: Simplified object representation of Lexeme
 26 | 
 27 |         """
 28 | 
 29 |         PARAMS = {"action": "wbgetentities", "format": "json", "ids": idLex}
 30 | 
 31 |         DATA = self.repo.get(PARAMS)
 32 | 
 33 |         self.update(DATA["entities"][idLex])
 34 | 
 35 |     @property
 36 |     def lemma(self) -> str:
 37 |         """
 38 |         The lemma of the lexeme as string
 39 | 
 40 |         :rtype: str
 41 |         """
 42 |         return list(self["lemmas"].values())[0]["value"]
 43 | 
 44 |     @property
 45 |     def language(self) -> str:
 46 |         """
 47 |         The language code of the lexeme as string
 48 | 
 49 |         :rtype: str
 50 |         """
 51 |         return list(self["lemmas"].values())[0]["language"]
 52 | 
 53 |     @property
 54 |     def forms(self) -> List[Form]:
 55 |         """
 56 |         List of all forms
 57 | 
 58 |         :rtype: List[Form]
 59 |         """
 60 |         return [Form(self.repo, f) for f in super().get("forms", [])]
 61 | 
 62 |     @property
 63 |     def senses(self) -> List[Sense]:
 64 |         """
 65 |         List of all senses
 66 | 
 67 |         :rtype: List[Sense]
 68 |         """
 69 |         return [Sense(self.repo, s) for s in super().get("senses", [])]
 70 | 
 71 |     def createSense(
 72 |         self, glosses: Dict[str, str], claims: Optional[List[Claim]] = None
 73 |     ) -> str:
 74 |         """Create a sense for the lexeme.
 75 | 
 76 |         :param glosses: glosses for the sense
 77 |         :type  glosses: Dict[str, str]
 78 |         :param claims: claims to add to the new form
 79 |         :rtype: str
 80 |         """
 81 |         # Create the json with the sense's data
 82 |         data_sense: Dict[str, Dict[str, Dict[str, str]]] = {"glosses": {}}
 83 |         for lang, gloss in glosses.items():
 84 |             data_sense["glosses"][lang] = {"value": gloss, "language": lang}
 85 | 
 86 |         # send a post to add sense to lexeme
 87 |         PARAMS = {
 88 |             "action": "wbladdsense",
 89 |             "format": "json",
 90 |             "lexemeId": self.id,
 91 |             "token": "__AUTO__",
 92 |             "bot": "1",
 93 |             "data": json.dumps(data_sense),
 94 |         }
 95 |         DATA = self.repo.post(PARAMS)
 96 |         addedSense = Sense(self.repo, DATA["sense"])
 97 |         idSense = DATA["sense"]["id"]
 98 |         logging.info("Created sense: %s", idSense)
 99 | 
100 |         # Add the claims
101 |         if claims:
102 |             addedSense.addClaims(claims)
103 | 
104 |         # Add the created form to the local lexeme
105 |         self["senses"].append(addedSense)
106 | 
107 |         return idSense
108 | 
109 |     def createForm(
110 |         self,
111 |         form: str,
112 |         infosGram: List[str],
113 |         language: Optional[Language] = None,
114 |         claims: Optional[List[Claim]] = None,
115 |     ) -> str:
116 |         """Create a form for the lexeme.
117 | 
118 |         :param form: the new form to add
119 |         :type  form: str
120 |         :param infosGram: grammatical features
121 |         :type  infosGram: List[str]
122 |         :param language: the language of the form
123 |         :type  language: Optional[Language]
124 |         :param claims: claims to add to the new form
125 |         :returns: The id of the form
126 |         :rtype: str
127 | 
128 |         """
129 | 
130 |         if language is None:
131 |             languagename = self.language
132 |         else:
133 |             languagename = language.short
134 | 
135 |         # Create the json with the forms's data
136 |         data_form = json.dumps(
137 |             {
138 |                 "representations": {
139 |                     languagename: {"value": form, "language": languagename}
140 |                 },
141 |                 "grammaticalFeatures": infosGram,
142 |             }
143 |         )
144 | 
145 |         # send a post to add form to lexeme
146 |         PARAMS = {
147 |             "action": "wbladdform",
148 |             "format": "json",
149 |             "lexemeId": self.id,
150 |             "token": "__AUTO__",
151 |             "bot": "1",
152 |             "data": data_form,
153 |         }
154 |         DATA = self.repo.post(PARAMS)
155 |         addedForm = Form(self.repo, DATA["form"])
156 |         idForm = DATA["form"]["id"]
157 |         logging.info("Created form: %s", idForm)
158 | 
159 |         # Add the claims
160 |         if claims:
161 |             addedForm.addClaims(claims)
162 | 
163 |         # Add the created form to the local lexeme
164 |         self["forms"].append(addedForm)
165 | 
166 |         return idForm
167 | 
168 |     def createClaims(self, claims: Dict[str, List[str]]):
169 |         """Add claims to the Lexeme.
170 | 
171 |         createClaim() is deprecated and might be removed in future versions.
172 |         Use Entity.addClaims() instead.
173 | 
174 |         :param claims: The set of claims to be added
175 | 
176 |         """
177 |         logging.warning(
178 |             "createClaim() is deprecated and might be removed in future versions."
179 |             + " Use Entity.addClaims() instead"
180 |         )
181 |         self.__createClaims__(claims)
182 | 
183 |     def __repr__(self) -> str:
184 |         return "<Lexeme '{}'>".format(self.id)
185 | 
186 |     def update_from_json(self, data: str, overwrite=False):
187 |         """Update the lexeme from an json-string.
188 | 
189 |         This is a lower level function usable to save arbitrary modifications
190 |         on a lexeme. The data has to be supplied in the right format by the
191 |         user.
192 | 
193 |         :param data: Data update: See the API documentation about the format.
194 |         :param overwrite: If set the whole entity is replaced by the supplied data
195 |         """
196 |         PARAMS: Dict[str, str] = {
197 |             "action": "wbeditentity",
198 |             "format": "json",
199 |             "bot": "1",
200 |             "id": self.id,
201 |             "token": "__AUTO__",
202 |             "data": data,
203 |         }
204 |         if overwrite:
205 |             PARAMS["clear"] = "true"
206 |         DATA = self.repo.post(PARAMS)
207 |         if DATA.get("success") != 200:
208 |             raise ValueError(DATA)
209 |         logging.info("Updated from json data")
210 |         # Due to limitations of the API, the returned data cannot be used to
211 |         # update the instance. Therefore reload the lexeme.
212 |         self.getLex(self.id)
213 | 


--------------------------------------------------------------------------------
/LexData/sense.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | from .entity import Entity
 4 | from .wikidatasession import WikidataSession
 5 | 
 6 | 
 7 | class Sense(Entity):
 8 |     """Wrapper around a dict to represent a Sense"""
 9 | 
10 |     def __init__(self, repo: WikidataSession, form: Dict):
11 |         super().__init__(repo)
12 |         self.update(form)
13 | 
14 |     def glosse(self, lang: str = "en") -> str:
15 |         """
16 |         The gloss of the text in the specified language is available, otherwise
17 |         in englisch, and if that's not set too in an arbitrary set language
18 | 
19 |         :param lang: language code of the wished language
20 |         :type  lang: str
21 |         :rtype: str
22 |         """
23 |         if lang not in self["glosses"]:
24 |             if "en" in self["glosses"]:
25 |                 lang = "en"
26 |             else:
27 |                 lang = list(self["glosses"].keys())[0]
28 |         return self["glosses"][lang]["value"]
29 | 
30 |     def __repr__(self) -> str:
31 |         return "<Sense '{}'>".format(self.glosse())
32 | 


--------------------------------------------------------------------------------
/LexData/utils.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import json
  3 | from datetime import datetime
  4 | from typing import Any, Dict
  5 | 
  6 | from .wikidatasession import WikidataSession
  7 | 
  8 | 
  9 | @functools.lru_cache()
 10 | def getPropertyType(propertyId: str):
 11 |     repo = WikidataSession()
 12 |     query = {
 13 |         "action": "query",
 14 |         "format": "json",
 15 |         "prop": "revisions",
 16 |         "titles": "Property:" + propertyId,
 17 |         "rvprop": "content",
 18 |     }
 19 |     DATA = repo.get(query)
 20 |     jsonstr = list(DATA["query"]["pages"].values())[0]["revisions"][0]["*"]
 21 |     content = json.loads(jsonstr)
 22 |     return content["datatype"]
 23 | 
 24 | 
 25 | def buildDataValue(datatype: str, value):
 26 |     if datatype in [
 27 |         "wikibase-lexeme",
 28 |         "wikibase-form",
 29 |         "wikibase-sense",
 30 |         "wikibase-item",
 31 |         "wikibase-property",
 32 |     ]:
 33 |         if type(value) == dict:
 34 |             return {"value": value, "type": "wikibase-entity"}
 35 |         elif type(value) == str:
 36 |             value = {"entity-type": datatype[9:], "id": value}
 37 |             return {"value": value, "type": "wikibase-entity"}
 38 |         else:
 39 |             raise TypeError(
 40 |                 f"Can not convert type {type(value)} to datatype {datatype}"
 41 |             )
 42 |     elif datatype in [
 43 |         "string",
 44 |         "tabular-data",
 45 |         "geo-shape",
 46 |         "url",
 47 |         "musical-notation",
 48 |         "math",
 49 |         "commonsMedia",
 50 |     ]:
 51 |         if type(value) == dict:
 52 |             return {"value": value, "type": "string"}
 53 |         elif type(value) == str:
 54 |             return {"value": {"value": value}, "type": "string"}
 55 |         else:
 56 |             raise TypeError(
 57 |                 f"Can not convert type {type(value)} to datatype {datatype}"
 58 |             )
 59 |     elif datatype == "monolingualtext":
 60 |         if type(value) == dict:
 61 |             return {"value": value, "type": "monolingualtext"}
 62 |         else:
 63 |             raise TypeError(
 64 |                 f"Can not convert type {type(value)} to datatype {datatype}"
 65 |             )
 66 |     elif datatype == "globe-coordinate":
 67 |         if type(value) == dict:
 68 |             return {"value": value, "type": "globecoordinate"}
 69 |         else:
 70 |             raise TypeError(
 71 |                 f"Can not convert type {type(value)} to datatype {datatype}"
 72 |             )
 73 |     elif datatype == "quantity":
 74 |         if type(value) == dict:
 75 |             return {"value": value, "type": "quantity"}
 76 |         if type(value) in [int, float]:
 77 |             valueObj = {
 78 |                 "amount": "%+f" % value,
 79 |                 "unit": "1",
 80 |             }
 81 |             return {"value": valueObj, "type": "time"}
 82 |         else:
 83 |             raise TypeError(
 84 |                 f"Can not convert type {type(value)} to datatype {datatype}"
 85 |             )
 86 |     elif datatype == "time":
 87 |         if type(value) == dict:
 88 |             return {"value": value, "type": "time"}
 89 |         if type(value) == datetime:
 90 |             cleanedDateTime = value.replace(hour=0, minute=0, second=0, microsecond=0)
 91 |             valueObj: Dict[str, Any] = {
 92 |                 "time": "+" + cleanedDateTime.isoformat() + "Z",
 93 |                 "timezone": 0,
 94 |                 "before": 0,
 95 |                 "after": 0,
 96 |                 "precision": 11,
 97 |                 "calendarmodel": "http://www.wikidata.org/entity/Q1985727",
 98 |             }
 99 |             return {"value": valueObj, "type": "time"}
100 |         else:
101 |             raise TypeError(
102 |                 f"Can not convert type {type(value)} to datatype {datatype}"
103 |             )
104 |     else:
105 |         raise NotImplementedError(f"Datatype {datatype} not implemented")
106 | 
107 | 
108 | def buildSnak(propertyId: str, value):
109 |     datatype = getPropertyType(propertyId)
110 |     datavalue = buildDataValue(datatype, value)
111 |     return {
112 |         "snaktype": "value",
113 |         "property": propertyId,
114 |         "datavalue": datavalue,
115 |         "datatype": datatype,
116 |     }
117 | 


--------------------------------------------------------------------------------
/LexData/version.py:
--------------------------------------------------------------------------------
1 | name = "LexData"
2 | version = "0.2.0"
3 | user_agent = "%s %s" % (name, version)
4 | 


--------------------------------------------------------------------------------
/LexData/wikidatasession.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import time
  3 | from typing import Any, Dict, Optional
  4 | 
  5 | import requests
  6 | 
  7 | from .version import user_agent
  8 | 
  9 | 
 10 | class WikidataSession:
 11 |     """Wikidata network and authentication session. Needed for everything this
 12 |     framework does.
 13 |     """
 14 | 
 15 |     URL: str = "https://www.wikidata.org/w/api.php"
 16 |     assertUser: Optional[str] = None
 17 |     maxlag: int = 5
 18 | 
 19 |     def __init__(
 20 |         self,
 21 |         username: Optional[str] = None,
 22 |         password: Optional[str] = None,
 23 |         token: Optional[str] = None,
 24 |         auth: Optional[str] = None,
 25 |         user_agent: str = user_agent,
 26 |     ):
 27 |         """
 28 |         Create a wikidata session by login in and getting the token
 29 |         """
 30 |         self.username = username
 31 |         self.password = password
 32 |         self.auth = auth
 33 |         self.headers = {"User-Agent": user_agent}
 34 |         self.S = requests.Session()
 35 |         if username is not None and password is not None:
 36 |             # Since logins don't put load on the servers
 37 |             # we set maxlag higher for these requests.
 38 |             self.maxlag = 30
 39 |             self.login()
 40 |             self.maxlag = 5
 41 |         if token is not None:
 42 |             self.CSRF_TOKEN = token
 43 |         # After login enable 'assertUser'-feature of the Mediawiki-API to
 44 |         # make sure to never edit accidentally as IP
 45 |         if username is not None:
 46 |             # truncate bot name if a "bot password" is used
 47 |             self.assertUser = username.split("@")[0]
 48 | 
 49 |     def login(self):
 50 |         # Ask for a token
 51 |         PARAMS_1 = {
 52 |             "action": "query",
 53 |             "meta": "tokens",
 54 |             "type": "login",
 55 |             "format": "json",
 56 |         }
 57 |         DATA = self.get(PARAMS_1)
 58 |         LOGIN_TOKEN = DATA["query"]["tokens"]["logintoken"]
 59 | 
 60 |         # connexion request
 61 |         PARAMS_2 = {
 62 |             "action": "login",
 63 |             "lgname": self.username,
 64 |             "lgpassword": self.password,
 65 |             "format": "json",
 66 |             "lgtoken": LOGIN_TOKEN,
 67 |         }
 68 |         DATA = self.post(PARAMS_2)
 69 |         if DATA.get("login", []).get("result") != "Success":
 70 |             raise PermissionError("Login failed", DATA["login"]["reason"])
 71 |         logging.info("Log in succeeded")
 72 | 
 73 |         PARAMS_3 = {"action": "query", "meta": "tokens", "format": "json"}
 74 |         DATA = self.get(PARAMS_3)
 75 |         self.CSRF_TOKEN = DATA["query"]["tokens"]["csrftoken"]
 76 |         logging.info("Got CSRF token: %s", self.CSRF_TOKEN)
 77 | 
 78 |     def post(self, data: Dict[str, str]) -> Any:
 79 |         """Send data to wikidata by POST request. The CSRF token is automatically
 80 |         filled in if __AUTO__ is given instead.
 81 | 
 82 |         :param data: Parameters to send via POST
 83 |         :type  data: Dict[str, str])
 84 |         :returns: Answer form the server as Objekt
 85 |         :rtype: Any
 86 | 
 87 |         """
 88 |         if data.get("token") == "__AUTO__":
 89 |             data["token"] = self.CSRF_TOKEN
 90 |         if "assertuser" not in data and self.assertUser is not None:
 91 |             data["assertuser"] = self.assertUser
 92 |         data["maxlag"] = str(self.maxlag)
 93 |         R = self.S.post(self.URL, data=data, headers=self.headers, auth=self.auth)
 94 |         if R.status_code != 200:
 95 |             raise Exception(
 96 |                 "POST was unsuccessfull ({}): {}".format(R.status_code, R.text)
 97 |             )
 98 |         DATA = R.json()
 99 |         if "error" in DATA:
100 |             if DATA["error"]["code"] == "maxlag":
101 |                 sleepfor = float(R.headers.get("retry-after", 5))
102 |                 logging.info("Maxlag hit, waiting for %.1f seconds", sleepfor)
103 |                 time.sleep(sleepfor)
104 |                 return self.post(data)
105 |             else:
106 |                 raise PermissionError("API returned error: " + str(DATA["error"]))
107 |         logging.debug("Post request succeed")
108 |         return DATA
109 | 
110 |     def get(self, data: Dict[str, str]) -> Any:
111 |         """Send a GET request to wikidata
112 | 
113 |         :param data: Parameters to send via GET
114 |         :type  data: Dict[str, str]
115 |         :returns: Answer form the server as Objekt
116 |         :rtype: Any
117 | 
118 |         """
119 |         R = self.S.get(self.URL, params=data, headers=self.headers)
120 |         DATA = R.json()
121 |         if R.status_code != 200 or "error" in DATA:
122 |             # We do not set maxlag for GET requests – so this error can only
123 |             # occur if the users sets maxlag in the request data object
124 |             if DATA["error"]["code"] == "maxlag":
125 |                 sleepfor = float(R.headers.get("retry-after", 5))
126 |                 logging.info("Maxlag hit, waiting for %.1f seconds", sleepfor)
127 |                 time.sleep(sleepfor)
128 |                 return self.get(data)
129 |             else:
130 |                 raise Exception(
131 |                     "GET was unsuccessfull ({}): {}".format(R.status_code, R.text)
132 |                 )
133 |         logging.debug("Get request succeed")
134 |         return DATA
135 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Python bot framework for Lexemes on Wikidata
 2 | 
 3 | This is a small library to create bots, scripts and tools about Wikidata
 4 | Lexemes. It's philosophy is to have a transparent thin layer on top of the
 5 | internal datastuctures enriched with convenient functions without hiding the
 6 | power of the access to the internals.
 7 | 
 8 | LexData is still in beta phase and there fore some features are missing and
 9 | functions might be renamed in future.
10 | 
11 | The code of AitalvivemBot was used as a starting point, but probably theres not
12 | a single line of code that wasn't rewritten.
13 | 
14 | Install from pypi:
15 | ```
16 |  $ pip install LexData
17 | ```
18 | 
19 | Read the docs: [https://nudin.github.io/LexData/](https://nudin.github.io/LexData/)
20 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR    = ../../LexData-docs
10 | 
11 | 
12 | # Put it first so that "make" without argument is like "make help".
13 | help:
14 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
15 | 
16 | .PHONY: help Makefile
17 | 
18 | # Catch-all target: route all unknown targets to Sphinx using the new
19 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
20 | %: Makefile
21 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/Claim.rst:
--------------------------------------------------------------------------------
1 | Claim
2 | =====
3 | 
4 | .. autoclass:: LexData.Claim
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/Entity.rst:
--------------------------------------------------------------------------------
 1 | Entity
 2 | ======
 3 | 
 4 | .. toctree::
 5 |    :hidden:
 6 | 
 7 |    Lexeme
 8 |    Form
 9 |    Sense
10 | 
11 | 
12 | .. autoclass:: LexData.entity.Entity
13 |    :members:
14 |    :undoc-members:
15 |    :show-inheritance:
16 | 


--------------------------------------------------------------------------------
/docs/source/Form.rst:
--------------------------------------------------------------------------------
1 | Form
2 | ====
3 | 
4 | .. autoclass:: LexData.Form
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/Language.rst:
--------------------------------------------------------------------------------
 1 | LexData.language
 2 | ================
 3 | 
 4 | .. automodule:: LexData.language
 5 |    :members:
 6 |    :undoc-members:
 7 |    :show-inheritance:
 8 | 
 9 | .. literalinclude:: ../../LexData/language.py
10 |     :language: python
11 |     :lines: 8-
12 | 


--------------------------------------------------------------------------------
/docs/source/LexData.rst:
--------------------------------------------------------------------------------
 1 | LexData
 2 | =======
 3 | 
 4 | The central Class for working with LexData is the Class Lexeme, it is documented
 5 | on a separate page:
 6 | 
 7 | * :doc:`Lexeme`
 8 | 
 9 | Other Classes and Functions
10 | ---------------------------
11 | 
12 | .. automodule:: LexData
13 |    :members:
14 |    :undoc-members:
15 |    :show-inheritance:
16 |    :exclude-members: Lexeme
17 | 


--------------------------------------------------------------------------------
/docs/source/Lexeme.rst:
--------------------------------------------------------------------------------
1 | Lexeme
2 | ======
3 | 
4 | .. autoclass:: LexData.Lexeme
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/Sense.rst:
--------------------------------------------------------------------------------
1 | Sense
2 | =====
3 | 
4 | .. autoclass:: LexData.Sense
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | 
13 | import os
14 | import sys
15 | 
16 | sys.path.insert(0, os.path.abspath("../.."))
17 | 
18 | 
19 | # -- Project information -----------------------------------------------------
20 | 
21 | project = "LexData"
22 | copyright = "2019, Michael F. Schönitzer"
23 | author = "Michael F. Schönitzer"
24 | 
25 | 
26 | # -- General configuration ---------------------------------------------------
27 | 
28 | # Add any Sphinx extension module names here, as strings. They can be
29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
30 | # ones.
31 | extensions = ["sphinx.ext.autodoc"]
32 | 
33 | # Add any paths that contain templates here, relative to this directory.
34 | templates_path = ["_templates"]
35 | 
36 | # List of patterns, relative to source directory, that match files and
37 | # directories to ignore when looking for source files.
38 | # This pattern also affects html_static_path and html_extra_path.
39 | exclude_patterns = []
40 | 
41 | 
42 | # -- Options for HTML output -------------------------------------------------
43 | 
44 | # The theme to use for HTML and HTML Help pages.  See the documentation for
45 | # a list of builtin themes.
46 | #
47 | html_theme = "alabaster"
48 | 
49 | # Add any paths that contain custom static files (such as style sheets) here,
50 | # relative to this directory. They are copied after the builtin static files,
51 | # so a file named "default.css" will overwrite the builtin "default.css".
52 | html_static_path = ["_static"]
53 | 
54 | # html_sidebars = {
55 | #    "**": ["about.html", "relations.html", "searchbox.html", "donate.html"]
56 | # }
57 | 
58 | html_theme_options = {
59 |     "github_user": "Nudin",
60 |     "github_repo": "LexData",
61 |     "github_type": "star",
62 |     "github_count": False,
63 |     "description": "Edit Wikidatas Lexemes",
64 |     "page_width": "60em",
65 |     "fixed_sidebar": True,
66 | }
67 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. LexData documentation master file, created by
 2 |    sphinx-quickstart on Tue Aug 27 21:50:47 2019.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to LexData's documentation!
 7 | ===================================
 8 | 
 9 | .. include:: intro.rst
10 | 
11 | A simple demonstration about how easy it is to edit Lexems with LexData:
12 | 
13 | .. literalinclude:: ../../example.py
14 |     :language: python
15 | 
16 | .. toctree::
17 |    :maxdepth: 2
18 |    :caption: Contents:
19 | 
20 |    LexData
21 |    Entity
22 |    Claim
23 |    Language
24 | 
25 | Indices and tables
26 | ==================
27 | 
28 | * :ref:`genindex`
29 | * :ref:`modindex`
30 | * :ref:`search`
31 | 


--------------------------------------------------------------------------------
/docs/source/intro.rst:
--------------------------------------------------------------------------------
 1 | This is a small library to create bots, scripts and tools about Wikidata
 2 | Lexemes. It's philosophy is to have a transparent thin layer on top of the
 3 | internal datastuctures enriched with convenient functions without hiding the
 4 | power of the access to the internals.
 5 | 
 6 | LexData is still in beta phase and there fore some features are missing and
 7 | functions might be renamed in future.
 8 | 
 9 | The code of AitalvivemBot was used as a starting point, but probably theres not
10 | a single line of code that wasn't rewritten.
11 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | LexData
2 | =======
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    LexData
8 | 


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | import logging
 3 | 
 4 | import LexData
 5 | from LexData.language import en
 6 | 
 7 | logging.basicConfig(level=logging.INFO)
 8 | 
 9 | repo = LexData.WikidataSession("MichaelSchoenitzer", "foobar")
10 | 
11 | # Open a Lexeme
12 | L2 = LexData.Lexeme(repo, "L2")
13 | 
14 | # Access the claims
15 | print(L2.claims.keys())
16 | # and Forms
17 | print(len(L2.forms))
18 | F1 = L2.forms[0]
19 | print(F1.claims.keys())
20 | # and senses
21 | print(len(L2.senses))
22 | S1 = L2.senses[0]
23 | print(S1.claims.keys())
24 | 
25 | # Find or create a Lexeme by lemma, language and grammatical form
26 | L2 = LexData.get_or_create_lexeme(repo, "first", en, "Q1084")
27 | 
28 | # You can easily create forms…
29 | if len(L2.forms) == 0:
30 |     L2.createForm("firsts", ["Q146786"])
31 | 
32 | # …or senses, with or without additional claims
33 | if len(L2.senses) == 0:
34 |     L2.createSense(
35 |         {
36 |             "en": "Element in an ordered list which comes before all others according to the ordering",
37 |             "de": "einer Ordnung folgend das Element vor allen anderen",
38 |         },
39 |         claims={"P5137": ["Q19269277"]},
40 |     )
41 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setuptools.setup(
 7 |     name="LexData",
 8 |     version="0.2.0",
 9 |     author="Michael F. Schoenitzer",
10 |     author_email="michael@schoenitzer.de",
11 |     description="A tiny package for editing Lexemes on Wikidata",
12 |     long_description=long_description,
13 |     long_description_content_type="text/markdown",
14 |     url="https://github.com/Nudin/LexData",
15 |     packages=setuptools.find_packages(),
16 |     classifiers=[
17 |         "Programming Language :: Python :: 3",
18 |         "License :: OSI Approved :: MIT License",
19 |         "Operating System :: OS Independent",
20 |     ],
21 |     install_requires=["requests"],
22 | )
23 | 


--------------------------------------------------------------------------------
/test_lexdata.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | from datetime import datetime
  3 | from pathlib import Path
  4 | 
  5 | import pytest
  6 | 
  7 | import LexData
  8 | 
  9 | 
 10 | @pytest.fixture
 11 | def credentials():
 12 |     with open(Path.home() / ".wikipass") as f:
 13 |         userename, password, *_ = f.read().split("\n")
 14 |     return (userename, password)
 15 | 
 16 | 
 17 | @pytest.fixture
 18 | def repo(credentials):
 19 |     username, password = credentials
 20 |     return LexData.WikidataSession(username, password)
 21 | 
 22 | 
 23 | @pytest.fixture
 24 | def repoTestWikidata():
 25 |     test = LexData.WikidataSession()
 26 |     test.URL = "https://test.wikidata.org/w/api.php"
 27 |     test.CSRF_TOKEN = "+\\"
 28 |     return test
 29 | 
 30 | 
 31 | def test_auth(credentials):
 32 |     with pytest.raises(Exception):
 33 |         assert LexData.WikidataSession("Username", "Password")
 34 |     anon = LexData.WikidataSession()
 35 |     LexData.Lexeme(anon, "L2")
 36 |     # anon.maxlag = 1
 37 |     # LexData.Lexeme(anon, "L2")
 38 | 
 39 | 
 40 | def test_lexeme(repo):
 41 |     L2 = LexData.Lexeme(repo, "L2")
 42 | 
 43 |     assert L2.lemma == "first"
 44 |     assert L2.language == "en"
 45 |     claims = L2.claims
 46 |     assert isinstance(claims, dict)
 47 | 
 48 |     examples = claims.get("P5831", [])
 49 |     assert len(examples) >= 1
 50 |     example = examples[0]
 51 |     assert isinstance(repr(example), str)
 52 |     assert example.property == "P5831"
 53 |     assert example.rank == "normal"
 54 |     assert example.numeric_rank == 0
 55 |     assert example.type == "monolingualtext"
 56 |     assert example.pure_value == "He was first in line."
 57 |     assert example.value["text"] == "He was first in line."
 58 |     assert example.value["language"] == "en"
 59 | 
 60 | 
 61 | def test_sense(repo):
 62 |     L2 = LexData.Lexeme(repo, "L2")
 63 |     assert str(L2)
 64 |     assert repr(L2)
 65 | 
 66 |     senses = L2.senses
 67 |     assert isinstance(senses, list)
 68 |     for sense in senses:
 69 |         assert isinstance(repr(sense), str)
 70 |         assert isinstance(sense.glosse(), str)
 71 |         assert isinstance(sense.glosse("de"), str)
 72 |         assert sense.glosse("en") == sense.glosse()
 73 |         assert sense.glosse("XX") == sense.glosse()
 74 |         del sense["glosses"]["en"]
 75 |         assert isinstance(sense.glosse("XX"), str)
 76 |         assert isinstance(sense.claims, dict)
 77 | 
 78 | 
 79 | def test_form(repo):
 80 |     L2 = LexData.Lexeme(repo, "L2")
 81 |     forms = L2.forms
 82 |     assert isinstance(forms, list)
 83 |     for form in forms:
 84 |         assert isinstance(repr(form), str)
 85 |         assert isinstance(form.form, str)
 86 |         assert isinstance(form.claims, dict)
 87 | 
 88 | 
 89 | def test_writes(repoTestWikidata):
 90 |     L123 = LexData.Lexeme(repoTestWikidata, "L123")
 91 | 
 92 |     L123.createClaims({"P7": ["Q100"]})
 93 |     L123.addClaims({"P7": ["Q100"]})
 94 | 
 95 |     L123.createForm("test", ["Q100"])
 96 | 
 97 |     L123.createSense({"de": "testtest", "en": "testtest"})
 98 |     L123.createSense({"de": "more tests", "en": "more tests"}, claims={})
 99 |     L123.createSense({"en": "even more tests"}, claims={"P7": ["Q100"]})
100 | 
101 | 
102 | def test_search(repo):
103 |     results = LexData.search_lexemes(repo, "first", LexData.language.en, "Q1084")
104 |     assert len(results) == 1
105 |     assert results[0].get("id") == "L2"
106 | 
107 |     result = LexData.get_or_create_lexeme(repo, "first", LexData.language.en, "Q1084")
108 |     assert result["id"] == "L2"
109 | 
110 | 
111 | def test_detatchedClaim(repo):
112 |     LexData.Claim(propertyId="P369", value="Q1")
113 |     LexData.Claim(propertyId="P856", value="http://example.com/")
114 |     LexData.Claim(propertyId="P2534", value="\frac{1}{2}")
115 |     quantity = LexData.Claim(propertyId="P2021", value=6)
116 |     assert quantity.pure_value == 6
117 |     date = LexData.Claim(propertyId="P580", value=datetime.now())
118 |     assert type(date.pure_value) is str
119 |     with pytest.raises(TypeError):
120 |         LexData.Claim(propertyId="P856", value=1)
121 |         LexData.Claim(propertyId="P2021", value="foo")
122 |         LexData.Claim(propertyId="P580", value=1)
123 |         LexData.Claim(propertyId="P580", value="foo")
124 |     with pytest.raises(Exception):
125 |         LexData.Claim(propertyId="P0", value="foo")
126 | 
127 | 
128 | def test_createLexeme(repoTestWikidata):
129 |     LexData.create_lexeme(repoTestWikidata, "foobar", LexData.language.en, "Q100")
130 | 


--------------------------------------------------------------------------------