├── .gitignore ├── LICENSE ├── LexData ├── __init__.py ├── claim.py ├── entity.py ├── form.py ├── language.py ├── languages.py ├── lexeme.py ├── sense.py ├── utils.py ├── version.py └── wikidatasession.py ├── README.md ├── docs ├── Makefile ├── make.bat └── source │ ├── Claim.rst │ ├── Entity.rst │ ├── Form.rst │ ├── Language.rst │ ├── LexData.rst │ ├── Lexeme.rst │ ├── Sense.rst │ ├── conf.py │ ├── index.rst │ ├── intro.rst │ └── modules.rst ├── example.py ├── setup.py └── test_lexdata.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | 3 | __pycache__ 4 | *pyc 5 | .mypy_cache 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 The Python Packaging Authority 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /LexData/__init__.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-* 2 | import json 3 | import logging 4 | from typing import List 5 | 6 | from .claim import Claim 7 | from .form import Form 8 | from .language import Language 9 | from .lexeme import Lexeme 10 | from .sense import Sense 11 | from .wikidatasession import WikidataSession 12 | 13 | 14 | def get_or_create_lexeme( 15 | repo: WikidataSession, lemma: str, lang: Language, catLex: str 16 | ) -> Lexeme: 17 | """Search for a lexeme in wikidata if not found, create it 18 | 19 | :param repo: Wikidata Session 20 | :type repo: WikidataSession 21 | :param lemma: the lemma of the lexeme 22 | :type lemma: str 23 | :param lang: language of the lexeme 24 | :type lang: Language 25 | :param catLex: lexical Category of the lexeme 26 | :type catLex: str 27 | :returns: Lexeme with the specified properties (created or found) 28 | :rtype: Lexeme 29 | 30 | """ 31 | lexemes = search_lexemes(repo, lemma, lang, catLex) 32 | if len(lexemes) == 1: 33 | return lexemes[0] 34 | elif len(lexemes) > 1: 35 | logging.warning("Multiple lexemes found, using first one.") 36 | return lexemes[0] 37 | else: 38 | return create_lexeme(repo, lemma, lang, catLex) 39 | 40 | 41 | def search_lexemes( 42 | repo: WikidataSession, lemma: str, lang: Language, catLex: str 43 | ) -> List[Lexeme]: 44 | """ 45 | Search for a lexeme by it's label, language and lexical category. 46 | 47 | :param repo: Wikidata Session 48 | :type repo: WikidataSession 49 | :param lemma: the lemma of the lexeme 50 | :type lemma: str 51 | :param lang: language of the lexeme 52 | :type lang: Language 53 | :param catLex: lexical Category of the lexeme 54 | :type catLex: str 55 | :returns: List of Lexemes with the specified properties 56 | :rtype: List[Lexeme] 57 | """ 58 | # the language we specify in search is currently not used by the search 59 | # set it nevertheless, except if it is a Language without ISO code 60 | if lang.short[:3] == "mis": 61 | searchlang = "en" 62 | else: 63 | searchlang = lang.short 64 | 65 | PARAMS = { 66 | "action": "wbsearchentities", 67 | "language": searchlang, 68 | "type": "lexeme", 69 | "search": lemma, 70 | "format": "json", 71 | "limit": "10", 72 | } 73 | 74 | DATA = repo.get(PARAMS) 75 | 76 | if "error" in DATA: 77 | raise Exception(DATA["error"]) 78 | 79 | # Iterate over all results and check for matches. Do not rely on 80 | # match-results, since they can differ for smaller languages – use them 81 | # however to avoid unnecessary queries. 82 | lexemes = [] 83 | for item in DATA["search"]: 84 | if item["label"] == lemma: 85 | if "language" in item["match"]: 86 | if item["match"]["language"] != lang.short and item["match"] != "und": 87 | continue 88 | idLex = item["id"] 89 | lexeme = Lexeme(repo, idLex) 90 | if lexeme["language"] == lang.qid and lexeme["lexicalCategory"] == catLex: 91 | logging.info("Found lexeme: %s", idLex) 92 | lexemes.append(lexeme) 93 | return lexemes 94 | 95 | 96 | def create_lexeme( 97 | repo: WikidataSession, lemma: str, lang: Language, catLex: str, claims=None 98 | ) -> Lexeme: 99 | """Creates a lexeme 100 | 101 | :param repo: Wikidata Session 102 | :type repo: WikidataSession 103 | :param lemma: value of the lexeme 104 | :type lemma: str 105 | :param lang: language 106 | :type lang: Language 107 | :param catLex: lexicographical category 108 | :param claims: claims to add to the lexeme (Default value = None) -> Lexem) 109 | :type catLex: str 110 | :returns: The created Lexeme 111 | :rtype: Lexeme 112 | 113 | """ 114 | 115 | # Create the json with the lexeme's data 116 | data_lex = json.dumps( 117 | { 118 | "type": "lexeme", 119 | "lemmas": {lang.short: {"value": lemma, "language": lang.short}}, 120 | "language": lang.qid, 121 | "lexicalCategory": catLex, 122 | "forms": [], 123 | } 124 | ) 125 | 126 | # Send a post to edit a lexeme 127 | PARAMS = { 128 | "action": "wbeditentity", 129 | "format": "json", 130 | "bot": "1", 131 | "new": "lexeme", 132 | "token": "__AUTO__", 133 | "data": data_lex, 134 | } 135 | 136 | DATA = repo.post(PARAMS) 137 | # Get the id of the new lexeme 138 | idLex = DATA["entity"]["id"] 139 | 140 | logging.info("Created lexeme: %s", idLex) 141 | lexeme = Lexeme(repo, idLex) 142 | 143 | if claims: 144 | lexeme.createClaims(claims) 145 | 146 | return lexeme 147 | -------------------------------------------------------------------------------- /LexData/claim.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Tuple, Union 2 | 3 | from .utils import buildSnak 4 | 5 | 6 | class Claim(dict): 7 | """Wrapper around a dict to represent a Claim 8 | 9 | There are two types of Claim objects: 10 | 11 | * Claims that where received from an existing entity. 12 | * Claims that where created by the user by Claim(propertyId, value) and 13 | have not yet been uploaded to Wikidata. These are called 'Detached Claims', 14 | since they don't belong to any entity. They don't have an id nor an hash. 15 | They can be added to an entity by the function Entity.addClaims(). 16 | 17 | Currently modifications on both types of claims can't be uploaded, except 18 | by use of the low level API call Lexeme.update_from_json(). 19 | """ 20 | 21 | # Hack needed to define a property called property 22 | property_decorator = property 23 | 24 | def __init__( 25 | self, 26 | claim: Optional[Dict[str, Any]] = None, 27 | propertyId: Optional[str] = None, 28 | value: Optional[Any] = None, 29 | ): 30 | super().__init__() 31 | if isinstance(claim, dict) and not propertyId and not value: 32 | self.update(claim) 33 | elif claim is None and propertyId and value: 34 | self["mainsnak"] = buildSnak(propertyId, value) 35 | self["rank"] = "normal" 36 | else: 37 | raise TypeError( 38 | "Claim() received an invalid combination of arguments expected one of:" 39 | + " * (dict claimObject)" 40 | + " * (str propertyId, value)" 41 | ) 42 | 43 | @property_decorator 44 | def value(self) -> Dict[str, Any]: 45 | """ 46 | Return the value of the claim. The type depends on the data type. 47 | """ 48 | return self["mainsnak"]["datavalue"]["value"] 49 | 50 | @property_decorator 51 | def type(self) -> str: 52 | """ 53 | Return the data type of the claim. 54 | 55 | :rtype: str 56 | """ 57 | return self["mainsnak"]["datatype"] 58 | 59 | @property_decorator 60 | def property(self) -> str: 61 | """ 62 | Return the id of the property of the claim. 63 | 64 | :rtype: str 65 | """ 66 | return self["mainsnak"]["property"] 67 | 68 | @property_decorator 69 | def rank(self) -> str: 70 | """ 71 | Return the rank of the claim. 72 | 73 | :rtype: str 74 | """ 75 | return self["rank"] 76 | 77 | @property_decorator 78 | def numeric_rank(self) -> int: 79 | """ 80 | Return the rank of the claim as integer. 81 | 82 | :rtype: int 83 | """ 84 | if self.rank == "normal": 85 | return 0 86 | elif self.rank == "preferred": 87 | return 1 88 | elif self.rank == "deprecated": 89 | return -1 90 | raise NotImplementedError("Unknown or invalid rank {}".format(self.rank)) 91 | 92 | @property_decorator 93 | def pure_value(self) -> Union[str, int, float, Tuple[float, float]]: 94 | """ 95 | Return just the 'pure' value, what this is depends on the type of the value: 96 | 97 | * wikibase-entity: the id as string, including 'L/Q/P'-prefix 98 | * string: the string 99 | * manolingualtext: the text as string 100 | * quantity: the amount as float 101 | * time: the timestamp as string in format ISO 8601 102 | * globecoordinate: tuple of latitude and longitude as floats 103 | 104 | Be aware that for most types this is not the full information stored in 105 | the value. 106 | """ 107 | value = self.value 108 | vtype = self.type 109 | if vtype == "wikibase-entityid": 110 | return value["id"] 111 | if vtype == "string": 112 | return value 113 | if vtype == "monolingualtext": 114 | return value["text"] 115 | if vtype == "quantity": 116 | return float(value["amount"]) 117 | if vtype == "time": 118 | return value["time"] 119 | if vtype == "globecoordinate": 120 | return (float(value["latitude"]), float(value["longitude"])) 121 | raise NotImplementedError 122 | 123 | def __repr__(self) -> str: 124 | if "id" in self: 125 | return "".format(repr(self.value)) 126 | else: 127 | return "".format(repr(self.value)) 128 | -------------------------------------------------------------------------------- /LexData/entity.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from typing import Dict, List, Union 4 | 5 | from .claim import Claim 6 | from .wikidatasession import WikidataSession 7 | 8 | 9 | class Entity(dict): 10 | """ 11 | Base class for all types of entities – currently: Lexeme, Form, Sense. 12 | Not yet implemented: Item, Property. 13 | """ 14 | 15 | def __init__(self, repo: WikidataSession): 16 | super().__init__() 17 | self.repo = repo 18 | 19 | @property 20 | def claims(self) -> Dict[str, List[Claim]]: 21 | """ 22 | All the claims of the Entity 23 | 24 | :rtype: Dict[str, List[Claim]] 25 | """ 26 | if self.get("claims", {}) != []: 27 | return {k: [Claim(c) for c in v] for k, v in self.get("claims", {}).items()} 28 | else: 29 | return {} 30 | 31 | def addClaims(self, claims: Union[List[Claim], Dict[str, List[str]]]): 32 | """ 33 | Add claims to the entity. 34 | 35 | :param claims: The claims to be added to the entity. 36 | 37 | There are two possibilities for this: 38 | 39 | - A list of Objects of type Claim 40 | 41 | Example: ``[Claim(propertyId="P31", value="Q1")]`` 42 | 43 | - A dictionary with the property id as key and lists of 44 | string formated entity ids as values. 45 | 46 | Example: ``{"P31": ["Q1", "Q2"]}`` 47 | 48 | The first supports all datatypes, whereas the later 49 | currently only supports datatypes of kind Entity. 50 | """ 51 | if isinstance(claims, list): 52 | self.__setClaims__(claims) 53 | elif isinstance(claims, dict): 54 | self.__createClaims__(claims) 55 | else: 56 | raise TypeError("Invalid argument type:", type(claims)) 57 | 58 | def __setClaims__(self, claims: List[Claim]): 59 | """ 60 | Add prebuild claims to the entity 61 | 62 | :param claims: The list of claims to be added 63 | """ 64 | for claim in claims: 65 | pid = claim.property 66 | self.__setClaim__(pid, claim) 67 | 68 | def __createClaims__(self, claims: Dict[str, List[str]]): 69 | """ 70 | Create and add new claims to the entity. 71 | 72 | Only properties of some entity type are implemented: 73 | Item, Property, Lexeme, Form and Sense 74 | 75 | :param claims: The set of claims to be added 76 | """ 77 | for cle, values in claims.items(): 78 | for value in values: 79 | self.__setEntityClaim__(cle, value) 80 | 81 | def __setEntityClaim__(self, idProp: str, idStr: str): 82 | """ 83 | Add a claim of an entity-type to the entity. 84 | 85 | Supported types are Lexeme, Form, Sense, Item, Property. 86 | 87 | :param idProp: id of the property (example: "P31") 88 | :param idItem: id of the entity (example: "Q1") 89 | """ 90 | entityId = int(idStr[1:]) 91 | claim_value = json.dumps({"entity-type": "item", "numeric-id": entityId}) 92 | self.__setClaim__(idProp, claim_value) 93 | 94 | def __setClaim__(self, idProp: str, claim_value): 95 | PARAMS = { 96 | "action": "wbcreateclaim", 97 | "format": "json", 98 | "entity": self.id, 99 | "snaktype": "value", 100 | "bot": "1", 101 | "property": idProp, 102 | "value": claim_value, 103 | "token": "__AUTO__", 104 | } 105 | 106 | DATA = self.repo.post(PARAMS) 107 | assert "claim" in DATA 108 | addedclaim = DATA["claim"] 109 | logging.info("Claim added") 110 | 111 | # Add the created claim to the local entity instance 112 | if self.get("claims", []) == []: 113 | self["claims"] = {idProp: addedclaim} 114 | elif idProp in self.claims: 115 | self.claims[idProp].append(addedclaim) 116 | else: 117 | self.claims[idProp] = [addedclaim] 118 | 119 | @property 120 | def id(self) -> str: 121 | EntityId = self.get("id") 122 | assert isinstance(EntityId, str) 123 | return EntityId 124 | 125 | def __str__(self) -> str: 126 | return super().__repr__() 127 | -------------------------------------------------------------------------------- /LexData/form.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | from .entity import Entity 4 | from .wikidatasession import WikidataSession 5 | 6 | 7 | class Form(Entity): 8 | """Wrapper around a dict to represent a From""" 9 | 10 | def __init__(self, repo: WikidataSession, form: Dict): 11 | super().__init__(repo) 12 | self.update(form) 13 | 14 | @property 15 | def form(self) -> str: 16 | """ 17 | String of the form value ("representation") 18 | 19 | :rtype: str 20 | """ 21 | return list(self["representations"].values())[0]["value"] 22 | 23 | def __repr__(self) -> str: 24 | return "
".format(self.form) 25 | -------------------------------------------------------------------------------- /LexData/language.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module simply contains a few common Languages with their language-codes 3 | and QIDs for easier use. 4 | """ 5 | from dataclasses import dataclass 6 | 7 | 8 | @dataclass 9 | class Language: 10 | """Dataclass representing a language""" 11 | 12 | short: str 13 | qid: str 14 | 15 | 16 | # feel free to add more languages 17 | en = Language("en", "Q1860") 18 | de = Language("de", "Q188") 19 | fr = Language("fr", "Q150") 20 | -------------------------------------------------------------------------------- /LexData/languages.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .language import * 4 | 5 | logging.warning( 6 | "LexData.languages has been renamed to LexData.language. " 7 | + "LexData.languages will be removed in future versions." 8 | ) 9 | -------------------------------------------------------------------------------- /LexData/lexeme.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from typing import Dict, List, Optional 4 | 5 | from .claim import Claim 6 | from .entity import Entity 7 | from .form import Form 8 | from .language import Language 9 | from .sense import Sense 10 | from .wikidatasession import WikidataSession 11 | 12 | 13 | class Lexeme(Entity): 14 | """Wrapper around a dict to represent a Lexeme""" 15 | 16 | def __init__(self, repo: WikidataSession, idLex: str): 17 | super().__init__(repo) 18 | self.getLex(idLex) 19 | 20 | def getLex(self, idLex: str): 21 | """This function gets and returns the data of a lexeme for a given id. 22 | 23 | :param idLex: Lexeme identifier (example: "L2") 24 | :type idLex: str 25 | :returns: Simplified object representation of Lexeme 26 | 27 | """ 28 | 29 | PARAMS = {"action": "wbgetentities", "format": "json", "ids": idLex} 30 | 31 | DATA = self.repo.get(PARAMS) 32 | 33 | self.update(DATA["entities"][idLex]) 34 | 35 | @property 36 | def lemma(self) -> str: 37 | """ 38 | The lemma of the lexeme as string 39 | 40 | :rtype: str 41 | """ 42 | return list(self["lemmas"].values())[0]["value"] 43 | 44 | @property 45 | def language(self) -> str: 46 | """ 47 | The language code of the lexeme as string 48 | 49 | :rtype: str 50 | """ 51 | return list(self["lemmas"].values())[0]["language"] 52 | 53 | @property 54 | def forms(self) -> List[Form]: 55 | """ 56 | List of all forms 57 | 58 | :rtype: List[Form] 59 | """ 60 | return [Form(self.repo, f) for f in super().get("forms", [])] 61 | 62 | @property 63 | def senses(self) -> List[Sense]: 64 | """ 65 | List of all senses 66 | 67 | :rtype: List[Sense] 68 | """ 69 | return [Sense(self.repo, s) for s in super().get("senses", [])] 70 | 71 | def createSense( 72 | self, glosses: Dict[str, str], claims: Optional[List[Claim]] = None 73 | ) -> str: 74 | """Create a sense for the lexeme. 75 | 76 | :param glosses: glosses for the sense 77 | :type glosses: Dict[str, str] 78 | :param claims: claims to add to the new form 79 | :rtype: str 80 | """ 81 | # Create the json with the sense's data 82 | data_sense: Dict[str, Dict[str, Dict[str, str]]] = {"glosses": {}} 83 | for lang, gloss in glosses.items(): 84 | data_sense["glosses"][lang] = {"value": gloss, "language": lang} 85 | 86 | # send a post to add sense to lexeme 87 | PARAMS = { 88 | "action": "wbladdsense", 89 | "format": "json", 90 | "lexemeId": self.id, 91 | "token": "__AUTO__", 92 | "bot": "1", 93 | "data": json.dumps(data_sense), 94 | } 95 | DATA = self.repo.post(PARAMS) 96 | addedSense = Sense(self.repo, DATA["sense"]) 97 | idSense = DATA["sense"]["id"] 98 | logging.info("Created sense: %s", idSense) 99 | 100 | # Add the claims 101 | if claims: 102 | addedSense.addClaims(claims) 103 | 104 | # Add the created form to the local lexeme 105 | self["senses"].append(addedSense) 106 | 107 | return idSense 108 | 109 | def createForm( 110 | self, 111 | form: str, 112 | infosGram: List[str], 113 | language: Optional[Language] = None, 114 | claims: Optional[List[Claim]] = None, 115 | ) -> str: 116 | """Create a form for the lexeme. 117 | 118 | :param form: the new form to add 119 | :type form: str 120 | :param infosGram: grammatical features 121 | :type infosGram: List[str] 122 | :param language: the language of the form 123 | :type language: Optional[Language] 124 | :param claims: claims to add to the new form 125 | :returns: The id of the form 126 | :rtype: str 127 | 128 | """ 129 | 130 | if language is None: 131 | languagename = self.language 132 | else: 133 | languagename = language.short 134 | 135 | # Create the json with the forms's data 136 | data_form = json.dumps( 137 | { 138 | "representations": { 139 | languagename: {"value": form, "language": languagename} 140 | }, 141 | "grammaticalFeatures": infosGram, 142 | } 143 | ) 144 | 145 | # send a post to add form to lexeme 146 | PARAMS = { 147 | "action": "wbladdform", 148 | "format": "json", 149 | "lexemeId": self.id, 150 | "token": "__AUTO__", 151 | "bot": "1", 152 | "data": data_form, 153 | } 154 | DATA = self.repo.post(PARAMS) 155 | addedForm = Form(self.repo, DATA["form"]) 156 | idForm = DATA["form"]["id"] 157 | logging.info("Created form: %s", idForm) 158 | 159 | # Add the claims 160 | if claims: 161 | addedForm.addClaims(claims) 162 | 163 | # Add the created form to the local lexeme 164 | self["forms"].append(addedForm) 165 | 166 | return idForm 167 | 168 | def createClaims(self, claims: Dict[str, List[str]]): 169 | """Add claims to the Lexeme. 170 | 171 | createClaim() is deprecated and might be removed in future versions. 172 | Use Entity.addClaims() instead. 173 | 174 | :param claims: The set of claims to be added 175 | 176 | """ 177 | logging.warning( 178 | "createClaim() is deprecated and might be removed in future versions." 179 | + " Use Entity.addClaims() instead" 180 | ) 181 | self.__createClaims__(claims) 182 | 183 | def __repr__(self) -> str: 184 | return "".format(self.id) 185 | 186 | def update_from_json(self, data: str, overwrite=False): 187 | """Update the lexeme from an json-string. 188 | 189 | This is a lower level function usable to save arbitrary modifications 190 | on a lexeme. The data has to be supplied in the right format by the 191 | user. 192 | 193 | :param data: Data update: See the API documentation about the format. 194 | :param overwrite: If set the whole entity is replaced by the supplied data 195 | """ 196 | PARAMS: Dict[str, str] = { 197 | "action": "wbeditentity", 198 | "format": "json", 199 | "bot": "1", 200 | "id": self.id, 201 | "token": "__AUTO__", 202 | "data": data, 203 | } 204 | if overwrite: 205 | PARAMS["clear"] = "true" 206 | DATA = self.repo.post(PARAMS) 207 | if DATA.get("success") != 200: 208 | raise ValueError(DATA) 209 | logging.info("Updated from json data") 210 | # Due to limitations of the API, the returned data cannot be used to 211 | # update the instance. Therefore reload the lexeme. 212 | self.getLex(self.id) 213 | -------------------------------------------------------------------------------- /LexData/sense.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | from .entity import Entity 4 | from .wikidatasession import WikidataSession 5 | 6 | 7 | class Sense(Entity): 8 | """Wrapper around a dict to represent a Sense""" 9 | 10 | def __init__(self, repo: WikidataSession, form: Dict): 11 | super().__init__(repo) 12 | self.update(form) 13 | 14 | def glosse(self, lang: str = "en") -> str: 15 | """ 16 | The gloss of the text in the specified language is available, otherwise 17 | in englisch, and if that's not set too in an arbitrary set language 18 | 19 | :param lang: language code of the wished language 20 | :type lang: str 21 | :rtype: str 22 | """ 23 | if lang not in self["glosses"]: 24 | if "en" in self["glosses"]: 25 | lang = "en" 26 | else: 27 | lang = list(self["glosses"].keys())[0] 28 | return self["glosses"][lang]["value"] 29 | 30 | def __repr__(self) -> str: 31 | return "".format(self.glosse()) 32 | -------------------------------------------------------------------------------- /LexData/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import json 3 | from datetime import datetime 4 | from typing import Any, Dict 5 | 6 | from .wikidatasession import WikidataSession 7 | 8 | 9 | @functools.lru_cache() 10 | def getPropertyType(propertyId: str): 11 | repo = WikidataSession() 12 | query = { 13 | "action": "query", 14 | "format": "json", 15 | "prop": "revisions", 16 | "titles": "Property:" + propertyId, 17 | "rvprop": "content", 18 | } 19 | DATA = repo.get(query) 20 | jsonstr = list(DATA["query"]["pages"].values())[0]["revisions"][0]["*"] 21 | content = json.loads(jsonstr) 22 | return content["datatype"] 23 | 24 | 25 | def buildDataValue(datatype: str, value): 26 | if datatype in [ 27 | "wikibase-lexeme", 28 | "wikibase-form", 29 | "wikibase-sense", 30 | "wikibase-item", 31 | "wikibase-property", 32 | ]: 33 | if type(value) == dict: 34 | return {"value": value, "type": "wikibase-entity"} 35 | elif type(value) == str: 36 | value = {"entity-type": datatype[9:], "id": value} 37 | return {"value": value, "type": "wikibase-entity"} 38 | else: 39 | raise TypeError( 40 | f"Can not convert type {type(value)} to datatype {datatype}" 41 | ) 42 | elif datatype in [ 43 | "string", 44 | "tabular-data", 45 | "geo-shape", 46 | "url", 47 | "musical-notation", 48 | "math", 49 | "commonsMedia", 50 | ]: 51 | if type(value) == dict: 52 | return {"value": value, "type": "string"} 53 | elif type(value) == str: 54 | return {"value": {"value": value}, "type": "string"} 55 | else: 56 | raise TypeError( 57 | f"Can not convert type {type(value)} to datatype {datatype}" 58 | ) 59 | elif datatype == "monolingualtext": 60 | if type(value) == dict: 61 | return {"value": value, "type": "monolingualtext"} 62 | else: 63 | raise TypeError( 64 | f"Can not convert type {type(value)} to datatype {datatype}" 65 | ) 66 | elif datatype == "globe-coordinate": 67 | if type(value) == dict: 68 | return {"value": value, "type": "globecoordinate"} 69 | else: 70 | raise TypeError( 71 | f"Can not convert type {type(value)} to datatype {datatype}" 72 | ) 73 | elif datatype == "quantity": 74 | if type(value) == dict: 75 | return {"value": value, "type": "quantity"} 76 | if type(value) in [int, float]: 77 | valueObj = { 78 | "amount": "%+f" % value, 79 | "unit": "1", 80 | } 81 | return {"value": valueObj, "type": "time"} 82 | else: 83 | raise TypeError( 84 | f"Can not convert type {type(value)} to datatype {datatype}" 85 | ) 86 | elif datatype == "time": 87 | if type(value) == dict: 88 | return {"value": value, "type": "time"} 89 | if type(value) == datetime: 90 | cleanedDateTime = value.replace(hour=0, minute=0, second=0, microsecond=0) 91 | valueObj: Dict[str, Any] = { 92 | "time": "+" + cleanedDateTime.isoformat() + "Z", 93 | "timezone": 0, 94 | "before": 0, 95 | "after": 0, 96 | "precision": 11, 97 | "calendarmodel": "http://www.wikidata.org/entity/Q1985727", 98 | } 99 | return {"value": valueObj, "type": "time"} 100 | else: 101 | raise TypeError( 102 | f"Can not convert type {type(value)} to datatype {datatype}" 103 | ) 104 | else: 105 | raise NotImplementedError(f"Datatype {datatype} not implemented") 106 | 107 | 108 | def buildSnak(propertyId: str, value): 109 | datatype = getPropertyType(propertyId) 110 | datavalue = buildDataValue(datatype, value) 111 | return { 112 | "snaktype": "value", 113 | "property": propertyId, 114 | "datavalue": datavalue, 115 | "datatype": datatype, 116 | } 117 | -------------------------------------------------------------------------------- /LexData/version.py: -------------------------------------------------------------------------------- 1 | name = "LexData" 2 | version = "0.2.0" 3 | user_agent = "%s %s" % (name, version) 4 | -------------------------------------------------------------------------------- /LexData/wikidatasession.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | from typing import Any, Dict, Optional 4 | 5 | import requests 6 | 7 | from .version import user_agent 8 | 9 | 10 | class WikidataSession: 11 | """Wikidata network and authentication session. Needed for everything this 12 | framework does. 13 | """ 14 | 15 | URL: str = "https://www.wikidata.org/w/api.php" 16 | assertUser: Optional[str] = None 17 | maxlag: int = 5 18 | 19 | def __init__( 20 | self, 21 | username: Optional[str] = None, 22 | password: Optional[str] = None, 23 | token: Optional[str] = None, 24 | auth: Optional[str] = None, 25 | user_agent: str = user_agent, 26 | ): 27 | """ 28 | Create a wikidata session by login in and getting the token 29 | """ 30 | self.username = username 31 | self.password = password 32 | self.auth = auth 33 | self.headers = {"User-Agent": user_agent} 34 | self.S = requests.Session() 35 | if username is not None and password is not None: 36 | # Since logins don't put load on the servers 37 | # we set maxlag higher for these requests. 38 | self.maxlag = 30 39 | self.login() 40 | self.maxlag = 5 41 | if token is not None: 42 | self.CSRF_TOKEN = token 43 | # After login enable 'assertUser'-feature of the Mediawiki-API to 44 | # make sure to never edit accidentally as IP 45 | if username is not None: 46 | # truncate bot name if a "bot password" is used 47 | self.assertUser = username.split("@")[0] 48 | 49 | def login(self): 50 | # Ask for a token 51 | PARAMS_1 = { 52 | "action": "query", 53 | "meta": "tokens", 54 | "type": "login", 55 | "format": "json", 56 | } 57 | DATA = self.get(PARAMS_1) 58 | LOGIN_TOKEN = DATA["query"]["tokens"]["logintoken"] 59 | 60 | # connexion request 61 | PARAMS_2 = { 62 | "action": "login", 63 | "lgname": self.username, 64 | "lgpassword": self.password, 65 | "format": "json", 66 | "lgtoken": LOGIN_TOKEN, 67 | } 68 | DATA = self.post(PARAMS_2) 69 | if DATA.get("login", []).get("result") != "Success": 70 | raise PermissionError("Login failed", DATA["login"]["reason"]) 71 | logging.info("Log in succeeded") 72 | 73 | PARAMS_3 = {"action": "query", "meta": "tokens", "format": "json"} 74 | DATA = self.get(PARAMS_3) 75 | self.CSRF_TOKEN = DATA["query"]["tokens"]["csrftoken"] 76 | logging.info("Got CSRF token: %s", self.CSRF_TOKEN) 77 | 78 | def post(self, data: Dict[str, str]) -> Any: 79 | """Send data to wikidata by POST request. The CSRF token is automatically 80 | filled in if __AUTO__ is given instead. 81 | 82 | :param data: Parameters to send via POST 83 | :type data: Dict[str, str]) 84 | :returns: Answer form the server as Objekt 85 | :rtype: Any 86 | 87 | """ 88 | if data.get("token") == "__AUTO__": 89 | data["token"] = self.CSRF_TOKEN 90 | if "assertuser" not in data and self.assertUser is not None: 91 | data["assertuser"] = self.assertUser 92 | data["maxlag"] = str(self.maxlag) 93 | R = self.S.post(self.URL, data=data, headers=self.headers, auth=self.auth) 94 | if R.status_code != 200: 95 | raise Exception( 96 | "POST was unsuccessfull ({}): {}".format(R.status_code, R.text) 97 | ) 98 | DATA = R.json() 99 | if "error" in DATA: 100 | if DATA["error"]["code"] == "maxlag": 101 | sleepfor = float(R.headers.get("retry-after", 5)) 102 | logging.info("Maxlag hit, waiting for %.1f seconds", sleepfor) 103 | time.sleep(sleepfor) 104 | return self.post(data) 105 | else: 106 | raise PermissionError("API returned error: " + str(DATA["error"])) 107 | logging.debug("Post request succeed") 108 | return DATA 109 | 110 | def get(self, data: Dict[str, str]) -> Any: 111 | """Send a GET request to wikidata 112 | 113 | :param data: Parameters to send via GET 114 | :type data: Dict[str, str] 115 | :returns: Answer form the server as Objekt 116 | :rtype: Any 117 | 118 | """ 119 | R = self.S.get(self.URL, params=data, headers=self.headers) 120 | DATA = R.json() 121 | if R.status_code != 200 or "error" in DATA: 122 | # We do not set maxlag for GET requests – so this error can only 123 | # occur if the users sets maxlag in the request data object 124 | if DATA["error"]["code"] == "maxlag": 125 | sleepfor = float(R.headers.get("retry-after", 5)) 126 | logging.info("Maxlag hit, waiting for %.1f seconds", sleepfor) 127 | time.sleep(sleepfor) 128 | return self.get(data) 129 | else: 130 | raise Exception( 131 | "GET was unsuccessfull ({}): {}".format(R.status_code, R.text) 132 | ) 133 | logging.debug("Get request succeed") 134 | return DATA 135 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python bot framework for Lexemes on Wikidata 2 | 3 | This is a small library to create bots, scripts and tools about Wikidata 4 | Lexemes. It's philosophy is to have a transparent thin layer on top of the 5 | internal datastuctures enriched with convenient functions without hiding the 6 | power of the access to the internals. 7 | 8 | LexData is still in beta phase and there fore some features are missing and 9 | functions might be renamed in future. 10 | 11 | The code of AitalvivemBot was used as a starting point, but probably theres not 12 | a single line of code that wasn't rewritten. 13 | 14 | Install from pypi: 15 | ``` 16 | $ pip install LexData 17 | ``` 18 | 19 | Read the docs: [https://nudin.github.io/LexData/](https://nudin.github.io/LexData/) 20 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = ../../LexData-docs 10 | 11 | 12 | # Put it first so that "make" without argument is like "make help". 13 | help: 14 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 15 | 16 | .PHONY: help Makefile 17 | 18 | # Catch-all target: route all unknown targets to Sphinx using the new 19 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 20 | %: Makefile 21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/Claim.rst: -------------------------------------------------------------------------------- 1 | Claim 2 | ===== 3 | 4 | .. autoclass:: LexData.Claim 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/Entity.rst: -------------------------------------------------------------------------------- 1 | Entity 2 | ====== 3 | 4 | .. toctree:: 5 | :hidden: 6 | 7 | Lexeme 8 | Form 9 | Sense 10 | 11 | 12 | .. autoclass:: LexData.entity.Entity 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | -------------------------------------------------------------------------------- /docs/source/Form.rst: -------------------------------------------------------------------------------- 1 | Form 2 | ==== 3 | 4 | .. autoclass:: LexData.Form 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/Language.rst: -------------------------------------------------------------------------------- 1 | LexData.language 2 | ================ 3 | 4 | .. automodule:: LexData.language 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | .. literalinclude:: ../../LexData/language.py 10 | :language: python 11 | :lines: 8- 12 | -------------------------------------------------------------------------------- /docs/source/LexData.rst: -------------------------------------------------------------------------------- 1 | LexData 2 | ======= 3 | 4 | The central Class for working with LexData is the Class Lexeme, it is documented 5 | on a separate page: 6 | 7 | * :doc:`Lexeme` 8 | 9 | Other Classes and Functions 10 | --------------------------- 11 | 12 | .. automodule:: LexData 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | :exclude-members: Lexeme 17 | -------------------------------------------------------------------------------- /docs/source/Lexeme.rst: -------------------------------------------------------------------------------- 1 | Lexeme 2 | ====== 3 | 4 | .. autoclass:: LexData.Lexeme 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/Sense.rst: -------------------------------------------------------------------------------- 1 | Sense 2 | ===== 3 | 4 | .. autoclass:: LexData.Sense 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | 13 | import os 14 | import sys 15 | 16 | sys.path.insert(0, os.path.abspath("../..")) 17 | 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = "LexData" 22 | copyright = "2019, Michael F. Schönitzer" 23 | author = "Michael F. Schönitzer" 24 | 25 | 26 | # -- General configuration --------------------------------------------------- 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = ["sphinx.ext.autodoc"] 32 | 33 | # Add any paths that contain templates here, relative to this directory. 34 | templates_path = ["_templates"] 35 | 36 | # List of patterns, relative to source directory, that match files and 37 | # directories to ignore when looking for source files. 38 | # This pattern also affects html_static_path and html_extra_path. 39 | exclude_patterns = [] 40 | 41 | 42 | # -- Options for HTML output ------------------------------------------------- 43 | 44 | # The theme to use for HTML and HTML Help pages. See the documentation for 45 | # a list of builtin themes. 46 | # 47 | html_theme = "alabaster" 48 | 49 | # Add any paths that contain custom static files (such as style sheets) here, 50 | # relative to this directory. They are copied after the builtin static files, 51 | # so a file named "default.css" will overwrite the builtin "default.css". 52 | html_static_path = ["_static"] 53 | 54 | # html_sidebars = { 55 | # "**": ["about.html", "relations.html", "searchbox.html", "donate.html"] 56 | # } 57 | 58 | html_theme_options = { 59 | "github_user": "Nudin", 60 | "github_repo": "LexData", 61 | "github_type": "star", 62 | "github_count": False, 63 | "description": "Edit Wikidatas Lexemes", 64 | "page_width": "60em", 65 | "fixed_sidebar": True, 66 | } 67 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. LexData documentation master file, created by 2 | sphinx-quickstart on Tue Aug 27 21:50:47 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to LexData's documentation! 7 | =================================== 8 | 9 | .. include:: intro.rst 10 | 11 | A simple demonstration about how easy it is to edit Lexems with LexData: 12 | 13 | .. literalinclude:: ../../example.py 14 | :language: python 15 | 16 | .. toctree:: 17 | :maxdepth: 2 18 | :caption: Contents: 19 | 20 | LexData 21 | Entity 22 | Claim 23 | Language 24 | 25 | Indices and tables 26 | ================== 27 | 28 | * :ref:`genindex` 29 | * :ref:`modindex` 30 | * :ref:`search` 31 | -------------------------------------------------------------------------------- /docs/source/intro.rst: -------------------------------------------------------------------------------- 1 | This is a small library to create bots, scripts and tools about Wikidata 2 | Lexemes. It's philosophy is to have a transparent thin layer on top of the 3 | internal datastuctures enriched with convenient functions without hiding the 4 | power of the access to the internals. 5 | 6 | LexData is still in beta phase and there fore some features are missing and 7 | functions might be renamed in future. 8 | 9 | The code of AitalvivemBot was used as a starting point, but probably theres not 10 | a single line of code that wasn't rewritten. 11 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | LexData 2 | ======= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | LexData 8 | -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import logging 3 | 4 | import LexData 5 | from LexData.language import en 6 | 7 | logging.basicConfig(level=logging.INFO) 8 | 9 | repo = LexData.WikidataSession("MichaelSchoenitzer", "foobar") 10 | 11 | # Open a Lexeme 12 | L2 = LexData.Lexeme(repo, "L2") 13 | 14 | # Access the claims 15 | print(L2.claims.keys()) 16 | # and Forms 17 | print(len(L2.forms)) 18 | F1 = L2.forms[0] 19 | print(F1.claims.keys()) 20 | # and senses 21 | print(len(L2.senses)) 22 | S1 = L2.senses[0] 23 | print(S1.claims.keys()) 24 | 25 | # Find or create a Lexeme by lemma, language and grammatical form 26 | L2 = LexData.get_or_create_lexeme(repo, "first", en, "Q1084") 27 | 28 | # You can easily create forms… 29 | if len(L2.forms) == 0: 30 | L2.createForm("firsts", ["Q146786"]) 31 | 32 | # …or senses, with or without additional claims 33 | if len(L2.senses) == 0: 34 | L2.createSense( 35 | { 36 | "en": "Element in an ordered list which comes before all others according to the ordering", 37 | "de": "einer Ordnung folgend das Element vor allen anderen", 38 | }, 39 | claims={"P5137": ["Q19269277"]}, 40 | ) 41 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="LexData", 8 | version="0.2.0", 9 | author="Michael F. Schoenitzer", 10 | author_email="michael@schoenitzer.de", 11 | description="A tiny package for editing Lexemes on Wikidata", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/Nudin/LexData", 15 | packages=setuptools.find_packages(), 16 | classifiers=[ 17 | "Programming Language :: Python :: 3", 18 | "License :: OSI Approved :: MIT License", 19 | "Operating System :: OS Independent", 20 | ], 21 | install_requires=["requests"], 22 | ) 23 | -------------------------------------------------------------------------------- /test_lexdata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from datetime import datetime 3 | from pathlib import Path 4 | 5 | import pytest 6 | 7 | import LexData 8 | 9 | 10 | @pytest.fixture 11 | def credentials(): 12 | with open(Path.home() / ".wikipass") as f: 13 | userename, password, *_ = f.read().split("\n") 14 | return (userename, password) 15 | 16 | 17 | @pytest.fixture 18 | def repo(credentials): 19 | username, password = credentials 20 | return LexData.WikidataSession(username, password) 21 | 22 | 23 | @pytest.fixture 24 | def repoTestWikidata(): 25 | test = LexData.WikidataSession() 26 | test.URL = "https://test.wikidata.org/w/api.php" 27 | test.CSRF_TOKEN = "+\\" 28 | return test 29 | 30 | 31 | def test_auth(credentials): 32 | with pytest.raises(Exception): 33 | assert LexData.WikidataSession("Username", "Password") 34 | anon = LexData.WikidataSession() 35 | LexData.Lexeme(anon, "L2") 36 | # anon.maxlag = 1 37 | # LexData.Lexeme(anon, "L2") 38 | 39 | 40 | def test_lexeme(repo): 41 | L2 = LexData.Lexeme(repo, "L2") 42 | 43 | assert L2.lemma == "first" 44 | assert L2.language == "en" 45 | claims = L2.claims 46 | assert isinstance(claims, dict) 47 | 48 | examples = claims.get("P5831", []) 49 | assert len(examples) >= 1 50 | example = examples[0] 51 | assert isinstance(repr(example), str) 52 | assert example.property == "P5831" 53 | assert example.rank == "normal" 54 | assert example.numeric_rank == 0 55 | assert example.type == "monolingualtext" 56 | assert example.pure_value == "He was first in line." 57 | assert example.value["text"] == "He was first in line." 58 | assert example.value["language"] == "en" 59 | 60 | 61 | def test_sense(repo): 62 | L2 = LexData.Lexeme(repo, "L2") 63 | assert str(L2) 64 | assert repr(L2) 65 | 66 | senses = L2.senses 67 | assert isinstance(senses, list) 68 | for sense in senses: 69 | assert isinstance(repr(sense), str) 70 | assert isinstance(sense.glosse(), str) 71 | assert isinstance(sense.glosse("de"), str) 72 | assert sense.glosse("en") == sense.glosse() 73 | assert sense.glosse("XX") == sense.glosse() 74 | del sense["glosses"]["en"] 75 | assert isinstance(sense.glosse("XX"), str) 76 | assert isinstance(sense.claims, dict) 77 | 78 | 79 | def test_form(repo): 80 | L2 = LexData.Lexeme(repo, "L2") 81 | forms = L2.forms 82 | assert isinstance(forms, list) 83 | for form in forms: 84 | assert isinstance(repr(form), str) 85 | assert isinstance(form.form, str) 86 | assert isinstance(form.claims, dict) 87 | 88 | 89 | def test_writes(repoTestWikidata): 90 | L123 = LexData.Lexeme(repoTestWikidata, "L123") 91 | 92 | L123.createClaims({"P7": ["Q100"]}) 93 | L123.addClaims({"P7": ["Q100"]}) 94 | 95 | L123.createForm("test", ["Q100"]) 96 | 97 | L123.createSense({"de": "testtest", "en": "testtest"}) 98 | L123.createSense({"de": "more tests", "en": "more tests"}, claims={}) 99 | L123.createSense({"en": "even more tests"}, claims={"P7": ["Q100"]}) 100 | 101 | 102 | def test_search(repo): 103 | results = LexData.search_lexemes(repo, "first", LexData.language.en, "Q1084") 104 | assert len(results) == 1 105 | assert results[0].get("id") == "L2" 106 | 107 | result = LexData.get_or_create_lexeme(repo, "first", LexData.language.en, "Q1084") 108 | assert result["id"] == "L2" 109 | 110 | 111 | def test_detatchedClaim(repo): 112 | LexData.Claim(propertyId="P369", value="Q1") 113 | LexData.Claim(propertyId="P856", value="http://example.com/") 114 | LexData.Claim(propertyId="P2534", value="\frac{1}{2}") 115 | quantity = LexData.Claim(propertyId="P2021", value=6) 116 | assert quantity.pure_value == 6 117 | date = LexData.Claim(propertyId="P580", value=datetime.now()) 118 | assert type(date.pure_value) is str 119 | with pytest.raises(TypeError): 120 | LexData.Claim(propertyId="P856", value=1) 121 | LexData.Claim(propertyId="P2021", value="foo") 122 | LexData.Claim(propertyId="P580", value=1) 123 | LexData.Claim(propertyId="P580", value="foo") 124 | with pytest.raises(Exception): 125 | LexData.Claim(propertyId="P0", value="foo") 126 | 127 | 128 | def test_createLexeme(repoTestWikidata): 129 | LexData.create_lexeme(repoTestWikidata, "foobar", LexData.language.en, "Q100") 130 | --------------------------------------------------------------------------------