├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── CONDUCT.md ├── CRAN-RELEASE ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS ├── R ├── WikidataR.R ├── data.R ├── disambiguators.R ├── geo.R ├── gets.R ├── prints.R ├── queries.R ├── schol.R ├── sysdata.rda ├── utils.R ├── writes.R ├── writes_wikibase.R └── zzz.R ├── README.md ├── WikidataR.Rproj ├── cran-comments.md ├── inst └── extdata │ └── WD.globalvar.RDS ├── man ├── WD.globalvar.Rd ├── WikidataR.Rd ├── as_pid.Rd ├── as_qid.Rd ├── as_quot.Rd ├── as_sid.Rd ├── check_input.Rd ├── createrows.Rd ├── createrows.tidy.Rd ├── disambiguate_QIDs.Rd ├── extract_claims.Rd ├── extract_para.Rd ├── filter_qids.Rd ├── find_item.Rd ├── get_example.Rd ├── get_geo_box.Rd ├── get_geo_entity.Rd ├── get_item.Rd ├── get_names_from_properties.Rd ├── get_random.Rd ├── identifier_from_identifier.Rd ├── initials.Rd ├── list_properties.Rd ├── print.find_item.Rd ├── print.find_property.Rd ├── print.wikidata.Rd ├── qid_from_DOI.Rd ├── qid_from_ORCID.Rd ├── qid_from_identifier.Rd ├── qid_from_name.Rd ├── query_wikidata.Rd ├── searcher.Rd ├── sparql_query.Rd ├── unspecial.Rd ├── url_to_id.Rd ├── wd_query.Rd ├── wd_rand_query.Rd ├── write_wikibase.Rd └── write_wikidata.Rd └── tests ├── testthat.R └── testthat ├── test_geo.R ├── test_gets.R └── test_search.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^CONDUCT\.md$ 4 | .travis.yml 5 | ^data-raw$ 6 | ^CRAN-RELEASE$ 7 | ^cran-comments\.md$ 8 | ^\.httr-oauth$ 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .DS_Store 4 | # Example code in package build process 5 | *-Ex.R 6 | # R data files from past sessions 7 | .Rdata 8 | # RStudio files 9 | .Rproj.user/ 10 | .Rproj.user 11 | inst/doc 12 | .httr-oauth 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Sample .travis.yml for R projects 2 | 3 | language: r 4 | warnings_are_errors: false 5 | sudo: required 6 | 7 | env: 8 | global: 9 | - CRAN: http://cran.rstudio.com 10 | 11 | r_packages: 12 | - testthat 13 | - WikipediR 14 | notifications: 15 | email: 16 | on_failure: change -------------------------------------------------------------------------------- /CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, we pledge to respect all people who 4 | contribute through reporting issues, posting feature requests, updating documentation, 5 | submitting pull requests or patches, and other activities. 6 | 7 | We are committed to making participation in this project a harassment-free experience for 8 | everyone, regardless of level of experience, gender, gender identity and expression, 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. 10 | 11 | Examples of unacceptable behavior by participants include the use of sexual language or 12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment, 13 | insults, or other unprofessional conduct. 14 | 15 | Project maintainers have the right and responsibility to remove, edit, or reject comments, 16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 18 | from the project team. 19 | 20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 21 | opening an issue or contacting one or more of the project maintainers. 22 | 23 | This Code of Conduct is adapted from the Contributor Covenant 24 | (http:contributor-covenant.org), version 1.0.0, available at 25 | http://contributor-covenant.org/version/1/0/0/ 26 | -------------------------------------------------------------------------------- /CRAN-RELEASE: -------------------------------------------------------------------------------- 1 | This package was submitted to CRAN on 2021-11-16. 2 | Once it is accepted, delete this file and tag the release (commit 3222560). 3 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: WikidataR 2 | Type: Package 3 | Title: Read-Write API Client Library for Wikidata 4 | Version: 2.3.3 5 | Date: 2021-11-16 6 | Authors@R: c( 7 | person("Thomas", "Shafee", role = c("aut", "cre"), email = "T.Shafee@latrobe.edu.au", comment = c(ORCID = "0000-0002-2298-7593")), 8 | person("Os", "Keyes", role = "aut", comment = c(ORCID = "0000-0001-5196-609X")), 9 | person("Serena", "Signorelli", role = "aut"), 10 | person("Alex", "Lum", role = "ctb", comment = c(ORCID = "0000-0002-9295-9053")), 11 | person("Christian", "Graul", role = "ctb"), 12 | person("Mikhail", "Popov", role = "ctb", comment = c(ORCID = "0000-0003-0145-8948")) 13 | ) 14 | Description: Read from, interrogate, and write to Wikidata - 15 | the multilingual, interdisciplinary, semantic knowledgebase. Includes functions to: 16 | read from Wikidata (single items, properties, or properties); query Wikidata (retrieving 17 | all items that match a set of criteria via Wikidata SPARQL query service); write to 18 | Wikidata (adding new items or statements via QuickStatements); and handle and manipulate 19 | Wikidata objects (as lists and tibbles). Uses the Wikidata and QuickStatements APIs. 20 | BugReports: https://github.com/TS404/WikidataR/issues 21 | URL: https://github.com/TS404/WikidataR 22 | License: MIT + file LICENSE 23 | Imports: 24 | httr, 25 | jsonlite, 26 | WikipediR, 27 | WikidataQueryServiceR, 28 | tibble, 29 | dplyr, 30 | stringr, 31 | Hmisc, 32 | progress, 33 | pbapply, 34 | stats, 35 | readr, 36 | crayon, 37 | utils 38 | Suggests: 39 | markdown, 40 | testthat, 41 | tidyverse, 42 | knitr, 43 | pageviews, 44 | spelling 45 | RoxygenNote: 7.2.3 46 | Encoding: UTF-8 47 | Depends: R (>= 3.5.0) 48 | Language: en-US 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License (https://opensource.org/license/MIT) 2 | 3 | Copyright 2014, Oliver Keyes 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,find_item) 4 | S3method(print,find_property) 5 | S3method(print,wikidata) 6 | export(as_pid) 7 | export(as_qid) 8 | export(as_quot) 9 | export(as_sid) 10 | export(check_input) 11 | export(createrows) 12 | export(createrows.tidy) 13 | export(disambiguate_QIDs) 14 | export(extract_claims) 15 | export(extract_para) 16 | export(filter_qids) 17 | export(find_item) 18 | export(find_property) 19 | export(get_example) 20 | export(get_geo_box) 21 | export(get_geo_entity) 22 | export(get_item) 23 | export(get_names_from_properties) 24 | export(get_property) 25 | export(get_random_item) 26 | export(get_random_property) 27 | export(identifier_from_identifier) 28 | export(initials) 29 | export(list_properties) 30 | export(qid_from_DOI) 31 | export(qid_from_ORCID) 32 | export(qid_from_identifier) 33 | export(qid_from_name) 34 | export(query_wikidata) 35 | export(searcher) 36 | export(sparql_query) 37 | export(unspecial) 38 | export(url_to_id) 39 | export(wd_query) 40 | export(wd_rand_query) 41 | export(write_wikibase) 42 | export(write_wikidata) 43 | import(WikidataQueryServiceR) 44 | import(crayon) 45 | import(dplyr) 46 | import(pbapply) 47 | import(progress) 48 | import(stringr) 49 | import(tibble) 50 | import(utils) 51 | importFrom(WikipediR,page_content) 52 | importFrom(WikipediR,query) 53 | importFrom(WikipediR,random_page) 54 | importFrom(dplyr,bind_cols) 55 | importFrom(httr,user_agent) 56 | importFrom(jsonlite,fromJSON) 57 | importFrom(readr,format_delim) 58 | importFrom(readr,format_tsv) 59 | importFrom(stats,cor) 60 | importFrom(stats,var) 61 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | 2.3.0 2 | ================================================= 3 | * New disambiguate_QIDs() function for manual item-by-item disambiguation 4 | 5 | 2.2.1 6 | ================================================= 7 | * Outputs as tibbles by default with vector options where relevant 8 | 9 | 2.2.0 10 | ================================================= 11 | * Additional 'website' output format option for write_wikidata() to submit to Quickstatements website 12 | * Patches to make writing wikidata statements with dates more reliable 13 | 14 | 2.1.5 15 | ================================================= 16 | * Updated release on CRAN 17 | * Expected property values and constraints from 2.1.0 now just stored as a data file, since onAttach function caused issues 18 | 19 | 2.1.0 20 | ================================================= 21 | * Upon loading the package, it will check whether property values are expected to be strings, numbers, dates or QIDs 22 | 23 | 2.0.0 24 | =================================================w 25 | * Write_wikidata() allows you to write to wikidata via the 'quickstatements' format 26 | * Outputs as tibbles where relevant 27 | * Switch maintatiner to Thomas Shafee 28 | 29 | 1.4.0 30 | ================================================= 31 | * First release on CRAN! 32 | * Extract_claims() allows you to, well, extract claims. 33 | * SPARQL syntax bug with some geo queries now fixed (thanks to Mikhail Popov) 34 | 35 | 1.3.0 36 | ================================================= 37 | * Vectorisation of get_*() functions 38 | 39 | 1.2.0 40 | ================================================= 41 | * Geographic data for entities that exist relative to other Wikidata items can now be retrieved 42 | with get_geo_entity and get_geo_box, courtesy of excellent Serena Signorelli's excellent 43 | QueryWikidataR package. 44 | * A bug in printing returned objects is now fixed. 45 | 46 | 1.1.0 47 | ================================================= 48 | * You can now retrieve multiple random properties or items with get_random_item and get_random_property 49 | 50 | 1.0.1 51 | ================================================= 52 | * Various documentation and metadata improvements. 53 | 54 | 1.0.0 55 | ================================================= 56 | * Fix a bug in get_* functions due to a parameter name mismatch 57 | * Print methods added by Christian Graul 58 | 59 | 0.5.0 60 | ================================================= 61 | * This is the initial release! See the explanatory vignettes. 62 | -------------------------------------------------------------------------------- /R/WikidataR.R: -------------------------------------------------------------------------------- 1 | #' @title API client library for Wikidata 2 | #' @description This package serves as an API client for reading and writing 3 | #' to and from \href{https://www.wikidata.org/wiki/Wikidata:Main_Page}{Wikidata}, (including 4 | #' via the \href{https://quickstatements.toolforge.org/}{QuickStatements} format), 5 | #' as well as for reading from \href{https://www.wikipedia.org}{Wikipedia}. 6 | #' @name WikidataR 7 | #' @docType package 8 | #' @seealso \code{\link{get_random}} for selecting a random item or property, 9 | #' \code{\link{get_item}} for a /specific/ item or property, or \code{\link{find_item}} 10 | #' for using search functionality to pull out item or property IDs where the descriptions 11 | #' or aliases match a particular search term. 12 | #' @import WikidataQueryServiceR 13 | #' @import tibble 14 | #' @import dplyr 15 | #' @import stringr 16 | #' @import pbapply 17 | #' @import progress 18 | #' @import crayon 19 | #' @import utils 20 | #' @importFrom readr format_tsv 21 | #' @importFrom readr format_delim 22 | #' @importFrom dplyr bind_cols 23 | #' @importFrom stats var 24 | #' @importFrom stats cor 25 | #' @importFrom WikipediR page_content random_page query 26 | #' @importFrom httr user_agent 27 | #' @importFrom jsonlite fromJSON 28 | #' @aliases WikidataR WikidataR-package 29 | NULL -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' @name WD.globalvar 2 | #' 3 | #' @title Global variables for Wikidata properties 4 | #' 5 | #' @description A dataset of Wikidata global variables. 6 | #' 7 | #' @format A list of tibbles documenting key property constraints from Wikidata 8 | #' \describe{ 9 | #' \item{SID.valid}{valid reference source properties} 10 | #' \item{PID.datatype}{required data type for each property} 11 | #' \item{PID.constraint}{expected regex match for each property} 12 | #' \item{lang.abbrev}{language abbreviations} 13 | #' \item{lang.abbrev.wiki}{language abbreviations for current wikis} 14 | #' \item{abbrev.wiki}{Wikimedia abbreviations for current wikis} 15 | #' ... 16 | #' } 17 | 18 | utils::globalVariables(c("WD.globalvar")) -------------------------------------------------------------------------------- /R/disambiguators.R: -------------------------------------------------------------------------------- 1 | # -------- Disambiguator functions -------- 2 | # 3 | #'@title Disambiguate QIDs 4 | #'@description Interactive function that presents alternative possible QID matches for a list of text 5 | #'strings and provides options for choosing between alternatives, rejecting all presented alternatives, 6 | #'or creating new items. Useful in cases where a list of text strings may have either missing Wikidata items 7 | #'or multiple alternative potential matches that need to be manually disambiguated. Can also used on 8 | #'lists of lists (see examples). For long lists of items, the process can be stopped partway through and 9 | #'the returned vector will indicate where the process was stopped. 10 | #'@param list a list or vector of text strings to find potential QID matches to. 11 | #' Can also be a list of lists (see examples) 12 | #'@param variablename type of items in the list that are being disambiguated (used in messages) 13 | #'@param variableinfo additional information about items that are being disambiguated (used in messages) 14 | #'@param filter_property property to filter on (e.g. "P31" to filter on "instance of") 15 | #'@param filter_variable values of that property to use to filter out (e.g. "Q571" to filter out books) 16 | #'@param filter_firsthit apply filter to the first match presented or only if alternatives requested? 17 | #' (default = FALSE, note: true is slower if filter not needed on most matches) 18 | #'@param Q_min return only possible hits with QIDs above the provided value 19 | #'@param auto_create if no match found, automatically assign "CREATE" 20 | #'@param limit number of alternative possible Wikidata items to present if multiple potential matches 21 | #'@return a vector of: 22 | #' \describe{ 23 | #' \item{QID}{Selected QID (for when an appropriate Wikidata match exists)} 24 | #' \item{CREATE}{Mark that a new Wikidata item should be created (for when no appropriate Wikidata match yet exists)} 25 | #' \item{NA}{Mark that no Wikidata item is needed} 26 | #' \item{STOP}{Mark that the process was halted at this point (so that output can be used as input to the function later)} 27 | #' } 28 | #'@examples 29 | #'\dontrun{ 30 | #'#Disambiguating possible QID matches for these music genres 31 | #'#Results should be: 32 | #'# "Q22731" as the first match 33 | #'# "Q147538" as the first match 34 | #'# "Q3947" as the second alternative match 35 | #'disambiguate_QIDs(list=c("Rock","Pop","House"), 36 | #' variablename="music genre") 37 | #' 38 | #'#Disambiguating possible QID matches for these three words, but not the music genres 39 | #'#This will take longer as the filtering step is slower 40 | #'#Results should be: 41 | #'# "Q22731" (the material) as the first match 42 | #'# "Q147538" (the soft drink) as the second alternative match 43 | #'# "Q3947" (the building) as the first match 44 | #'disambiguate_QIDs(list=c("Rock","Pop","House"), 45 | #' filter_property="instance of", 46 | #' filter_variable="music genre", 47 | #' filter_firsthit=TRUE, 48 | #' variablename="concept, not the music genre") 49 | #' 50 | #'#Disambiguating possible QID matches for the multiple expertise of 51 | #'#these three people as list of lists 52 | #'disambiguate_QIDs(list=list(alice=list("physics","chemistry","maths"), 53 | #' barry=list("history"), 54 | #' clair=list("law","genetics","ethics")), 55 | #' variablename="expertise") 56 | #'} 57 | #'@export 58 | disambiguate_QIDs <- function(list, 59 | variablename="variables", 60 | variableinfo=NULL, 61 | filter_property=NULL, 62 | filter_variable=NULL, 63 | filter_firsthit=FALSE, 64 | Q_min=NULL, 65 | auto_create=FALSE, 66 | limit=10){ 67 | #make list is formatted as a list (e.g. if vector) 68 | if(!all(class(list)=="list")){list <- as.list(list)} 69 | if(!is.null(filter_property)){filter_property <- as_pid(filter_property)[[1]][1]} 70 | if(!is.null(filter_variable)){filter_variable <- as_qid(filter_variable)[[1]][1]} 71 | 72 | #is the list the outut from a previous half-done run? 73 | if(any(unlist(lapply(list,function(x) x=="STOP")),na.rm = TRUE)){ 74 | item_to_start_from <- which(unlist(lapply(list,function(x) any(x=="STOP")))) 75 | subitem_to_start_from <- first(which(list == "STOP")) 76 | output <- list 77 | }else{ 78 | item_to_start_from <- 1 79 | subitem_to_start_from <- 1 80 | output <- blank_output_list(list) 81 | } 82 | 83 | #create output 84 | pb_main <- progress_bar$new(total = sum(unlist(output,recursive = TRUE)==""|unlist(output,recursive = TRUE)=="STOP"), 85 | format = ":bar :percent eta::eta \n", 86 | current = "|", 87 | width = 90, 88 | show_after = 0) 89 | for(item in item_to_start_from:length(list)){ 90 | for(subitem in subitem_to_start_from:length(list[[item]])){ 91 | #check item to search 92 | tosearch <- list[[item]][subitem] 93 | if(is.na(tosearch)){break} #skip NAs 94 | if(tosearch=="STOP"|tosearch==""){tosearch<-names(tosearch)} #use name for items not done in previous run (stored as "STOP" and "") 95 | if(grepl("^[Qq][0-9]+$",tosearch)){break} #skip if already a QID 96 | if(is.null(tosearch)){break} #skip nulls or empty items with no name (usually errors) 97 | list[[item]][subitem] <- tosearch #if no skips, place that text back in the list 98 | 99 | #announce choice to be made 100 | message_header(list,item,subitem,variablename,variableinfo) 101 | pb_main$tick() 102 | #execute search and record choice 103 | Q_min_hit=NULL 104 | if(!is.null(Q_min)){ 105 | Q_min <- as.numeric(gsub("Q","",Q_min)) 106 | Q_min_hit <- sapply(find_item(list[[item]][subitem],limit = limit),"[[",1) 107 | Q_min_hit <- Q_min_hit[as.numeric(gsub("Q","",Q_min_hit))>Q_min] 108 | } 109 | if(length(Q_min_hit)==1){ 110 | choice <- Q_min_hit 111 | }else{ 112 | if(filter_firsthit){ 113 | first_hit_qid <- firsthit(list[[item]][subitem],filter_property,filter_variable) 114 | }else{ 115 | first_hit_qid <- firsthit(list[[item]][subitem]) 116 | } 117 | if(is.na(first_hit_qid) & auto_create){ 118 | choice <- "CREATE" 119 | }else{ 120 | choice <- makechoice(qid = first_hit_qid, 121 | text= names(first_hit_qid), 122 | filter_property=filter_property, 123 | filter_variable=filter_variable, 124 | limit=limit) 125 | } 126 | 127 | } 128 | output[[item]][[subitem]] <- choice 129 | names(output[[item]])[[subitem]] <- names(choice) 130 | 131 | #check if stop request made 132 | if(!is.na(output[[item]][[subitem]])){if(output[[item]][[subitem]]=="STOP"){ 133 | done_so_far <- item 134 | message_stop(done_so_far,total = length(list)) 135 | break 136 | }} 137 | } 138 | subitem_to_start_from <- 1 # reset the subitem to start from if completed a full item 139 | if(!is.na(output[[item]][[subitem]])){if(output[[item]][[subitem]]=="STOP"){break}} 140 | } 141 | return(output) 142 | } 143 | 144 | # When provided with a QID, interactively make a decision on whether the output should be that qid or some other value 145 | makechoice <- function(qid=NULL, 146 | text=NULL, 147 | table=NULL, 148 | filter_property=NULL, 149 | filter_variable=NULL, 150 | limit=10){ 151 | if(is.null(text)){ 152 | text <- names(qid) 153 | } 154 | 155 | # announce item for disambig 156 | suppressWarnings(invisible(selection <- readline())) 157 | if (selection=="s"|selection=="stop"){ #s = stop 158 | output <- "STOP" 159 | names(output) <- text 160 | 161 | }else if(selection=="y"|selection=="yes"){ #y = accept 162 | output <- qid 163 | names(output) <- text 164 | 165 | }else if(selection=="n"|selection=="no"|selection==""){ #n = reject 166 | output <- NA 167 | names(output) <- text 168 | 169 | }else if(selection=="c"|selection=="create"){ #c = create 170 | output <- "CREATE" 171 | names(output) <- text 172 | 173 | }else if(selection=="?"){ #? = loop up in browser 174 | browseURL(paste0("https://www.wikidata.org/wiki/",qid)) 175 | output <- makechoice(qid,text,table,filter_property,filter_variable,limit) 176 | 177 | }else if(grepl("^[Qq][0-9]+$",selection)){ #Q123 = id 178 | output <- selection 179 | names(output) <- paste0("-> ",selection) 180 | 181 | }else if(grepl("^[Qq][0-9]+?$",selection)){ #Q123? = search that id 182 | browseURL(paste0("https://www.wikidata.org/wiki/", 183 | gsub("\\?","",selection))) 184 | output <- makechoice(qid,text,table,filter_property,filter_variable,limit) 185 | 186 | }else if(grepl("^[0-9]+$",selection) & !is.null(table)){ #number = select row 187 | output <- table$qid[as.numeric(selection)] 188 | label <- table$label[as.numeric(selection)] 189 | 190 | }else if(grepl("^[0-9]+\\?$",selection)& !is.null(table)){ #number? = loop up row in browser 191 | browseURL(paste0("https://www.wikidata.org/wiki/", 192 | table$qid[as.numeric(gsub("\\?","",selection))])) 193 | output <- makechoice(qid,text,table,filter_property,filter_variable,limit) 194 | label <- table$label[as.numeric(selection)] 195 | 196 | }else if((selection=="a"|selection=="alt") & !is.null(text)){ #a = alternative 197 | table <- choices_alt(text,filter_property,filter_variable,limit) 198 | output <- makechoice(qid,text,table,filter_property,filter_variable,limit) 199 | if(!is.null(names(output)) & !is.null(text)){if(names(output)!=text){ 200 | names(output) <- paste0(text," -> ",names(output)) 201 | }} 202 | 203 | }else{ #freetext = freetext to search 204 | table <- choices_alt(selection,filter_property,filter_variable,limit) 205 | output <- makechoice(qid,selection,table,filter_property,filter_variable,limit) 206 | if(!is.null(names(output)) & !is.null(text)){if(names(output)!=text){ 207 | names(output) <- paste0(text," -> ",names(output)) 208 | }} 209 | } 210 | 211 | return(output) 212 | } 213 | 214 | # -------- Messages -------- 215 | 216 | # Clear console and show standard header for 217 | message_header <- function(list, 218 | i, 219 | j, 220 | variablename=NULL, 221 | variableinfo=NULL){ 222 | list <- as.list(list) 223 | name <- bold$cyan(names(list)[[i]]) 224 | variables <- list[[i]] 225 | variables[j] <- bold$white$underline(variables[j]) 226 | variables <- paste(variables,collapse = " | ") 227 | if(!is.null(variablename)){ 228 | variablename <- paste0("the ",variablename," of ") 229 | if (length(variablename)>1){ 230 | variablename <- variablename[i] 231 | } 232 | }else{ 233 | variablename <- NULL 234 | } 235 | 236 | if(!is.null(variableinfo)){ 237 | variableinfo <- paste0(variableinfo,"\n") 238 | if (length(variableinfo)>1){ 239 | variableinfo <- variableinfo[i] 240 | } 241 | }else{ 242 | variableinfo <- NULL 243 | } 244 | message("\014", 245 | "--------------------------------------------------------------------------- \n", 246 | "Let's disambiguate ",variablename, 247 | name, ": \n", 248 | variableinfo, 249 | variables) 250 | } 251 | 252 | message_choices <- function(){ 253 | message(bold(" y "),"-> accept the presented match \n", 254 | bold(" n "),"-> reject the presented match and move on to the next \n", 255 | bold(" a "),"-> request alternative possible matches \n", 256 | bold(" Q123 "),"-> use this as the wikidata QID \n", 257 | bold(" text "),"-> try this text as alternative search term \n", 258 | bold(" c "),"-> create a new item for this later \n", 259 | bold(" s "),"-> stop here, save those done so far and come back later \n", 260 | bold(" ? "),"-> check the presented match in your browser") 261 | } 262 | 263 | message_choices_na <- function(){ 264 | message(bold(" y/n "),"-> leave as 'NA' \n", 265 | bold(" Q123 "),"-> use this as the wikidata QID \n", 266 | bold(" text "),"-> try this text as alternative search term \n", 267 | bold(" c "),"-> create a new item for this later \n", 268 | bold(" s "),"-> stop here, save those done so far and come back later") 269 | } 270 | 271 | message_choices_alt <- function(table){ 272 | message("Are any of these appropriate?") 273 | print(data.frame(table),right=FALSE) 274 | message(bold(" number "),"-> select one of the matches presented (include ",bold("?")," to check an item in your browser) \n", 275 | bold(" Q123 "),"-> use this as the wikidata QID \n", 276 | bold(" text "),"-> try this text as alternative search term \n", 277 | bold(" c "),"-> create a new item for this later \n", 278 | bold(" s "),"-> stop here, save those done so far and come back later") 279 | } 280 | 281 | message_stop <- function(done_so_far,total){ 282 | message("Stopping. You've completed ", 283 | bold$white(done_so_far - 1), 284 | " so far (", 285 | bold$white(total - done_so_far + 1), 286 | " remaining). \n", 287 | "To restart from where you left off, use the output from this function as the list for disambiguate_QIDs()") 288 | } 289 | 290 | 291 | # -------- Misc. support -------- 292 | 293 | # pulling and formatting the first hit from wikidata 294 | # and presenting appropriate choice text options in prep for makechoice() 295 | firsthit <- function(text, 296 | filter_property=NULL, 297 | filter_variable=NULL, 298 | limit=30){ 299 | if(!is.null(filter_property) & !is.null(filter_variable)){ 300 | filtered_items <- filter_qids(ids=sapply(find_item(text,limit = limit),"[[",1), 301 | property=filter_property, 302 | filter=filter_variable, 303 | message="Checking for item that doesn't match the filter ") 304 | if(!is.na(filtered_items$qid[1])){ 305 | qid <- filtered_items$qid[1] 306 | label <- filtered_items$label[1] 307 | desc <- filtered_items$desc[1] 308 | message(white(qid," ",label," ",desc,sep="")) 309 | message_choices() 310 | }else{ 311 | qid <- NA 312 | message(white("No good match found that matches filters")) 313 | message_choices_na() 314 | } 315 | }else{ 316 | item <- find_item(text,limit = 1) 317 | if(length(item)>0){ 318 | if(is.null(item[[1]]$description)){ 319 | desc <- "no description" 320 | }else{ 321 | desc <- item[[1]]$description 322 | } 323 | if(is.null(item[[1]]$label)){ 324 | label <- "no label" 325 | }else{ 326 | label <- item[[1]]$label 327 | } 328 | qid <- item[[1]]$id 329 | message(white(qid," ",label," ",desc,sep="")) 330 | message_choices() 331 | }else{ 332 | qid <- NA 333 | message(white("No good match found")) 334 | message_choices_na() 335 | } 336 | } 337 | names(qid) <- text 338 | return(qid) 339 | } 340 | 341 | blank_output_list <- function(list){ 342 | make_attr_names <- function(x){ 343 | x1 <- list[[x]] 344 | attr(x1, 'names') <- x1 345 | x1 346 | } 347 | if(all(is.null(names(list)))){ 348 | output <- list 349 | names(output) <- list 350 | }else{ 351 | output <- lapply(names(list), make_attr_names) 352 | names(output) <- names(list) 353 | } 354 | output <- rapply(output,function(x) ifelse(is.na(x),NA,""),how = 'replace') 355 | return(output) 356 | } 357 | 358 | 359 | restarted_output_list <- function(list){ 360 | make_attr_names_rev <- function(x){ 361 | x1 <- list[[x]] 362 | x1 <- attr(x1, 'names') 363 | x1 364 | } 365 | listnames <- lapply(names(list), make_attr_names_rev) 366 | output <- rapply(output,function(x) ifelse(is.na(x),NA,""),how = 'replace') 367 | return(output) 368 | } 369 | 370 | choices_alt <- function(selection,filter_property,filter_variable,limit){ 371 | altqids <- unlist(lapply(find_item(selection,limit=limit),function(x) x$id)) 372 | if(is.null(altqids)){ 373 | message("Searching for ",bold$white(selection)," as an alternative term") 374 | results <- tibble(qid=NA, 375 | label=NA, 376 | desc="No current matching Wikidata item") 377 | }else{ 378 | message("Searching for ",bold$white(selection)," as an alternative term") 379 | results <- filter_qids(ids = altqids, 380 | property = filter_property, 381 | filter = filter_variable) 382 | } 383 | if(all(is.na(results$qid))){ 384 | message(white("No good match found")) 385 | message_choices_na() 386 | return(NULL) 387 | }else{ 388 | message_choices_alt(results) 389 | names(results$qid) <- results$label 390 | return(results) 391 | } 392 | } 393 | -------------------------------------------------------------------------------- /R/geo.R: -------------------------------------------------------------------------------- 1 | #'@title Retrieve geographic information from Wikidata 2 | #'@description \code{get_geo_entity} retrieves the item ID, latitude 3 | #'and longitude of any object with geographic data associated with \emph{another} 4 | #'object with geographic data (example: all the locations around/near/associated with 5 | #'a city). 6 | #' 7 | #'@param entity a Wikidata item (\code{Q...}) or series of items, to check 8 | #'for associated geo-tagged items. 9 | #' 10 | #'@param language the two-letter language code to use for the name 11 | #'of the item. "en" by default, because we're imperialist 12 | #'anglocentric westerners. 13 | #' 14 | #'@param radius optionally, a radius (in kilometers) around \code{entity} 15 | #'to restrict the search to. 16 | #' 17 | #'@param limit the maximum number of results to return. 18 | #' 19 | #'@param \\dots further arguments to pass to de{httr:ink[httr::GET]{GET}}. 20 | #' 21 | #'@return a data.frame of 5 columns: 22 | #'\itemize{ 23 | #' \item{item}{ the Wikidata identifier of each object associated with 24 | #' \code{entity}.} 25 | #' \item{name}{ the name of the item, if available, in the requested language. If it 26 | #' is not available, \code{NA} will be returned instead.} 27 | #' \item{latitude}{ the latitude of \code{item}} 28 | #' \item{longitude}{ the longitude of \code{item}} 29 | #' \item{entity}{ the entity the item is associated with (necessary for multi-entity 30 | #' queries).} 31 | #'} 32 | #' 33 | #'@examples 34 | #'# All entities 35 | #'\donttest{sf_locations <- get_geo_entity("Q62")} 36 | #' 37 | #'# Entities with French, rather than English, names 38 | #'\donttest{sf_locations <- get_geo_entity("Q62", language = "fr")} 39 | #' 40 | #'# Entities within 1km 41 | #'\donttest{sf_close_locations <- get_geo_entity("Q62", radius = 1)} 42 | #' 43 | #'# Multiple entities 44 | #'\donttest{multi_entity <- get_geo_entity(entity = c("Q62", "Q64"))} 45 | #' 46 | #'@seealso \code{\link{get_geo_box}} for using a bounding box 47 | #'rather than an unrestricted search or simple radius. 48 | #' 49 | #'@export 50 | get_geo_entity <- function(entity, language = "en", radius = NULL, limit=100, ...){ 51 | 52 | entity <- check_input(entity, "Q") 53 | 54 | if(is.null(radius)){ 55 | query <- paste0("SELECT DISTINCT ?item ?name ?coord ?propertyLabel WHERE { 56 | ?item wdt:P131* wd:", entity, ". ?item wdt:P625 ?coord . 57 | SERVICE wikibase:label { 58 | bd:serviceParam wikibase:language \"", language, "\" . 59 | ?item rdfs:label ?name 60 | } 61 | } 62 | ORDER BY ASC (?name) 63 | LIMIT ", limit) 64 | } else { 65 | query <- paste0("SELECT ?item ?name ?coord 66 | WHERE { 67 | wd:", entity, " wdt:P625 ?mainLoc . 68 | SERVICE wikibase:around { 69 | ?item wdt:P625 ?coord . 70 | bd:serviceParam wikibase:center ?mainLoc . 71 | bd:serviceParam wikibase:radius \"", radius, 72 | "\" . 73 | } 74 | SERVICE wikibase:label { 75 | bd:serviceParam wikibase:language \"", language, "\" . 76 | ?item rdfs:label ?name 77 | } 78 | } ORDER BY ASC (?name) 79 | LIMIT ",limit) 80 | } 81 | 82 | if(length(query) > 1){ 83 | return(do.call("rbind", mapply(function(query, entity, ...){ 84 | output <- clean_geo(sparql_query(query, ...)$results$bindings) 85 | output$entity <- entity 86 | return(output) 87 | }, query = query, entity = entity, SIMPLIFY = FALSE, ...))) 88 | } 89 | output <- clean_geo(sparql_query(query)$results$bindings) 90 | if(length(output)==0){warning("Query timeout. Possibly try again with lower 'limit='")} 91 | output$entity <- entity 92 | return(output) 93 | } 94 | 95 | #'@title Get geographic entities based on a bounding box 96 | #'@description \code{get_geo_box} retrieves all geographic entities in 97 | #'Wikidata that fall between a bounding box between two existing items 98 | #'with geographic attributes (usually cities). 99 | #' 100 | #'@param first_city_code a Wikidata item, or series of items, to use for 101 | #'one corner of the bounding box. 102 | #' 103 | #'@param first_corner the direction of \code{first_city_code} relative 104 | #'to \code{city} (eg "NorthWest", "SouthEast"). 105 | #' 106 | #'@param second_city_code a Wikidata item, or series of items, to use for 107 | #'one corner of the bounding box. 108 | #' 109 | #'@param second_corner the direction of \code{second_city_code} relative 110 | #'to \code{city} (eg "NorthWest", "SouthEast"). 111 | #' 112 | #'@param language the two-letter language code to use for the name 113 | #'of the item. "en" by default. 114 | #' 115 | #'@param \\dots further arguments to pass to de{httr:ink[httr::GET]{GET}}. 116 | #' 117 | #'@return a data.frame of 5 columns: 118 | #'\itemize{ 119 | #' \item{item}{ the Wikidata identifier of each object associated with 120 | #' \code{entity}.} 121 | #' \item{name}{ the name of the item, if available, in the requested language. If it 122 | #' is not available, \code{NA} will be returned instead.} 123 | #' \item{latitude}{ the latitude of \code{item}} 124 | #' \item{longitude}{ the longitude of \code{item}} 125 | #' \item{entity}{ the entity the item is associated with (necessary for multi-entity 126 | #' queries).} 127 | #'} 128 | #' 129 | #'@examples 130 | #'# Simple bounding box 131 | #'\donttest{bruges_box <- get_geo_box("Q12988", "NorthEast", "Q184287", "SouthWest")} 132 | #' 133 | #'# Custom language 134 | #'\donttest{bruges_box_fr <- get_geo_box("Q12988", "NorthEast", "Q184287", "SouthWest", 135 | #' language = "fr")} 136 | #' 137 | #'@seealso \code{\link{get_geo_entity}} for using an unrestricted search or simple radius, 138 | #'rather than a bounding box. 139 | #' 140 | #'@export 141 | get_geo_box <- function(first_city_code, first_corner, second_city_code, second_corner, 142 | language = "en", ...){ 143 | 144 | # Input checks 145 | first_city_code <- check_input(first_city_code, "Q") 146 | second_city_code <- check_input(second_city_code, "Q") 147 | 148 | # Construct query 149 | query <- paste0("SELECT ?item ?name ?coord WHERE { 150 | wd:", first_city_code, " wdt:P625 ?Firstloc . 151 | wd:", second_city_code, " wdt:P625 ?Secondloc . 152 | SERVICE wikibase:box { 153 | ?item wdt:P625 ?coord . 154 | bd:serviceParam wikibase:corner", first_corner, " ?Firstloc . 155 | bd:serviceParam wikibase:corner", second_corner, " ?Secondloc . 156 | } 157 | SERVICE wikibase:label { 158 | bd:serviceParam wikibase:language \"", language, "\" . 159 | ?item rdfs:label ?name 160 | } 161 | }ORDER BY ASC (?name)") 162 | 163 | # Vectorise if necessary, or not if not! 164 | if(length(query) > 1){ 165 | return(do.call("rbind", mapply(function(query, ...){ 166 | output <- clean_geo(sparql_query(query, ...)$results$bindings) 167 | return(output) 168 | }, query = query, ..., SIMPLIFY = FALSE))) 169 | } 170 | output <- clean_geo(sparql_query(query)$results$bindings) 171 | return(output) 172 | } 173 | 174 | 175 | # Cleanup function 176 | clean_geo <- function(results){ 177 | do.call("rbind", lapply(results, function(item){ 178 | point <- unlist(strsplit(gsub(x = item$coord$value, pattern = "(Point\\(|\\))", replacement = ""), 179 | " ")) 180 | wd_id <- gsub(x = item$item$value, pattern = "http://www.wikidata.org/entity/", 181 | replacement = "", fixed = TRUE) 182 | return(data.frame(item = wd_id, 183 | name = ifelse(item$name$value == wd_id, NA, item$name$value), 184 | latitutde = as.numeric(point[1]), 185 | longitude = as.numeric(point[2]), 186 | stringsAsFactors = FALSE)) 187 | 188 | })) 189 | } 190 | -------------------------------------------------------------------------------- /R/gets.R: -------------------------------------------------------------------------------- 1 | # -------- Gets -------- 2 | 3 | #'@title Retrieve specific Wikidata items or properties 4 | #'@description \code{get_item} and \code{get_property} allow you to retrieve the data associated 5 | #'with individual Wikidata items and properties, respectively. As with 6 | #'other \code{WikidataR} code, custom print methods are available; use \code{\link{str}} 7 | #'to manipulate and see the underlying structure of the data. 8 | #' 9 | #'@param id the ID number(s) of the item or property you're looking for. This can be in 10 | #'various formats; either a numeric value ("200"), the full name ("Q200") or 11 | #'even with an included namespace ("Property:P10") - the function will format 12 | #'it appropriately. This function is vectorized and will happily accept 13 | #'multiple IDs. 14 | #' 15 | #'@param \\dots further arguments to pass to de{httr:ink[httr::GET]{GET}}. 16 | #' 17 | #'@seealso \code{\link{get_random}} for selecting a random item or property, 18 | #'or \code{\link{find_item}} for using search functionality to pull out 19 | #'item or property IDs where the descriptions or aliases match a particular 20 | #'search term. 21 | #' 22 | #'@examples 23 | #' 24 | #'#Retrieve a specific item 25 | #'adams_metadata <- get_item("42") 26 | #' 27 | #'#Retrieve a specific property 28 | #'object_is_child <- get_property("P40") 29 | #' 30 | #'@aliases get_item get_property 31 | #'@rdname get_item 32 | #'@export 33 | get_item <- function(id, ...){ 34 | id <- check_input(id, "Q") 35 | output <- (lapply(id, wd_query, ...)) 36 | class(output) <- "wikidata" 37 | return(output) 38 | } 39 | 40 | #'@rdname get_item 41 | #'@export 42 | get_property <- function(id, ...){ 43 | has_grep <- grepl("^P(?!r)",id, perl = TRUE) 44 | id[has_grep] <- paste0("Property:", id[has_grep]) 45 | id <- check_input(id, "Property:P") 46 | 47 | output <- (lapply(id, wd_query, ...)) 48 | class(output) <- "wikidata" 49 | return(output) 50 | } 51 | 52 | #'@title Retrieve randomly-selected Wikidata items or properties 53 | #'@description \code{get_random_item} and \code{get_random_property} allow you to retrieve the data 54 | #'associated with randomly-selected Wikidata items and properties, respectively. As with 55 | #'other \code{WikidataR} code, custom print methods are available; use \code{\link{str}} 56 | #'to manipulate and see the underlying structure of the data. 57 | #' 58 | #'@param limit how many random items to return. 1 by default, but can be higher. 59 | #' 60 | #'@param \\dots arguments to pass to de{httr:ink[httr::GET]{GET}}. 61 | #' 62 | #'@seealso \code{\link{get_item}} for selecting a specific item or property, 63 | #'or \code{\link{find_item}} for using search functionality to pull out 64 | #'item or property IDs where the descriptions or aliases match a particular 65 | #'search term. 66 | #' 67 | #'@examples 68 | #'\dontrun{ 69 | #'#Random item 70 | #'random_item <- get_random_item() 71 | #' 72 | #'#Random property 73 | #'random_property <- get_random_property() 74 | #'} 75 | #'@aliases get_random get_random_item get_random_property 76 | #'@rdname get_random 77 | #'@export 78 | get_random_item <- function(limit = 1, ...){ 79 | return(wd_rand_query(ns = 0, limit = limit, ...)) 80 | } 81 | 82 | #'@rdname get_random 83 | #'@export 84 | get_random_property <- function(limit = 1, ...){ 85 | return(wd_rand_query(ns = 120, limit = limit, ...)) 86 | } 87 | 88 | 89 | #' @title Get an example SPARQL query from Wikidata 90 | #' @description Gets the specified example(s) from 91 | #' [SPARQL query service examples page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) 92 | #' using [Wikidata's MediaWiki API](https://www.wikidata.org/w/api.php). 93 | #' @details If you are planning on extracting multiple examples, please provide 94 | #' all the names as a single vector for efficiency. 95 | #' @param example_name the names of the examples as they appear on 96 | #' [this page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) 97 | #' @return The SPARQL query as a character vector. 98 | #' @examples 99 | #' \dontrun{ 100 | #' sparql_query <- extract_example(c("Cats", "Horses")) 101 | #' query_wikidata(sparql_query) 102 | #' # returns a named list with two data frames 103 | #' # one called "Cats" and one called "Horses" 104 | #' sparql_query <- extract_example("Largest cities with female mayor") 105 | #' cat(sparql_query) 106 | #' query_wikidata(sparql_query) 107 | #' } 108 | #' @seealso [query_wikidata] 109 | #' @export 110 | get_example <- function(example_name){ 111 | content <- WikipediR::page_content( 112 | domain = "www.wikidata.org", 113 | page_name = "Wikidata:SPARQL query service/queries/examples", 114 | as_wikitext = TRUE 115 | ) 116 | wiki <- strsplit(content$parse$wikitext$`*`, "\n")[[1]] 117 | wiki <- wiki[wiki != ""] 118 | return(vapply(example_name, function(example_name){ 119 | heading_line <- which(grepl(paste0("^===\\s?", example_name, "\\s?===$"), wiki, fixed = FALSE)) 120 | start_line <- which(grepl("{{SPARQL", wiki[(heading_line + 1):length(wiki)], fixed = TRUE))[1] 121 | end_line <- which(grepl("}}", wiki[(heading_line + start_line + 1):length(wiki)], fixed = TRUE))[1] 122 | query <- paste0(wiki[(heading_line + start_line):(heading_line + start_line + end_line - 1)], collapse = "\n") 123 | return(sub("^\\s*\\{\\{SPARQL2?\\n?\\|query\\=", "", query)) 124 | }, "")) 125 | } 126 | 127 | 128 | # -------- Finds -------- 129 | 130 | #'@title Search for Wikidata items or properties that match a search term 131 | #'@description \code{find_item} and \code{find_property} allow you to retrieve a set 132 | #'of Wikidata items or properties where the aliases or descriptions match a particular 133 | #'search term. As with other \code{WikidataR} code, custom print methods are available; 134 | #'use \code{\link{str}} to manipulate and see the underlying structure of the data. 135 | #' 136 | #'@param search_term A term to search for. 137 | #' 138 | #'@param language The language to return the labels and descriptions in; this should 139 | #'consist of an ISO language code. Defaults to \code{"en"}. 140 | #' 141 | #'@param limit The number of results to return; set to \code{10} by default. 142 | #' 143 | #'@param \\dots further arguments to pass to de{httr:ink[httr::GET]{GET}}. 144 | #' 145 | #'@seealso \code{\link{get_random}} for selecting a random item or property, 146 | #'or \code{\link{get_item}} for selecting a specific item or property. 147 | #' 148 | #'@examples 149 | #' 150 | #'#Check for entries relating to Douglas Adams in some way 151 | #'adams_items <- find_item("Douglas Adams") 152 | #' 153 | #'#Check for properties involving the peerage 154 | #'peerage_props <- find_property("peerage") 155 | #' 156 | #'@aliases find_item find_property 157 | #'@return A list containing the result of the query. 158 | #'@rdname find_item 159 | #'@export 160 | find_item <- function(search_term, 161 | language = "en", 162 | limit = 10, 163 | response_language = "en", 164 | ...){ 165 | res <- searcher(search_term, language, limit, response_language, "item") 166 | class(res) <- "find_item" 167 | return(res) 168 | } 169 | 170 | #'@rdname find_item 171 | #'@export 172 | find_property <- function(search_term, 173 | language = "en", 174 | response_language = "en", 175 | limit = 10){ 176 | res <- searcher(search_term, language, limit, response_language, "property") 177 | class(res) <- "find_property" 178 | return(res) 179 | } 180 | 181 | #Generic, direct access to Wikidata's search functionality. 182 | #'@title Convert an input to a item QID 183 | #'@description Convert an input string to the most likely item QID 184 | #'@param search_term a term to search for. 185 | #'@param language the language to conduct the search in; this should 186 | #'consist of an ISO language code. Set to "en" by default. 187 | #'@param response_language the language to return the labels and descriptions in; this should 188 | #'consist of an ISO language code. Set to "en" by default. 189 | #'@param limit the number of results to return; set to 10 by default. 190 | #'@param type type of wikidata object to return (default = "item") 191 | #'@param \\dots Additional parameters to supply to [httr::POST] 192 | #'@return If the inputted string matches an item label, return its QID. 193 | #'If the inputted string matches multiple labels of multiple items, return the QID of the first hit. 194 | #'If the inputted string is already a QID, return the string. 195 | #'@examples 196 | #'# if input string is a valid QID 197 | #'as_qid("Q42") 198 | #'# if input string matches multiple item labels 199 | #'as_qid("Douglas Adams") 200 | #'# if input string matches a single unique label 201 | #'as_qid("Douglas Adams and the question of arterial blood pressure in mammals") 202 | #'@export 203 | searcher <- function(search_term, language, limit, response_language, type, ...){ 204 | result <- WikipediR::query(url = "https://www.wikidata.org/w/api.php", out_class = "list", clean_response = FALSE, 205 | query_param = list( 206 | action = "wbsearchentities", 207 | type = type, 208 | language = language, 209 | uselang = response_language, 210 | limit = limit, 211 | search = search_term 212 | ), 213 | ...) 214 | result <- result$search 215 | return(result) 216 | } 217 | -------------------------------------------------------------------------------- /R/prints.R: -------------------------------------------------------------------------------- 1 | #'@title Print method for find_item 2 | #' 3 | #'@description print found items. 4 | #' 5 | #'@param x find_item object with search results 6 | #'@param \dots Arguments to be passed to methods 7 | #' 8 | #'@method print find_item 9 | #'@export 10 | print.find_item <- function(x, ...) { 11 | cat("\n\tWikidata item search\n\n") 12 | 13 | # number of results 14 | num_results <- length(x) 15 | cat("Number of results:\t", num_results, "\n\n") 16 | 17 | # results 18 | if(num_results > 0) { 19 | cat("Results:\n") 20 | for(i in 1:num_results) { 21 | if(is.null(x[[i]]$description)){ 22 | desc <- "\n" 23 | } 24 | else { 25 | desc <- paste("-", x[[i]]$description, "\n") 26 | } 27 | cat(i, "\t", x[[i]]$label, paste0("(", x[[i]]$id, ")"), desc) 28 | } 29 | } 30 | } 31 | 32 | #'@title Print method for find_property 33 | #' 34 | #'@description print found properties. 35 | #' 36 | #'@param x find_property object with search results 37 | #'@param \dots Arguments to be passed to methods 38 | #' 39 | #'@method print find_property 40 | #'@export 41 | print.find_property <- function(x, ...) { 42 | cat("\n\tWikidata property search\n\n") 43 | 44 | # number of results 45 | num_results <- length(x) 46 | cat("Number of results:\t", num_results, "\n\n") 47 | 48 | # results 49 | if(num_results > 0) { 50 | cat("Results:\n") 51 | for(i in seq_len(num_results)) { 52 | if(is.null(x[[i]]$description)){ 53 | desc <- "\n" 54 | } 55 | else { 56 | desc <- paste("-", x[[i]]$description, "\n") 57 | } 58 | cat(i, "\t", x[[i]]$label, paste0("(", x[[i]]$id, ")"), desc) 59 | } 60 | } 61 | } 62 | 63 | wd_print_base <- function(x, ...){ 64 | 65 | cat("\n\tWikidata", x$type, x$id, "\n\n") 66 | 67 | # labels 68 | num.labels <- length(x$labels) 69 | if(num.labels>0) { 70 | lbl <- x$labels[[1]]$value 71 | if(num.labels==1) cat("Label:\t\t", lbl, "\n") 72 | else { 73 | if(!is.null(x$labels$en)) lbl <- x$labels$en$value 74 | cat("Label:\t\t", lbl, paste0("\t[", num.labels-1, " other languages available]\n")) 75 | } 76 | } 77 | 78 | # aliases 79 | num_aliases <- length(x$aliases) 80 | if(num_aliases > 0) { 81 | al <- unique(unlist(lapply(x$aliases, function(xl){return(xl$value)}))) 82 | cat("Aliases:\t", paste(al, collapse = ", "), "\n") 83 | } 84 | 85 | # descriptions 86 | num_desc <- length(x$descriptions) 87 | if(num_desc > 0) { 88 | desc <- x$descriptions[[1]]$value 89 | if(num_desc == 1){ 90 | cat("Description:", desc, "\n") 91 | } 92 | else { 93 | if(!is.null(x$descriptions$en)){ 94 | desc <- x$descriptions$en$value 95 | } 96 | cat("Description:", desc, paste0("\t[", (num_desc - 1), " other languages available]\n")) 97 | } 98 | } 99 | 100 | # num claims 101 | num_claims <- length(x$claims) 102 | if(num_claims > 0){ 103 | cat("Claims:\t\t", num_claims, "\n") 104 | } 105 | 106 | # num sitelinks 107 | num_links <- length(x$sitelinks) 108 | if(num_links > 0){ 109 | cat("Sitelinks:\t", num_links, "\n") 110 | } 111 | } 112 | 113 | #'@title Print method for Wikidata objects 114 | #' 115 | #'@description print found objects generally. 116 | #' 117 | #'@param x Wikidata object from get_item, get_random_item, get_property or get_random_property 118 | #'@param \dots Arguments to be passed to methods 119 | #'@seealso get_item, get_random_item, get_property or get_random_property 120 | #'@method print wikidata 121 | #'@export 122 | print.wikidata <- function(x, ...){ 123 | lapply(x, wd_print_base, ...) 124 | return(invisible()) 125 | } -------------------------------------------------------------------------------- /R/queries.R: -------------------------------------------------------------------------------- 1 | #Generic queryin' function for direct Wikidata calls. Wraps around WikipediR::page_content. - Ironholds 2 | #'@title Download a Wikidata item 3 | #'@description Utility wrapper for Wikidata API to download item. 4 | #'Used by \code{get_item} and \code{get_property}. 5 | #'@param title The Wikidata item or property as a string. 6 | #'@param \\dots Additional parameters to supply to \code{httr:\link[httr::POST]{POST}}. 7 | #'@return A downloaded full wikidata object (item or property) formatted as a 8 | #'nested json list. 9 | #'@export 10 | wd_query <- function(title, ...){ 11 | result <- WikipediR::page_content(domain = "wikidata.org", 12 | page_name = title, 13 | as_wikitext = TRUE, 14 | httr::user_agent("WikidataR - https://github.com/TS404/WikidataR"), 15 | ...) 16 | output <- jsonlite::fromJSON(result$parse$wikitext[[1]]) 17 | return(output) 18 | } 19 | 20 | # Query for a random item in "namespace" (ns). Essentially a wrapper around WikipediR::random_page. - Ironholds 21 | #'@title Download random Wikidata items 22 | #'@description Utility wrapper for Wikidata API to download random items. 23 | #'Used by \code{random_item}. 24 | #'@param ns string indicating namespace, most commonly "Main" for QID items, "Property" 25 | #'for PID properties. 26 | #'@param limit How many random object to return. 27 | #'@param \\dots Additional parameters to supply to \code{httr:\link[httr::POST]{POST}}. 28 | #'@return Downloaded full wikidata objects (items or properties) formatted 29 | #'as nested json lists. 30 | #'@export 31 | wd_rand_query <- function(ns, limit, ...){ 32 | result <- WikipediR::random_page(domain = "wikidata.org", 33 | as_wikitext = TRUE, 34 | namespaces = ns, 35 | httr::user_agent("WikidataR - https://github.com/TS404/WikidataR"), 36 | limit = limit, ...) 37 | output <- lapply(result, function(x){jsonlite::fromJSON(x$wikitext[[1]])}) 38 | class(output) <- "wikidata" 39 | return(output) 40 | } 41 | 42 | #SPARQL query function for direct Wikidata calls. 43 | #'@title Download full Wikidata items matching a SPARQL query 44 | #'@description Utility wrapper for wikidata spargl endpoint to download items. 45 | #'Used by \code{get_geo_entity} and \code{get_geo_box}. 46 | #'@param query The SPARQL query as a string 47 | #'@param \\dots Additional parameters to supply to \code{httr:\link[httr::POST]{POST}}. 48 | #'@return a download of the full wikidata objects formatted as a nested json list 49 | #'@export 50 | sparql_query <- function(query, ...){ 51 | result <- httr::GET("https://query.wikidata.org/bigdata/namespace/wdq/sparql", 52 | query = list(query = query), 53 | httr::user_agent("WikidataR - https://github.com/TS404/WikidataR"), 54 | ...) 55 | httr::stop_for_status(result) 56 | return(httr::content(result, as = "parsed", type = "application/json")) 57 | } 58 | 59 | #Wrapper around WikidataQueryServiceR::query_wikidata 60 | #' @title Send one or more SPARQL queries to WDQS 61 | #' @description Makes a POST request to Wikidata Query Service SPARQL endpoint. 62 | #' @param sparql_query SPARQL query (can be a vector of queries) 63 | #' @param format 64 | #' `tibble` (default) returns a pure character data frame, 65 | #' `simple` returns a pure character vector, while 66 | #' `smart` fetches JSON-formatted data and returns a tibble with datetime 67 | #' columns converted to `POSIXct`. 68 | #' @param \\dots Additional parameters to supply to \code{httr:\link[httr::POST]{POST}}. 69 | #' @return A `tibble` or `vector`. Note: QID values will be returned as QIDs, rather than URLs. 70 | #' @section Query limits: 71 | #' There is a hard query deadline configured which is set to 60 seconds. There 72 | #' are also following limits: 73 | #' - One client (user agent + IP) is allowed 60 seconds of processing time each 74 | #' 60 seconds 75 | #' - One client is allowed 30 error queries per minute 76 | #' See \href{https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits}{query limits section} 77 | #' in the Wikidata Query Service User Manual for more information. 78 | #' @examples 79 | #' # R's versions and release dates: 80 | #' sparql_query <- 'SELECT DISTINCT 81 | #' ?softwareVersion ?publicationDate 82 | #' WHERE { 83 | #' BIND(wd:Q206904 AS ?R) 84 | #' ?R p:P348 [ 85 | #' ps:P348 ?softwareVersion; 86 | #' pq:P577 ?publicationDate 87 | #' ] . 88 | #' }' 89 | #' query_wikidata(sparql_query) 90 | #' 91 | #' \dontrun{ 92 | #' # "smart" format converts all datetime columns to POSIXct 93 | #' query_wikidata(sparql_query, format = "smart") 94 | #' } 95 | #' @export 96 | query_wikidata <- function(sparql_query,format="tibble",...) { 97 | if(format=="simple"){simplify<-TRUE}else{simplify<-FALSE} 98 | if(format=="tibble"){format<-"simple"} 99 | output <- WikidataQueryServiceR::query_wikidata(sparql_query=sparql_query, 100 | format=format, ...) 101 | output <- suppressWarnings(mapply(url_to_id, 102 | data.frame(output), 103 | SIMPLIFY=simplify)) 104 | output <- tibble(data.frame(output)) 105 | if(nrow(output)==0){output<-tibble(value=NA)} 106 | output 107 | } 108 | 109 | #' @title QID from identifier 110 | #' @description Convert unique identifiers to QIDs (for items in Wikidata). 111 | #' @details The \href{https://www.wikidata.org/wiki/Q43649390}{Wikidata Q identifier} (QID) is the unique identifier (UID) 112 | #' used in Wikidata. 113 | #' @param property The identifier property to search (for caveats, see \code{as_pid}.) 114 | #' @param value The identifier value to match. 115 | #' @return A vector of QIDs corresponding to identifiers submitted. 116 | #' @examples 117 | #' qid_from_identifier('ISBN-13','978-0-262-53817-6') 118 | #' @export 119 | qid_from_identifier <- function(property = 'DOI', 120 | value = c('10.15347/WJM/2019.001','10.15347/WJM/2020.002')){ 121 | 122 | property <- as_pid(property) 123 | 124 | qid_from_property1 <- function(value,property){ 125 | out <- paste('SELECT ?value WHERE {?value wdt:', 126 | property, 127 | ' "', 128 | value, 129 | '"}', 130 | sep='') 131 | names(out)<-value 132 | return(out) 133 | } 134 | 135 | qid_from_property2 <- function(x){ 136 | out <- as.character(query_wikidata(x)[[1]]) 137 | names(out) <- names(x) 138 | return(out) 139 | } 140 | 141 | sparql_query <- lapply(value,property,FUN=qid_from_property1) 142 | 143 | if(length(value)>1){ 144 | output <- unlist(pblapply(sparql_query,qid_from_property2)) 145 | } else { 146 | output <- as.character(unlist(lapply(sparql_query,FUN=query_wikidata))) 147 | names(output) <- value 148 | } 149 | 150 | if(length(value)!=length(output)){ 151 | message("Caution! Some supplied values returned more than one QID.") 152 | } 153 | 154 | return(output) 155 | } 156 | 157 | #' @title Identifier from identifier 158 | #' @description Convert unique identifiers to other unique identifiers. 159 | #' @param property The identifier property to search (for caveats, see \code{as_pid}) 160 | #' @param return The identifier property to convert to 161 | #' @param value The identifier value to match. 162 | #' @return A vector of identifiers corresponding to identifiers submitted. 163 | #' @examples 164 | #' identifier_from_identifier(property ='ORCID iD', 165 | #' return = 'IMDb ID', 166 | #' value = c('0000-0002-7865-7235','0000-0003-1079-5604') 167 | #' ) 168 | #' @export 169 | identifier_from_identifier <- function(property = 'ORCID iD', 170 | return = 'IMDb ID', 171 | value = "0000-0002-7865-7235"){ 172 | 173 | property <- as_pid(property) 174 | return <- as_pid(return) 175 | 176 | qid_from_property1 <- function(value,return,property){paste('SELECT ?return WHERE { ?value wdt:', 177 | property, 178 | ' "', 179 | value, 180 | '". ?value wdt:', 181 | return, 182 | ' ?return.}', 183 | sep='')} 184 | sparql_query <- lapply(value,return,property,FUN=qid_from_property1) 185 | output <- if(length(value)>1){ 186 | unlist(pbapply::pblapply(sparql_query,function(x) as.character(query_wikidata(x)[[1]]))) 187 | }else{ 188 | as.character(unlist(lapply(sparql_query,FUN=query_wikidata))) 189 | } 190 | names(output) <- value 191 | return(output) 192 | } 193 | -------------------------------------------------------------------------------- /R/schol.R: -------------------------------------------------------------------------------- 1 | #' @title QID from DOI 2 | #' @description simple converter from DOIs to QIDs (for items in Wikidata) 3 | #' @param DOI digital object identifiers submitted as strings 4 | #' @return vector of QIDs corresponding to DOIs submitted 5 | #' @export 6 | qid_from_DOI <- function(DOI = '10.15347/WJM/2019.001'){ 7 | article.qid <- qid_from_identifier(property = 'P356', 8 | value = toupper(DOI)) 9 | return(article.qid) 10 | } 11 | 12 | #' @title QID from label name 13 | #' @description simple converter from label names to QIDs (for items in wikidata). 14 | #' Essentially a simplification of \code{find_item} 15 | #' @param name name labels submitted as strings 16 | #' @param limit if multiple QIDs match each submitted name, how many to return 17 | #' @param format output format ('vector' to return a simple vector, or 'list' to return a nested list) 18 | #' @return vector of QIDs corresponding to names submitted. Note: some names may return multiple QIDs. 19 | #' @export 20 | qid_from_name <- function(name = "Thomas Shafee", 21 | limit = 100, 22 | format = "vector"){ 23 | qid_from_name_nest1 <- function(x){lapply(x,"[[","id")} 24 | item.qs <- lapply(name,find_item, limit=limit) 25 | item.qid <- lapply(item.qs,qid_from_name_nest1) 26 | names(item.qid) <- name 27 | if(format=="vector"){item.qid <- unlist(item.qid)} 28 | if(format=="list") {item.qid <- item.qid} 29 | return(item.qid) 30 | } 31 | 32 | #' @title QID from ORCID 33 | #' @description simple converter from ORCIDs to QIDs (for items in wikidata) 34 | #' @param ORCID digital object identifiers submitted as strings 35 | #' @return vector of QIDs corresponding to ORCIDs submitted 36 | #' @export 37 | qid_from_ORCID <- function(ORCID = '0000-0002-2298-7593'){ 38 | author.qid <- qid_from_identifier(property = 'P496',value = ORCID) 39 | return(author.qid) 40 | } 41 | -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TS404/WikidataR/d7873b6b80a951130e9c4ec5c17068bd4898fb6e/R/sysdata.rda -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | # -------- Format checkers -------- 2 | # Simple tests of strings for whether they adhere to common wikidata formats 3 | is.qid <- function(x){grepl("^[Qq][0-9]+$",x)} 4 | is.pid <- function(x){gsub("S","P",x) %in% as.matrix(WD.globalvar$PID.datatype$property)} 5 | is.sid <- function(x){gsub("S","P",x) %in% as.matrix(WD.globalvar$SID.valid$Wikidata_property_to_indicate_a_source)} 6 | is.date <- function(x){grepl("[0-9]{1,4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}",x)} 7 | is.quot <- function(x){grepl("^\".+\"$",x)} 8 | is.empty <- function(x){x==""} 9 | is.coord <- function(x){grepl("@-?([1-8]?\\d(\\.\\d+)?|90(\\.0+)?)/-?(180(\\.0+)?|((1[0-7]\\d)|([1-9]?\\d))(\\.\\d+)?)$",x)} 10 | is.wdURL <- function(x){grepl("http://www.wikidata.org/entity/[PpQq][0-9]+$",x)} 11 | is.create <- function(x){grepl("^CREATE",x)} 12 | is.createx <- function(x){grepl("^CREATE.+",x)} 13 | is.create_x<- function(x){grepl("^CREATE_.+",x)} 14 | is.last <- function(x){grepl("^LAST$",x)} 15 | is.special <- function(x){ 16 | if(grepl("^[LAD]",x)){ 17 | substr(x,2,100) %in% as.matrix(WD.globalvar$lang.abbrev) 18 | }else if(grepl("^S",x)){ 19 | substr(x,2,100) %in% as.matrix(WD.globalvar$abbrev.wiki) 20 | }else{ 21 | FALSE 22 | } 23 | } 24 | 25 | check.PID.WikibaseItem <- function(x){ 26 | x %in% WD.globalvar$PID.datatype$property[WD.globalvar$PID.datatype$wbtype=="WikibaseItem"]} 27 | 28 | check.PID.constraint <- function(x){ 29 | check.PID.constraint.nest1 <- function(x){ 30 | out <- as.character(WD.globalvar$PID.constraint$fmt[WD.globalvar$PID.constraint$Wikidata_property==x]) 31 | if(length(out)!=0){out}else{NA} 32 | } 33 | sapply(x,check.PID.constraint.nest1) 34 | } 35 | 36 | #'@title Extract an identifier from a wikidata URL 37 | #'@description Convert a URL ending in an identifier (returned by SPARQL queries) to just 38 | #'the plain identifier (QID or PID). 39 | #'@param x a strings representing a wikidata URL 40 | #'@return if the URL ends in a QID or PID, return that PID or QID, else return the original string 41 | #'@examples 42 | #'url_to_id("http://www.wikidata.org/entity/42") 43 | #'@export 44 | url_to_id <- function (x){ 45 | if(is.wdURL(x)){x <- sapply(sapply(x,pattern = "/",stringr::str_split),tail,1)} 46 | output <- x 47 | output 48 | } 49 | 50 | #Generic input checker. Needs additional stuff for property-based querying 51 | #because namespaces are weird, yo. - Ironholds 52 | #'@title Generic input checker 53 | #'@description Utility function to handle namespaces. Used by \code{get_item} and \code{get_property} 54 | #'@param input string to check 55 | #'@param substitution string for what's been looked for 56 | #'@return boolian indicating whether the checked string contains a match for the substitution string 57 | #'@export 58 | check_input <- function(input, substitution){ 59 | in_fit <- grepl("^\\d+$",input) 60 | if(any(in_fit)){ 61 | input[in_fit] <- paste0(substitution, input[in_fit]) 62 | } 63 | return(input) 64 | } 65 | 66 | 67 | # -------- Format converters -------- 68 | # Simple functions to convert plain text descriptions into their most likely QID/PIDs 69 | #'@title Convert an input to a item QID. 70 | #'@description Convert an input string to the most likely item 71 | #'\href{https://www.wikidata.org/wiki/Q43649390}{QID}. 72 | #'@param x a vector, data frame, or tibble of strings representing wikidata items 73 | #'@return if the inputted string is a valid QID, return the string. 74 | #'If the inputted string matches an item label, return its QID. 75 | #'If the inputted string matches multiple labels of multiple items, return the QID of the first hit. 76 | #'@examples 77 | #'# if input string is a valid QID 78 | #'as_qid("Q42") 79 | #'# if input string matches multiple item labels 80 | #'as_qid("Douglas Adams") 81 | #'# if input string matches a single unique label 82 | #'as_qid("Douglas Adams and the question of arterial blood pressure in mammals") 83 | #'@export 84 | as_qid <- function(x){ 85 | as_qid_nest1 <- function(x){ 86 | as_qid_nest2 <- function(x){ 87 | if(is.qid(x)|is.date(x)|is.quot(x)|is.na(x)|is.null(x)|is.empty(x)|is.createx(x)|is.create(x)|is.last(x)){ 88 | x 89 | }else{ 90 | temp <- find_item(x,limit = 100) 91 | if(length(temp)==0){ 92 | out <- NA 93 | message (paste0("no sufficiently close match for \"",x,"\". Returned \"NA\".")) 94 | }else{ 95 | toinclude <- sapply(temp,function(temp,x){temp$label==x},x) 96 | toinclude[1] <- TRUE 97 | temp <- temp[toinclude] 98 | out <- temp[[1]]$id 99 | names(out) <- temp[[1]]$label 100 | if(x!=temp[[1]]$label){message(paste0( 101 | "Inexact match for \"",x, 102 | "\", closest match = ",temp[[1]]$label, 103 | " (",out,") "))} 104 | if(length(temp)>1){ 105 | message(paste0( 106 | "Multiple exact matches for \"",x,"\"")) 107 | message(paste0( 108 | " match ",1:length(temp), 109 | " = (",sapply(temp,function(temp){temp$id}), 110 | ") ",sapply(temp,function(temp){temp$description}), 111 | "\n"))} 112 | } 113 | out 114 | } 115 | } 116 | out <- unlist(lapply(x,as_qid_nest2)) 117 | out 118 | } 119 | output <- bind_cols(lapply(tibble(x),as_qid_nest1)) 120 | return(output) 121 | } 122 | 123 | #'@title Convert an input to a property PID 124 | #'@description Convert an input string to the most likely property PID 125 | #'@param x a vector, data frame, or tibble of strings representing Wikidata properties 126 | #'@return If the inputted string is a valid PID, return the string. 127 | #'If the inputted string matches a property label, return its PID. 128 | #'If the inputted string matches multiple labels of multiple properties, return the PID of the first hit. 129 | #'@examples 130 | #'# if input string is a valid PID 131 | #'as_pid("P50") 132 | #'# if input string matches multiple item labels 133 | #'as_pid("author") 134 | #'# if input string matches a single unique label 135 | #'as_pid("Scopus author ID") 136 | #'@export 137 | as_pid <- function(x){ 138 | as_pid_nest1 <- function(x){ 139 | as_pid_nest2 <- function(x){ 140 | if(is.pid(x)|is.date(x)|is.quot(x)|is.na(x)|is.null(x)|is.empty(x)|is.special(x)){ 141 | x 142 | }else{ 143 | temp <- find_property(x,limit = 2) 144 | if(length(temp)==0){ 145 | out <- NA 146 | message (paste0("no sufficiently close match for \"",x,"\". Returned \"NA\".")) 147 | }else{ 148 | out <- temp[[1]]$id 149 | names(out) <- temp[[1]]$label 150 | if(x!=temp[[1]]$label){message(paste0( 151 | "Inexact match for \"",x, 152 | "\", closest match = ",temp[[1]]$label, 153 | " (",out,")."))} 154 | } 155 | out 156 | } 157 | } 158 | out <- unlist(lapply(x,as_pid_nest2)) 159 | out 160 | } 161 | output <- bind_cols(lapply(tibble(x),as_pid_nest1)) 162 | return(output) 163 | } 164 | 165 | #'@title Convert an input to a source property SID 166 | #'@description Convert an input string to the most likely source SID 167 | #' (equivalent to PID.) 168 | #'@param x a vector, data frame, or tibble of strings representing Wikidata 169 | #'source properties. 170 | #'@return if the inputted string is a valid SID, return the string. 171 | #'If the inputted string matches a property label, return its SID. 172 | #'If the inputted string matches multiple labels of multiple properties, 173 | #'return the SID of the first hit. 174 | #'@examples 175 | #'# if input string is a valid SID 176 | #'as_pid("S854") 177 | #'# if input string matches multiple item labels 178 | #'as_pid("URL") 179 | #'# if input string matches a single unique label 180 | #'as_pid("Reference URL") 181 | #'@export 182 | as_sid <- function(x){ 183 | as_sid_nest1 <- function(x){ 184 | as_sid_nest2 <- function(x){ 185 | if(is.sid(x)|is.date(x)|is.quot(x)|is.na(x)|is.null(x)|is.empty(x)){ 186 | x 187 | }else if(all(is.pid(x))){ 188 | gsub("P","S",x,ignore.case = 1) 189 | }else{ 190 | gsub("P","S",find_property(x)[[1]]$id) 191 | } 192 | } 193 | out <- unlist(lapply(x,as_sid_nest2)) 194 | out 195 | } 196 | output <- bind_cols(lapply(tibble(x),as_sid_nest1)) 197 | return(output) 198 | } 199 | 200 | #'@title Add quotations marks 201 | #'@description Add escaped quotation marks around strings that need them ready for 202 | #'submission to an API. 203 | #'@param x a vector, data frame, or tibble of strings 204 | #'@param format either "tibble" / "csv" to use plain quotation marks (default), 205 | #'or "api" / "website" to use '\%22' 206 | #'@return A tibble of items inside of escaped quotation marks 207 | #'unless they are already in escaped quotation marks, is a 208 | #'\href{https://www.wikidata.org/wiki/Q43649390}{QID}, 209 | #'in which chase it is returned unchanged. 210 | #'@examples 211 | #'as_quot("text") 212 | #'@export 213 | as_quot <- function(x,format="tibble"){ 214 | if(is.null(x)){ 215 | return(NULL) 216 | }else if(format=="api"|format=="website"){ 217 | q_mark <- '%22' 218 | }else if(format=="tibble"|format=="csv"){ 219 | q_mark <- '"' 220 | } 221 | as_quot_nest1 <- function(x){ 222 | as_quot_nest2 <- function(x){ 223 | if(!(is.qid(x)|is.quot(x)|is.date(x)|is.na(x)|is.empty(x)|is.numeric(x))) 224 | {paste0(q_mark,as.character(x),q_mark)} 225 | else 226 | {as.character(x)} 227 | } 228 | out <- unlist(lapply(x,as_quot_nest2)) 229 | out 230 | } 231 | output <- bind_cols(lapply(tibble(x),as_quot_nest1)) 232 | return(output) 233 | } 234 | 235 | #'@title Extract an identifier from a Wikidata URL 236 | #'@description Convert a URL ending in an identifier (returned by SPARQL queries) 237 | #'to just the plan identifier (QID or PID). 238 | #'@details The \href{https://www.wikidata.org/wiki/Q43649390}{Wikidata Q identifier} (QID) 239 | #'is the unique identifier (UID) used in Wikidata. 240 | #'@param x A vector of strings representing Wikidata URLs. 241 | #'@return QID or PID. 242 | #'@examples 243 | #'url_to_id("http://www.wikidata.org/Q42") 244 | #'@export 245 | url_to_id <- function(x){ 246 | sapply(sapply(x,pattern = "/|:",stringr::str_split),tail,1) 247 | } 248 | 249 | 250 | # -------- Wikidata object manipulation -------- 251 | #'@title Extract claims from returned item data 252 | #'@description extract claim information from data returned using 253 | #'\code{\link{get_item}}. 254 | #'@param items a list of one or more Wikidata items returned with 255 | #'\code{\link{get_item}}. 256 | #'@param claims a vector of claims (in the form "P321", "P12") to look for 257 | #'and extract. 258 | #'@return a list containing one sub-list for each entry in \code{items}, 259 | #'and (below that) the found data for each claim. In the event a claim 260 | #'cannot be found for an item, an \code{NA} will be returned 261 | #'instead. 262 | #'@examples 263 | #'# Get item data 264 | #'adams_data <- get_item("42") 265 | #'# Get claim data 266 | #'claims <- extract_claims(adams_data, "P31") 267 | #'@export 268 | extract_claims <- function (items, 269 | claims){ 270 | claims <- sapply(claims,as_pid) 271 | output <- lapply(items, function(x, claims){ 272 | return(lapply(claims, function(claim, obj){ 273 | which_match <- which(names(obj$claims) == claim) 274 | if (!length(which_match)){ 275 | return(NA) 276 | } 277 | return(obj$claims[[which_match[1]]]) 278 | }, obj = x)) 279 | }, claims = claims) 280 | return(output) 281 | } 282 | 283 | #'@title List properties of a Wikidata item 284 | #'@description for a downloaded wikidata item, list the properties of all statements 285 | #'@param item a list of one or more Wikidata items returned with 286 | #'\code{\link{get_item}}. 287 | #'@param names a boolian for whether to return property names, or just P numbers 288 | #'and extract. 289 | #'@return a list containing one sub-list for each entry in \code{items}, 290 | #'and (below that) the found data for each claim. In the event a claim 291 | #'cannot be found for an item, an \code{NA} will be returned 292 | #'instead. 293 | #'@examples 294 | #'# Get item data 295 | #'adams_data <- get_item("42") 296 | #'# Get claim data 297 | #'claims <- extract_claims(adams_data, "P31") 298 | #'@export 299 | list_properties <- function (item, 300 | names=FALSE){ 301 | properties.p <- lapply(lapply(item,"[[","claims"),names) 302 | if(names){ 303 | if(length(item)==1){ 304 | names(properties.p) <- unlist(lapply(lapply(lapply(get_property(properties.p),"[[","labels"),"[[","en"),"[[","value")) 305 | } 306 | } 307 | return(properties.p) 308 | } 309 | 310 | #Note: This one isn't very well named. not really the property names, more the predicate names, but you get the idea 311 | #'@title Get names of properties 312 | #'@description For a claim or set of claims, return the names of the properties 313 | #'@param properties a claims list from \code{extract_claims} 314 | #'@return tibble of labels for each property for a set of claims 315 | #'@export 316 | get_names_from_properties <- function(properties){ 317 | get_names_from_properties_nest1 <- function(x){ 318 | out <- lapply(lapply(lapply(lapply(x,"[[","mainsnak"),"[[","datavalue"),"[[","value"),"[[","id") 319 | names(out) <- lapply(lapply(lapply(x,"[[","mainsnak"),"[[","property"),"[[",1) 320 | return(out) 321 | } 322 | get_names_from_properties_nest2 <- function(x){ 323 | out <- lapply(x,get_item) 324 | return(out) 325 | } 326 | get_names_from_properties_nest3.1 <- function(x){ 327 | out <- lapply(lapply(lapply(x,"[[","labels"),"[[","en"),"[[","value") 328 | names(out) <- lapply(x,"[[","id") 329 | return(out) 330 | } 331 | get_names_from_properties_nest3 <- function(x){ 332 | out <- lapply(x,get_names_from_properties_nest3.1) 333 | return(out) 334 | } 335 | 336 | property_values.qid <- lapply(properties,get_names_from_properties_nest1) 337 | property_values.q <- lapply(property_values.qid,get_names_from_properties_nest2) 338 | property_names <- lapply(property_values.q, get_names_from_properties_nest3) 339 | property_names <- lapply(lapply(property_names,unlist),enframe,name = "QID") 340 | return(property_names) 341 | } 342 | 343 | 344 | #'@title Filter QIDs 345 | #'@description For a QID or vector of QIDs, remove ones that match a particular statement 346 | #'(e.g. remove all that are instances of academic publications or books). 347 | #'@details The \href{https://www.wikidata.org/wiki/Q43649390}{Wikidata Q identifier} (QID) 348 | #'is the unique identifier (UID) used in Wikidata. 349 | #'@param ids QIDs to check 350 | #'@param property property to check (default = P31 to filter on "instance of") 351 | #'@param filter values of that property to use to filter out 352 | #'(default = Q737498, Q5633421, Q7725634, Q13442814, and Q18918145 to remove academic 353 | #'publications or books) 354 | #'@param message message to return (useful for disambiguate_QIDs function) 355 | #'@return a vector of QIDs that do not match the property filter 356 | #'@examples 357 | #' \dontrun{ 358 | #' # Filter three items called "Earth Science" to show only those that aren't 359 | #' # books, journals or journal articles 360 | #' filter_qids(ids = c("Q96695546","Q8008","Q58966429"), 361 | #' property = "P31", 362 | #' filter = c("Q737498","Q5633421","Q7725634","Q13442814","Q18918145")) 363 | #' } 364 | #'@export 365 | filter_qids <- function (ids, 366 | property = "P31", 367 | filter = c("Q737498", 368 | "Q5633421", 369 | "Q7725634", 370 | "Q13442814", 371 | "Q18918145"), 372 | message=NULL){ 373 | out <- NULL 374 | pb <- progress_bar$new(total = length(ids), 375 | format = paste0(message,":bar :percent eta::eta"), 376 | width = 75, 377 | show_after = 0) 378 | if(is.null(property)|is.null(filter)){ 379 | for (i in 1:length(ids)){ 380 | pb$tick() 381 | qid <- ids[i] 382 | item <- find_item(qid,limit=1) 383 | label <- item[[1]]$label 384 | if(length(item[[1]]$description)>0){ 385 | if(!is.null(item[[1]]$description)){ 386 | desc <- item[[1]]$description 387 | }else{ 388 | desc <- item[[1]]$description 389 | } 390 | }else{ 391 | desc <- "no description" 392 | } 393 | out <- bind_rows(out,tibble(qid=qid,label=label,desc=desc)) 394 | } 395 | }else{ 396 | for (i in 1:length(ids)){ 397 | pb$tick() 398 | qid <- ids[i] 399 | item <- get_item(qid) 400 | P31 <- item[[1]]$claims[[property]]$mainsnak$datavalue$value$id 401 | if(all(is.null(P31))){P31<-"other"} 402 | if(!any(P31 %in% filter)){ 403 | label <- item[[1]]$labels[[1]]$value 404 | if(length(item[[1]]$descriptions)>0){ 405 | if(!is.null(item[[1]]$descriptions$en$value)){ 406 | desc <- item[[1]]$descriptions$en$value 407 | }else{ 408 | desc <- item[[1]]$descriptions[[1]]$value 409 | } 410 | }else{ 411 | desc <- "no description" 412 | } 413 | if(length(item[[1]]$labels)>0){ 414 | if(!is.null(item[[1]]$labels$en$value)){ 415 | label <- item[[1]]$labels$en$value 416 | }else{ 417 | label <- item[[1]]$labels[[1]]$value 418 | } 419 | }else{ 420 | label <- "no label" 421 | } 422 | out <- bind_rows(out,tibble(qid=qid,label=label,desc=desc)) 423 | } 424 | } 425 | } 426 | if(is.null(out)){ 427 | out <- tibble(qid=NA, 428 | label=NA, 429 | desc="No current matching Wikidata item") 430 | } 431 | return(out) 432 | } 433 | 434 | 435 | # -------- Misc. string manipulation -------- 436 | #'@title Format short form person names 437 | #'@description Converting names into first initial and surname, or just initials 438 | #'@param x a vector of people's names as strings 439 | #'@param format a vector of strings of either "FLast" or "FL" to indicate the output format 440 | #'@return the inputted name strings with first names shortened based on the 441 | #'selected format. 442 | #'@export 443 | initials <- function(x,format="FLast"){ 444 | if (format=="FLast"){ 445 | gsub("^([A-Za-z]).* ([A-Za-z]*)", "\\1 \\2", x) 446 | }else{ 447 | gsub("(.)\\S* *", "\\1", x) 448 | } 449 | } 450 | 451 | #'@title Remove special characters 452 | #'@description Special characters can otherwise mess up wikidata read-writes 453 | #'@param x a vector of strings to check for special characters 454 | #'@return the inputted strings with special characters replaced with 455 | #'closest match plan characters. 456 | #'@export 457 | unspecial <- function(x){ 458 | out <- x 459 | for(i in 1:ncol(x)){ 460 | out[[i]] <- iconv(x[[i]],to = 'ASCII//TRANSLIT') 461 | if(Hmisc::all.is.numeric(x[[i]])){ 462 | out[[i]] <- as.numeric(out[[i]]) 463 | }else{ 464 | out[[i]] <- as.factor(out[[i]]) 465 | } 466 | } 467 | return(as_tibble(out)) 468 | } 469 | 470 | #'@title Extract a paragraph of text 471 | #'@description Return the nth paragraph of a section of text 472 | #'Useful for extracting information from Wikipedia or other wikimarkup text 473 | #'@param text the input text as a string 474 | #'@param para number indicating which paragraph(s) to return (default=1) 475 | #'@param templ an optional string specifying a mediawikitemplate within 476 | #'which to restrict the search restrict search 477 | #'@return the nth paragraph of the input text. 478 | #'@export 479 | extract_para <- function(text, 480 | para=1, 481 | templ=NULL){ 482 | extract_para_nest1 <- function(x,y){ 483 | out <- lapply(x,gsub,pattern=".*= *| *\\|",replacement="") 484 | names(out) <- y 485 | return(out) 486 | } 487 | templ <- gsub(" ","_",templ) 488 | tosearch <- gsub("( |\\\\n|\\\\t)+"," ",text) 489 | if(!is.null(templ)){ 490 | templates <- regmatches(tosearch, gregexpr("\\{(?:[^{}]+|(?R))*+\\}", 491 | tosearch, perl=TRUE, ignore.case=TRUE))[[1]] 492 | name_lens <- regexpr(" *\\|| *\\}",templates) - 1 493 | templates <- paste0(gsub(" ","_",substr(templates,1,regexpr(" *\\|| *\\}",templates)-1)), 494 | substr(templates,regexpr("*\\||*\\}",templates),nchar(templates))) 495 | 496 | tosearch <- unlist(str_extract_all(templates, 497 | paste0("(?i)\\{\\{ *?",templ,".*?\\}\\}"))) 498 | names(tosearch) <- paste0(templ,"_",1:length(tosearch)) 499 | } 500 | 501 | match_paras <- lapply(tosearch, 502 | str_extract_all, 503 | paste0("\\| *?",para," *?=.*?\\|")) 504 | 505 | match_exact <- lapply(match_paras,extract_para_nest1,para) 506 | 507 | return(match_exact) 508 | } 509 | 510 | #'@title "CREATE" rows 511 | #'@description Add in empty lines for QuickStatements CREATE rows that mint new QIDs. 512 | #'This is a slightly messy quirk of the QuickStatements format that mints new QIDs via a line 513 | #'containing only "CREATE", so this function is a way to approximate that behavior in a tibble 514 | #'@param items a vector, data frame or tibble of items (which may or may not contain the keyword "CREATE") 515 | #'@param vector a vector of properties or values which may be expanded based on the items vector 516 | #'@return if the vector is NULL, return NULL. Otherwise, if the "CREATE" keyword appears in the 517 | #'items vector, insert blank strings at those positions in the vector. 518 | #'@export 519 | createrows <- function(items,vector){ 520 | if(is.null(vector)){ 521 | return(NULL) 522 | } 523 | if(any(items=="CREATE",na.rm = 1)){ 524 | #expand vector to full length if just intending to repeat a single value 525 | if(length(unlist(vector))==1){ 526 | vector <- rep(vector,sum(items!="CREATE")) 527 | } 528 | vector <- tibble(vector) 529 | 530 | newQID <- which(items=="CREATE") 531 | val <- bind_rows(vector,tibble(data.frame(array("",dim=c(length(newQID),ncol(vector)),dimnames = list(NULL,colnames(vector)))))) 532 | id <- c(1:nrow(vector), newQID-seq_along(newQID)+0.5) 533 | out <- tibble(val[order(id),]) 534 | return(out) 535 | }else{ 536 | return(tibble(vector)) 537 | } 538 | } 539 | 540 | #'@title "CREATE" rows from tidy format 541 | #'@description Add in QuickStatements CREATE rows that mint new QIDs from tidy input data. 542 | #'New items are created by any item starting that starts with the text "CREATE" followed 543 | #'by any unique ID. 544 | #'@param QS.tib a tibble of items, values and properties (optionally qualifiers and sources). 545 | #'@return a tibble, with items that start with "CREATE" followed by any unique text causing the 546 | #'addition of a "Create" line above, being replaced with "LAST" in the QuickStatements 547 | #'format to create new QIDs. 548 | #'@export 549 | createrows.tidy <- function(QS.tib){ 550 | #insert 'CREATE' blankrows above first instance of 'CREATExyz' 551 | newQID <- which(!duplicated(QS.tib[,1])&sapply(QS.tib[,1],is.createx)) 552 | val <- rbind(QS.tib, array("",dim=c(length(newQID),ncol(QS.tib)),dimnames = list(newQID,names(QS.tib))) ) 553 | id <- c(seq_along(t(QS.tib)[1,]), newQID-0.5) 554 | out <- val[order(id),] 555 | 556 | #replace 'CREATEXYZ' with 'LAST' 557 | out[sapply(out[,1],is.createx),1] <- "LAST" 558 | 559 | #replace new empty rows with 'CREATE' row 560 | out[apply(is.empty(out),all,MARGIN=1),1] <- "CREATE" 561 | return(out) 562 | } 563 | -------------------------------------------------------------------------------- /R/writes.R: -------------------------------------------------------------------------------- 1 | # -------- Writes -------- 2 | 3 | #'@title Write statements to Wikidata 4 | #'@description Upload data to Wikidata, including creating items, 5 | #'adding statements to existing items (via the quickstatements format and API). 6 | #' 7 | #'@param items a vector of strings indicating the items to which to add statements (as QIDs or labels). 8 | #'Note: if labels are provided, and multiple items match, the first matching item will be used 9 | #'(see \code{as_qid} function), so use with caution. 10 | #'New QIDs can be created by using the "CREATE_xyz", where "_xyz" is any unique string. 11 | #'Using the same id will add additional statements to those new items 12 | #'@param properties a vector of strings indicating the properties to add as statements (as PIDs or labels). 13 | #'Note: if labels are provided, and multiple items match, the first matching item will be used 14 | #'(see \code{as_pid} function), so use with caution. 15 | #'Four special properties can also be used: labels, aliases, descriptions and sitelinks. 16 | #'See [this link](https://www.wikidata.org/wiki/Help:QuickStatements#Adding_labels,_aliases,_descriptions_and_sitelinks) for the syntax. 17 | #'@param values a vector of strings indicating the values to add as statements (as QIDs or strings). 18 | #'Note: if strings are provided, they will be treated as plain text. 19 | #'@param qual.properties a vector, data frame, or tibble of strings indicating the properties to add as qualifiers to statements (as PIDs or labels). 20 | #'Note: if labels are provided, and multiple items match, the first matching item will be used 21 | #'(see \code{as_pid} function), so use with caution. 22 | #'@param qual.values a vector, data frame, or tibble of strings indicating the values to add as statements (as QIDs or strings). 23 | #'Note: if strings are provided, they will be treated as plain text. 24 | #'@param src.properties a vector, data frame, or tibble of strings indicating the properties to add as reference sources to statements (as SIDs or labels). 25 | #'Note: if labels are provided, and multiple items match, the first matching item will be used 26 | #'(see \code{as_sid} function), so use with caution. 27 | #'@param src.values a vector, data frame, or tibble of strings indicating the values to add reference sources to statements (as QIDs or strings). 28 | #'Note: if strings are provided, they will be treated as plain text. 29 | #'@param remove a vector of boolians for each statemnt indicating whether it should 30 | #'be removed from the item rather than added (default = FALSE) 31 | #'@param format output format as a string. Options include: 32 | #' \describe{ 33 | #' \item{tibble}{easiest format to further manipulation in R} 34 | #' \item{csv}{can be copy-pasted to [the QuickStatements website](https://quickstatements.toolforge.org/) (or manipulated in a spreadsheet programs)} 35 | #' \item{api}{a url that can be copy-pasted into a web browser, or automatically submitted (see \code{api.submit} parameter)} 36 | #' \item{website}{open a [QuickStatements](https://quickstatements.toolforge.org/) web browser window summarizing the edits to be made to Wikidata)} 37 | #' } 38 | #'@param api.username a string indicating your Wikimedia username 39 | #'@param api.token a string indicating your api token (the unique identifier that you can find listed at [your user page](https://quickstatements.toolforge.org/#/user)) 40 | #'@param api.format a string indicating which version of the quickstatement format used to submit the api (default = "v1") 41 | #'@param api.batchname a string create a named batch (listed at [your batch history page](https://quickstatements.toolforge.org/#/batches)) and tag in the edit summaries 42 | #'@param api.submit boolian indicating whether to submit instruction directly to wikidata (else returns the URL that can be copy-pasted into a web browser) 43 | #' 44 | #'@return data formatted to upload to wikidata (via quickstatemsnts), 45 | #'optionally also directly uploaded to wikidata (see \code{format} parameter). 46 | #' 47 | #'@examples 48 | #'# Add a statement to the "Wikidata sandbox" item (Q4115189) 49 | #'# to say that it is an "instance of" (P31) of Q1 (the universe). 50 | #'# The instruction will submit directly to wikidata via the API 51 | #'# (if you include your Wikimedia username and token) 52 | #' 53 | #' \dontrun{write_wikidata(items = "Wikidata Sandbox", 54 | #' properties = "instance of", 55 | #' values = "Q1", 56 | #' format = "api", 57 | #' api.username = "myusername", 58 | #' api.token = , #REDACTED# 59 | #' )} 60 | #'#note: 61 | #' 62 | #'@export 63 | 64 | write_wikidata <- function(items, 65 | properties = NULL, 66 | values = NULL, 67 | qual.properties = NULL, 68 | qual.values = NULL, 69 | src.properties = NULL, 70 | src.values = NULL, 71 | remove = FALSE, 72 | format = "tibble", 73 | api.username = NULL, 74 | api.token = NULL, # Find yours from [your user page](https://tools.wmflabs.org/quickstatements/#/user) 75 | api.format = "v1", 76 | api.batchname = NULL, 77 | api.submit = TRUE 78 | ){ 79 | 80 | # Check if username and token provided 81 | if(format=="api"){ 82 | if(is.null(api.username)){stop("Enter your Wikimedia username")} 83 | if(is.null(api.token)) {stop("Enter your api.token (Find yours at https://tools.wmflabs.org/quickstatements/#/user)")} 84 | } 85 | 86 | # Place all the quickstatements variables into a list 87 | QS <- list(items = items, 88 | properties = properties, 89 | values = values, 90 | qual.properties = qual.properties, 91 | qual.values = qual.values, 92 | src.properties = src.properties, 93 | src.values = src.values) 94 | QS <- lapply(QS,function(x){if(!is.null(x)){tibble(x)}}) 95 | 96 | # If new QIDs are being created via the "CREATE" keyword, need to insert blank lines across the other parameters to align correctly into rows 97 | # This is the most similar to the standard quickstatements method, though the "CREATExyz" method is preferred (see createrows.tidy function later) 98 | QS$properties <- createrows(QS$items,QS$properties) 99 | QS$values <- createrows(QS$items,QS$values) 100 | QS$qual.properties <- createrows(QS$items,QS$qual.properties) 101 | QS$qual.values <- createrows(QS$items,QS$qual.values) 102 | QS$src.properties <- createrows(QS$items,QS$src.properties) 103 | QS$src.values <- createrows(QS$items,QS$src.values) 104 | 105 | # If same number of rows as the rowmax, do nothing 106 | # If only one row, repeat it rowmax times 107 | # If wrong number of rows, stop with an error message 108 | rowcount <- unlist(lapply(QS,nrow)) 109 | rowmax <- max(rowcount) 110 | stoprun <- FALSE 111 | 112 | if(var(unlist(rowcount))!=0){ 113 | for (x in 1:length(QS)){ 114 | if(is.null(nrow(QS[[x]]))){ 115 | QS[[x]] <- slice(tibble(QS[[x]]),rep(1:n(), each=rowmax)) 116 | }else if (nrow(QS[[x]])==1){ 117 | QS[[x]] <- slice(tibble(QS[[x]]),rep(1:n(), each=rowmax)) 118 | }else if(nrow(QS[[x]])==rowmax){ 119 | QS[[x]] <- QS[[x]] 120 | }else{ 121 | stoprun<-TRUE 122 | warning(paste0("Not all quickstatement columns have equal rows: ", 123 | nrow(QS$items)," items (including ", 124 | sum(is.create(unlist(QS$items)))," new QIDs to CREATE) were provided, but ", 125 | names(QS)[x], 126 | " has ", 127 | nrow(QS[[x]]), 128 | " rows (expecting ", 129 | nrow(QS$items), 130 | ").")) 131 | } 132 | } 133 | } 134 | if(stoprun){stop("Therefore stopping")} 135 | 136 | # Convert values to QIDs where possible and identify which (if any) to remove 137 | QS$items <- as_qid(QS$items) 138 | QS$items[remove,] <- paste0("-",unlist(QS$items[remove,])) 139 | 140 | # Convert properties to PIDs where possible, unless special functions (such as lables and aliases) 141 | QS$properties <- as_pid(QS$properties) 142 | 143 | # Convert values to QIDs where possible, unless property is expecting a string 144 | QS$values <- tibble(QS$values) 145 | if(any(sapply(QS$properties,check.PID.WikibaseItem))){ 146 | QS$values[sapply(QS$properties,check.PID.WikibaseItem),] <- as_qid(QS$values[sapply(QS$properties,check.PID.WikibaseItem),]) 147 | } 148 | QS$values <- as_quot(QS$values,format) 149 | 150 | # Check if multiple values and properties supplied for each item 151 | if(!is.null(dim(QS$properties))){ 152 | if(all (dim(QS$properties) != dim(QS$values))){ 153 | stop("multiple properties and values supplied for each item, but number of properties and values don't match") 154 | } 155 | QS$items <- tibble(rep(unlist(QS$items),each=ncol(QS$properties))) 156 | QS$properties <- tibble(as.vector(t(QS$properties))) 157 | QS$values <- tibble(as.vector(t(QS$values))) 158 | } 159 | 160 | # Convert first three columns into tibble (tibbulate?) 161 | colnames(QS$items) <- "Item" 162 | colnames(QS$properties) <- "Prop" 163 | colnames(QS$values) <- "Value" 164 | 165 | QS.tib <- bind_cols(QS$items, 166 | QS$properties, 167 | QS$values) 168 | 169 | # optionally, append columns for qualifier properties and qualifier values for those statements 170 | if(!is.null(QS$qual.properties)|!is.null(QS$qual.values)){ 171 | QS$qual.properties <- as_pid(QS$qual.properties) 172 | QS$qual.values <- as_quot(QS$qual.values,format) 173 | 174 | # if no value, clear property 175 | QS$qual.properties[QS$qual.values==""|is.na(QS$qual.values)] <- NA 176 | 177 | colnames(QS$qual.properties) <- paste0("qual.prop.",1:ncol(QS$qual.properties)) 178 | colnames(QS$qual.values) <- paste0("qual.value.",1:ncol(QS$qual.values)) 179 | 180 | QSq <- list(QS$qual.properties, 181 | QS$qual.values) 182 | 183 | QSq.check <- var(sapply(c(QS,QSq),function(x){if(is.null(dim(x))){length(x)}else{nrow(x)}}))==0 184 | if(!QSq.check){stop("Incorrect number of qualifiers provided. If no qualifers needed for a statement, use NA or \"\".")} 185 | 186 | QS.qual.tib <- as_tibble(cbind(QSq[[1]],QSq[[2]])[,c(rbind(1:ncol(QSq[[1]]),ncol(QSq[[1]])+1:ncol(QSq[[2]])))]) 187 | 188 | QS.tib <- tibble(QS.tib, 189 | QS.qual.tib) 190 | } 191 | 192 | # optionally, append columns for source properties and source values for those statements 193 | if(!is.null(src.properties)|!is.null(src.values)){ 194 | QS$src.properties <- as_sid(QS$src.properties) 195 | QS$src.values <- as_quot(QS$src.values,format) 196 | 197 | # if no value, clear property 198 | QS$src.properties[QS$src.values==""|is.na(QS$src.values)] <- NA 199 | 200 | colnames(QS$src.properties) <- paste0("src.prop.",1:ncol(QS$src.properties)) 201 | colnames(QS$src.values) <- paste0("src.values.",1:ncol(QS$src.values)) 202 | 203 | QSs <- list(QS$src.properties, 204 | QS$src.values) 205 | QSs.check <- var(sapply(c(QS,QSs),function(x){if(is.null(dim(x))){length(x)}else{nrow(x)}}))==0 206 | if(!QSs.check){stop("incorrect number of sources provided")} 207 | 208 | QS.src.tib <- as_tibble(cbind(QSs[[1]],QSs[[2]])[,c(rbind(1:ncol(QSs[[1]]),ncol(QSs[[1]])+1:ncol(QSs[[2]])))]) 209 | 210 | QS.tib <- tibble(QS.tib, 211 | QS.src.tib) 212 | } 213 | 214 | # if new QIDs are being created via tidy "CREATExyz" keywords, need to insert CREATE lines above and replace subsequent "CREATExyz" with "LAST" 215 | QS.tib <- createrows.tidy(QS.tib) 216 | 217 | # remove any impossible rows (value is NA) 218 | if(nrow(QS.tib)!=1){ 219 | QS.tib <- QS.tib[!is.na(QS.tib$Value),] 220 | QS.tib <- as_tibble(apply(QS.tib,2,replace_na,"")) 221 | } 222 | 223 | # format up the output 224 | if (format=="csv"){ 225 | write.table(QS.tib,quote = FALSE,row.names = FALSE,sep = ",") 226 | } 227 | 228 | if (format=="tibble"){ 229 | return(QS.tib) 230 | } 231 | 232 | if (format=="website"){ 233 | api.temp1 <- format_tsv(QS.tib, col_names = FALSE, quote_escape = "none") 234 | api.temp2 <- gsub("\t", "%7C",api.temp1) # Replace TAB with "%7C" 235 | api.temp3 <- gsub("%7C(%7C)+","%7C",api.temp2) # Replace multiple tabs (from missing values) with a single tab (to distinguish from newlines) 236 | api.temp4 <- gsub("\n", "%7C%7C",api.temp3) # Replace end-of-line with "%7C%7C" 237 | api.temp5 <- gsub(" ", "%20",api.temp4) # Replace space with "%20" 238 | api.temp6 <- gsub("\\+","%2B",api.temp5) # Replace plus with "%2B" 239 | api.data <- gsub("/", "%2F",api.temp6) # Replace slash with "%2F" 240 | 241 | url <- paste0("https://quickstatements.toolforge.org/#/v1=","&data=%7C%7C",api.data) 242 | 243 | if(api.submit){ 244 | browseURL(url) 245 | }else{ 246 | return(url) 247 | } 248 | } 249 | 250 | if (format=="api"){ 251 | api.temp1 <- format_tsv(QS.tib, col_names = FALSE, quote_escape = "none") 252 | api.temp2 <- gsub("%22","\"",api.temp1) #cludge to fix as_quote issues 253 | api.data <- gsub("%2F","/",api.temp2) #cludge to fix as_date issues 254 | 255 | if (api.submit){ 256 | POST(url="https://tools.wmflabs.org/quickstatements/api.php", 257 | body = list(action = "import", 258 | submit = "1", 259 | format = api.format, 260 | batchname = api.batchname, 261 | username = api.username, 262 | token = api.token, 263 | data = api.data) 264 | ) 265 | browseURL("https://quickstatements.toolforge.org/#/batches") 266 | }else{ 267 | api.temp1 <- format_tsv(QS.tib, col_names = FALSE, quote_escape = "none") 268 | api.temp2 <- gsub("\t", "%7C",api.temp1) # Replace TAB with "%7C" 269 | api.temp3 <- gsub("%7C(%7C)+","%7C",api.temp2) # Replace multiple tabs (from missing values) with a single tab (to distinguish from newlines) 270 | api.temp4 <- gsub("\n", "%7C%7C",api.temp3) # Replace end-of-line with "%7C%7C" 271 | api.temp5 <- gsub(" ", "%20",api.temp4) # Replace space with "%20" 272 | api.temp6 <- gsub("\\+","%2B",api.temp5) # Replace plus with "%2B" 273 | api.data <- gsub("/", "%2F",api.temp6) # Replace slash with "%2F" 274 | url <- paste0("https://tools.wmflabs.org/quickstatements/api.php", 275 | "?action=", "import", 276 | "&submit=", "1", 277 | "&format=", api.format, 278 | "&batchname=", api.batchname, 279 | "&username=", api.username, 280 | "&token=", api.token, 281 | "&data=%7C%7C",api.data) 282 | return(url) 283 | } 284 | } 285 | } 286 | -------------------------------------------------------------------------------- /R/writes_wikibase.R: -------------------------------------------------------------------------------- 1 | #' @title Write statements to any Wikibase instance 2 | #' @description Upload data to a Wikibase instance, including creating items, 3 | #' adding statements to existing items (via the quickstatements format and API). 4 | #' 5 | #' @param items a vector of strings indicating the items to which to add statements (as QIDs or labels). 6 | #' Note: In contrast to \code{write_wikidata}, this function takes no labels as input, just QIDs. 7 | #' New QIDs can be created by using the "CREATE_xyz", where "_xyz" is any unique string. 8 | #' Using the same id will add additional statements to those new items 9 | #' @param properties a vector of strings indicating the properties to add as statements (as PIDs or labels). 10 | #' Note: In contrast to \code{write_wikidata}, this function takes no labels as input, just PIDs. 11 | #' Four special properties can also be used: labels, aliases, descriptions and sitelinks. 12 | #' See [this link](https://www.wikidata.org/wiki/Help:QuickStatements#Adding_labels,_aliases,_descriptions_and_sitelinks) for the syntax. 13 | #' @param values a vector of strings indicating the values to add as statements (as QIDs). 14 | #' Note: if strings are provided, they will be treated as plain text. 15 | #' @param qual.properties a vector, data frame, or tibble of strings indicating the properties to add as qualifiers to statements (as PIDs). 16 | #' @param qual.values a vector, data frame, or tibble of strings indicating the values to add as statements (as QIDs or strings). 17 | #' Note: if strings are provided, they will be treated as plain text. 18 | #' @param src.properties a vector, data frame, or tibble of strings indicating the properties to add as reference sources to statements (as SIDs or labels). 19 | #' Note: if labels are provided, and multiple items match, the first matching item will be used 20 | #' (see \code{as_sid} function), so use with caution. 21 | #' @param src.values a vector, data frame, or tibble of strings indicating the values to add reference sources to statements (as QIDs or strings). 22 | #' Note: if strings are provided, they will be treated as plain text. 23 | #' @param remove a vector of boolians for each statemnt indicating whether it should 24 | #' be removed from the item rather than added (default = FALSE) 25 | #' @param format output format as a string. Options include: 26 | #' \describe{ 27 | #' \item{tibble}{easiest format to further manipulation in R} 28 | #' \item{csv}{can be copy-pasted to the Wikibase QuickStatements website (or manipulated in a spreadsheet programs). In contrast to write_wikidata function the delimiter is `tab`, because Quickstatements expect tab-separated data} 29 | #' \item{api}{a url that can be copy-pasted into a web browser, or automatically submitted (see \code{api.submit} parameter)} 30 | #' \item{website}{open a [QuickStatements](https://quickstatements.toolforge.org/) web browser window summarizing the edits to be made to Wikidata)} 31 | #' } 32 | #' @param format.csv.file path to save the csv file. If none is provided, then printed to console. 33 | #' @param api.username a string indicating your Wikimedia username 34 | #' @param api.token a string indicating your api token (the unique identifier that you can find listed at [your user page](https://quickstatements.toolforge.org/#/user)) 35 | #' @param api.format a string indicating which version of the quickstatement format used to submit the api (default = "v1") 36 | #' @param api.batchname a string create a named batch (listed at [your batch history page](https://quickstatements.toolforge.org/#/batches)) and tag in the edit summaries 37 | #' @param api.submit boolian indicating whether to submit instruction directly to wikidata (else returns the URL that can be copy-pasted into a web browser) 38 | #' @param quickstatements.url url to access quickstatements of the corresponding Wikibase instance. 39 | #' @param coordinate_pid PID of a geocoordinates; need to have a different formatting 40 | #' 41 | #' @return data formatted to upload to Wikidata (via quickstatemsnts), 42 | #' optionally also directly uploaded to Wikidata (see \code{format} parameter). 43 | #' 44 | #' @examples 45 | #' # Add a statement to the "Wikidata sandbox" item (Q4115189) 46 | #' # to say that it is an "instance of" (P31) of Q1 (the universe). 47 | #' # The instruction will submit directly to Wikidata via the API 48 | #' # (if you include your Wikibase/Wikimedia username and token) 49 | #' 50 | #' \dontrun{ 51 | #' write_wikibase( 52 | #' items = "Q24", 53 | #' properties = "P2", 54 | #' values = "Q8", 55 | #' format = "api", 56 | #' api.username = "myusername", 57 | #' api.token = "mytoken", 58 | #' api.submit = TRUE, 59 | #' quickstatements.url = NULL 60 | #' ) 61 | #' } 62 | #' # note: 63 | #' 64 | #' @export 65 | 66 | write_wikibase <- function(items, 67 | properties = NULL, 68 | values = NULL, 69 | qual.properties = NULL, 70 | qual.values = NULL, 71 | src.properties = NULL, 72 | src.values = NULL, 73 | remove = FALSE, 74 | format = "tibble", 75 | format.csv.file = NULL, 76 | api.username = NULL, 77 | api.token = NULL, # Find yours from [your user page](https://tools.wmflabs.org/quickstatements/#/user) 78 | api.format = "v1", 79 | api.batchname = NULL, 80 | api.submit = TRUE, 81 | quickstatements.url = NULL, 82 | coordinate_pid = NULL) { 83 | 84 | # Check if username and token provided 85 | if (format == "api") { 86 | if (is.null(api.username)) { 87 | stop("Enter your Wikimedia username") 88 | } 89 | if (is.null(api.token)) { 90 | stop("Enter your api.token (Find yours at https://tools.wmflabs.org/quickstatements/#/user)") 91 | } 92 | } 93 | 94 | # Place all the quickstatements variables into a list 95 | QS <- list( 96 | items = items, 97 | properties = properties, 98 | values = values, 99 | qual.properties = qual.properties, 100 | qual.values = qual.values, 101 | src.properties = src.properties, 102 | src.values = src.values 103 | ) 104 | 105 | QS <- lapply(QS, function(x) { 106 | if (!is.null(x)) { 107 | tibble(x) 108 | } 109 | }) 110 | 111 | # If new QIDs are being created via the "CREATE" keyword, need to insert blank lines across the other parameters to align correctly into rows 112 | # This is the most similar to the standard quickstatements method, though the "CREATExyz" method is preferred (see createrows.tidy function later) 113 | QS$properties <- createrows(QS$items, QS$properties) 114 | QS$values <- createrows(QS$items, QS$values) 115 | QS$qual.properties <- createrows(QS$items, QS$qual.properties) 116 | QS$qual.values <- createrows(QS$items, QS$qual.values) 117 | QS$src.properties <- createrows(QS$items, QS$src.properties) 118 | QS$src.values <- createrows(QS$items, QS$src.values) 119 | 120 | # If same number of rows as the rowmax, do nothing 121 | # If only one row, repeat it rowmax times 122 | # If wrong number of rows, stop with an error message 123 | rowcount <- unlist(lapply(QS, nrow)) 124 | rowmax <- max(rowcount) 125 | stoprun <- FALSE 126 | 127 | if (var(unlist(rowcount)) != 0) { 128 | for (x in 1:length(QS)) { 129 | if (nrow(QS[[x]]) == rowmax) { 130 | QS[[x]] <- QS[[x]] 131 | } else if (nrow(QS[[x]]) == 1) { 132 | QS[[x]] <- slice(QS[[x]], rep(1:n(), each = rowmax)) 133 | } else { 134 | stoprun <- TRUE 135 | warning(paste0( 136 | "Not all quickstatement columns have equal rows: ", 137 | nrow(QS$items), " items (including ", 138 | sum(is.create(unlist(QS$items))), " new QIDs to CREATE) were provided, but ", 139 | names(QS)[x], 140 | " has ", 141 | nrow(QS[[x]]), 142 | " rows (expecting ", 143 | nrow(QS$items), 144 | ")." 145 | )) 146 | } 147 | } 148 | } 149 | if (stoprun) { 150 | stop("Therefore stopping") 151 | } 152 | 153 | # Convert values to QIDs where possible and identify which (if any) to remove 154 | 155 | QS$items[remove, ] <- paste0("-", unlist(QS$items[remove, ])) 156 | 157 | # Convert values to QIDs where possible, unless property is expecting a string 158 | QS$values <- tibble(QS$values) 159 | QS$values <- as_quot(QS$values, format) 160 | 161 | # Convert first three columns into tibble (tibbulate?) 162 | colnames(QS$items) <- "Item" 163 | colnames(QS$properties) <- "Prop" 164 | colnames(QS$values) <- "Value" 165 | 166 | QS.tib <- bind_cols( 167 | QS$items, 168 | QS$properties, 169 | QS$values 170 | ) 171 | 172 | # optionally, append columns for qualifier properties and qualifier values for those statements 173 | if (!is.null(QS$qual.properties) | !is.null(QS$qual.values)) { 174 | QS$qual.properties <- as_pid(QS$qual.properties) 175 | QS$qual.values <- as_quot(QS$qual.values, format) 176 | 177 | colnames(QS$qual.properties) <- paste0("Qual.prop.", 1:ncol(QS$qual.properties)) 178 | colnames(QS$qual.values) <- paste0("Qual.value.", 1:ncol(QS$qual.values)) 179 | 180 | QSq <- list( 181 | QS$qual.properties, 182 | QS$qual.values 183 | ) 184 | QSq.check <- var(sapply(c(QS, QSq), function(x) { 185 | if (is.null(dim(x))) { 186 | length(x) 187 | } else { 188 | nrow(x) 189 | } 190 | })) == 0 191 | if (!QSq.check) { 192 | stop("Incorrect number of qualifiers provided. If no qualifers needed for a statement, use NA or \"\".") 193 | } 194 | 195 | QS.qual.tib <- as_tibble(cbind(QSq[[1]], QSq[[2]])[, c(rbind(1:ncol(QSq[[1]]), ncol(QSq[[1]]) + 1:ncol(QSq[[2]])))]) 196 | 197 | QS.tib <- tibble( 198 | QS.tib, 199 | QS.qual.tib 200 | ) 201 | } 202 | 203 | # optionally, append columns for source properties and source values for those statements 204 | if (!is.null(src.properties) | !is.null(src.values)) { 205 | QS$src.properties <- as_sid(QS$src.properties) 206 | QS$src.values <- as_quot(QS$src.values, format) 207 | 208 | colnames(QS$src.properties) <- paste0("Src.prop.", 1:ncol(QS$src.properties)) 209 | colnames(QS$src.values) <- paste0("Src.values.", 1:ncol(QS$src.values)) 210 | 211 | QSs <- list( 212 | QS$src.properties, 213 | QS$src.values 214 | ) 215 | QSs.check <- var(sapply(c(QS, QSs), function(x) { 216 | if (is.null(dim(x))) { 217 | length(x) 218 | } else { 219 | nrow(x) 220 | } 221 | })) == 0 222 | if (!QSs.check) { 223 | stop("incorrect number of sources provided") 224 | } 225 | 226 | QS.src.tib <- as_tibble(cbind(QSs[[1]], QSs[[2]])[, c(rbind(1:ncol(QSs[[1]]), ncol(QSs[[1]]) + 1:ncol(QSs[[2]])))]) 227 | 228 | QS.tib <- tibble( 229 | QS.tib, 230 | QS.src.tib 231 | ) 232 | } 233 | 234 | # if new QIDs are being created via tidy "CREATExyz" keywords, need to insert CREATE lines above and replace subsequent "CREATExyz" with "LAST" 235 | QS.tib <- createrows.tidy(QS.tib) 236 | 237 | # remove quotes, if PID is coordinates 238 | if (!is.null(coordinate_pid)) { 239 | QS.tib$Value <- ifelse(QS.tib$Prop == coordinate_pid, gsub('\"', '', QS.tib$Value), QS.tib$Value) 240 | } 241 | 242 | # output 243 | if (format == "csv") { 244 | if(!is.null(format.csv.file)) { 245 | write.table(QS.tib, file = format.csv.file, quote = FALSE, row.names = FALSE, sep = "\t", col.names = FALSE) 246 | } else { 247 | write.table(QS.tib, quote = FALSE, row.names = FALSE, sep = "\t") 248 | } 249 | } 250 | # format up the output 251 | if (format == "tibble") { 252 | return(QS.tib) 253 | } 254 | if (format == "api" | format == "website") { 255 | api.temp1 <- format_tsv(QS.tib, col_names = FALSE) 256 | api.temp2 <- gsub("\t", "%7C", api.temp1) # Replace TAB with "%7C" 257 | api.temp3 <- gsub("\n", "%7C%7C", api.temp2) # Replace end-of-line with "%7C%7C" 258 | api.temp4 <- gsub(" ", "%20", api.temp3) # Replace space with "%20" 259 | api.temp5 <- gsub("\\+", "%2B", api.temp4) # Replace plus with "%2B" 260 | api.data <- gsub("/", "%2F", api.temp5) # Replace slash with "%2F" 261 | 262 | if (format == "api") { 263 | if (is.null(api.token)) { 264 | stop(paste0("API token needed. Find yours at", quickstatements.url, "#/user")) 265 | } 266 | url <- paste0( 267 | quickstatements.url, "api.php", 268 | "?action=", "import", 269 | "&submit=", "1", 270 | "&format=", api.format, 271 | "&batchname=", api.batchname, 272 | "&username=", api.username, 273 | "&token=", api.token, 274 | "&data=", api.data 275 | ) 276 | } 277 | if (format == "website") { 278 | # not working with v2 279 | url <- paste0( 280 | quickstatements.url, "#/v1=", 281 | "&data=", api.data 282 | ) 283 | } 284 | if (api.submit) { 285 | browseURL(url) 286 | } else { 287 | return(url) 288 | } 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | # # ----------- Validation checks ----------- 2 | # 3 | # # Below are the queries used to generate sysdata.rda within the R package 4 | # # Ideally run these on startup or something? The parameters will change not frequently. 5 | # # Useful for checking whether quickstatements inputs will be valid to warn early. 6 | # 7 | # .onAttach <- function(){ 8 | # 9 | # message('Updating key variables from wikidata (estimated time <1 min)') 10 | # 11 | # # Valid reference source properties 12 | # message(' ... Checking valid reference source properties') 13 | # sparql_query <- 'SELECT ?Wikidata_property_to_indicate_a_source ?Wikidata_property_to_indicate_a_sourceLabel WHERE { 14 | # SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } 15 | # ?Wikidata_property_to_indicate_a_source wdt:P31 wd:Q18608359. 16 | # }' 17 | # SID.valid <- query_wikidata(sparql_query) 18 | # 19 | # # The required data type for each property 20 | # message(' ... Checking required data type for each property') 21 | # sparql_query <- 'SELECT ?property ?propertyLabel ?wbtype WHERE { 22 | # SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } 23 | # ?property rdf:type wikibase:Property. 24 | # ?property wikibase:propertyType ?wbtype. 25 | # }' 26 | # PID.datatype <- query_wikidata(sparql_query) 27 | # PID.datatype$wbtype <- gsub("ontology#","",PID.datatype$wbtype) 28 | # 29 | # # The expected regex match for each property 30 | # message(' ... Checking expected regex match for each property') 31 | # # Those with a 'format as a regular expression' (P1793) listed as a qualifier of their 'property constraint' (P2302) 32 | # sparql_query1 <- 'SELECT DISTINCT ?Wikidata_property ?Wikidata_propertyLabel ?fmt WHERE { 33 | # SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } 34 | # ?Wikidata_property wdt:P31/wdt:P279* wd:Q18616576. 35 | # ?Wikidata_property p:P2302 [pq:P1793 ?fmt]. 36 | # }' 37 | # # Those with a 'format as a regular expression' (P1793) only listed as a property statement 38 | # sparql_query2 <- 'SELECT DISTINCT ?Wikidata_property ?Wikidata_propertyLabel ?fmt WHERE { 39 | # SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } 40 | # ?Wikidata_property wdt:P31/wdt:P279* wd:Q18616576. 41 | # ?Wikidata_property wdt:P1793 ?fmt 42 | # MINUS{?Wikidata_property p:P2302 [pq:P1793 ?fmtmain]}. 43 | # }' 44 | # PID.constraint <- add_row(query_wikidata(sparql_query1), 45 | # query_wikidata(sparql_query2)) 46 | # 47 | # 48 | # # Language abbreviations 49 | # message(' ... Checking language abbreviations') 50 | # sparql_query <- 'SELECT ?abbrev WHERE { 51 | # ?language wdt:P305 ?abbrev. 52 | # }' 53 | # lang.abbrev <- query_wikidata(sparql_query) 54 | # 55 | # # Language abbreviations for current wikis 56 | # message(' ... Checking language abbreviations for current wikis') 57 | # sparql_query <- 'SELECT ?abbrev WHERE { 58 | # ?Wikipedia_language_edition wdt:P31 wd:Q10876391. 59 | # ?Wikipedia_language_edition wdt:P424 ?abbrev. 60 | # }' 61 | # lang.abbrev.wiki <- query_wikidata(sparql_query) 62 | # 63 | # # Wikimedia abbreviations for current wikis 64 | # message(' ... Checking Wikimedia abbreviations for current wikis') 65 | # sparql_query <- 'SELECT ?abbrev WHERE { 66 | # ?Wiki_edition wdt:P1800 ?abbrev. 67 | # }' 68 | # abbrev.wiki <- query_wikidata(sparql_query) 69 | # 70 | # # #example 71 | # # grep(as.matrix(PID.constraint[PID.constraint$Wikidata_property=="P968","fmt"]), 72 | # # "mailto:t.shafee@gmail.com", 73 | # # perl=TRUE) 74 | # assign(x = "WD.globalvar", 75 | # envir = .GlobalEnv, 76 | # value = list(SID.valid = SID.valid, 77 | # PID.datatype = PID.datatype, 78 | # PID.constraint = PID.constraint, 79 | # lang.abbrev = lang.abbrev, 80 | # lang.abbrev.wiki = lang.abbrev.wiki, 81 | # abbrev.wiki = abbrev.wiki) 82 | # ) 83 | # 84 | # message('Update complete (data saved as WD.globalvar)') 85 | # } 86 | 87 | 88 | # # Below used to save as system data within an R package 89 | # save(list="WD.globalvar",file="R//sysdata.rda", compress = "xz") -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | WikidataR 2 | ========= 3 | 4 | A combined R package for reading, writing and handling Wikidata semantic data (via APIs). 5 | 6 | __Authors:__ [Thomas Shafee](https://github.com/TS404) (aut., maint.), [Os Keys](https://github.com/Ironholds) (aut., cre.) 7 | __License:__ [MIT](https://opensource.org/licenses/MIT) 8 | __Status:__ Stable 9 | 10 | Description 11 | ====== 12 | WikidataR includes functions to: 13 | - read from wikidata (single items, properties, or properties) 14 | - query wikidata (retrieving all items that match a set of criterial via [Wikidata SPARQL query service](https://query.wikidata.org)) 15 | - write to Wikidata (adding new items or statements via [QuickStatements](https://tools.wmflabs.org/quickstatements)) 16 | - Handle and manipulate Wikidata objects (as lists and tibbles) 17 | For details on how to best use it, see the examples below. 18 | 19 | Installation 20 | ====== 21 | 22 | To download the stable version of WikidataR from CRAN: 23 | 24 | install.packages("WikidataR","WikidataQueryServiceR") 25 | 26 | To get the most current development version from Github: 27 | 28 | install.packages("devtools") 29 | devtools::install_github("ts404/WikidataR") 30 | 31 | Examples 32 | ====== 33 | ### Search Wikidata to see if an item exists (example: pharmaceuticals) 34 | For cases where you don't already know the QID of an item or the PID of a property, you can search wikidata by name. Note that some search terms will return multiple possible items. You can also specify a language (defaults to English). 35 | 36 | ``` r 37 | find_item("Paracetamol") 38 | find_property("medical condition treated") 39 | ``` 40 | Which returns the lists: 41 | 42 | ``` 43 | acetaminophen (Q57055) - common drug for pain and fever 44 | Paracetamol (Q36716177) - scientific article published on July 1980 45 | Paracetamol (Q54982056) - musical group 46 | ... 47 | ``` 48 | 49 | and 50 | ``` 51 | medical condition treated (P2175) - disease that this pharmaceutical drug, procedure, or therapy is used to treat 52 | ``` 53 | Elements within those lists include basic information from wikidata (ID, description, labels). The QID or PID can then be used to get the full data for the item (see below). 54 | 55 | ### Convert between identifiers 56 | Wikidata is an excellent thesaurus for different identifiers. For example it is possible to convert from any identifier to wikidata QIDs or between different identifiers 57 | ``` r 58 | qid_from_identifier('ISBN-13','978-0-262-53817-6') 59 | identifier_from_identifier('ORCID iD','IMDb ID',c('0000-0002-7865-7235','0000-0003-1079-5604')) 60 | ``` 61 | Which returns the lists: 62 | ``` 63 | 978-0-262-53817-6 Q102035721 Wikipedia @ 20: Stories of an Incomplete Revolution 64 | ``` 65 | and 66 | ``` 67 | # A tibble: 2 x 2 68 | value return 69 | 70 | 1 0000-0002-7865-7235 nm2118834 71 | 2 0000-0003-1079-5604 nm1821217 72 | ``` 73 | 74 | ### Get full items from Wikidata (example: journal articles) 75 | In this example, we search for three articles using their DOIs ([P356](https://www.wikidata.org/wiki/Property:P356)), find their QIDs, download their full wikidata entries, and then extract the "main topics" (note PID didn't have to be used). 76 | 77 | ``` r 78 | article.qid <- qid_from_DOI(c('10.15347/WJM/2017.007','10.15347/WJM/2019.001','10.15347/WJM/2019.007')) 79 | article.q <- get_item(article.qid) 80 | article.topics.p <- extract_claims(article.q, "main topic") 81 | get_names_from_properties(article.topics.p) 82 | ``` 83 | Which returns a tibble for each of the journal articles, listing the main topics of each and their QIDs. 84 | ``` 85 | $`10.15347/WJM/2017.007` 86 | # A tibble: 1 x 2 87 | QID value 88 | 89 | 1 P921.Q164778 rotavirus 90 | 91 | $`10.15347/WJM/2019.001` 92 | # A tibble: 2 x 2 93 | QID value 94 | 95 | 1 P921.Q15989108 Western African Ebola virus epidemic 96 | 2 P921.Q10538943 Ebola virus 97 | 98 | $`10.15347/WJM/2019.007` 99 | # A tibble: 2 x 2 100 | QID value 101 | 102 | 1 P921.Q1820650 readability 103 | 2 P921.Q16235120 health information on Wikipedia 104 | ``` 105 | 106 | ### Query Wikidata with complex searches (example: movie genres) 107 | 108 | In this example, we search Wikidata for any items that are an "instance of" ([P31](https://www.wikidata.org/wiki/Property:P31)) "film" ([Q11424](https://www.wikidata.org/wiki/Q11424)) that has the label "The Cabin in the Woods" ([Q45394](https://www.wikidata.org/wiki/Q45394)), and ask for the item's genres ([P136](https://www.wikidata.org/wiki/Property:P136)). 109 | 110 | ``` r 111 | query_wikidata('SELECT DISTINCT 112 | ?genre ?genreLabel 113 | WHERE { 114 | ?film wdt:P31 wd:Q11424. 115 | ?film rdfs:label "The Cabin in the Woods"@en. 116 | ?film wdt:P136 ?genre. 117 | SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } 118 | }') 119 | ``` 120 | Which returns a tibble: 121 | ``` 122 | # A tibble: 6 x 2 123 | genre genreLabel 124 | 125 | 1 http://www.wikidata.org/entity/Q3072049 zombie film 126 | 2 http://www.wikidata.org/entity/Q471839 science fiction film 127 | 3 http://www.wikidata.org/entity/Q859369 comedy-drama 128 | 4 http://www.wikidata.org/entity/Q1342372 monster film 129 | 5 http://www.wikidata.org/entity/Q853630 slasher film 130 | 6 http://www.wikidata.org/entity/Q224700 comedy horror 131 | ``` 132 | 133 | For more example SPARQL queries, see [this page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) on [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page). 134 | 135 | `query_wikidata()` can accept multiple queries, returning a (potentially named) list of data frames. If the vector of SPARQL queries is named, the results will inherit those names. 136 | 137 | #### Links for learning SPARQL 138 | 139 | - [A beginner-friendly course for SPARQL](https://www.wikidata.org/wiki/Wikidata:A_beginner-friendly_course_for_SPARQL) 140 | - Building a SPARQL query: [Museums on Instagram](https://www.wikidata.org/wiki/Help:SPARQL/Building_a_query/Museums_on_Instagram) 141 | - [SPARQL Query Examples](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) for WDQS 142 | - [Using SPARQL to access Linked Open Data](https://programminghistorian.org/lessons/graph-databases-and-SPARQL) by Matthew Lincoln 143 | - Interesting or illustrative [SPARQL queries](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries) for Wikidata 144 | - Wikidata [2016 SPARQL Workshop](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/2016_SPARQL_Workshop) 145 | - [Wikidata SPARQL Query video tutorial](https://www.youtube.com/watch?v=1jHoUkj_mKw) by Navino Evans 146 | - *[Learning SPARQL](http://www.learningsparql.com/)* by Bob DuCharme 147 | - [WDQS User Manual](https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual) 148 | 149 | ### Write to Wikidata (example: paintings) 150 | In this example we'll write directly to wikidata via the [QuickStatements](https://tools.wmflabs.org/quickstatements) format. 151 | ``` r 152 | write_wikidata(items = c("Q4115189","Q13406268"), 153 | properties = "author", 154 | values = c("Q762","Q41406"), 155 | format = "api", 156 | api.username = "myusername", # Enter your Wikimedia username here 157 | api.token = "" #REDACTED# Find yours from https://tools.wmflabs.org/quickstatements/#/user 158 | ) 159 | ``` 160 | Results in the statements being directly added to wikidata under your username via the API. 161 | > The Mona Lisa (Q12418) has the Creator (P170) of Leonardo da Vinci (Q762) 162 | > The Scream (Q471379) has the Creator (P170) of Edvard Munch (Q41406) 163 | 164 | Alternatively, you can print via format=tibble and paste into the [QuickStatements](https://tools.wmflabs.org/quickstatements) website. 165 | 166 | ### Combining all of the above (example: journal articles) 167 | The example below finds all articles in a journal, works out the URL for their peer reviews, and writes those URLs into those articles' wikidata items. 168 | ``` r 169 | sparql_query <- 'SELECT ?Article ?ArticleLabel ?JLabel ?T ?peer_review_URL WHERE { 170 | SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } 171 | ?Article wdt:P1433 wd:Q24657325. 172 | OPTIONAL { ?Article wdt:P1433 ?J. } 173 | OPTIONAL { ?Article wdt:P1476 ?T. } 174 | OPTIONAL { ?Article wdt:P7347 ?peer_review_URL. }} 175 | LIMIT 10000' 176 | articles.qr <- as_tibble(query_wikidata(sparql_query)) 177 | articles.qr <- articles.qr[articles.qr$peer_review_URL=="",] #omit those with review URLs listed 178 | review.URLs <- paste0('https://en.wikiversity.org/wiki/Talk:', 179 | articles.qr$JLabel, 180 | "/", 181 | articles.qr$T 182 | ) 183 | review.URLs <- gsub(" ","_",review.URLs) 184 | 185 | write_wikidata(items = sapply(sapply(articles.qr$Article,pattern = "/",stringr::str_split),tail,1), 186 | properties = "Peer review URL", 187 | values = review.URLs, 188 | format = "tibble", 189 | ) 190 | 191 | write_wikidata(items = sapply(sapply(articles.qr$Article,pattern = "/",stringr::str_split),tail,1), 192 | properties = "Peer review URL", 193 | values = review.URLs, 194 | format = "api", 195 | api.username = "myusername", 196 | api.token = , #REDACTED# Find yours from https://tools.wmflabs.org/quickstatements/#/user 197 | ) 198 | ``` 199 | ### Acknowledgements 200 | This package combines and builds on the utilities of Os Keyes' [WikidataR](https://github.com/Ironholds/WikidataR), Christian Graul's 201 | [rwikidata](https://github.com/chgrl/rwikidata), Mikhail Popov's [WikidataQueryServiceR](https://github.com/wikimedia/WikidataQueryServiceR), and Serena Signorelli's [QueryWikidataR](https://github.com/serenasignorelli/QueryWikidataR) packages. It also uses the Magnus Manske's [QuickStatements](https://github.com/magnusmanske/quickstatements) tool. 202 | -------------------------------------------------------------------------------- /WikidataR.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: XeLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source --resave-data=best 18 | PackageCheckArgs: --as-cran 19 | PackageRoxygenize: rd,collate,namespace,vignette 20 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Test environments 2 | * local R installation, R 3.6.3 3 | * ubuntu 16.04 (on travis-ci), R 3.6.3 4 | * win-builder (devel) 5 | 6 | ## R CMD check results 7 | 8 | 0 errors | 0 warnings | 1 note 9 | 10 | * This is a new release of WikidataR, previously meaintained by Os Keyes (github.com/Ironholds) 11 | * They have consented to hand maintainership over to me (github.com/TS404). 12 | -------------------------------------------------------------------------------- /inst/extdata/WD.globalvar.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TS404/WikidataR/d7873b6b80a951130e9c4ec5c17068bd4898fb6e/inst/extdata/WD.globalvar.RDS -------------------------------------------------------------------------------- /man/WD.globalvar.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{WD.globalvar} 4 | \alias{WD.globalvar} 5 | \title{Global variables for Wikidata properties} 6 | \format{ 7 | A list of tibbles documenting key property constraints from Wikidata 8 | \describe{ 9 | \item{SID.valid}{valid reference source properties} 10 | \item{PID.datatype}{required data type for each property} 11 | \item{PID.constraint}{expected regex match for each property} 12 | \item{lang.abbrev}{language abbreviations} 13 | \item{lang.abbrev.wiki}{language abbreviations for current wikis} 14 | \item{abbrev.wiki}{Wikimedia abbreviations for current wikis} 15 | ... 16 | } 17 | } 18 | \description{ 19 | A dataset of Wikidata global variables. 20 | } 21 | -------------------------------------------------------------------------------- /man/WikidataR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/WikidataR.R 3 | \docType{package} 4 | \name{WikidataR} 5 | \alias{WikidataR} 6 | \alias{WikidataR-package} 7 | \title{API client library for Wikidata} 8 | \description{ 9 | This package serves as an API client for reading and writing 10 | to and from \href{https://www.wikidata.org/wiki/Wikidata:Main_Page}{Wikidata}, (including 11 | via the \href{https://quickstatements.toolforge.org/}{QuickStatements} format), 12 | as well as for reading from \href{https://www.wikipedia.org}{Wikipedia}. 13 | } 14 | \seealso{ 15 | \code{\link{get_random}} for selecting a random item or property, 16 | \code{\link{get_item}} for a /specific/ item or property, or \code{\link{find_item}} 17 | for using search functionality to pull out item or property IDs where the descriptions 18 | or aliases match a particular search term. 19 | } 20 | -------------------------------------------------------------------------------- /man/as_pid.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{as_pid} 4 | \alias{as_pid} 5 | \title{Convert an input to a property PID} 6 | \usage{ 7 | as_pid(x) 8 | } 9 | \arguments{ 10 | \item{x}{a vector, data frame, or tibble of strings representing Wikidata properties} 11 | } 12 | \value{ 13 | If the inputted string is a valid PID, return the string. 14 | If the inputted string matches a property label, return its PID. 15 | If the inputted string matches multiple labels of multiple properties, return the PID of the first hit. 16 | } 17 | \description{ 18 | Convert an input string to the most likely property PID 19 | } 20 | \examples{ 21 | # if input string is a valid PID 22 | as_pid("P50") 23 | # if input string matches multiple item labels 24 | as_pid("author") 25 | # if input string matches a single unique label 26 | as_pid("Scopus author ID") 27 | } 28 | -------------------------------------------------------------------------------- /man/as_qid.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{as_qid} 4 | \alias{as_qid} 5 | \title{Convert an input to a item QID.} 6 | \usage{ 7 | as_qid(x) 8 | } 9 | \arguments{ 10 | \item{x}{a vector, data frame, or tibble of strings representing wikidata items} 11 | } 12 | \value{ 13 | if the inputted string is a valid QID, return the string. 14 | If the inputted string matches an item label, return its QID. 15 | If the inputted string matches multiple labels of multiple items, return the QID of the first hit. 16 | } 17 | \description{ 18 | Convert an input string to the most likely item 19 | \href{https://www.wikidata.org/wiki/Q43649390}{QID}. 20 | } 21 | \examples{ 22 | # if input string is a valid QID 23 | as_qid("Q42") 24 | # if input string matches multiple item labels 25 | as_qid("Douglas Adams") 26 | # if input string matches a single unique label 27 | as_qid("Douglas Adams and the question of arterial blood pressure in mammals") 28 | } 29 | -------------------------------------------------------------------------------- /man/as_quot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{as_quot} 4 | \alias{as_quot} 5 | \title{Add quotations marks} 6 | \usage{ 7 | as_quot(x, format = "tibble") 8 | } 9 | \arguments{ 10 | \item{x}{a vector, data frame, or tibble of strings} 11 | 12 | \item{format}{either "tibble" / "csv" to use plain quotation marks (default), 13 | or "api" / "website" to use '\%22'} 14 | } 15 | \value{ 16 | A tibble of items inside of escaped quotation marks 17 | unless they are already in escaped quotation marks, is a 18 | \href{https://www.wikidata.org/wiki/Q43649390}{QID}, 19 | in which chase it is returned unchanged. 20 | } 21 | \description{ 22 | Add escaped quotation marks around strings that need them ready for 23 | submission to an API. 24 | } 25 | \examples{ 26 | as_quot("text") 27 | } 28 | -------------------------------------------------------------------------------- /man/as_sid.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{as_sid} 4 | \alias{as_sid} 5 | \title{Convert an input to a source property SID} 6 | \usage{ 7 | as_sid(x) 8 | } 9 | \arguments{ 10 | \item{x}{a vector, data frame, or tibble of strings representing Wikidata 11 | source properties.} 12 | } 13 | \value{ 14 | if the inputted string is a valid SID, return the string. 15 | If the inputted string matches a property label, return its SID. 16 | If the inputted string matches multiple labels of multiple properties, 17 | return the SID of the first hit. 18 | } 19 | \description{ 20 | Convert an input string to the most likely source SID 21 | (equivalent to PID.) 22 | } 23 | \examples{ 24 | # if input string is a valid SID 25 | as_pid("S854") 26 | # if input string matches multiple item labels 27 | as_pid("URL") 28 | # if input string matches a single unique label 29 | as_pid("Reference URL") 30 | } 31 | -------------------------------------------------------------------------------- /man/check_input.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{check_input} 4 | \alias{check_input} 5 | \title{Generic input checker} 6 | \usage{ 7 | check_input(input, substitution) 8 | } 9 | \arguments{ 10 | \item{input}{string to check} 11 | 12 | \item{substitution}{string for what's been looked for} 13 | } 14 | \value{ 15 | boolian indicating whether the checked string contains a match for the substitution string 16 | } 17 | \description{ 18 | Utility function to handle namespaces. Used by \code{get_item} and \code{get_property} 19 | } 20 | -------------------------------------------------------------------------------- /man/createrows.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{createrows} 4 | \alias{createrows} 5 | \title{"CREATE" rows} 6 | \usage{ 7 | createrows(items, vector) 8 | } 9 | \arguments{ 10 | \item{items}{a vector, data frame or tibble of items (which may or may not contain the keyword "CREATE")} 11 | 12 | \item{vector}{a vector of properties or values which may be expanded based on the items vector} 13 | } 14 | \value{ 15 | if the vector is NULL, return NULL. Otherwise, if the "CREATE" keyword appears in the 16 | items vector, insert blank strings at those positions in the vector. 17 | } 18 | \description{ 19 | Add in empty lines for QuickStatements CREATE rows that mint new QIDs. 20 | This is a slightly messy quirk of the QuickStatements format that mints new QIDs via a line 21 | containing only "CREATE", so this function is a way to approximate that behavior in a tibble 22 | } 23 | -------------------------------------------------------------------------------- /man/createrows.tidy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{createrows.tidy} 4 | \alias{createrows.tidy} 5 | \title{"CREATE" rows from tidy format} 6 | \usage{ 7 | createrows.tidy(QS.tib) 8 | } 9 | \arguments{ 10 | \item{QS.tib}{a tibble of items, values and properties (optionally qualifiers and sources).} 11 | } 12 | \value{ 13 | a tibble, with items that start with "CREATE" followed by any unique text causing the 14 | addition of a "Create" line above, being replaced with "LAST" in the QuickStatements 15 | format to create new QIDs. 16 | } 17 | \description{ 18 | Add in QuickStatements CREATE rows that mint new QIDs from tidy input data. 19 | New items are created by any item starting that starts with the text "CREATE" followed 20 | by any unique ID. 21 | } 22 | -------------------------------------------------------------------------------- /man/disambiguate_QIDs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/disambiguators.R 3 | \name{disambiguate_QIDs} 4 | \alias{disambiguate_QIDs} 5 | \title{Disambiguate QIDs} 6 | \usage{ 7 | disambiguate_QIDs( 8 | list, 9 | variablename = "variables", 10 | variableinfo = NULL, 11 | filter_property = NULL, 12 | filter_variable = NULL, 13 | filter_firsthit = FALSE, 14 | Q_min = NULL, 15 | auto_create = FALSE, 16 | limit = 10 17 | ) 18 | } 19 | \arguments{ 20 | \item{list}{a list or vector of text strings to find potential QID matches to. 21 | Can also be a list of lists (see examples)} 22 | 23 | \item{variablename}{type of items in the list that are being disambiguated (used in messages)} 24 | 25 | \item{variableinfo}{additional information about items that are being disambiguated (used in messages)} 26 | 27 | \item{filter_property}{property to filter on (e.g. "P31" to filter on "instance of")} 28 | 29 | \item{filter_variable}{values of that property to use to filter out (e.g. "Q571" to filter out books)} 30 | 31 | \item{filter_firsthit}{apply filter to the first match presented or only if alternatives requested? 32 | (default = FALSE, note: true is slower if filter not needed on most matches)} 33 | 34 | \item{Q_min}{return only possible hits with QIDs above the provided value} 35 | 36 | \item{auto_create}{if no match found, automatically assign "CREATE"} 37 | 38 | \item{limit}{number of alternative possible Wikidata items to present if multiple potential matches} 39 | } 40 | \value{ 41 | a vector of: 42 | \describe{ 43 | \item{QID}{Selected QID (for when an appropriate Wikidata match exists)} 44 | \item{CREATE}{Mark that a new Wikidata item should be created (for when no appropriate Wikidata match yet exists)} 45 | \item{NA}{Mark that no Wikidata item is needed} 46 | \item{STOP}{Mark that the process was halted at this point (so that output can be used as input to the function later)} 47 | } 48 | } 49 | \description{ 50 | Interactive function that presents alternative possible QID matches for a list of text 51 | strings and provides options for choosing between alternatives, rejecting all presented alternatives, 52 | or creating new items. Useful in cases where a list of text strings may have either missing Wikidata items 53 | or multiple alternative potential matches that need to be manually disambiguated. Can also used on 54 | lists of lists (see examples). For long lists of items, the process can be stopped partway through and 55 | the returned vector will indicate where the process was stopped. 56 | } 57 | \examples{ 58 | \dontrun{ 59 | #Disambiguating possible QID matches for these music genres 60 | #Results should be: 61 | # "Q22731" as the first match 62 | # "Q147538" as the first match 63 | # "Q3947" as the second alternative match 64 | disambiguate_QIDs(list=c("Rock","Pop","House"), 65 | variablename="music genre") 66 | 67 | #Disambiguating possible QID matches for these three words, but not the music genres 68 | #This will take longer as the filtering step is slower 69 | #Results should be: 70 | # "Q22731" (the material) as the first match 71 | # "Q147538" (the soft drink) as the second alternative match 72 | # "Q3947" (the building) as the first match 73 | disambiguate_QIDs(list=c("Rock","Pop","House"), 74 | filter_property="instance of", 75 | filter_variable="music genre", 76 | filter_firsthit=TRUE, 77 | variablename="concept, not the music genre") 78 | 79 | #Disambiguating possible QID matches for the multiple expertise of 80 | #these three people as list of lists 81 | disambiguate_QIDs(list=list(alice=list("physics","chemistry","maths"), 82 | barry=list("history"), 83 | clair=list("law","genetics","ethics")), 84 | variablename="expertise") 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /man/extract_claims.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{extract_claims} 4 | \alias{extract_claims} 5 | \title{Extract claims from returned item data} 6 | \usage{ 7 | extract_claims(items, claims) 8 | } 9 | \arguments{ 10 | \item{items}{a list of one or more Wikidata items returned with 11 | \code{\link{get_item}}.} 12 | 13 | \item{claims}{a vector of claims (in the form "P321", "P12") to look for 14 | and extract.} 15 | } 16 | \value{ 17 | a list containing one sub-list for each entry in \code{items}, 18 | and (below that) the found data for each claim. In the event a claim 19 | cannot be found for an item, an \code{NA} will be returned 20 | instead. 21 | } 22 | \description{ 23 | extract claim information from data returned using 24 | \code{\link{get_item}}. 25 | } 26 | \examples{ 27 | # Get item data 28 | adams_data <- get_item("42") 29 | # Get claim data 30 | claims <- extract_claims(adams_data, "P31") 31 | } 32 | -------------------------------------------------------------------------------- /man/extract_para.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{extract_para} 4 | \alias{extract_para} 5 | \title{Extract a paragraph of text} 6 | \usage{ 7 | extract_para(text, para = 1, templ = NULL) 8 | } 9 | \arguments{ 10 | \item{text}{the input text as a string} 11 | 12 | \item{para}{number indicating which paragraph(s) to return (default=1)} 13 | 14 | \item{templ}{an optional string specifying a mediawikitemplate within 15 | which to restrict the search restrict search} 16 | } 17 | \value{ 18 | the nth paragraph of the input text. 19 | } 20 | \description{ 21 | Return the nth paragraph of a section of text 22 | Useful for extracting information from Wikipedia or other wikimarkup text 23 | } 24 | -------------------------------------------------------------------------------- /man/filter_qids.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{filter_qids} 4 | \alias{filter_qids} 5 | \title{Filter QIDs} 6 | \usage{ 7 | filter_qids( 8 | ids, 9 | property = "P31", 10 | filter = c("Q737498", "Q5633421", "Q7725634", "Q13442814", "Q18918145"), 11 | message = NULL 12 | ) 13 | } 14 | \arguments{ 15 | \item{ids}{QIDs to check} 16 | 17 | \item{property}{property to check (default = P31 to filter on "instance of")} 18 | 19 | \item{filter}{values of that property to use to filter out 20 | (default = Q737498, Q5633421, Q7725634, Q13442814, and Q18918145 to remove academic 21 | publications or books)} 22 | 23 | \item{message}{message to return (useful for disambiguate_QIDs function)} 24 | } 25 | \value{ 26 | a vector of QIDs that do not match the property filter 27 | } 28 | \description{ 29 | For a QID or vector of QIDs, remove ones that match a particular statement 30 | (e.g. remove all that are instances of academic publications or books). 31 | } 32 | \details{ 33 | The \href{https://www.wikidata.org/wiki/Q43649390}{Wikidata Q identifier} (QID) 34 | is the unique identifier (UID) used in Wikidata. 35 | } 36 | \examples{ 37 | \dontrun{ 38 | # Filter three items called "Earth Science" to show only those that aren't 39 | # books, journals or journal articles 40 | filter_qids(ids = c("Q96695546","Q8008","Q58966429"), 41 | property = "P31", 42 | filter = c("Q737498","Q5633421","Q7725634","Q13442814","Q18918145")) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /man/find_item.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gets.R 3 | \name{find_item} 4 | \alias{find_item} 5 | \alias{find_property} 6 | \title{Search for Wikidata items or properties that match a search term} 7 | \usage{ 8 | find_item( 9 | search_term, 10 | language = "en", 11 | limit = 10, 12 | response_language = "en", 13 | ... 14 | ) 15 | 16 | find_property( 17 | search_term, 18 | language = "en", 19 | response_language = "en", 20 | limit = 10 21 | ) 22 | } 23 | \arguments{ 24 | \item{search_term}{A term to search for.} 25 | 26 | \item{language}{The language to return the labels and descriptions in; this should 27 | consist of an ISO language code. Defaults to \code{"en"}.} 28 | 29 | \item{limit}{The number of results to return; set to \code{10} by default.} 30 | 31 | \item{\\dots}{further arguments to pass to de{httr:ink[httr::GET]{GET}}.} 32 | } 33 | \value{ 34 | A list containing the result of the query. 35 | } 36 | \description{ 37 | \code{find_item} and \code{find_property} allow you to retrieve a set 38 | of Wikidata items or properties where the aliases or descriptions match a particular 39 | search term. As with other \code{WikidataR} code, custom print methods are available; 40 | use \code{\link{str}} to manipulate and see the underlying structure of the data. 41 | } 42 | \examples{ 43 | 44 | #Check for entries relating to Douglas Adams in some way 45 | adams_items <- find_item("Douglas Adams") 46 | 47 | #Check for properties involving the peerage 48 | peerage_props <- find_property("peerage") 49 | 50 | } 51 | \seealso{ 52 | \code{\link{get_random}} for selecting a random item or property, 53 | or \code{\link{get_item}} for selecting a specific item or property. 54 | } 55 | -------------------------------------------------------------------------------- /man/get_example.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gets.R 3 | \name{get_example} 4 | \alias{get_example} 5 | \title{Get an example SPARQL query from Wikidata} 6 | \usage{ 7 | get_example(example_name) 8 | } 9 | \arguments{ 10 | \item{example_name}{the names of the examples as they appear on 11 | [this page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)} 12 | } 13 | \value{ 14 | The SPARQL query as a character vector. 15 | } 16 | \description{ 17 | Gets the specified example(s) from 18 | [SPARQL query service examples page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) 19 | using [Wikidata's MediaWiki API](https://www.wikidata.org/w/api.php). 20 | } 21 | \details{ 22 | If you are planning on extracting multiple examples, please provide 23 | all the names as a single vector for efficiency. 24 | } 25 | \examples{ 26 | \dontrun{ 27 | sparql_query <- extract_example(c("Cats", "Horses")) 28 | query_wikidata(sparql_query) 29 | # returns a named list with two data frames 30 | # one called "Cats" and one called "Horses" 31 | sparql_query <- extract_example("Largest cities with female mayor") 32 | cat(sparql_query) 33 | query_wikidata(sparql_query) 34 | } 35 | } 36 | \seealso{ 37 | [query_wikidata] 38 | } 39 | -------------------------------------------------------------------------------- /man/get_geo_box.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/geo.R 3 | \name{get_geo_box} 4 | \alias{get_geo_box} 5 | \title{Get geographic entities based on a bounding box} 6 | \usage{ 7 | get_geo_box( 8 | first_city_code, 9 | first_corner, 10 | second_city_code, 11 | second_corner, 12 | language = "en", 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{first_city_code}{a Wikidata item, or series of items, to use for 18 | one corner of the bounding box.} 19 | 20 | \item{first_corner}{the direction of \code{first_city_code} relative 21 | to \code{city} (eg "NorthWest", "SouthEast").} 22 | 23 | \item{second_city_code}{a Wikidata item, or series of items, to use for 24 | one corner of the bounding box.} 25 | 26 | \item{second_corner}{the direction of \code{second_city_code} relative 27 | to \code{city} (eg "NorthWest", "SouthEast").} 28 | 29 | \item{language}{the two-letter language code to use for the name 30 | of the item. "en" by default.} 31 | 32 | \item{\\dots}{further arguments to pass to de{httr:ink[httr::GET]{GET}}.} 33 | } 34 | \value{ 35 | a data.frame of 5 columns: 36 | \itemize{ 37 | \item{item}{ the Wikidata identifier of each object associated with 38 | \code{entity}.} 39 | \item{name}{ the name of the item, if available, in the requested language. If it 40 | is not available, \code{NA} will be returned instead.} 41 | \item{latitude}{ the latitude of \code{item}} 42 | \item{longitude}{ the longitude of \code{item}} 43 | \item{entity}{ the entity the item is associated with (necessary for multi-entity 44 | queries).} 45 | } 46 | } 47 | \description{ 48 | \code{get_geo_box} retrieves all geographic entities in 49 | Wikidata that fall between a bounding box between two existing items 50 | with geographic attributes (usually cities). 51 | } 52 | \examples{ 53 | # Simple bounding box 54 | \donttest{bruges_box <- get_geo_box("Q12988", "NorthEast", "Q184287", "SouthWest")} 55 | 56 | # Custom language 57 | \donttest{bruges_box_fr <- get_geo_box("Q12988", "NorthEast", "Q184287", "SouthWest", 58 | language = "fr")} 59 | 60 | } 61 | \seealso{ 62 | \code{\link{get_geo_entity}} for using an unrestricted search or simple radius, 63 | rather than a bounding box. 64 | } 65 | -------------------------------------------------------------------------------- /man/get_geo_entity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/geo.R 3 | \name{get_geo_entity} 4 | \alias{get_geo_entity} 5 | \title{Retrieve geographic information from Wikidata} 6 | \usage{ 7 | get_geo_entity(entity, language = "en", radius = NULL, limit = 100, ...) 8 | } 9 | \arguments{ 10 | \item{entity}{a Wikidata item (\code{Q...}) or series of items, to check 11 | for associated geo-tagged items.} 12 | 13 | \item{language}{the two-letter language code to use for the name 14 | of the item. "en" by default, because we're imperialist 15 | anglocentric westerners.} 16 | 17 | \item{radius}{optionally, a radius (in kilometers) around \code{entity} 18 | to restrict the search to.} 19 | 20 | \item{limit}{the maximum number of results to return.} 21 | 22 | \item{\\dots}{further arguments to pass to de{httr:ink[httr::GET]{GET}}.} 23 | } 24 | \value{ 25 | a data.frame of 5 columns: 26 | \itemize{ 27 | \item{item}{ the Wikidata identifier of each object associated with 28 | \code{entity}.} 29 | \item{name}{ the name of the item, if available, in the requested language. If it 30 | is not available, \code{NA} will be returned instead.} 31 | \item{latitude}{ the latitude of \code{item}} 32 | \item{longitude}{ the longitude of \code{item}} 33 | \item{entity}{ the entity the item is associated with (necessary for multi-entity 34 | queries).} 35 | } 36 | } 37 | \description{ 38 | \code{get_geo_entity} retrieves the item ID, latitude 39 | and longitude of any object with geographic data associated with \emph{another} 40 | object with geographic data (example: all the locations around/near/associated with 41 | a city). 42 | } 43 | \examples{ 44 | # All entities 45 | \donttest{sf_locations <- get_geo_entity("Q62")} 46 | 47 | # Entities with French, rather than English, names 48 | \donttest{sf_locations <- get_geo_entity("Q62", language = "fr")} 49 | 50 | # Entities within 1km 51 | \donttest{sf_close_locations <- get_geo_entity("Q62", radius = 1)} 52 | 53 | # Multiple entities 54 | \donttest{multi_entity <- get_geo_entity(entity = c("Q62", "Q64"))} 55 | 56 | } 57 | \seealso{ 58 | \code{\link{get_geo_box}} for using a bounding box 59 | rather than an unrestricted search or simple radius. 60 | } 61 | -------------------------------------------------------------------------------- /man/get_item.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gets.R 3 | \name{get_item} 4 | \alias{get_item} 5 | \alias{get_property} 6 | \title{Retrieve specific Wikidata items or properties} 7 | \usage{ 8 | get_item(id, ...) 9 | 10 | get_property(id, ...) 11 | } 12 | \arguments{ 13 | \item{id}{the ID number(s) of the item or property you're looking for. This can be in 14 | various formats; either a numeric value ("200"), the full name ("Q200") or 15 | even with an included namespace ("Property:P10") - the function will format 16 | it appropriately. This function is vectorized and will happily accept 17 | multiple IDs.} 18 | 19 | \item{\\dots}{further arguments to pass to de{httr:ink[httr::GET]{GET}}.} 20 | } 21 | \description{ 22 | \code{get_item} and \code{get_property} allow you to retrieve the data associated 23 | with individual Wikidata items and properties, respectively. As with 24 | other \code{WikidataR} code, custom print methods are available; use \code{\link{str}} 25 | to manipulate and see the underlying structure of the data. 26 | } 27 | \examples{ 28 | 29 | #Retrieve a specific item 30 | adams_metadata <- get_item("42") 31 | 32 | #Retrieve a specific property 33 | object_is_child <- get_property("P40") 34 | 35 | } 36 | \seealso{ 37 | \code{\link{get_random}} for selecting a random item or property, 38 | or \code{\link{find_item}} for using search functionality to pull out 39 | item or property IDs where the descriptions or aliases match a particular 40 | search term. 41 | } 42 | -------------------------------------------------------------------------------- /man/get_names_from_properties.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{get_names_from_properties} 4 | \alias{get_names_from_properties} 5 | \title{Get names of properties} 6 | \usage{ 7 | get_names_from_properties(properties) 8 | } 9 | \arguments{ 10 | \item{properties}{a claims list from \code{extract_claims}} 11 | } 12 | \value{ 13 | tibble of labels for each property for a set of claims 14 | } 15 | \description{ 16 | For a claim or set of claims, return the names of the properties 17 | } 18 | -------------------------------------------------------------------------------- /man/get_random.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gets.R 3 | \name{get_random_item} 4 | \alias{get_random_item} 5 | \alias{get_random} 6 | \alias{get_random_property} 7 | \title{Retrieve randomly-selected Wikidata items or properties} 8 | \usage{ 9 | get_random_item(limit = 1, ...) 10 | 11 | get_random_property(limit = 1, ...) 12 | } 13 | \arguments{ 14 | \item{limit}{how many random items to return. 1 by default, but can be higher.} 15 | 16 | \item{\\dots}{arguments to pass to de{httr:ink[httr::GET]{GET}}.} 17 | } 18 | \description{ 19 | \code{get_random_item} and \code{get_random_property} allow you to retrieve the data 20 | associated with randomly-selected Wikidata items and properties, respectively. As with 21 | other \code{WikidataR} code, custom print methods are available; use \code{\link{str}} 22 | to manipulate and see the underlying structure of the data. 23 | } 24 | \examples{ 25 | \dontrun{ 26 | #Random item 27 | random_item <- get_random_item() 28 | 29 | #Random property 30 | random_property <- get_random_property() 31 | } 32 | } 33 | \seealso{ 34 | \code{\link{get_item}} for selecting a specific item or property, 35 | or \code{\link{find_item}} for using search functionality to pull out 36 | item or property IDs where the descriptions or aliases match a particular 37 | search term. 38 | } 39 | -------------------------------------------------------------------------------- /man/identifier_from_identifier.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/queries.R 3 | \name{identifier_from_identifier} 4 | \alias{identifier_from_identifier} 5 | \title{Identifier from identifier} 6 | \usage{ 7 | identifier_from_identifier( 8 | property = "ORCID iD", 9 | return = "IMDb ID", 10 | value = "0000-0002-7865-7235" 11 | ) 12 | } 13 | \arguments{ 14 | \item{property}{The identifier property to search (for caveats, see \code{as_pid})} 15 | 16 | \item{return}{The identifier property to convert to} 17 | 18 | \item{value}{The identifier value to match.} 19 | } 20 | \value{ 21 | A vector of identifiers corresponding to identifiers submitted. 22 | } 23 | \description{ 24 | Convert unique identifiers to other unique identifiers. 25 | } 26 | \examples{ 27 | identifier_from_identifier(property ='ORCID iD', 28 | return = 'IMDb ID', 29 | value = c('0000-0002-7865-7235','0000-0003-1079-5604') 30 | ) 31 | } 32 | -------------------------------------------------------------------------------- /man/initials.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{initials} 4 | \alias{initials} 5 | \title{Format short form person names} 6 | \usage{ 7 | initials(x, format = "FLast") 8 | } 9 | \arguments{ 10 | \item{x}{a vector of people's names as strings} 11 | 12 | \item{format}{a vector of strings of either "FLast" or "FL" to indicate the output format} 13 | } 14 | \value{ 15 | the inputted name strings with first names shortened based on the 16 | selected format. 17 | } 18 | \description{ 19 | Converting names into first initial and surname, or just initials 20 | } 21 | -------------------------------------------------------------------------------- /man/list_properties.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{list_properties} 4 | \alias{list_properties} 5 | \title{List properties of a Wikidata item} 6 | \usage{ 7 | list_properties(item, names = FALSE) 8 | } 9 | \arguments{ 10 | \item{item}{a list of one or more Wikidata items returned with 11 | \code{\link{get_item}}.} 12 | 13 | \item{names}{a boolian for whether to return property names, or just P numbers 14 | and extract.} 15 | } 16 | \value{ 17 | a list containing one sub-list for each entry in \code{items}, 18 | and (below that) the found data for each claim. In the event a claim 19 | cannot be found for an item, an \code{NA} will be returned 20 | instead. 21 | } 22 | \description{ 23 | for a downloaded wikidata item, list the properties of all statements 24 | } 25 | \examples{ 26 | # Get item data 27 | adams_data <- get_item("42") 28 | # Get claim data 29 | claims <- extract_claims(adams_data, "P31") 30 | } 31 | -------------------------------------------------------------------------------- /man/print.find_item.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prints.R 3 | \name{print.find_item} 4 | \alias{print.find_item} 5 | \title{Print method for find_item} 6 | \usage{ 7 | \method{print}{find_item}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{find_item object with search results} 11 | 12 | \item{\dots}{Arguments to be passed to methods} 13 | } 14 | \description{ 15 | print found items. 16 | } 17 | -------------------------------------------------------------------------------- /man/print.find_property.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prints.R 3 | \name{print.find_property} 4 | \alias{print.find_property} 5 | \title{Print method for find_property} 6 | \usage{ 7 | \method{print}{find_property}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{find_property object with search results} 11 | 12 | \item{\dots}{Arguments to be passed to methods} 13 | } 14 | \description{ 15 | print found properties. 16 | } 17 | -------------------------------------------------------------------------------- /man/print.wikidata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prints.R 3 | \name{print.wikidata} 4 | \alias{print.wikidata} 5 | \title{Print method for Wikidata objects} 6 | \usage{ 7 | \method{print}{wikidata}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{Wikidata object from get_item, get_random_item, get_property or get_random_property} 11 | 12 | \item{\dots}{Arguments to be passed to methods} 13 | } 14 | \description{ 15 | print found objects generally. 16 | } 17 | \seealso{ 18 | get_item, get_random_item, get_property or get_random_property 19 | } 20 | -------------------------------------------------------------------------------- /man/qid_from_DOI.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/schol.R 3 | \name{qid_from_DOI} 4 | \alias{qid_from_DOI} 5 | \title{QID from DOI} 6 | \usage{ 7 | qid_from_DOI(DOI = "10.15347/WJM/2019.001") 8 | } 9 | \arguments{ 10 | \item{DOI}{digital object identifiers submitted as strings} 11 | } 12 | \value{ 13 | vector of QIDs corresponding to DOIs submitted 14 | } 15 | \description{ 16 | simple converter from DOIs to QIDs (for items in Wikidata) 17 | } 18 | -------------------------------------------------------------------------------- /man/qid_from_ORCID.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/schol.R 3 | \name{qid_from_ORCID} 4 | \alias{qid_from_ORCID} 5 | \title{QID from ORCID} 6 | \usage{ 7 | qid_from_ORCID(ORCID = "0000-0002-2298-7593") 8 | } 9 | \arguments{ 10 | \item{ORCID}{digital object identifiers submitted as strings} 11 | } 12 | \value{ 13 | vector of QIDs corresponding to ORCIDs submitted 14 | } 15 | \description{ 16 | simple converter from ORCIDs to QIDs (for items in wikidata) 17 | } 18 | -------------------------------------------------------------------------------- /man/qid_from_identifier.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/queries.R 3 | \name{qid_from_identifier} 4 | \alias{qid_from_identifier} 5 | \title{QID from identifier} 6 | \usage{ 7 | qid_from_identifier( 8 | property = "DOI", 9 | value = c("10.15347/WJM/2019.001", "10.15347/WJM/2020.002") 10 | ) 11 | } 12 | \arguments{ 13 | \item{property}{The identifier property to search (for caveats, see \code{as_pid}.)} 14 | 15 | \item{value}{The identifier value to match.} 16 | } 17 | \value{ 18 | A vector of QIDs corresponding to identifiers submitted. 19 | } 20 | \description{ 21 | Convert unique identifiers to QIDs (for items in Wikidata). 22 | } 23 | \details{ 24 | The \href{https://www.wikidata.org/wiki/Q43649390}{Wikidata Q identifier} (QID) is the unique identifier (UID) 25 | used in Wikidata. 26 | } 27 | \examples{ 28 | qid_from_identifier('ISBN-13','978-0-262-53817-6') 29 | } 30 | -------------------------------------------------------------------------------- /man/qid_from_name.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/schol.R 3 | \name{qid_from_name} 4 | \alias{qid_from_name} 5 | \title{QID from label name} 6 | \usage{ 7 | qid_from_name(name = "Thomas Shafee", limit = 100, format = "vector") 8 | } 9 | \arguments{ 10 | \item{name}{name labels submitted as strings} 11 | 12 | \item{limit}{if multiple QIDs match each submitted name, how many to return} 13 | 14 | \item{format}{output format ('vector' to return a simple vector, or 'list' to return a nested list)} 15 | } 16 | \value{ 17 | vector of QIDs corresponding to names submitted. Note: some names may return multiple QIDs. 18 | } 19 | \description{ 20 | simple converter from label names to QIDs (for items in wikidata). 21 | Essentially a simplification of \code{find_item} 22 | } 23 | -------------------------------------------------------------------------------- /man/query_wikidata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/queries.R 3 | \name{query_wikidata} 4 | \alias{query_wikidata} 5 | \title{Send one or more SPARQL queries to WDQS} 6 | \usage{ 7 | query_wikidata(sparql_query, format = "tibble", ...) 8 | } 9 | \arguments{ 10 | \item{sparql_query}{SPARQL query (can be a vector of queries)} 11 | 12 | \item{format}{`tibble` (default) returns a pure character data frame, 13 | `simple` returns a pure character vector, while 14 | `smart` fetches JSON-formatted data and returns a tibble with datetime 15 | columns converted to `POSIXct`.} 16 | 17 | \item{\\dots}{Additional parameters to supply to \code{httr:\link[httr::POST]{POST}}.} 18 | } 19 | \value{ 20 | A `tibble` or `vector`. Note: QID values will be returned as QIDs, rather than URLs. 21 | } 22 | \description{ 23 | Makes a POST request to Wikidata Query Service SPARQL endpoint. 24 | } 25 | \section{Query limits}{ 26 | 27 | There is a hard query deadline configured which is set to 60 seconds. There 28 | are also following limits: 29 | - One client (user agent + IP) is allowed 60 seconds of processing time each 30 | 60 seconds 31 | - One client is allowed 30 error queries per minute 32 | See \href{https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits}{query limits section} 33 | in the Wikidata Query Service User Manual for more information. 34 | } 35 | 36 | \examples{ 37 | # R's versions and release dates: 38 | sparql_query <- 'SELECT DISTINCT 39 | ?softwareVersion ?publicationDate 40 | WHERE { 41 | BIND(wd:Q206904 AS ?R) 42 | ?R p:P348 [ 43 | ps:P348 ?softwareVersion; 44 | pq:P577 ?publicationDate 45 | ] . 46 | }' 47 | query_wikidata(sparql_query) 48 | 49 | \dontrun{ 50 | # "smart" format converts all datetime columns to POSIXct 51 | query_wikidata(sparql_query, format = "smart") 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /man/searcher.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gets.R 3 | \name{searcher} 4 | \alias{searcher} 5 | \title{Convert an input to a item QID} 6 | \usage{ 7 | searcher(search_term, language, limit, response_language, type, ...) 8 | } 9 | \arguments{ 10 | \item{search_term}{a term to search for.} 11 | 12 | \item{language}{the language to conduct the search in; this should 13 | consist of an ISO language code. Set to "en" by default.} 14 | 15 | \item{limit}{the number of results to return; set to 10 by default.} 16 | 17 | \item{response_language}{the language to return the labels and descriptions in; this should 18 | consist of an ISO language code. Set to "en" by default.} 19 | 20 | \item{type}{type of wikidata object to return (default = "item")} 21 | 22 | \item{\\dots}{Additional parameters to supply to [httr::POST]} 23 | } 24 | \value{ 25 | If the inputted string matches an item label, return its QID. 26 | If the inputted string matches multiple labels of multiple items, return the QID of the first hit. 27 | If the inputted string is already a QID, return the string. 28 | } 29 | \description{ 30 | Convert an input string to the most likely item QID 31 | } 32 | \examples{ 33 | # if input string is a valid QID 34 | as_qid("Q42") 35 | # if input string matches multiple item labels 36 | as_qid("Douglas Adams") 37 | # if input string matches a single unique label 38 | as_qid("Douglas Adams and the question of arterial blood pressure in mammals") 39 | } 40 | -------------------------------------------------------------------------------- /man/sparql_query.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/queries.R 3 | \name{sparql_query} 4 | \alias{sparql_query} 5 | \title{Download full Wikidata items matching a SPARQL query} 6 | \usage{ 7 | sparql_query(query, ...) 8 | } 9 | \arguments{ 10 | \item{query}{The SPARQL query as a string} 11 | 12 | \item{\\dots}{Additional parameters to supply to \code{httr:\link[httr::POST]{POST}}.} 13 | } 14 | \value{ 15 | a download of the full wikidata objects formatted as a nested json list 16 | } 17 | \description{ 18 | Utility wrapper for wikidata spargl endpoint to download items. 19 | Used by \code{get_geo_entity} and \code{get_geo_box}. 20 | } 21 | -------------------------------------------------------------------------------- /man/unspecial.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{unspecial} 4 | \alias{unspecial} 5 | \title{Remove special characters} 6 | \usage{ 7 | unspecial(x) 8 | } 9 | \arguments{ 10 | \item{x}{a vector of strings to check for special characters} 11 | } 12 | \value{ 13 | the inputted strings with special characters replaced with 14 | closest match plan characters. 15 | } 16 | \description{ 17 | Special characters can otherwise mess up wikidata read-writes 18 | } 19 | -------------------------------------------------------------------------------- /man/url_to_id.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{url_to_id} 4 | \alias{url_to_id} 5 | \title{Extract an identifier from a wikidata URL} 6 | \usage{ 7 | url_to_id(x) 8 | 9 | url_to_id(x) 10 | } 11 | \arguments{ 12 | \item{x}{A vector of strings representing Wikidata URLs.} 13 | } 14 | \value{ 15 | if the URL ends in a QID or PID, return that PID or QID, else return the original string 16 | 17 | QID or PID. 18 | } 19 | \description{ 20 | Convert a URL ending in an identifier (returned by SPARQL queries) to just 21 | the plain identifier (QID or PID). 22 | 23 | Convert a URL ending in an identifier (returned by SPARQL queries) 24 | to just the plan identifier (QID or PID). 25 | } 26 | \details{ 27 | The \href{https://www.wikidata.org/wiki/Q43649390}{Wikidata Q identifier} (QID) 28 | is the unique identifier (UID) used in Wikidata. 29 | } 30 | \examples{ 31 | url_to_id("http://www.wikidata.org/entity/42") 32 | url_to_id("http://www.wikidata.org/Q42") 33 | } 34 | -------------------------------------------------------------------------------- /man/wd_query.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/queries.R 3 | \name{wd_query} 4 | \alias{wd_query} 5 | \title{Download a Wikidata item} 6 | \usage{ 7 | wd_query(title, ...) 8 | } 9 | \arguments{ 10 | \item{title}{The Wikidata item or property as a string.} 11 | 12 | \item{\\dots}{Additional parameters to supply to \code{httr:\link[httr::POST]{POST}}.} 13 | } 14 | \value{ 15 | A downloaded full wikidata object (item or property) formatted as a 16 | nested json list. 17 | } 18 | \description{ 19 | Utility wrapper for Wikidata API to download item. 20 | Used by \code{get_item} and \code{get_property}. 21 | } 22 | -------------------------------------------------------------------------------- /man/wd_rand_query.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/queries.R 3 | \name{wd_rand_query} 4 | \alias{wd_rand_query} 5 | \title{Download random Wikidata items} 6 | \usage{ 7 | wd_rand_query(ns, limit, ...) 8 | } 9 | \arguments{ 10 | \item{ns}{string indicating namespace, most commonly "Main" for QID items, "Property" 11 | for PID properties.} 12 | 13 | \item{limit}{How many random object to return.} 14 | 15 | \item{\\dots}{Additional parameters to supply to \code{httr:\link[httr::POST]{POST}}.} 16 | } 17 | \value{ 18 | Downloaded full wikidata objects (items or properties) formatted 19 | as nested json lists. 20 | } 21 | \description{ 22 | Utility wrapper for Wikidata API to download random items. 23 | Used by \code{random_item}. 24 | } 25 | -------------------------------------------------------------------------------- /man/write_wikibase.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/writes_wikibase.R 3 | \name{write_wikibase} 4 | \alias{write_wikibase} 5 | \title{Write statements to any Wikibase instance} 6 | \usage{ 7 | write_wikibase( 8 | items, 9 | properties = NULL, 10 | values = NULL, 11 | qual.properties = NULL, 12 | qual.values = NULL, 13 | src.properties = NULL, 14 | src.values = NULL, 15 | remove = FALSE, 16 | format = "tibble", 17 | format.csv.file = NULL, 18 | api.username = NULL, 19 | api.token = NULL, 20 | api.format = "v1", 21 | api.batchname = NULL, 22 | api.submit = TRUE, 23 | quickstatements.url = NULL, 24 | coordinate_pid = NULL 25 | ) 26 | } 27 | \arguments{ 28 | \item{items}{a vector of strings indicating the items to which to add statements (as QIDs or labels). 29 | Note: In contrast to \code{write_wikidata}, this function takes no labels as input, just QIDs. 30 | New QIDs can be created by using the "CREATE_xyz", where "_xyz" is any unique string. 31 | Using the same id will add additional statements to those new items} 32 | 33 | \item{properties}{a vector of strings indicating the properties to add as statements (as PIDs or labels). 34 | Note: In contrast to \code{write_wikidata}, this function takes no labels as input, just PIDs. 35 | Four special properties can also be used: labels, aliases, descriptions and sitelinks. 36 | See [this link](https://www.wikidata.org/wiki/Help:QuickStatements#Adding_labels,_aliases,_descriptions_and_sitelinks) for the syntax.} 37 | 38 | \item{values}{a vector of strings indicating the values to add as statements (as QIDs). 39 | Note: if strings are provided, they will be treated as plain text.} 40 | 41 | \item{qual.properties}{a vector, data frame, or tibble of strings indicating the properties to add as qualifiers to statements (as PIDs).} 42 | 43 | \item{qual.values}{a vector, data frame, or tibble of strings indicating the values to add as statements (as QIDs or strings). 44 | Note: if strings are provided, they will be treated as plain text.} 45 | 46 | \item{src.properties}{a vector, data frame, or tibble of strings indicating the properties to add as reference sources to statements (as SIDs or labels). 47 | Note: if labels are provided, and multiple items match, the first matching item will be used 48 | (see \code{as_sid} function), so use with caution.} 49 | 50 | \item{src.values}{a vector, data frame, or tibble of strings indicating the values to add reference sources to statements (as QIDs or strings). 51 | Note: if strings are provided, they will be treated as plain text.} 52 | 53 | \item{remove}{a vector of boolians for each statemnt indicating whether it should 54 | be removed from the item rather than added (default = FALSE)} 55 | 56 | \item{format}{output format as a string. Options include: 57 | \describe{ 58 | \item{tibble}{easiest format to further manipulation in R} 59 | \item{csv}{can be copy-pasted to the Wikibase QuickStatements website (or manipulated in a spreadsheet programs). In contrast to write_wikidata function the delimiter is `tab`, because Quickstatements expect tab-separated data} 60 | \item{api}{a url that can be copy-pasted into a web browser, or automatically submitted (see \code{api.submit} parameter)} 61 | \item{website}{open a [QuickStatements](https://quickstatements.toolforge.org/) web browser window summarizing the edits to be made to Wikidata)} 62 | }} 63 | 64 | \item{format.csv.file}{path to save the csv file. If none is provided, then printed to console.} 65 | 66 | \item{api.username}{a string indicating your Wikimedia username} 67 | 68 | \item{api.token}{a string indicating your api token (the unique identifier that you can find listed at [your user page](https://quickstatements.toolforge.org/#/user))} 69 | 70 | \item{api.format}{a string indicating which version of the quickstatement format used to submit the api (default = "v1")} 71 | 72 | \item{api.batchname}{a string create a named batch (listed at [your batch history page](https://quickstatements.toolforge.org/#/batches)) and tag in the edit summaries} 73 | 74 | \item{api.submit}{boolian indicating whether to submit instruction directly to wikidata (else returns the URL that can be copy-pasted into a web browser)} 75 | 76 | \item{quickstatements.url}{url to access quickstatements of the corresponding Wikibase instance.} 77 | 78 | \item{coordinate_pid}{PID of a geocoordinates; need to have a different formatting} 79 | } 80 | \value{ 81 | data formatted to upload to Wikidata (via quickstatemsnts), 82 | optionally also directly uploaded to Wikidata (see \code{format} parameter). 83 | } 84 | \description{ 85 | Upload data to a Wikibase instance, including creating items, 86 | adding statements to existing items (via the quickstatements format and API). 87 | } 88 | \examples{ 89 | # Add a statement to the "Wikidata sandbox" item (Q4115189) 90 | # to say that it is an "instance of" (P31) of Q1 (the universe). 91 | # The instruction will submit directly to Wikidata via the API 92 | # (if you include your Wikibase/Wikimedia username and token) 93 | 94 | \dontrun{ 95 | write_wikibase( 96 | items = "Q24", 97 | properties = "P2", 98 | values = "Q8", 99 | format = "api", 100 | api.username = "myusername", 101 | api.token = "mytoken", 102 | api.submit = TRUE, 103 | quickstatements.url = NULL 104 | ) 105 | } 106 | # note: 107 | 108 | } 109 | -------------------------------------------------------------------------------- /man/write_wikidata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/writes.R 3 | \name{write_wikidata} 4 | \alias{write_wikidata} 5 | \title{Write statements to Wikidata} 6 | \usage{ 7 | write_wikidata( 8 | items, 9 | properties = NULL, 10 | values = NULL, 11 | qual.properties = NULL, 12 | qual.values = NULL, 13 | src.properties = NULL, 14 | src.values = NULL, 15 | remove = FALSE, 16 | format = "tibble", 17 | api.username = NULL, 18 | api.token = NULL, 19 | api.format = "v1", 20 | api.batchname = NULL, 21 | api.submit = TRUE 22 | ) 23 | } 24 | \arguments{ 25 | \item{items}{a vector of strings indicating the items to which to add statements (as QIDs or labels). 26 | Note: if labels are provided, and multiple items match, the first matching item will be used 27 | (see \code{as_qid} function), so use with caution. 28 | New QIDs can be created by using the "CREATE_xyz", where "_xyz" is any unique string. 29 | Using the same id will add additional statements to those new items} 30 | 31 | \item{properties}{a vector of strings indicating the properties to add as statements (as PIDs or labels). 32 | Note: if labels are provided, and multiple items match, the first matching item will be used 33 | (see \code{as_pid} function), so use with caution. 34 | Four special properties can also be used: labels, aliases, descriptions and sitelinks. 35 | See [this link](https://www.wikidata.org/wiki/Help:QuickStatements#Adding_labels,_aliases,_descriptions_and_sitelinks) for the syntax.} 36 | 37 | \item{values}{a vector of strings indicating the values to add as statements (as QIDs or strings). 38 | Note: if strings are provided, they will be treated as plain text.} 39 | 40 | \item{qual.properties}{a vector, data frame, or tibble of strings indicating the properties to add as qualifiers to statements (as PIDs or labels). 41 | Note: if labels are provided, and multiple items match, the first matching item will be used 42 | (see \code{as_pid} function), so use with caution.} 43 | 44 | \item{qual.values}{a vector, data frame, or tibble of strings indicating the values to add as statements (as QIDs or strings). 45 | Note: if strings are provided, they will be treated as plain text.} 46 | 47 | \item{src.properties}{a vector, data frame, or tibble of strings indicating the properties to add as reference sources to statements (as SIDs or labels). 48 | Note: if labels are provided, and multiple items match, the first matching item will be used 49 | (see \code{as_sid} function), so use with caution.} 50 | 51 | \item{src.values}{a vector, data frame, or tibble of strings indicating the values to add reference sources to statements (as QIDs or strings). 52 | Note: if strings are provided, they will be treated as plain text.} 53 | 54 | \item{remove}{a vector of boolians for each statemnt indicating whether it should 55 | be removed from the item rather than added (default = FALSE)} 56 | 57 | \item{format}{output format as a string. Options include: 58 | \describe{ 59 | \item{tibble}{easiest format to further manipulation in R} 60 | \item{csv}{can be copy-pasted to [the QuickStatements website](https://quickstatements.toolforge.org/) (or manipulated in a spreadsheet programs)} 61 | \item{api}{a url that can be copy-pasted into a web browser, or automatically submitted (see \code{api.submit} parameter)} 62 | \item{website}{open a [QuickStatements](https://quickstatements.toolforge.org/) web browser window summarizing the edits to be made to Wikidata)} 63 | }} 64 | 65 | \item{api.username}{a string indicating your Wikimedia username} 66 | 67 | \item{api.token}{a string indicating your api token (the unique identifier that you can find listed at [your user page](https://quickstatements.toolforge.org/#/user))} 68 | 69 | \item{api.format}{a string indicating which version of the quickstatement format used to submit the api (default = "v1")} 70 | 71 | \item{api.batchname}{a string create a named batch (listed at [your batch history page](https://quickstatements.toolforge.org/#/batches)) and tag in the edit summaries} 72 | 73 | \item{api.submit}{boolian indicating whether to submit instruction directly to wikidata (else returns the URL that can be copy-pasted into a web browser)} 74 | } 75 | \value{ 76 | data formatted to upload to wikidata (via quickstatemsnts), 77 | optionally also directly uploaded to wikidata (see \code{format} parameter). 78 | } 79 | \description{ 80 | Upload data to Wikidata, including creating items, 81 | adding statements to existing items (via the quickstatements format and API). 82 | } 83 | \examples{ 84 | # Add a statement to the "Wikidata sandbox" item (Q4115189) 85 | # to say that it is an "instance of" (P31) of Q1 (the universe). 86 | # The instruction will submit directly to wikidata via the API 87 | # (if you include your Wikimedia username and token) 88 | 89 | \dontrun{write_wikidata(items = "Wikidata Sandbox", 90 | properties = "instance of", 91 | values = "Q1", 92 | format = "api", 93 | api.username = "myusername", 94 | api.token = , #REDACTED# 95 | )} 96 | #note: 97 | 98 | } 99 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(WikidataR) 3 | 4 | test_check("WikidataR") 5 | -------------------------------------------------------------------------------- /tests/testthat/test_geo.R: -------------------------------------------------------------------------------- 1 | testthat::context("Geographic queries") 2 | # Have had to comment out tests. Wikidata query service having timeout issues that cause tests to fail 3 | 4 | 5 | # testthat::test_that("Simple entity-based geo lookups work", { 6 | # field_names <- c("item", "name", "latitutde", "longitude", "entity") 7 | # sf_locations <- get_geo_entity("Q62") 8 | # testthat::expect_true(is.data.frame(sf_locations)) 9 | # testthat::expect_true(all(field_names == names(sf_locations))) 10 | # testthat::expect_true(unique(sf_locations$entity) == "Q62") 11 | # }) 12 | 13 | # testthat::test_that("Language-variant entity-based geo lookups work", { 14 | # field_names <- c("item", "name", "latitutde", "longitude", "entity") 15 | # sf_locations <- get_geo_entity("Q62", language = "fr") 16 | # testthat::expect_true(is.data.frame(sf_locations)) 17 | # testthat::expect_true(all(field_names == names(sf_locations))) 18 | # testthat::expect_true(unique(sf_locations$entity) == "Q62") 19 | # }) 20 | 21 | # testthat::test_that("Radius restricted entity-based geo lookups work", { 22 | # field_names <- c("item", "name", "latitutde", "longitude", "entity") 23 | # sf_locations <- get_geo_entity("Q62", radius = 1) 24 | # testthat::expect_true(is.data.frame(sf_locations)) 25 | # testthat::expect_true(all(field_names == names(sf_locations))) 26 | # testthat::expect_true(unique(sf_locations$entity) == "Q62") 27 | # }) 28 | 29 | # testthat::test_that("multi-entity geo lookups work", { 30 | # field_names <- c("item", "name", "latitutde", "longitude", "entity") 31 | # sf_locations <- get_geo_entity(c("Q62", "Q64"), radius = 1) 32 | # testthat::expect_true(is.data.frame(sf_locations)) 33 | # testthat::expect_true(all(field_names == names(sf_locations))) 34 | # testthat::expect_equal(length(unique(sf_locations$entity)), 2) 35 | # }) 36 | 37 | # testthat::test_that("Simple bounding lookups work", { 38 | # field_names <- c("item", "name", "latitutde", "longitude") 39 | # bruges_box <- get_geo_box("Q12988", "NorthEast", "Q184287", "SouthWest") 40 | # testthat::expect_true(is.data.frame(bruges_box)) 41 | # testthat::expect_true(all(field_names == names(bruges_box))) 42 | # }) 43 | 44 | # testthat::test_that("Language-variant bounding lookups work", { 45 | # field_names <- c("item", "name", "latitutde", "longitude") 46 | # bruges_box <- get_geo_box("Q12988", "NorthEast", "Q184287", "SouthWest", 47 | # language = "fr") 48 | # testthat::expect_true(is.data.frame(bruges_box)) 49 | # testthat::expect_true(all(field_names == names(bruges_box))) 50 | # }) -------------------------------------------------------------------------------- /tests/testthat/test_gets.R: -------------------------------------------------------------------------------- 1 | context("Direct Wikidata get functions") 2 | 3 | test_that("A specific item can be retrieved with an entire item code", { 4 | expect_true({get_item("Q100");TRUE}) 5 | }) 6 | 7 | test_that("A specific item can be retrieved with a partial entire item code", { 8 | expect_true({get_item("100");TRUE}) 9 | }) 10 | 11 | test_that("A specific property can be retrieved with an entire prop code + namespace", { 12 | expect_true({get_property("Property:P10");TRUE}) 13 | }) 14 | 15 | test_that("A specific property can be retrieved with an entire prop code + namespace", { 16 | expect_true({get_property("P10");TRUE}) 17 | }) 18 | 19 | 20 | test_that("A specific property can be retrieved with a partial prop code", { 21 | expect_true({get_property("10");TRUE}) 22 | }) 23 | 24 | test_that("A randomly-selected item can be retrieved",{ 25 | expect_true({get_random_item();TRUE}) 26 | }) 27 | 28 | test_that("A randomly-selected property can be retriveed",{ 29 | expect_true({get_random_property();TRUE}) 30 | }) -------------------------------------------------------------------------------- /tests/testthat/test_search.R: -------------------------------------------------------------------------------- 1 | context("Search functions") 2 | 3 | test_that("English-language search works",{ 4 | expect_true({find_item("Wonder Girls", "en");TRUE}) 5 | }) 6 | 7 | test_that("Non-English-language search works",{ 8 | expect_true({find_item("Wonder Girls", "es");TRUE}) 9 | }) 10 | 11 | test_that("Property search works",{ 12 | expect_true({find_property("Music", "en");TRUE}) 13 | }) --------------------------------------------------------------------------------