├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── CONDUCT.md
├── CRAN-RELEASE
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS
├── R
    ├── WikidataR.R
    ├── data.R
    ├── disambiguators.R
    ├── geo.R
    ├── gets.R
    ├── prints.R
    ├── queries.R
    ├── schol.R
    ├── sysdata.rda
    ├── utils.R
    ├── writes.R
    ├── writes_wikibase.R
    └── zzz.R
├── README.md
├── WikidataR.Rproj
├── cran-comments.md
├── inst
    └── extdata
    │   └── WD.globalvar.RDS
├── man
    ├── WD.globalvar.Rd
    ├── WikidataR.Rd
    ├── as_pid.Rd
    ├── as_qid.Rd
    ├── as_quot.Rd
    ├── as_sid.Rd
    ├── check_input.Rd
    ├── createrows.Rd
    ├── createrows.tidy.Rd
    ├── disambiguate_QIDs.Rd
    ├── extract_claims.Rd
    ├── extract_para.Rd
    ├── filter_qids.Rd
    ├── find_item.Rd
    ├── get_example.Rd
    ├── get_geo_box.Rd
    ├── get_geo_entity.Rd
    ├── get_item.Rd
    ├── get_names_from_properties.Rd
    ├── get_random.Rd
    ├── identifier_from_identifier.Rd
    ├── initials.Rd
    ├── list_properties.Rd
    ├── print.find_item.Rd
    ├── print.find_property.Rd
    ├── print.wikidata.Rd
    ├── qid_from_DOI.Rd
    ├── qid_from_ORCID.Rd
    ├── qid_from_identifier.Rd
    ├── qid_from_name.Rd
    ├── query_wikidata.Rd
    ├── searcher.Rd
    ├── sparql_query.Rd
    ├── unspecial.Rd
    ├── url_to_id.Rd
    ├── wd_query.Rd
    ├── wd_rand_query.Rd
    ├── write_wikibase.Rd
    └── write_wikidata.Rd
└── tests
    ├── testthat.R
    └── testthat
        ├── test_geo.R
        ├── test_gets.R
        └── test_search.R


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^CONDUCT\.md$
4 | .travis.yml
5 | ^data-raw$
6 | ^CRAN-RELEASE$
7 | ^cran-comments\.md$
8 | ^\.httr-oauth$
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .DS_Store
 4 | # Example code in package build process
 5 | *-Ex.R
 6 | # R data files from past sessions
 7 | .Rdata
 8 | # RStudio files
 9 | .Rproj.user/
10 | .Rproj.user
11 | inst/doc
12 | .httr-oauth
13 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Sample .travis.yml for R projects
 2 | 
 3 | language: r
 4 | warnings_are_errors: false
 5 | sudo: required
 6 | 
 7 | env:
 8 |  global:
 9 |    - CRAN: http://cran.rstudio.com
10 | 
11 | r_packages:
12 |    - testthat
13 |    - WikipediR
14 | notifications:
15 |   email:
16 |     on_failure: change


--------------------------------------------------------------------------------
/CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Code of Conduct
 2 | 
 3 | As contributors and maintainers of this project, we pledge to respect all people who 
 4 | contribute through reporting issues, posting feature requests, updating documentation,
 5 | submitting pull requests or patches, and other activities.
 6 | 
 7 | We are committed to making participation in this project a harassment-free experience for
 8 | everyone, regardless of level of experience, gender, gender identity and expression,
 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
10 | 
11 | Examples of unacceptable behavior by participants include the use of sexual language or
12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment,
13 | insults, or other unprofessional conduct.
14 | 
15 | Project maintainers have the right and responsibility to remove, edit, or reject comments,
16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 
17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 
18 | from the project team.
19 | 
20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 
21 | opening an issue or contacting one or more of the project maintainers.
22 | 
23 | This Code of Conduct is adapted from the Contributor Covenant 
24 | (http:contributor-covenant.org), version 1.0.0, available at 
25 | http://contributor-covenant.org/version/1/0/0/
26 | 


--------------------------------------------------------------------------------
/CRAN-RELEASE:
--------------------------------------------------------------------------------
1 | This package was submitted to CRAN on 2021-11-16.
2 | Once it is accepted, delete this file and tag the release (commit 3222560).
3 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: WikidataR
 2 | Type: Package
 3 | Title: Read-Write API Client Library for Wikidata
 4 | Version: 2.3.3
 5 | Date: 2021-11-16
 6 | Authors@R: c(
 7 |             person("Thomas", "Shafee", role = c("aut", "cre"), email = "T.Shafee@latrobe.edu.au", comment = c(ORCID = "0000-0002-2298-7593")),
 8 |             person("Os", "Keyes", role = "aut", comment = c(ORCID = "0000-0001-5196-609X")),
 9 |             person("Serena", "Signorelli", role = "aut"),
10 |             person("Alex", "Lum", role = "ctb", comment = c(ORCID = "0000-0002-9295-9053")),
11 |             person("Christian", "Graul", role = "ctb"),
12 |             person("Mikhail", "Popov", role = "ctb", comment = c(ORCID = "0000-0003-0145-8948"))
13 |            )
14 | Description: Read from, interrogate, and write to Wikidata <https://www.wikidata.org> -
15 |     the multilingual, interdisciplinary, semantic knowledgebase. Includes functions to:
16 |     read from Wikidata (single items, properties, or properties); query Wikidata (retrieving
17 |     all items that match a set of criteria via Wikidata SPARQL query service); write to
18 |     Wikidata (adding new items or statements via QuickStatements); and handle and manipulate
19 |     Wikidata objects (as lists and tibbles). Uses the Wikidata and QuickStatements APIs. 
20 | BugReports: https://github.com/TS404/WikidataR/issues
21 | URL: https://github.com/TS404/WikidataR
22 | License: MIT + file LICENSE
23 | Imports:
24 |     httr,
25 |     jsonlite,
26 |     WikipediR,
27 |     WikidataQueryServiceR,
28 |     tibble,
29 |     dplyr,
30 |     stringr,
31 |     Hmisc,
32 |     progress,
33 |     pbapply,
34 |     stats,
35 |     readr,
36 |     crayon,
37 |     utils
38 | Suggests:
39 |     markdown,
40 |     testthat,
41 |     tidyverse,
42 |     knitr,
43 |     pageviews,
44 |     spelling
45 | RoxygenNote: 7.2.3
46 | Encoding: UTF-8
47 | Depends: R (>= 3.5.0)
48 | Language: en-US
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License (https://opensource.org/license/MIT)
 2 | 
 3 | Copyright 2014, Oliver Keyes
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(print,find_item)
 4 | S3method(print,find_property)
 5 | S3method(print,wikidata)
 6 | export(as_pid)
 7 | export(as_qid)
 8 | export(as_quot)
 9 | export(as_sid)
10 | export(check_input)
11 | export(createrows)
12 | export(createrows.tidy)
13 | export(disambiguate_QIDs)
14 | export(extract_claims)
15 | export(extract_para)
16 | export(filter_qids)
17 | export(find_item)
18 | export(find_property)
19 | export(get_example)
20 | export(get_geo_box)
21 | export(get_geo_entity)
22 | export(get_item)
23 | export(get_names_from_properties)
24 | export(get_property)
25 | export(get_random_item)
26 | export(get_random_property)
27 | export(identifier_from_identifier)
28 | export(initials)
29 | export(list_properties)
30 | export(qid_from_DOI)
31 | export(qid_from_ORCID)
32 | export(qid_from_identifier)
33 | export(qid_from_name)
34 | export(query_wikidata)
35 | export(searcher)
36 | export(sparql_query)
37 | export(unspecial)
38 | export(url_to_id)
39 | export(wd_query)
40 | export(wd_rand_query)
41 | export(write_wikibase)
42 | export(write_wikidata)
43 | import(WikidataQueryServiceR)
44 | import(crayon)
45 | import(dplyr)
46 | import(pbapply)
47 | import(progress)
48 | import(stringr)
49 | import(tibble)
50 | import(utils)
51 | importFrom(WikipediR,page_content)
52 | importFrom(WikipediR,query)
53 | importFrom(WikipediR,random_page)
54 | importFrom(dplyr,bind_cols)
55 | importFrom(httr,user_agent)
56 | importFrom(jsonlite,fromJSON)
57 | importFrom(readr,format_delim)
58 | importFrom(readr,format_tsv)
59 | importFrom(stats,cor)
60 | importFrom(stats,var)
61 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
 1 | 2.3.0
 2 | =================================================
 3 | * New disambiguate_QIDs() function for manual item-by-item disambiguation
 4 | 
 5 | 2.2.1
 6 | =================================================
 7 | * Outputs as tibbles by default with vector options where relevant
 8 | 
 9 | 2.2.0
10 | =================================================
11 | * Additional 'website' output format option for write_wikidata() to submit to Quickstatements website
12 | * Patches to make writing wikidata statements with dates more reliable
13 | 
14 | 2.1.5
15 | =================================================
16 | * Updated release on CRAN
17 | * Expected property values and constraints from 2.1.0 now just stored as a data file, since onAttach function caused issues
18 | 
19 | 2.1.0
20 | =================================================
21 | * Upon loading the package, it will check whether property values are expected to be strings, numbers, dates or QIDs
22 | 
23 | 2.0.0
24 | =================================================w
25 | * Write_wikidata() allows you to write to wikidata via the 'quickstatements' format
26 | * Outputs as tibbles where relevant
27 | * Switch maintatiner to Thomas Shafee
28 | 
29 | 1.4.0
30 | =================================================
31 | * First release on CRAN!
32 | * Extract_claims() allows you to, well, extract claims.
33 | * SPARQL syntax bug with some geo queries now fixed (thanks to Mikhail Popov)
34 | 
35 | 1.3.0
36 | =================================================
37 | * Vectorisation of get_*() functions
38 | 
39 | 1.2.0
40 | =================================================
41 | * Geographic data for entities that exist relative to other Wikidata items can now be retrieved
42 |   with get_geo_entity and get_geo_box, courtesy of excellent Serena Signorelli's excellent
43 |   QueryWikidataR package.
44 | * A bug in printing returned objects is now fixed.
45 | 
46 | 1.1.0
47 | =================================================
48 | * You can now retrieve multiple random properties or items with get_random_item and get_random_property
49 | 
50 | 1.0.1
51 | =================================================
52 | * Various documentation and metadata improvements.
53 | 
54 | 1.0.0
55 | =================================================
56 | * Fix a bug in get_* functions due to a parameter name mismatch
57 | * Print methods added by Christian Graul
58 | 
59 | 0.5.0
60 | =================================================
61 | * This is the initial release! See the explanatory vignettes.
62 | 


--------------------------------------------------------------------------------
/R/WikidataR.R:
--------------------------------------------------------------------------------
 1 | #' @title API client library for Wikidata
 2 | #' @description This package serves as an API client for reading and writing
 3 | #' to and from \href{https://www.wikidata.org/wiki/Wikidata:Main_Page}{Wikidata}, (including 
 4 | #' via the \href{https://quickstatements.toolforge.org/}{QuickStatements} format),
 5 | #' as well as for reading from \href{https://www.wikipedia.org}{Wikipedia}.
 6 | #' @name WikidataR
 7 | #' @docType package
 8 | #' @seealso \code{\link{get_random}} for selecting a random item or property,
 9 | #' \code{\link{get_item}} for a /specific/ item or property, or \code{\link{find_item}}
10 | #' for using search functionality to pull out item or property IDs where the descriptions
11 | #' or aliases match a particular search term.
12 | #' @import WikidataQueryServiceR
13 | #' @import tibble
14 | #' @import dplyr
15 | #' @import stringr
16 | #' @import pbapply
17 | #' @import progress
18 | #' @import crayon
19 | #' @import utils
20 | #' @importFrom readr format_tsv
21 | #' @importFrom readr format_delim
22 | #' @importFrom dplyr bind_cols
23 | #' @importFrom stats var
24 | #' @importFrom stats cor
25 | #' @importFrom WikipediR page_content random_page query
26 | #' @importFrom httr user_agent
27 | #' @importFrom jsonlite fromJSON
28 | #' @aliases WikidataR WikidataR-package
29 | NULL


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' @name WD.globalvar
 2 | #' 
 3 | #' @title Global variables for Wikidata properties
 4 | #'
 5 | #' @description A dataset of Wikidata global variables.
 6 | #'
 7 | #' @format A list of tibbles documenting key property constraints from Wikidata  
 8 | #' \describe{
 9 | #'   \item{SID.valid}{valid reference source properties}
10 | #'   \item{PID.datatype}{required data type for each property}
11 | #'   \item{PID.constraint}{expected regex match for each property}
12 | #'   \item{lang.abbrev}{language abbreviations}
13 | #'   \item{lang.abbrev.wiki}{language abbreviations for current wikis}
14 | #'   \item{abbrev.wiki}{Wikimedia abbreviations for current wikis}
15 | #'   ...
16 | #' }
17 | 
18 | utils::globalVariables(c("WD.globalvar"))


--------------------------------------------------------------------------------
/R/disambiguators.R:
--------------------------------------------------------------------------------
  1 | # -------- Disambiguator functions --------
  2 | #
  3 | #'@title Disambiguate QIDs
  4 | #'@description Interactive function that presents alternative possible QID matches for a list of text
  5 | #'strings and provides options for choosing between alternatives, rejecting all presented alternatives,
  6 | #'or creating new items. Useful in cases where a list of text strings may have either missing Wikidata items
  7 | #'or multiple alternative potential matches that need to be manually disambiguated. Can also used on
  8 | #'lists of lists (see examples). For long lists of items, the process can be stopped partway through and
  9 | #'the returned vector will indicate where the process was stopped. 
 10 | #'@param list a list or vector of text strings to find potential QID matches to.
 11 | #'            Can also be a list of lists (see examples)
 12 | #'@param variablename type of items in the list that are being disambiguated (used in messages)
 13 | #'@param variableinfo additional information about items that are being disambiguated (used in messages)
 14 | #'@param filter_property property to filter on (e.g. "P31" to filter on "instance of")
 15 | #'@param filter_variable values of that property to use to filter out (e.g. "Q571" to filter out books)
 16 | #'@param filter_firsthit apply filter to the first match presented or only if alternatives requested?
 17 | #'                       (default = FALSE, note: true is slower if filter not needed on most matches)
 18 | #'@param Q_min return only possible hits with QIDs above the provided value
 19 | #'@param auto_create if no match found, automatically assign "CREATE"
 20 | #'@param limit number of alternative possible Wikidata items to present if multiple potential matches
 21 | #'@return a vector of:
 22 | #' \describe{
 23 | #'   \item{QID}{Selected QID (for when an appropriate Wikidata match exists)}
 24 | #'   \item{CREATE}{Mark that a new Wikidata item should be created (for when no appropriate Wikidata match yet exists)}
 25 | #'   \item{NA}{Mark that no Wikidata item is needed}
 26 | #'   \item{STOP}{Mark that the process was halted at this point (so that output can be used as input to the function later)}
 27 | #' }
 28 | #'@examples
 29 | #'\dontrun{
 30 | #'#Disambiguating possible QID matches for these music genres
 31 | #'#Results should be:
 32 | #'# "Q22731" as the first match
 33 | #'# "Q147538" as the first match
 34 | #'# "Q3947" as the second alternative match
 35 | #'disambiguate_QIDs(list=c("Rock","Pop","House"),
 36 | #'                  variablename="music genre")
 37 | #'
 38 | #'#Disambiguating possible QID matches for these three words, but not the music genres
 39 | #'#This will take longer as the filtering step is slower
 40 | #'#Results should be:
 41 | #'# "Q22731" (the material) as the first match
 42 | #'# "Q147538" (the soft drink) as the second alternative match
 43 | #'# "Q3947" (the building) as the first match
 44 | #'disambiguate_QIDs(list=c("Rock","Pop","House"),
 45 | #'                  filter_property="instance of",
 46 | #'                  filter_variable="music genre",
 47 | #'                  filter_firsthit=TRUE,
 48 | #'                  variablename="concept, not the music genre")
 49 | #'
 50 | #'#Disambiguating possible QID matches for the multiple expertise of
 51 | #'#these three people as list of lists
 52 | #'disambiguate_QIDs(list=list(alice=list("physics","chemistry","maths"),
 53 | #'                            barry=list("history"),
 54 | #'                            clair=list("law","genetics","ethics")),
 55 | #'                  variablename="expertise")
 56 | #'}
 57 | #'@export
 58 | disambiguate_QIDs <- function(list,
 59 |                               variablename="variables",
 60 |                               variableinfo=NULL,
 61 |                               filter_property=NULL,
 62 |                               filter_variable=NULL,
 63 |                               filter_firsthit=FALSE,
 64 |                               Q_min=NULL,
 65 |                               auto_create=FALSE,
 66 |                               limit=10){
 67 |   #make list is formatted as a list (e.g. if vector)
 68 |   if(!all(class(list)=="list")){list <- as.list(list)}
 69 |   if(!is.null(filter_property)){filter_property <- as_pid(filter_property)[[1]][1]}
 70 |   if(!is.null(filter_variable)){filter_variable <- as_qid(filter_variable)[[1]][1]}
 71 |   
 72 |   #is the list the outut from a previous half-done run?
 73 |   if(any(unlist(lapply(list,function(x) x=="STOP")),na.rm = TRUE)){
 74 |     item_to_start_from    <- which(unlist(lapply(list,function(x) any(x=="STOP"))))
 75 |     subitem_to_start_from <- first(which(list == "STOP"))
 76 |     output <- list
 77 |   }else{
 78 |     item_to_start_from    <- 1
 79 |     subitem_to_start_from <- 1
 80 |     output <- blank_output_list(list)
 81 |   }
 82 | 
 83 |   #create output
 84 |   pb_main <- progress_bar$new(total = sum(unlist(output,recursive = TRUE)==""|unlist(output,recursive = TRUE)=="STOP"),
 85 |                               format     = ":bar :percent eta::eta \n",
 86 |                               current    = "|",
 87 |                               width      = 90,
 88 |                               show_after = 0)
 89 |   for(item in item_to_start_from:length(list)){
 90 |     for(subitem in subitem_to_start_from:length(list[[item]])){
 91 |       #check item to search
 92 |       tosearch <- list[[item]][subitem]
 93 |       if(is.na(tosearch)){break}                                   #skip NAs
 94 |       if(tosearch=="STOP"|tosearch==""){tosearch<-names(tosearch)} #use name for items not done in previous run (stored as "STOP" and "")
 95 |       if(grepl("^[Qq][0-9]+$",tosearch)){break}                    #skip if already a QID
 96 |       if(is.null(tosearch)){break}                                 #skip nulls or empty items with no name (usually errors)
 97 |       list[[item]][subitem] <- tosearch                            #if no skips, place that text back in the list
 98 | 
 99 |       #announce choice to be made
100 |       message_header(list,item,subitem,variablename,variableinfo)
101 |       pb_main$tick()
102 |       #execute search and record choice
103 |       Q_min_hit=NULL
104 |       if(!is.null(Q_min)){
105 |         Q_min <- as.numeric(gsub("Q","",Q_min))
106 |         Q_min_hit <- sapply(find_item(list[[item]][subitem],limit = limit),"[[",1)
107 |         Q_min_hit <- Q_min_hit[as.numeric(gsub("Q","",Q_min_hit))>Q_min]
108 |       }
109 |       if(length(Q_min_hit)==1){
110 |           choice <- Q_min_hit
111 |       }else{
112 |         if(filter_firsthit){
113 |           first_hit_qid <- firsthit(list[[item]][subitem],filter_property,filter_variable)
114 |         }else{
115 |           first_hit_qid <- firsthit(list[[item]][subitem])
116 |         }
117 |         if(is.na(first_hit_qid) & auto_create){
118 |           choice <- "CREATE"
119 |         }else{
120 |           choice <- makechoice(qid = first_hit_qid,
121 |                                text= names(first_hit_qid),
122 |                                filter_property=filter_property,
123 |                                filter_variable=filter_variable,
124 |                                limit=limit)
125 |         }
126 | 
127 |       }
128 |       output[[item]][[subitem]] <- choice
129 |       names(output[[item]])[[subitem]] <- names(choice)
130 | 
131 |       #check if stop request made
132 |       if(!is.na(output[[item]][[subitem]])){if(output[[item]][[subitem]]=="STOP"){
133 |         done_so_far <- item
134 |         message_stop(done_so_far,total = length(list))
135 |         break
136 |       }}
137 |     }
138 |     subitem_to_start_from <- 1 # reset the subitem to start from if completed a full item
139 |     if(!is.na(output[[item]][[subitem]])){if(output[[item]][[subitem]]=="STOP"){break}}
140 |   }
141 |   return(output)
142 | }
143 | 
144 | # When provided with a QID, interactively make a decision on whether the output should be that qid or some other value
145 | makechoice <- function(qid=NULL,
146 |                        text=NULL,
147 |                        table=NULL,
148 |                        filter_property=NULL,
149 |                        filter_variable=NULL,
150 |                        limit=10){
151 |   if(is.null(text)){
152 |     text <- names(qid)
153 |   }
154 | 
155 |   # announce item for disambig
156 |   suppressWarnings(invisible(selection <- readline()))
157 |   if      (selection=="s"|selection=="stop"){                   #s = stop
158 |     output <- "STOP"
159 |     names(output) <- text
160 | 
161 |   }else if(selection=="y"|selection=="yes"){                    #y = accept
162 |     output <- qid
163 |     names(output) <- text
164 | 
165 |   }else if(selection=="n"|selection=="no"|selection==""){       #n = reject
166 |     output <- NA
167 |     names(output) <- text
168 | 
169 |   }else if(selection=="c"|selection=="create"){                 #c = create
170 |     output <- "CREATE"
171 |     names(output) <- text
172 | 
173 |   }else if(selection=="?"){                                     #? = loop up in browser
174 |     browseURL(paste0("https://www.wikidata.org/wiki/",qid))
175 |     output <- makechoice(qid,text,table,filter_property,filter_variable,limit)
176 | 
177 |   }else if(grepl("^[Qq][0-9]+$",selection)){                    #Q123 = id
178 |     output <- selection
179 |     names(output) <- paste0("-> ",selection)
180 | 
181 |   }else if(grepl("^[Qq][0-9]+?$",selection)){                   #Q123? = search that id
182 |     browseURL(paste0("https://www.wikidata.org/wiki/",
183 |                      gsub("\\?","",selection)))
184 |     output <- makechoice(qid,text,table,filter_property,filter_variable,limit)
185 | 
186 |   }else if(grepl("^[0-9]+$",selection) & !is.null(table)){      #number = select row
187 |     output <- table$qid[as.numeric(selection)]
188 |     label  <- table$label[as.numeric(selection)]
189 | 
190 |   }else if(grepl("^[0-9]+\\?$",selection)& !is.null(table)){    #number? = loop up row in browser
191 |     browseURL(paste0("https://www.wikidata.org/wiki/",
192 |                      table$qid[as.numeric(gsub("\\?","",selection))]))
193 |     output <- makechoice(qid,text,table,filter_property,filter_variable,limit)
194 |     label  <- table$label[as.numeric(selection)]
195 | 
196 |   }else if((selection=="a"|selection=="alt") & !is.null(text)){ #a = alternative
197 |     table  <- choices_alt(text,filter_property,filter_variable,limit)
198 |     output <- makechoice(qid,text,table,filter_property,filter_variable,limit)
199 |     if(!is.null(names(output)) & !is.null(text)){if(names(output)!=text){
200 |       names(output) <- paste0(text," -> ",names(output))
201 |     }}
202 | 
203 |   }else{                                                        #freetext = freetext to search
204 |     table  <- choices_alt(selection,filter_property,filter_variable,limit)
205 |     output <- makechoice(qid,selection,table,filter_property,filter_variable,limit)
206 |     if(!is.null(names(output)) & !is.null(text)){if(names(output)!=text){
207 |       names(output) <- paste0(text," -> ",names(output))
208 |     }}
209 |   }
210 | 
211 |   return(output)
212 | }
213 | 
214 | # -------- Messages --------
215 | 
216 | # Clear console and show standard header for
217 | message_header <- function(list,
218 |                            i,
219 |                            j,
220 |                            variablename=NULL,
221 |                            variableinfo=NULL){
222 |   list         <- as.list(list)
223 |   name         <- bold$cyan(names(list)[[i]])
224 |   variables    <- list[[i]]
225 |   variables[j] <- bold$white$underline(variables[j])
226 |   variables    <- paste(variables,collapse = " | ")
227 |   if(!is.null(variablename)){
228 |     variablename <- paste0("the ",variablename," of ")
229 |     if (length(variablename)>1){
230 |       variablename <- variablename[i]
231 |     }
232 |   }else{
233 |     variablename <- NULL
234 |   }
235 | 
236 |   if(!is.null(variableinfo)){
237 |     variableinfo <- paste0(variableinfo,"\n")
238 |     if (length(variableinfo)>1){
239 |       variableinfo <- variableinfo[i]
240 |     }
241 |   }else{
242 |     variableinfo <- NULL
243 |   }
244 |   message("\014",
245 |           "--------------------------------------------------------------------------- \n",
246 |           "Let's disambiguate ",variablename,
247 |           name, ": \n",
248 |           variableinfo,
249 |           variables)
250 | }
251 | 
252 | message_choices <- function(){
253 |   message(bold(" y    "),"-> accept the presented match \n",
254 |           bold(" n    "),"-> reject the presented match and move on to the next \n",
255 |           bold(" a    "),"-> request alternative possible matches \n",
256 |           bold(" Q123 "),"-> use this as the wikidata QID \n",
257 |           bold(" text "),"-> try this text as alternative search term \n",
258 |           bold(" c    "),"-> create a new item for this later \n",
259 |           bold(" s    "),"-> stop here, save those done so far and come back later \n",
260 |           bold(" ?    "),"-> check the presented match in your browser")
261 | }
262 | 
263 | message_choices_na <- function(){
264 |   message(bold(" y/n  "),"-> leave as 'NA' \n",
265 |           bold(" Q123 "),"-> use this as the wikidata QID \n",
266 |           bold(" text "),"-> try this text as alternative search term \n",
267 |           bold(" c    "),"-> create a new item for this later \n",
268 |           bold(" s    "),"-> stop here, save those done so far and come back later")
269 | }
270 | 
271 | message_choices_alt <- function(table){
272 |   message("Are any of these appropriate?")
273 |   print(data.frame(table),right=FALSE)
274 |   message(bold(" number "),"-> select one of the matches presented (include ",bold("?")," to check an item in your browser) \n",
275 |           bold(" Q123   "),"-> use this as the wikidata QID \n",
276 |           bold(" text   "),"-> try this text as alternative search term \n",
277 |           bold(" c      "),"-> create a new item for this later \n",
278 |           bold(" s      "),"-> stop here, save those done so far and come back later")
279 | }
280 | 
281 | message_stop <- function(done_so_far,total){
282 |   message("Stopping. You've completed ",
283 |           bold$white(done_so_far - 1),
284 |           " so far (",
285 |           bold$white(total - done_so_far + 1),
286 |           " remaining). \n",
287 |           "To restart from where you left off, use the output from this function as the list for disambiguate_QIDs()")
288 | }
289 | 
290 | 
291 | # -------- Misc. support --------
292 | 
293 | # pulling and formatting the first hit from wikidata
294 | # and presenting appropriate choice text options in prep for makechoice()
295 | firsthit <- function(text,
296 |                      filter_property=NULL,
297 |                      filter_variable=NULL,
298 |                      limit=30){
299 |   if(!is.null(filter_property) & !is.null(filter_variable)){
300 |     filtered_items <- filter_qids(ids=sapply(find_item(text,limit = limit),"[[",1),
301 |                                   property=filter_property,
302 |                                   filter=filter_variable,
303 |                                   message="Checking for item that doesn't match the filter ")
304 |     if(!is.na(filtered_items$qid[1])){
305 |       qid   <- filtered_items$qid[1]
306 |       label <- filtered_items$label[1]
307 |       desc  <- filtered_items$desc[1]
308 |       message(white(qid,"   ",label,"   ",desc,sep=""))
309 |       message_choices()
310 |     }else{
311 |       qid <- NA
312 |       message(white("No good match found that matches filters"))
313 |       message_choices_na()
314 |     }
315 |   }else{
316 |     item <- find_item(text,limit = 1)
317 |     if(length(item)>0){
318 |       if(is.null(item[[1]]$description)){
319 |         desc <- "no description"
320 |       }else{
321 |         desc <- item[[1]]$description
322 |       }
323 |       if(is.null(item[[1]]$label)){
324 |         label <- "no label"
325 |       }else{
326 |         label <- item[[1]]$label
327 |       }
328 |       qid <- item[[1]]$id
329 |       message(white(qid,"   ",label,"   ",desc,sep=""))
330 |       message_choices()
331 |     }else{
332 |       qid <- NA
333 |       message(white("No good match found"))
334 |       message_choices_na()
335 |     }
336 |   }
337 |   names(qid) <- text
338 |   return(qid)
339 | }
340 | 
341 | blank_output_list <- function(list){
342 |   make_attr_names <- function(x){
343 |     x1 <- list[[x]]
344 |     attr(x1, 'names') <- x1
345 |     x1
346 |   }
347 |   if(all(is.null(names(list)))){
348 |     output <- list
349 |     names(output) <- list
350 |   }else{
351 |     output <- lapply(names(list), make_attr_names)
352 |     names(output) <- names(list)
353 |   }
354 |   output <- rapply(output,function(x) ifelse(is.na(x),NA,""),how = 'replace')
355 |   return(output)
356 | }
357 | 
358 | 
359 | restarted_output_list <- function(list){
360 |   make_attr_names_rev <- function(x){
361 |     x1 <- list[[x]]
362 |     x1 <- attr(x1, 'names')
363 |     x1
364 |   }
365 |   listnames <- lapply(names(list), make_attr_names_rev)
366 |   output <- rapply(output,function(x) ifelse(is.na(x),NA,""),how = 'replace')
367 |   return(output)
368 | }
369 | 
370 | choices_alt <- function(selection,filter_property,filter_variable,limit){
371 |   altqids <- unlist(lapply(find_item(selection,limit=limit),function(x) x$id))
372 |   if(is.null(altqids)){
373 |     message("Searching for ",bold$white(selection)," as an alternative term")
374 |     results <- tibble(qid=NA,
375 |                       label=NA,
376 |                       desc="No current matching Wikidata item")
377 |   }else{
378 |     message("Searching for ",bold$white(selection)," as an alternative term")
379 |     results <- filter_qids(ids = altqids,
380 |                            property = filter_property,
381 |                            filter = filter_variable)
382 |   }
383 |   if(all(is.na(results$qid))){
384 |     message(white("No good match found"))
385 |     message_choices_na()
386 |     return(NULL)
387 |   }else{
388 |     message_choices_alt(results)
389 |     names(results$qid) <- results$label
390 |     return(results)
391 |   }
392 | }
393 | 


--------------------------------------------------------------------------------
/R/geo.R:
--------------------------------------------------------------------------------
  1 | #'@title Retrieve geographic information from Wikidata
  2 | #'@description \code{get_geo_entity} retrieves the item ID, latitude
  3 | #'and longitude of any object with geographic data associated with \emph{another}
  4 | #'object with geographic data (example: all the locations around/near/associated with
  5 | #'a city).
  6 | #'
  7 | #'@param entity a Wikidata item (\code{Q...}) or series of items, to check
  8 | #'for associated geo-tagged items.
  9 | #'
 10 | #'@param language the two-letter language code to use for the name
 11 | #'of the item. "en" by default, because we're imperialist
 12 | #'anglocentric westerners.
 13 | #'
 14 | #'@param radius optionally, a radius (in kilometers) around \code{entity}
 15 | #'to restrict the search to.
 16 | #'
 17 | #'@param limit the maximum number of results to return.
 18 | #'
 19 | #'@param \\dots further arguments to pass to de{httr:ink[httr::GET]{GET}}.
 20 | #'
 21 | #'@return a data.frame of 5 columns:
 22 | #'\itemize{
 23 | #'  \item{item}{ the Wikidata identifier of each object associated with
 24 | #'  \code{entity}.}
 25 | #'  \item{name}{ the name of the item, if available, in the requested language. If it
 26 | #'  is not available, \code{NA} will be returned instead.}
 27 | #'  \item{latitude}{ the latitude of \code{item}}
 28 | #'  \item{longitude}{ the longitude of \code{item}}
 29 | #'  \item{entity}{ the entity the item is associated with (necessary for multi-entity
 30 | #'  queries).}
 31 | #'}
 32 | #'
 33 | #'@examples
 34 | #'# All entities
 35 | #'\donttest{sf_locations <- get_geo_entity("Q62")}
 36 | #'
 37 | #'# Entities with French, rather than English, names
 38 | #'\donttest{sf_locations <- get_geo_entity("Q62", language = "fr")}
 39 | #'
 40 | #'# Entities within 1km
 41 | #'\donttest{sf_close_locations <- get_geo_entity("Q62", radius = 1)}
 42 | #'
 43 | #'# Multiple entities
 44 | #'\donttest{multi_entity <- get_geo_entity(entity = c("Q62", "Q64"))}
 45 | #'
 46 | #'@seealso \code{\link{get_geo_box}} for using a bounding box
 47 | #'rather than an unrestricted search or simple radius.
 48 | #'
 49 | #'@export
 50 | get_geo_entity <- function(entity, language = "en", radius = NULL, limit=100, ...){
 51 |   
 52 |   entity <- check_input(entity, "Q")
 53 |   
 54 |   if(is.null(radius)){
 55 |     query <- paste0("SELECT DISTINCT ?item ?name ?coord ?propertyLabel WHERE {
 56 |                       ?item wdt:P131* wd:", entity, ". ?item wdt:P625 ?coord .
 57 |                       SERVICE wikibase:label {
 58 |                         bd:serviceParam wikibase:language \"", language, "\" .
 59 |                         ?item rdfs:label ?name
 60 |                       }
 61 |                     }
 62 |                     ORDER BY ASC (?name)
 63 |                     LIMIT ", limit)
 64 |   } else {
 65 |     query <- paste0("SELECT ?item ?name ?coord
 66 |                     WHERE {
 67 |                       wd:", entity, " wdt:P625 ?mainLoc .
 68 |                       SERVICE wikibase:around { 
 69 |                         ?item wdt:P625 ?coord .
 70 |                         bd:serviceParam wikibase:center ?mainLoc .
 71 |                         bd:serviceParam wikibase:radius \"", radius,
 72 |                         "\" .
 73 |                       }
 74 |                       SERVICE wikibase:label {
 75 |                         bd:serviceParam wikibase:language \"", language, "\" .
 76 |                         ?item rdfs:label ?name
 77 |                       }
 78 |                     } ORDER BY ASC (?name)
 79 |                     LIMIT ",limit)
 80 |   }
 81 |   
 82 |   if(length(query) > 1){
 83 |      return(do.call("rbind", mapply(function(query, entity, ...){
 84 |       output <- clean_geo(sparql_query(query, ...)$results$bindings)
 85 |       output$entity <- entity
 86 |       return(output)
 87 |     }, query = query, entity = entity, SIMPLIFY = FALSE, ...)))
 88 |   }
 89 |   output <- clean_geo(sparql_query(query)$results$bindings)
 90 |   if(length(output)==0){warning("Query timeout. Possibly try again with lower 'limit='")}
 91 |   output$entity <- entity
 92 |   return(output)
 93 | }
 94 | 
 95 | #'@title Get geographic entities based on a bounding box
 96 | #'@description \code{get_geo_box} retrieves all geographic entities in
 97 | #'Wikidata that fall between a bounding box between two existing items
 98 | #'with geographic attributes (usually cities).
 99 | #'
100 | #'@param first_city_code a Wikidata item, or series of items, to use for
101 | #'one corner of the bounding box.
102 | #'
103 | #'@param first_corner the direction of \code{first_city_code} relative
104 | #'to \code{city} (eg "NorthWest", "SouthEast").
105 | #'
106 | #'@param second_city_code a Wikidata item, or series of items, to use for
107 | #'one corner of the bounding box.
108 | #'
109 | #'@param second_corner the direction of \code{second_city_code} relative
110 | #'to \code{city} (eg "NorthWest", "SouthEast").
111 | #'
112 | #'@param language the two-letter language code to use for the name
113 | #'of the item. "en" by default.
114 | #'
115 | #'@param \\dots further arguments to pass to de{httr:ink[httr::GET]{GET}}.
116 | #'
117 | #'@return a data.frame of 5 columns:
118 | #'\itemize{
119 | #'  \item{item}{ the Wikidata identifier of each object associated with
120 | #'  \code{entity}.}
121 | #'  \item{name}{ the name of the item, if available, in the requested language. If it
122 | #'  is not available, \code{NA} will be returned instead.}
123 | #'  \item{latitude}{ the latitude of \code{item}}
124 | #'  \item{longitude}{ the longitude of \code{item}}
125 | #'  \item{entity}{ the entity the item is associated with (necessary for multi-entity
126 | #'  queries).}
127 | #'}
128 | #'
129 | #'@examples
130 | #'# Simple bounding box
131 | #'\donttest{bruges_box <- get_geo_box("Q12988", "NorthEast", "Q184287", "SouthWest")}
132 | #'
133 | #'# Custom language
134 | #'\donttest{bruges_box_fr <- get_geo_box("Q12988", "NorthEast", "Q184287", "SouthWest",
135 | #'                                         language = "fr")}
136 | #'
137 | #'@seealso \code{\link{get_geo_entity}} for using an unrestricted search or simple radius,
138 | #'rather than a bounding box.
139 | #'
140 | #'@export
141 | get_geo_box <- function(first_city_code, first_corner, second_city_code, second_corner,
142 |                         language = "en", ...){
143 |   
144 |   # Input checks
145 |   first_city_code <- check_input(first_city_code, "Q")
146 |   second_city_code <- check_input(second_city_code, "Q")
147 |   
148 |   # Construct query
149 |   query <- paste0("SELECT ?item ?name ?coord WHERE {
150 |                     wd:", first_city_code, " wdt:P625 ?Firstloc .
151 |                     wd:", second_city_code, " wdt:P625 ?Secondloc .
152 |                     SERVICE wikibase:box {
153 |                       ?item wdt:P625 ?coord .
154 |                       bd:serviceParam wikibase:corner", first_corner, " ?Firstloc .
155 |                       bd:serviceParam wikibase:corner", second_corner, " ?Secondloc .
156 |                     }
157 |                     SERVICE wikibase:label {
158 |                       bd:serviceParam wikibase:language \"", language, "\" .
159 |                       ?item rdfs:label ?name
160 |                     }
161 |                   }ORDER BY ASC (?name)")
162 |   
163 |   # Vectorise if necessary, or not if not!
164 |   if(length(query) > 1){
165 |     return(do.call("rbind", mapply(function(query, ...){
166 |       output <- clean_geo(sparql_query(query, ...)$results$bindings)
167 |       return(output)
168 |     }, query = query, ..., SIMPLIFY = FALSE)))
169 |   }
170 |   output <- clean_geo(sparql_query(query)$results$bindings)
171 |   return(output)
172 | }
173 | 
174 | 
175 | # Cleanup function
176 | clean_geo <- function(results){
177 |   do.call("rbind", lapply(results, function(item){
178 |     point <- unlist(strsplit(gsub(x = item$coord$value, pattern = "(Point\\(|\\))", replacement = ""),
179 |                              " "))
180 |     wd_id <- gsub(x = item$item$value, pattern = "http://www.wikidata.org/entity/",
181 |                   replacement = "", fixed = TRUE)
182 |     return(data.frame(item = wd_id,
183 |                       name = ifelse(item$name$value == wd_id, NA, item$name$value),
184 |                       latitutde = as.numeric(point[1]),
185 |                       longitude = as.numeric(point[2]),
186 |                       stringsAsFactors = FALSE))
187 |     
188 |   }))
189 | }
190 | 


--------------------------------------------------------------------------------
/R/gets.R:
--------------------------------------------------------------------------------
  1 | # -------- Gets --------
  2 | 
  3 | #'@title Retrieve specific Wikidata items or properties
  4 | #'@description \code{get_item} and \code{get_property} allow you to retrieve the data associated
  5 | #'with individual Wikidata items and properties, respectively. As with
  6 | #'other \code{WikidataR} code, custom print methods are available; use \code{\link{str}}
  7 | #'to manipulate and see the underlying structure of the data.
  8 | #'
  9 | #'@param id the ID number(s) of the item or property you're looking for. This can be in
 10 | #'various formats; either a numeric value ("200"), the full name ("Q200") or
 11 | #'even with an included namespace ("Property:P10") - the function will format
 12 | #'it appropriately. This function is vectorized and will happily accept
 13 | #'multiple IDs.
 14 | #'
 15 | #'@param \\dots further arguments to pass to de{httr:ink[httr::GET]{GET}}.
 16 | #'
 17 | #'@seealso \code{\link{get_random}} for selecting a random item or property,
 18 | #'or \code{\link{find_item}} for using search functionality to pull out
 19 | #'item or property IDs where the descriptions or aliases match a particular
 20 | #'search term.
 21 | #'
 22 | #'@examples
 23 | #'
 24 | #'#Retrieve a specific item
 25 | #'adams_metadata <- get_item("42")
 26 | #'
 27 | #'#Retrieve a specific property
 28 | #'object_is_child <- get_property("P40")
 29 | #'
 30 | #'@aliases get_item get_property
 31 | #'@rdname get_item
 32 | #'@export
 33 | get_item <- function(id, ...){
 34 |   id <- check_input(id, "Q")
 35 |   output <- (lapply(id, wd_query, ...))
 36 |   class(output) <- "wikidata"
 37 |   return(output)
 38 | }
 39 | 
 40 | #'@rdname get_item
 41 | #'@export
 42 | get_property <- function(id, ...){
 43 |   has_grep <- grepl("^P(?!r)",id, perl = TRUE)
 44 |   id[has_grep] <- paste0("Property:", id[has_grep])
 45 |   id <- check_input(id, "Property:P")
 46 |   
 47 |   output <- (lapply(id, wd_query, ...))
 48 |   class(output) <- "wikidata"
 49 |   return(output)
 50 | }
 51 | 
 52 | #'@title Retrieve randomly-selected Wikidata items or properties
 53 | #'@description \code{get_random_item} and \code{get_random_property} allow you to retrieve the data
 54 | #'associated with randomly-selected Wikidata items and properties, respectively. As with
 55 | #'other \code{WikidataR} code, custom print methods are available; use \code{\link{str}}
 56 | #'to manipulate and see the underlying structure of the data.
 57 | #'
 58 | #'@param limit how many random items to return. 1 by default, but can be higher.
 59 | #'
 60 | #'@param \\dots arguments to pass to de{httr:ink[httr::GET]{GET}}.
 61 | #'
 62 | #'@seealso \code{\link{get_item}} for selecting a specific item or property,
 63 | #'or \code{\link{find_item}} for using search functionality to pull out
 64 | #'item or property IDs where the descriptions or aliases match a particular
 65 | #'search term.
 66 | #'
 67 | #'@examples
 68 | #'\dontrun{
 69 | #'#Random item
 70 | #'random_item <- get_random_item()
 71 | #'
 72 | #'#Random property
 73 | #'random_property <- get_random_property()
 74 | #'}
 75 | #'@aliases get_random get_random_item get_random_property
 76 | #'@rdname get_random
 77 | #'@export
 78 | get_random_item <- function(limit = 1, ...){
 79 |   return(wd_rand_query(ns = 0, limit = limit, ...))
 80 | }
 81 | 
 82 | #'@rdname get_random
 83 | #'@export
 84 | get_random_property <- function(limit = 1, ...){
 85 |   return(wd_rand_query(ns = 120, limit = limit, ...))
 86 | }
 87 | 
 88 | 
 89 | #' @title Get an example SPARQL query from Wikidata
 90 | #' @description Gets the specified example(s) from
 91 | #'   [SPARQL query service examples page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)
 92 | #'   using [Wikidata's MediaWiki API](https://www.wikidata.org/w/api.php).
 93 | #' @details If you are planning on extracting multiple examples, please provide
 94 | #'   all the names as a single vector for efficiency.
 95 | #' @param example_name the names of the examples as they appear on
 96 | #'   [this page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)
 97 | #' @return The SPARQL query as a character vector.
 98 | #' @examples
 99 | #' \dontrun{
100 | #' sparql_query <- extract_example(c("Cats", "Horses"))
101 | #' query_wikidata(sparql_query)
102 | #' # returns a named list with two data frames
103 | #' # one called "Cats" and one called "Horses"
104 | #' sparql_query <- extract_example("Largest cities with female mayor")
105 | #' cat(sparql_query)
106 | #' query_wikidata(sparql_query)
107 | #' }
108 | #' @seealso [query_wikidata]
109 | #' @export
110 | get_example <- function(example_name){
111 |   content <- WikipediR::page_content(
112 |     domain = "www.wikidata.org",
113 |     page_name = "Wikidata:SPARQL query service/queries/examples",
114 |     as_wikitext = TRUE
115 |   )
116 |   wiki <- strsplit(content$parse$wikitext$`*`, "\n")[[1]]
117 |   wiki <- wiki[wiki != ""]
118 |   return(vapply(example_name, function(example_name){
119 |     heading_line <- which(grepl(paste0("^===\\s?", example_name, "\\s?===$"), wiki, fixed = FALSE))
120 |     start_line <- which(grepl("{{SPARQL", wiki[(heading_line + 1):length(wiki)], fixed = TRUE))[1]
121 |     end_line <- which(grepl("}}", wiki[(heading_line + start_line + 1):length(wiki)], fixed = TRUE))[1]
122 |     query <- paste0(wiki[(heading_line + start_line):(heading_line + start_line + end_line - 1)], collapse = "\n")
123 |     return(sub("^\\s*\\{\\{SPARQL2?\\n?\\|query\\=", "", query))
124 |   }, ""))
125 | }
126 | 
127 | 
128 | # -------- Finds --------
129 | 
130 | #'@title Search for Wikidata items or properties that match a search term
131 | #'@description \code{find_item} and \code{find_property} allow you to retrieve a set
132 | #'of Wikidata items or properties where the aliases or descriptions match a particular
133 | #'search term.  As with other \code{WikidataR} code, custom print methods are available;
134 | #'use \code{\link{str}} to manipulate and see the underlying structure of the data.
135 | #'
136 | #'@param search_term A term to search for.
137 | #'
138 | #'@param language The language to return the labels and descriptions in; this should
139 | #'consist of an ISO language code. Defaults to \code{"en"}.
140 | #'
141 | #'@param limit The number of results to return; set to \code{10} by default.
142 | #'
143 | #'@param \\dots further arguments to pass to de{httr:ink[httr::GET]{GET}}.
144 | #'
145 | #'@seealso \code{\link{get_random}} for selecting a random item or property,
146 | #'or \code{\link{get_item}} for selecting a specific item or property.
147 | #'
148 | #'@examples
149 | #'
150 | #'#Check for entries relating to Douglas Adams in some way
151 | #'adams_items <- find_item("Douglas Adams")
152 | #'
153 | #'#Check for properties involving the peerage
154 | #'peerage_props <- find_property("peerage")
155 | #'
156 | #'@aliases find_item find_property
157 | #'@return A list containing the result of the query.
158 | #'@rdname find_item
159 | #'@export
160 | find_item <- function(search_term, 
161 |                       language = "en", 
162 |                       limit = 10, 
163 |                       response_language = "en", 
164 |                       ...){
165 |   res <- searcher(search_term, language, limit, response_language, "item")
166 |   class(res) <- "find_item"
167 |   return(res)
168 | }
169 | 
170 | #'@rdname find_item
171 | #'@export
172 | find_property <- function(search_term, 
173 |                           language = "en", 
174 |                           response_language = "en", 
175 |                           limit = 10){
176 |   res <- searcher(search_term, language, limit, response_language, "property")
177 |   class(res) <- "find_property"
178 |   return(res)
179 | }
180 | 
181 | #Generic, direct access to Wikidata's search functionality.
182 | #'@title Convert an input to a item QID
183 | #'@description Convert an input string to the most likely item QID
184 | #'@param search_term a term to search for.
185 | #'@param language the language to conduct the search in; this should
186 | #'consist of an ISO language code. Set to "en" by default.
187 | #'@param response_language the language to return the labels and descriptions in; this should
188 | #'consist of an ISO language code. Set to "en" by default.
189 | #'@param limit the number of results to return; set to 10 by default.
190 | #'@param type type of wikidata object to return (default = "item")
191 | #'@param \\dots Additional parameters to supply to [httr::POST]
192 | #'@return If the inputted string matches an item label, return its QID.
193 | #'If the inputted string matches multiple labels of multiple items, return the QID of the first hit.
194 | #'If the inputted string is already a QID, return the string.
195 | #'@examples
196 | #'# if input string is a valid QID
197 | #'as_qid("Q42")
198 | #'# if input string matches multiple item labels
199 | #'as_qid("Douglas Adams")
200 | #'# if input string matches a single unique label
201 | #'as_qid("Douglas Adams and the question of arterial blood pressure in mammals")
202 | #'@export
203 | searcher <- function(search_term, language, limit, response_language, type, ...){
204 |   result <- WikipediR::query(url = "https://www.wikidata.org/w/api.php", out_class = "list", clean_response = FALSE,
205 |                              query_param = list(
206 |                                action   = "wbsearchentities", 
207 |                                type     = type,
208 |                                language = language,
209 |                                uselang = response_language,
210 |                                limit    = limit,
211 |                                search   = search_term
212 |                              ),
213 |                              ...)
214 |   result <- result$search
215 |   return(result)
216 | }
217 | 


--------------------------------------------------------------------------------
/R/prints.R:
--------------------------------------------------------------------------------
  1 | #'@title Print method for find_item
  2 | #'
  3 | #'@description print found items.
  4 | #'
  5 | #'@param x find_item object with search results
  6 | #'@param \dots Arguments to be passed to methods
  7 | #'
  8 | #'@method print find_item
  9 | #'@export
 10 | print.find_item <- function(x, ...) {
 11 |   cat("\n\tWikidata item search\n\n")
 12 | 	
 13 |   # number of results
 14 |   num_results <- length(x)
 15 |   cat("Number of results:\t", num_results, "\n\n")
 16 | 		
 17 |   # results
 18 |   if(num_results > 0) {
 19 |     cat("Results:\n")
 20 |     for(i in 1:num_results) {
 21 |       if(is.null(x[[i]]$description)){
 22 |         desc <- "\n"
 23 |       }
 24 |       else {
 25 |         desc <- paste("-", x[[i]]$description, "\n")
 26 |       }
 27 |       cat(i, "\t", x[[i]]$label, paste0("(", x[[i]]$id, ")"), desc)
 28 |     }
 29 |   }
 30 | }
 31 | 
 32 | #'@title Print method for find_property
 33 | #'
 34 | #'@description print found properties.
 35 | #'
 36 | #'@param x find_property object with search results
 37 | #'@param \dots Arguments to be passed to methods
 38 | #'
 39 | #'@method print find_property
 40 | #'@export
 41 | print.find_property <- function(x, ...) {
 42 |   cat("\n\tWikidata property search\n\n")
 43 | 	
 44 |   # number of results
 45 |   num_results <- length(x)
 46 |   cat("Number of results:\t", num_results, "\n\n")
 47 | 		
 48 |   # results
 49 |   if(num_results > 0) {
 50 |     cat("Results:\n")
 51 |     for(i in seq_len(num_results)) {
 52 |       if(is.null(x[[i]]$description)){
 53 |         desc <- "\n"
 54 |       }
 55 |       else {
 56 |         desc <- paste("-", x[[i]]$description, "\n")
 57 |       }
 58 |       cat(i, "\t", x[[i]]$label, paste0("(", x[[i]]$id, ")"), desc)
 59 |     }
 60 |   }
 61 | }
 62 | 
 63 | wd_print_base <- function(x, ...){
 64 |   
 65 |   cat("\n\tWikidata", x$type, x$id, "\n\n")
 66 |   
 67 |   # labels
 68 |   num.labels <- length(x$labels)
 69 |   if(num.labels>0) {
 70 |     lbl <- x$labels[[1]]$value
 71 |     if(num.labels==1) cat("Label:\t\t", lbl, "\n")
 72 |     else {
 73 |       if(!is.null(x$labels$en)) lbl <- x$labels$en$value
 74 |       cat("Label:\t\t", lbl, paste0("\t[", num.labels-1, " other languages available]\n"))
 75 |     }
 76 |   }
 77 |   
 78 |   # aliases
 79 |   num_aliases <- length(x$aliases)
 80 |   if(num_aliases > 0) {
 81 |     al <- unique(unlist(lapply(x$aliases, function(xl){return(xl$value)})))
 82 |     cat("Aliases:\t", paste(al, collapse = ", "), "\n")
 83 |   }
 84 |   
 85 |   # descriptions
 86 |   num_desc <- length(x$descriptions)
 87 |   if(num_desc > 0) {
 88 |     desc <- x$descriptions[[1]]$value
 89 |     if(num_desc == 1){
 90 |       cat("Description:", desc, "\n")
 91 |     }
 92 |     else {
 93 |       if(!is.null(x$descriptions$en)){
 94 |         desc <- x$descriptions$en$value
 95 |       }
 96 |       cat("Description:", desc, paste0("\t[", (num_desc - 1), " other languages available]\n"))
 97 |     }
 98 |   }
 99 |   
100 |   # num claims
101 |   num_claims <- length(x$claims)
102 |   if(num_claims > 0){
103 |     cat("Claims:\t\t", num_claims, "\n")
104 |   }
105 |   
106 |   # num sitelinks
107 |   num_links <- length(x$sitelinks)
108 |   if(num_links > 0){
109 |     cat("Sitelinks:\t", num_links, "\n")
110 |   }
111 | }
112 | 
113 | #'@title Print method for Wikidata objects
114 | #'
115 | #'@description print found objects generally.
116 | #'
117 | #'@param x Wikidata object from get_item, get_random_item, get_property or get_random_property
118 | #'@param \dots Arguments to be passed to methods
119 | #'@seealso get_item, get_random_item, get_property or get_random_property
120 | #'@method print wikidata
121 | #'@export
122 | print.wikidata <- function(x, ...){
123 |   lapply(x, wd_print_base, ...)
124 |   return(invisible())
125 | }


--------------------------------------------------------------------------------
/R/queries.R:
--------------------------------------------------------------------------------
  1 | #Generic queryin' function for direct Wikidata calls. Wraps around WikipediR::page_content. - Ironholds
  2 | #'@title Download a Wikidata item
  3 | #'@description Utility wrapper for Wikidata API to download item.
  4 | #'Used by \code{get_item} and \code{get_property}.
  5 | #'@param title The Wikidata item or property as a string.
  6 | #'@param \\dots Additional parameters to supply to  \code{httr:\link[httr::POST]{POST}}.
  7 | #'@return A downloaded full wikidata object (item or property) formatted as a 
  8 | #'nested json list.
  9 | #'@export
 10 | wd_query <- function(title, ...){
 11 |   result <- WikipediR::page_content(domain = "wikidata.org", 
 12 |                                     page_name = title, 
 13 |                                     as_wikitext = TRUE,
 14 |                                     httr::user_agent("WikidataR - https://github.com/TS404/WikidataR"),
 15 |                                     ...)
 16 |   output <- jsonlite::fromJSON(result$parse$wikitext[[1]])
 17 |   return(output)
 18 | }
 19 | 
 20 | # Query for a random item in "namespace" (ns). Essentially a wrapper around WikipediR::random_page. - Ironholds
 21 | #'@title Download random Wikidata items
 22 | #'@description Utility wrapper for Wikidata API to download random items. 
 23 | #'Used by \code{random_item}.
 24 | #'@param ns string indicating namespace, most commonly "Main" for QID items, "Property" 
 25 | #'for PID properties.
 26 | #'@param limit How many random object to return.
 27 | #'@param \\dots Additional parameters to supply to  \code{httr:\link[httr::POST]{POST}}.
 28 | #'@return Downloaded full wikidata objects (items or properties) formatted 
 29 | #'as nested json lists.
 30 | #'@export
 31 | wd_rand_query <- function(ns, limit, ...){
 32 |   result <- WikipediR::random_page(domain = "wikidata.org", 
 33 |                                    as_wikitext = TRUE, 
 34 |                                    namespaces = ns,
 35 |                                    httr::user_agent("WikidataR - https://github.com/TS404/WikidataR"),
 36 |                                    limit = limit, ...)
 37 |   output <- lapply(result, function(x){jsonlite::fromJSON(x$wikitext[[1]])})
 38 |   class(output) <- "wikidata"
 39 |   return(output)
 40 | }
 41 | 
 42 | #SPARQL query function for direct Wikidata calls.
 43 | #'@title Download full Wikidata items matching a SPARQL query 
 44 | #'@description Utility wrapper for wikidata spargl endpoint to download items.
 45 | #'Used by \code{get_geo_entity} and \code{get_geo_box}.
 46 | #'@param query The SPARQL query as a string
 47 | #'@param \\dots Additional parameters to supply to \code{httr:\link[httr::POST]{POST}}.
 48 | #'@return a download of the full wikidata objects formatted as a nested json list
 49 | #'@export
 50 | sparql_query <- function(query, ...){
 51 |   result <- httr::GET("https://query.wikidata.org/bigdata/namespace/wdq/sparql",
 52 |                       query = list(query = query),
 53 |                       httr::user_agent("WikidataR - https://github.com/TS404/WikidataR"),
 54 |                       ...)
 55 |   httr::stop_for_status(result)
 56 |   return(httr::content(result, as = "parsed", type = "application/json"))
 57 | }
 58 | 
 59 | #Wrapper around WikidataQueryServiceR::query_wikidata
 60 | #' @title Send one or more SPARQL queries to WDQS
 61 | #' @description Makes a POST request to Wikidata Query Service SPARQL endpoint.
 62 | #' @param sparql_query SPARQL query (can be a vector of queries)
 63 | #' @param format
 64 | #'   `tibble` (default) returns a pure character data frame,
 65 | #'   `simple` returns a pure character vector, while
 66 | #'   `smart` fetches JSON-formatted data and returns a tibble with datetime
 67 | #'   columns converted to `POSIXct`.
 68 | #' @param \\dots Additional parameters to supply to \code{httr:\link[httr::POST]{POST}}.
 69 | #' @return A `tibble` or `vector`. Note: QID values will be returned as QIDs, rather than URLs.
 70 | #' @section Query limits:
 71 | #' There is a hard query deadline configured which is set to 60 seconds. There
 72 | #' are also following limits:
 73 | #' - One client (user agent + IP) is allowed 60 seconds of processing time each
 74 | #'   60 seconds
 75 | #' - One client is allowed 30 error queries per minute
 76 | #' See \href{https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits}{query limits section}
 77 | #' in the Wikidata Query Service User Manual for more information.
 78 | #' @examples
 79 | #' # R's versions and release dates:
 80 | #' sparql_query <- 'SELECT DISTINCT
 81 | #'   ?softwareVersion ?publicationDate
 82 | #'   WHERE {
 83 | #'     BIND(wd:Q206904 AS ?R)
 84 | #'     ?R p:P348 [
 85 | #'       ps:P348 ?softwareVersion;
 86 | #'       pq:P577 ?publicationDate
 87 | #'     ] .
 88 | #' }'
 89 | #' query_wikidata(sparql_query)
 90 | #'
 91 | #' \dontrun{
 92 | #' # "smart" format converts all datetime columns to POSIXct
 93 | #' query_wikidata(sparql_query, format = "smart")
 94 | #' }
 95 | #' @export
 96 | query_wikidata <- function(sparql_query,format="tibble",...) {
 97 |   if(format=="simple"){simplify<-TRUE}else{simplify<-FALSE}
 98 |   if(format=="tibble"){format<-"simple"}
 99 |   output <- WikidataQueryServiceR::query_wikidata(sparql_query=sparql_query,
100 |                                                   format=format, ...)
101 |   output <- suppressWarnings(mapply(url_to_id,
102 |                                     data.frame(output),
103 |                                     SIMPLIFY=simplify))
104 |   output <- tibble(data.frame(output))
105 |   if(nrow(output)==0){output<-tibble(value=NA)}
106 |   output
107 | }
108 | 
109 | #' @title QID from identifier
110 | #' @description Convert unique identifiers to QIDs (for items in Wikidata). 
111 | #' @details The \href{https://www.wikidata.org/wiki/Q43649390}{Wikidata Q identifier} (QID) is the unique identifier (UID) 
112 | #' used in Wikidata.
113 | #' @param property The identifier property to search (for caveats, see \code{as_pid}.)
114 | #' @param value The identifier value to match.
115 | #' @return A vector of QIDs corresponding to identifiers submitted.
116 | #' @examples
117 | #' qid_from_identifier('ISBN-13','978-0-262-53817-6')
118 | #' @export
119 | qid_from_identifier <- function(property = 'DOI',
120 |                                 value    = c('10.15347/WJM/2019.001','10.15347/WJM/2020.002')){
121 |   
122 |   property <- as_pid(property)
123 |   
124 |   qid_from_property1 <- function(value,property){
125 |     out <- paste('SELECT ?value WHERE {?value wdt:',
126 |                  property,
127 |                  ' "',
128 |                  value,
129 |                  '"}',
130 |                  sep='')
131 |     names(out)<-value
132 |     return(out)
133 |     }
134 |   
135 |   qid_from_property2 <- function(x){
136 |     out <- as.character(query_wikidata(x)[[1]])
137 |     names(out) <- names(x)
138 |     return(out)
139 |     }
140 |   
141 |   sparql_query <- lapply(value,property,FUN=qid_from_property1)
142 |   
143 |   if(length(value)>1){
144 |     output <- unlist(pblapply(sparql_query,qid_from_property2))
145 |   } else {
146 |     output <- as.character(unlist(lapply(sparql_query,FUN=query_wikidata)))
147 |     names(output) <- value
148 |   }
149 |   
150 |   if(length(value)!=length(output)){
151 |     message("Caution! Some supplied values returned more than one QID.")
152 |     }
153 |   
154 |   return(output)
155 | }
156 | 
157 | #' @title Identifier from identifier
158 | #' @description Convert unique identifiers to other unique identifiers.
159 | #' @param property The identifier property to search (for caveats, see \code{as_pid})
160 | #' @param return The identifier property to convert to
161 | #' @param value The identifier value to match.
162 | #' @return A vector of identifiers corresponding to identifiers submitted.
163 | #' @examples
164 | #' identifier_from_identifier(property ='ORCID iD',
165 | #'                            return = 'IMDb ID',
166 | #'                            value = c('0000-0002-7865-7235','0000-0003-1079-5604')
167 | #'                            )
168 | #' @export
169 | identifier_from_identifier <- function(property = 'ORCID iD',
170 |                                        return   = 'IMDb ID',
171 |                                        value    = "0000-0002-7865-7235"){
172 |   
173 |   property <- as_pid(property)
174 |   return   <- as_pid(return)
175 |   
176 |   qid_from_property1 <- function(value,return,property){paste('SELECT ?return WHERE { ?value wdt:',
177 |                                                        property,
178 |                                                        ' "',
179 |                                                        value,
180 |                                                        '". ?value wdt:',
181 |                                                        return,
182 |                                                        ' ?return.}',
183 |                                                        sep='')}
184 |   sparql_query <- lapply(value,return,property,FUN=qid_from_property1)
185 |   output       <- if(length(value)>1){
186 |     unlist(pbapply::pblapply(sparql_query,function(x) as.character(query_wikidata(x)[[1]])))
187 |   }else{
188 |     as.character(unlist(lapply(sparql_query,FUN=query_wikidata)))
189 |   }
190 |   names(output) <- value
191 |   return(output)
192 | }
193 | 


--------------------------------------------------------------------------------
/R/schol.R:
--------------------------------------------------------------------------------
 1 | #' @title QID from DOI
 2 | #' @description simple converter from DOIs to QIDs (for items in Wikidata)
 3 | #' @param DOI digital object identifiers submitted as strings
 4 | #' @return vector of QIDs corresponding to DOIs submitted
 5 | #' @export
 6 | qid_from_DOI <- function(DOI = '10.15347/WJM/2019.001'){
 7 |   article.qid <- qid_from_identifier(property = 'P356',
 8 |                                      value = toupper(DOI))
 9 |   return(article.qid)
10 | }
11 | 
12 | #' @title QID from label name
13 | #' @description simple converter from label names to QIDs (for items in wikidata).
14 | #' Essentially a simplification of \code{find_item}
15 | #' @param name name labels submitted as strings
16 | #' @param limit if multiple QIDs match each submitted name, how many to return
17 | #' @param format output format ('vector' to return a simple vector, or 'list' to return a nested list)
18 | #' @return vector of QIDs corresponding to names submitted. Note: some names may return multiple QIDs.
19 | #' @export
20 | qid_from_name <- function(name   = "Thomas Shafee",
21 |                           limit  = 100,
22 |                           format = "vector"){
23 |   qid_from_name_nest1 <- function(x){lapply(x,"[[","id")}
24 |   item.qs  <- lapply(name,find_item, limit=limit)
25 |   item.qid <- lapply(item.qs,qid_from_name_nest1)
26 |   names(item.qid) <- name
27 |   if(format=="vector"){item.qid <- unlist(item.qid)}
28 |   if(format=="list")  {item.qid <- item.qid}
29 |   return(item.qid)
30 | }
31 | 
32 | #' @title QID from ORCID
33 | #' @description simple converter from ORCIDs to QIDs (for items in wikidata)
34 | #' @param ORCID digital object identifiers submitted as strings
35 | #' @return vector of QIDs corresponding to ORCIDs submitted
36 | #' @export
37 | qid_from_ORCID <- function(ORCID = '0000-0002-2298-7593'){
38 |   author.qid   <- qid_from_identifier(property = 'P496',value = ORCID)
39 |   return(author.qid)
40 | }
41 | 


--------------------------------------------------------------------------------
/R/sysdata.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TS404/WikidataR/d7873b6b80a951130e9c4ec5c17068bd4898fb6e/R/sysdata.rda


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | # -------- Format checkers --------
  2 | # Simple tests of strings for whether they adhere to common wikidata formats
  3 | is.qid     <- function(x){grepl("^[Qq][0-9]+$",x)}
  4 | is.pid     <- function(x){gsub("S","P",x) %in% as.matrix(WD.globalvar$PID.datatype$property)}
  5 | is.sid     <- function(x){gsub("S","P",x) %in% as.matrix(WD.globalvar$SID.valid$Wikidata_property_to_indicate_a_source)}
  6 | is.date    <- function(x){grepl("[0-9]{1,4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}",x)}
  7 | is.quot    <- function(x){grepl("^\".+\"$",x)}
  8 | is.empty   <- function(x){x==""}
  9 | is.coord   <- function(x){grepl("@-?([1-8]?\\d(\\.\\d+)?|90(\\.0+)?)/-?(180(\\.0+)?|((1[0-7]\\d)|([1-9]?\\d))(\\.\\d+)?)$",x)}
 10 | is.wdURL   <- function(x){grepl("http://www.wikidata.org/entity/[PpQq][0-9]+$",x)}
 11 | is.create  <- function(x){grepl("^CREATE",x)}
 12 | is.createx <- function(x){grepl("^CREATE.+",x)}
 13 | is.create_x<- function(x){grepl("^CREATE_.+",x)}
 14 | is.last    <- function(x){grepl("^LAST$",x)}
 15 | is.special <- function(x){
 16 |   if(grepl("^[LAD]",x)){
 17 |     substr(x,2,100) %in% as.matrix(WD.globalvar$lang.abbrev)
 18 |   }else if(grepl("^S",x)){
 19 |     substr(x,2,100) %in% as.matrix(WD.globalvar$abbrev.wiki)
 20 |   }else{
 21 |     FALSE
 22 |   }
 23 | }
 24 | 
 25 | check.PID.WikibaseItem <- function(x){
 26 |   x %in% WD.globalvar$PID.datatype$property[WD.globalvar$PID.datatype$wbtype=="WikibaseItem"]}
 27 | 
 28 | check.PID.constraint <- function(x){
 29 |   check.PID.constraint.nest1 <- function(x){
 30 |     out <- as.character(WD.globalvar$PID.constraint$fmt[WD.globalvar$PID.constraint$Wikidata_property==x])
 31 |     if(length(out)!=0){out}else{NA}
 32 |     }
 33 |   sapply(x,check.PID.constraint.nest1)
 34 | }
 35 | 
 36 | #'@title Extract an identifier from a wikidata URL
 37 | #'@description Convert a URL ending in an identifier (returned by SPARQL queries) to just
 38 | #'the plain identifier (QID or PID).
 39 | #'@param x a strings representing a wikidata URL
 40 | #'@return if the URL ends in a QID or PID, return that PID or QID, else return the original string
 41 | #'@examples
 42 | #'url_to_id("http://www.wikidata.org/entity/42")
 43 | #'@export
 44 | url_to_id <- function (x){
 45 |   if(is.wdURL(x)){x <- sapply(sapply(x,pattern = "/",stringr::str_split),tail,1)}
 46 |   output <- x
 47 |   output
 48 | }
 49 | 
 50 | #Generic input checker. Needs additional stuff for property-based querying
 51 | #because namespaces are weird, yo. - Ironholds
 52 | #'@title Generic input checker
 53 | #'@description Utility function to handle namespaces. Used by \code{get_item} and \code{get_property}
 54 | #'@param input string to check
 55 | #'@param substitution string for what's been looked for
 56 | #'@return boolian indicating whether the checked string contains a match for the substitution string 
 57 | #'@export
 58 | check_input <- function(input, substitution){
 59 |   in_fit <- grepl("^\\d+$",input)
 60 |   if(any(in_fit)){
 61 |     input[in_fit] <- paste0(substitution, input[in_fit])
 62 |   }
 63 |   return(input)
 64 | }
 65 | 
 66 | 
 67 | # -------- Format converters --------
 68 | # Simple functions to convert plain text descriptions into their most likely QID/PIDs
 69 | #'@title Convert an input to a item QID.
 70 | #'@description Convert an input string to the most likely item 
 71 | #'\href{https://www.wikidata.org/wiki/Q43649390}{QID}.
 72 | #'@param x a vector, data frame, or tibble of strings representing wikidata items
 73 | #'@return if the inputted string is a valid QID, return the string.
 74 | #'If the inputted string matches an item label, return its QID.
 75 | #'If the inputted string matches multiple labels of multiple items, return the QID of the first hit.
 76 | #'@examples
 77 | #'# if input string is a valid QID
 78 | #'as_qid("Q42")
 79 | #'# if input string matches multiple item labels
 80 | #'as_qid("Douglas Adams")
 81 | #'# if input string matches a single unique label
 82 | #'as_qid("Douglas Adams and the question of arterial blood pressure in mammals")
 83 | #'@export
 84 | as_qid <- function(x){
 85 |   as_qid_nest1 <- function(x){
 86 |     as_qid_nest2 <- function(x){
 87 |       if(is.qid(x)|is.date(x)|is.quot(x)|is.na(x)|is.null(x)|is.empty(x)|is.createx(x)|is.create(x)|is.last(x)){
 88 |         x
 89 |       }else{
 90 |         temp <- find_item(x,limit = 100)
 91 |         if(length(temp)==0){
 92 |           out <- NA
 93 |           message (paste0("no sufficiently close match for \"",x,"\". Returned \"NA\"."))
 94 |         }else{
 95 |           toinclude    <- sapply(temp,function(temp,x){temp$label==x},x)
 96 |           toinclude[1] <- TRUE
 97 |           temp         <- temp[toinclude]
 98 |           out          <- temp[[1]]$id
 99 |           names(out)   <- temp[[1]]$label
100 |           if(x!=temp[[1]]$label){message(paste0(
101 |             "Inexact match for \"",x,
102 |             "\", closest match = ",temp[[1]]$label,
103 |             " (",out,") "))}
104 |           if(length(temp)>1){
105 |             message(paste0(
106 |             "Multiple exact matches for \"",x,"\""))
107 |             message(paste0(
108 |               "  match ",1:length(temp),
109 |               " = (",sapply(temp,function(temp){temp$id}),
110 |               ") ",sapply(temp,function(temp){temp$description}),
111 |               "\n"))}
112 |           }
113 |         out
114 |       }
115 |     }
116 |     out <- unlist(lapply(x,as_qid_nest2))
117 |     out
118 |   }
119 |   output <- bind_cols(lapply(tibble(x),as_qid_nest1))
120 |   return(output)
121 | }
122 | 
123 | #'@title Convert an input to a property PID
124 | #'@description Convert an input string to the most likely property PID
125 | #'@param x a vector, data frame, or tibble of strings representing Wikidata properties
126 | #'@return If the inputted string is a valid PID, return the string.
127 | #'If the inputted string matches a property label, return its PID.
128 | #'If the inputted string matches multiple labels of multiple properties, return the PID of the first hit.
129 | #'@examples
130 | #'# if input string is a valid PID
131 | #'as_pid("P50")
132 | #'# if input string matches multiple item labels
133 | #'as_pid("author")
134 | #'# if input string matches a single unique label
135 | #'as_pid("Scopus author ID")
136 | #'@export
137 | as_pid <- function(x){
138 |   as_pid_nest1 <- function(x){
139 |     as_pid_nest2 <- function(x){
140 |       if(is.pid(x)|is.date(x)|is.quot(x)|is.na(x)|is.null(x)|is.empty(x)|is.special(x)){
141 |         x
142 |       }else{
143 |         temp <- find_property(x,limit = 2)
144 |         if(length(temp)==0){
145 |             out <- NA
146 |             message (paste0("no sufficiently close match for \"",x,"\". Returned \"NA\"."))
147 |           }else{
148 |             out        <- temp[[1]]$id
149 |             names(out) <- temp[[1]]$label
150 |             if(x!=temp[[1]]$label){message(paste0(
151 |               "Inexact match for \"",x,
152 |               "\", closest match = ",temp[[1]]$label,
153 |               " (",out,")."))}
154 |             }
155 |         out
156 |       }
157 |     }
158 |     out <- unlist(lapply(x,as_pid_nest2))
159 |     out
160 |   }
161 |   output <- bind_cols(lapply(tibble(x),as_pid_nest1))
162 |   return(output)
163 | }
164 | 
165 | #'@title Convert an input to a source property SID
166 | #'@description Convert an input string to the most likely source SID
167 | #' (equivalent to PID.)
168 | #'@param x a vector, data frame, or tibble of strings representing Wikidata 
169 | #'source properties.
170 | #'@return if the inputted string is a valid SID, return the string.
171 | #'If the inputted string matches a property label, return its SID.
172 | #'If the inputted string matches multiple labels of multiple properties, 
173 | #'return the SID of the first hit.
174 | #'@examples
175 | #'# if input string is a valid SID
176 | #'as_pid("S854")
177 | #'# if input string matches multiple item labels
178 | #'as_pid("URL")
179 | #'# if input string matches a single unique label
180 | #'as_pid("Reference URL")
181 | #'@export
182 | as_sid <- function(x){
183 |   as_sid_nest1 <- function(x){
184 |     as_sid_nest2 <- function(x){
185 |       if(is.sid(x)|is.date(x)|is.quot(x)|is.na(x)|is.null(x)|is.empty(x)){
186 |         x
187 |       }else if(all(is.pid(x))){
188 |         gsub("P","S",x,ignore.case = 1)
189 |       }else{
190 |         gsub("P","S",find_property(x)[[1]]$id)
191 |       }
192 |     }
193 |     out <- unlist(lapply(x,as_sid_nest2))
194 |     out
195 |   }
196 |   output <- bind_cols(lapply(tibble(x),as_sid_nest1))
197 |   return(output)
198 | }
199 | 
200 | #'@title Add quotations marks
201 | #'@description Add escaped quotation marks around strings that need them ready for 
202 | #'submission to an API.
203 | #'@param x a vector, data frame, or tibble of strings
204 | #'@param format either "tibble" / "csv" to use plain quotation marks (default), 
205 | #'or "api" / "website" to use '\%22'
206 | #'@return A tibble of items inside of escaped quotation marks
207 | #'unless they are already in escaped quotation marks, is a 
208 | #'\href{https://www.wikidata.org/wiki/Q43649390}{QID}, 
209 | #'in which chase it is returned unchanged. 
210 | #'@examples
211 | #'as_quot("text")
212 | #'@export
213 | as_quot <- function(x,format="tibble"){
214 |   if(is.null(x)){
215 |     return(NULL)
216 |   }else if(format=="api"|format=="website"){
217 |     q_mark <- '%22'
218 |   }else if(format=="tibble"|format=="csv"){
219 |     q_mark <- '"'
220 |   }
221 |   as_quot_nest1 <- function(x){
222 |     as_quot_nest2 <- function(x){
223 |       if(!(is.qid(x)|is.quot(x)|is.date(x)|is.na(x)|is.empty(x)|is.numeric(x)))
224 |       {paste0(q_mark,as.character(x),q_mark)}
225 |       else
226 |       {as.character(x)}
227 |     }
228 |     out <- unlist(lapply(x,as_quot_nest2))
229 |     out
230 |   }
231 |   output <- bind_cols(lapply(tibble(x),as_quot_nest1))
232 |   return(output)
233 | }
234 | 
235 | #'@title Extract an identifier from a Wikidata URL
236 | #'@description Convert a URL ending in an identifier (returned by SPARQL queries)
237 | #'to just the plan identifier (QID or PID).
238 | #'@details The \href{https://www.wikidata.org/wiki/Q43649390}{Wikidata Q identifier} (QID) 
239 | #'is the unique identifier (UID) used in Wikidata.
240 | #'@param x A vector of strings representing Wikidata URLs.
241 | #'@return QID or PID.
242 | #'@examples
243 | #'url_to_id("http://www.wikidata.org/Q42")
244 | #'@export
245 | url_to_id <- function(x){
246 |   sapply(sapply(x,pattern = "/|:",stringr::str_split),tail,1)
247 | }
248 | 
249 | 
250 | # -------- Wikidata object manipulation --------
251 | #'@title Extract claims from returned item data
252 | #'@description extract claim information from data returned using
253 | #'\code{\link{get_item}}.
254 | #'@param items a list of one or more Wikidata items returned with
255 | #'\code{\link{get_item}}.
256 | #'@param claims a vector of claims (in the form "P321", "P12") to look for
257 | #'and extract.
258 | #'@return a list containing one sub-list for each entry in \code{items},
259 | #'and (below that) the found data for each claim. In the event a claim
260 | #'cannot be found for an item, an \code{NA} will be returned
261 | #'instead.
262 | #'@examples
263 | #'# Get item data
264 | #'adams_data <- get_item("42")
265 | #'# Get claim data
266 | #'claims <- extract_claims(adams_data, "P31")
267 | #'@export
268 | extract_claims <- function (items,
269 |                             claims){
270 |   claims <- sapply(claims,as_pid)
271 |   output <- lapply(items, function(x, claims){
272 |     return(lapply(claims, function(claim, obj){
273 |       which_match <- which(names(obj$claims) == claim)
274 |       if (!length(which_match)){
275 |         return(NA)
276 |       }
277 |       return(obj$claims[[which_match[1]]])
278 |     }, obj = x))
279 |   }, claims = claims)
280 |   return(output)
281 | }
282 | 
283 | #'@title List properties of a Wikidata item
284 | #'@description for a downloaded wikidata item, list the properties of all statements
285 | #'@param item a list of one or more Wikidata items returned with
286 | #'\code{\link{get_item}}.
287 | #'@param names a boolian for whether to return property names, or just P numbers
288 | #'and extract.
289 | #'@return a list containing one sub-list for each entry in \code{items},
290 | #'and (below that) the found data for each claim. In the event a claim
291 | #'cannot be found for an item, an \code{NA} will be returned
292 | #'instead.
293 | #'@examples
294 | #'# Get item data
295 | #'adams_data <- get_item("42")
296 | #'# Get claim data
297 | #'claims <- extract_claims(adams_data, "P31")
298 | #'@export
299 | list_properties <- function (item,
300 |                              names=FALSE){
301 |   properties.p <- lapply(lapply(item,"[[","claims"),names)
302 |   if(names){
303 |     if(length(item)==1){
304 |       names(properties.p) <- unlist(lapply(lapply(lapply(get_property(properties.p),"[[","labels"),"[[","en"),"[[","value"))
305 |     }
306 |   }
307 |   return(properties.p)
308 | }
309 | 
310 | #Note: This one isn't very well named. not really the property names, more the predicate names, but you get the idea
311 | #'@title Get names of properties
312 | #'@description For a claim or set of claims, return the names of the properties  
313 | #'@param properties a claims list from \code{extract_claims}
314 | #'@return tibble of labels for each property for a set of claims
315 | #'@export
316 | get_names_from_properties <- function(properties){
317 |   get_names_from_properties_nest1 <- function(x){
318 |     out <- lapply(lapply(lapply(lapply(x,"[[","mainsnak"),"[[","datavalue"),"[[","value"),"[[","id")
319 |     names(out) <- lapply(lapply(lapply(x,"[[","mainsnak"),"[[","property"),"[[",1)
320 |     return(out)
321 |   }
322 |   get_names_from_properties_nest2 <- function(x){
323 |     out <- lapply(x,get_item)
324 |     return(out)
325 |   }
326 |   get_names_from_properties_nest3.1 <- function(x){
327 |     out <- lapply(lapply(lapply(x,"[[","labels"),"[[","en"),"[[","value")
328 |     names(out) <- lapply(x,"[[","id")
329 |     return(out)
330 |   }
331 |   get_names_from_properties_nest3 <- function(x){
332 |     out <- lapply(x,get_names_from_properties_nest3.1)
333 |     return(out)
334 |   }
335 |   
336 |   property_values.qid <- lapply(properties,get_names_from_properties_nest1)
337 |   property_values.q   <- lapply(property_values.qid,get_names_from_properties_nest2)
338 |   property_names      <- lapply(property_values.q, get_names_from_properties_nest3)
339 |   property_names      <- lapply(lapply(property_names,unlist),enframe,name = "QID") 
340 |   return(property_names)
341 | }
342 | 
343 | 
344 | #'@title Filter QIDs
345 | #'@description For a QID or vector of QIDs, remove ones that match a particular statement
346 | #'(e.g. remove all that are instances of academic publications or books).
347 | #'@details The \href{https://www.wikidata.org/wiki/Q43649390}{Wikidata Q identifier} (QID) 
348 | #'is the unique identifier (UID) used in Wikidata.
349 | #'@param ids QIDs to check
350 | #'@param property property to check (default = P31 to filter on "instance of")
351 | #'@param filter values of that property to use to filter out
352 | #'(default = Q737498, Q5633421, Q7725634, Q13442814, and Q18918145 to remove academic
353 | #'publications or books)
354 | #'@param message message to return (useful for disambiguate_QIDs function)
355 | #'@return a vector of QIDs that do not match the property filter
356 | #'@examples 
357 | #' \dontrun{
358 | #' # Filter three items called "Earth Science" to show only those that aren't
359 | #' # books, journals or journal articles
360 | #' filter_qids(ids = c("Q96695546","Q8008","Q58966429"),
361 | #'             property = "P31",
362 | #'             filter = c("Q737498","Q5633421","Q7725634","Q13442814","Q18918145"))
363 | #' }
364 | #'@export
365 | filter_qids <- function (ids,
366 |                          property = "P31",
367 |                          filter = c("Q737498",
368 |                                     "Q5633421",
369 |                                     "Q7725634",
370 |                                     "Q13442814",
371 |                                     "Q18918145"),
372 |                          message=NULL){
373 |   out <- NULL
374 |   pb <- progress_bar$new(total  = length(ids),
375 |                          format = paste0(message,":bar :percent eta::eta"),
376 |                          width  = 75,
377 |                          show_after = 0)
378 |   if(is.null(property)|is.null(filter)){
379 |     for (i in 1:length(ids)){
380 |       pb$tick()
381 |       qid   <- ids[i]
382 |       item  <- find_item(qid,limit=1)
383 |       label <- item[[1]]$label
384 |       if(length(item[[1]]$description)>0){
385 |         if(!is.null(item[[1]]$description)){
386 |           desc <- item[[1]]$description
387 |         }else{
388 |           desc <- item[[1]]$description
389 |         }
390 |       }else{
391 |         desc <- "no description"
392 |       }
393 |       out <- bind_rows(out,tibble(qid=qid,label=label,desc=desc))
394 |     }
395 |   }else{
396 |     for (i in 1:length(ids)){
397 |       pb$tick()
398 |       qid  <- ids[i]
399 |       item <- get_item(qid)
400 |       P31  <- item[[1]]$claims[[property]]$mainsnak$datavalue$value$id
401 |       if(all(is.null(P31))){P31<-"other"}
402 |       if(!any(P31 %in% filter)){
403 |         label <- item[[1]]$labels[[1]]$value
404 |         if(length(item[[1]]$descriptions)>0){
405 |           if(!is.null(item[[1]]$descriptions$en$value)){
406 |             desc <- item[[1]]$descriptions$en$value
407 |           }else{
408 |             desc <- item[[1]]$descriptions[[1]]$value
409 |           }
410 |         }else{
411 |           desc <- "no description"
412 |         }
413 |         if(length(item[[1]]$labels)>0){
414 |           if(!is.null(item[[1]]$labels$en$value)){
415 |             label <- item[[1]]$labels$en$value
416 |           }else{
417 |             label <- item[[1]]$labels[[1]]$value
418 |           }
419 |         }else{
420 |           label <- "no label"
421 |         }
422 |         out <- bind_rows(out,tibble(qid=qid,label=label,desc=desc))
423 |       }
424 |     }
425 |   }
426 |   if(is.null(out)){
427 |     out <- tibble(qid=NA,
428 |                   label=NA,
429 |                   desc="No current matching Wikidata item")
430 |   }
431 |   return(out)
432 | }
433 | 
434 | 
435 | # -------- Misc. string manipulation --------
436 | #'@title Format short form person names
437 | #'@description Converting names into first initial and surname, or just initials
438 | #'@param x a vector of people's names as strings
439 | #'@param format a vector of strings of either "FLast" or "FL" to indicate the output format
440 | #'@return the inputted name strings with first names shortened based on the
441 | #'selected format.
442 | #'@export
443 | initials <- function(x,format="FLast"){
444 |   if (format=="FLast"){
445 |     gsub("^([A-Za-z]).* ([A-Za-z]*)", "\\1 \\2", x)
446 |   }else{
447 |     gsub("(.)\\S* *", "\\1", x)
448 |   }
449 | }
450 | 
451 | #'@title Remove special characters
452 | #'@description Special characters can otherwise mess up wikidata read-writes
453 | #'@param x a vector of strings to check for special characters
454 | #'@return the inputted strings with special characters replaced with
455 | #'closest match plan characters.
456 | #'@export
457 | unspecial <- function(x){
458 |   out <- x
459 |   for(i in 1:ncol(x)){
460 |     out[[i]] <- iconv(x[[i]],to = 'ASCII//TRANSLIT')
461 |     if(Hmisc::all.is.numeric(x[[i]])){
462 |       out[[i]] <- as.numeric(out[[i]])
463 |     }else{
464 |       out[[i]] <- as.factor(out[[i]])
465 |     } 
466 |   }
467 |   return(as_tibble(out))
468 | }
469 | 
470 | #'@title Extract a paragraph of text
471 | #'@description Return the nth paragraph of a section of text
472 | #'Useful for extracting information from Wikipedia or other wikimarkup text
473 | #'@param text the input text as a string
474 | #'@param para number indicating which paragraph(s) to return (default=1)
475 | #'@param templ an optional string specifying a mediawikitemplate within
476 | #'which to restrict the search restrict search 
477 | #'@return the nth paragraph of the input text.
478 | #'@export
479 | extract_para <- function(text,
480 |                          para=1,
481 |                          templ=NULL){
482 |   extract_para_nest1 <- function(x,y){
483 |     out <- lapply(x,gsub,pattern=".*= *| *\\|",replacement="")
484 |     names(out) <- y
485 |     return(out)
486 |   }
487 |   templ <- gsub(" ","_",templ)
488 |   tosearch <- gsub("( |\\\\n|\\\\t)+"," ",text)
489 |   if(!is.null(templ)){
490 |     templates <- regmatches(tosearch, gregexpr("\\{(?:[^{}]+|(?R))*+\\}",
491 |                                                tosearch, perl=TRUE, ignore.case=TRUE))[[1]]
492 |     name_lens <- regexpr(" *\\|| *\\}",templates) - 1 
493 |     templates <- paste0(gsub(" ","_",substr(templates,1,regexpr(" *\\|| *\\}",templates)-1)),
494 |                         substr(templates,regexpr("*\\||*\\}",templates),nchar(templates)))
495 |     
496 |     tosearch  <- unlist(str_extract_all(templates,
497 |                                        paste0("(?i)\\{\\{ *?",templ,".*?\\}\\}")))
498 |     names(tosearch) <- paste0(templ,"_",1:length(tosearch))
499 |   }
500 |   
501 |   match_paras <- lapply(tosearch,
502 |                         str_extract_all,
503 |                         paste0("\\| *?",para," *?=.*?\\|"))
504 |   
505 |   match_exact <- lapply(match_paras,extract_para_nest1,para)
506 |   
507 |   return(match_exact)
508 | }
509 | 
510 | #'@title "CREATE" rows 
511 | #'@description Add in empty lines for QuickStatements CREATE rows that mint new QIDs.
512 | #'This is a slightly messy quirk of the QuickStatements format that mints new QIDs via a line
513 | #'containing only "CREATE", so this function is a way to approximate that behavior in a tibble
514 | #'@param items a vector, data frame or tibble of items (which may or may not contain the keyword "CREATE")
515 | #'@param vector a vector of properties or values which may be expanded based on the items vector
516 | #'@return if the vector is NULL, return NULL. Otherwise, if the "CREATE" keyword appears in the
517 | #'items vector, insert blank strings at those positions in the vector.
518 | #'@export
519 | createrows <- function(items,vector){
520 |   if(is.null(vector)){
521 |     return(NULL)
522 |   }
523 |   if(any(items=="CREATE",na.rm = 1)){
524 |     #expand vector to full length if just intending to repeat a single value 
525 |     if(length(unlist(vector))==1){
526 |       vector <- rep(vector,sum(items!="CREATE"))
527 |     }
528 |     vector <- tibble(vector)
529 |     
530 |     newQID <- which(items=="CREATE")
531 |     val    <- bind_rows(vector,tibble(data.frame(array("",dim=c(length(newQID),ncol(vector)),dimnames = list(NULL,colnames(vector))))))
532 |     id     <- c(1:nrow(vector), newQID-seq_along(newQID)+0.5)
533 |     out    <- tibble(val[order(id),])
534 |     return(out)
535 |   }else{
536 |     return(tibble(vector))
537 |   }
538 | }
539 | 
540 | #'@title "CREATE" rows from tidy format
541 | #'@description Add in QuickStatements CREATE rows that mint new QIDs from tidy input data.
542 | #'New items are created by any item starting that starts with the text "CREATE" followed
543 | #'by any unique ID.
544 | #'@param QS.tib a tibble of items, values and properties (optionally qualifiers and sources).
545 | #'@return a tibble, with items that start with "CREATE" followed by any unique text causing the
546 | #'addition of a "Create" line above, being replaced with "LAST" in the QuickStatements 
547 | #'format to create new QIDs.
548 | #'@export
549 | createrows.tidy <- function(QS.tib){
550 |   #insert 'CREATE' blankrows above first instance of 'CREATExyz'
551 |   newQID <- which(!duplicated(QS.tib[,1])&sapply(QS.tib[,1],is.createx))
552 |   val    <- rbind(QS.tib, array("",dim=c(length(newQID),ncol(QS.tib)),dimnames = list(newQID,names(QS.tib))) )
553 |   id     <- c(seq_along(t(QS.tib)[1,]), newQID-0.5)
554 |   out    <- val[order(id),]
555 |   
556 |   #replace 'CREATEXYZ' with 'LAST'
557 |   out[sapply(out[,1],is.createx),1] <- "LAST"
558 |   
559 |   #replace new empty rows with 'CREATE' row
560 |   out[apply(is.empty(out),all,MARGIN=1),1] <- "CREATE"
561 |   return(out)
562 | }
563 | 


--------------------------------------------------------------------------------
/R/writes.R:
--------------------------------------------------------------------------------
  1 | # -------- Writes --------
  2 | 
  3 | #'@title Write statements to Wikidata
  4 | #'@description Upload data to Wikidata, including creating items,
  5 | #'adding statements to existing items (via the quickstatements format and API).
  6 | #'
  7 | #'@param items a vector of strings indicating the items to which to add statements (as QIDs or labels).
  8 | #'Note: if labels are provided, and multiple items match, the first matching item will be used
  9 | #'(see \code{as_qid} function), so use with caution.
 10 | #'New QIDs can be created by using the "CREATE_xyz", where "_xyz" is any unique string.
 11 | #'Using the same id will add additional statements to those new items 
 12 | #'@param properties a vector of strings indicating the properties to add as statements (as PIDs or labels).
 13 | #'Note: if labels are provided, and multiple items match, the first matching item will be used
 14 | #'(see \code{as_pid} function), so use with caution.
 15 | #'Four special properties can also be used: labels, aliases, descriptions and sitelinks.
 16 | #'See [this link](https://www.wikidata.org/wiki/Help:QuickStatements#Adding_labels,_aliases,_descriptions_and_sitelinks) for the syntax.
 17 | #'@param values a vector of strings indicating the values to add as statements (as QIDs or strings).
 18 | #'Note: if strings are provided, they will be treated as plain text.
 19 | #'@param qual.properties a vector, data frame, or tibble of strings indicating the properties to add as qualifiers to statements (as PIDs or labels).
 20 | #'Note: if labels are provided, and multiple items match, the first matching item will be used
 21 | #'(see \code{as_pid} function), so use with caution.
 22 | #'@param qual.values a vector, data frame, or tibble of strings indicating the values to add as statements (as QIDs or strings).
 23 | #'Note: if strings are provided, they will be treated as plain text.
 24 | #'@param src.properties a vector, data frame, or tibble of strings indicating the properties to add as reference sources to statements (as SIDs or labels).
 25 | #'Note: if labels are provided, and multiple items match, the first matching item will be used
 26 | #'(see \code{as_sid} function), so use with caution.
 27 | #'@param src.values a vector, data frame, or tibble of strings indicating the values to add reference sources to statements (as QIDs or strings).
 28 | #'Note: if strings are provided, they will be treated as plain text.
 29 | #'@param remove a vector of boolians for each statemnt indicating whether it should
 30 | #'be removed from the item rather than added (default = FALSE)
 31 | #'@param format output format as a string. Options include:
 32 | #' \describe{
 33 | #'   \item{tibble}{easiest format to further manipulation in R}
 34 | #'   \item{csv}{can be copy-pasted to [the QuickStatements website](https://quickstatements.toolforge.org/) (or manipulated in a spreadsheet programs)}
 35 | #'   \item{api}{a url that can be copy-pasted into a web browser, or automatically submitted (see \code{api.submit} parameter)}
 36 | #'   \item{website}{open a [QuickStatements](https://quickstatements.toolforge.org/) web browser window summarizing the edits to be made to Wikidata)}
 37 | #' }
 38 | #'@param api.username a string indicating your Wikimedia username 
 39 | #'@param api.token a string indicating your api token (the unique identifier that you can find listed at [your user page](https://quickstatements.toolforge.org/#/user))
 40 | #'@param api.format a string indicating which version of the quickstatement format used to submit the api (default = "v1")
 41 | #'@param api.batchname a string create a named batch (listed at [your batch history page](https://quickstatements.toolforge.org/#/batches)) and tag in the edit summaries
 42 | #'@param api.submit boolian indicating whether to submit instruction directly to wikidata (else returns the URL that can be copy-pasted into a web browser)
 43 | #'
 44 | #'@return data formatted to upload to wikidata (via quickstatemsnts),
 45 | #'optionally also directly uploaded to wikidata (see \code{format} parameter). 
 46 | #'
 47 | #'@examples
 48 | #'# Add a statement to the "Wikidata sandbox" item (Q4115189)
 49 | #'# to say that it is an "instance of" (P31) of Q1 (the universe).
 50 | #'# The instruction will submit directly to wikidata via the API
 51 | #'# (if you include your Wikimedia username and token)
 52 | #'
 53 | #' \dontrun{write_wikidata(items        = "Wikidata Sandbox",
 54 | #'                properties   = "instance of",
 55 | #'                values       = "Q1",
 56 | #'                format       = "api",
 57 | #'                api.username = "myusername", 
 58 | #'                api.token    = , #REDACTED#
 59 | #'                )}
 60 | #'#note: 
 61 | #'
 62 | #'@export
 63 | 
 64 | write_wikidata <- function(items,
 65 |                            properties      = NULL,
 66 |                            values          = NULL,
 67 |                            qual.properties = NULL,
 68 |                            qual.values     = NULL,
 69 |                            src.properties  = NULL,
 70 |                            src.values      = NULL,
 71 |                            remove          = FALSE,
 72 |                            format          = "tibble",
 73 |                            api.username    = NULL,
 74 |                            api.token       = NULL, # Find yours from [your user page](https://tools.wmflabs.org/quickstatements/#/user)
 75 |                            api.format      = "v1",
 76 |                            api.batchname   = NULL,
 77 |                            api.submit      = TRUE
 78 | ){
 79 |   
 80 |   # Check if username and token provided
 81 |   if(format=="api"){
 82 |     if(is.null(api.username)){stop("Enter your Wikimedia username")}
 83 |     if(is.null(api.token))   {stop("Enter your api.token (Find yours at https://tools.wmflabs.org/quickstatements/#/user)")}
 84 |   }
 85 |   
 86 |   # Place all the quickstatements variables into a list 
 87 |   QS <- list(items           = items,
 88 |              properties      = properties,
 89 |              values          = values,
 90 |              qual.properties = qual.properties,
 91 |              qual.values     = qual.values,
 92 |              src.properties  = src.properties,
 93 |              src.values      = src.values)
 94 |   QS <- lapply(QS,function(x){if(!is.null(x)){tibble(x)}})
 95 | 
 96 |   # If new QIDs are being created via the "CREATE" keyword, need to insert blank lines across the other parameters to align correctly into rows
 97 |   # This is the most similar to the standard quickstatements method, though the "CREATExyz" method is preferred (see createrows.tidy function later)
 98 |   QS$properties      <- createrows(QS$items,QS$properties)
 99 |   QS$values          <- createrows(QS$items,QS$values)
100 |   QS$qual.properties <- createrows(QS$items,QS$qual.properties)
101 |   QS$qual.values     <- createrows(QS$items,QS$qual.values)
102 |   QS$src.properties  <- createrows(QS$items,QS$src.properties)
103 |   QS$src.values      <- createrows(QS$items,QS$src.values)
104 |   
105 |   # If same number of rows as the rowmax, do nothing
106 |   # If only one row, repeat it rowmax times
107 |   # If wrong number of rows, stop with an error message
108 |   rowcount <- unlist(lapply(QS,nrow))
109 |   rowmax   <- max(rowcount)
110 |   stoprun  <- FALSE
111 |   
112 |   if(var(unlist(rowcount))!=0){
113 |     for (x in 1:length(QS)){
114 |       if(is.null(nrow(QS[[x]]))){
115 |         QS[[x]] <- slice(tibble(QS[[x]]),rep(1:n(), each=rowmax))
116 |       }else if (nrow(QS[[x]])==1){ 
117 |         QS[[x]] <- slice(tibble(QS[[x]]),rep(1:n(), each=rowmax)) 
118 |       }else if(nrow(QS[[x]])==rowmax){ 
119 |         QS[[x]] <- QS[[x]]
120 |       }else{
121 |         stoprun<-TRUE
122 |         warning(paste0("Not all quickstatement columns have equal rows: ",
123 |                        nrow(QS$items)," items (including ",
124 |                        sum(is.create(unlist(QS$items)))," new QIDs to CREATE) were provided, but ",
125 |                        names(QS)[x],
126 |                        " has ",
127 |                        nrow(QS[[x]]),
128 |                        " rows (expecting ",
129 |                        nrow(QS$items),
130 |                        ")."))
131 |       }
132 |     }
133 |   }
134 |   if(stoprun){stop("Therefore stopping")}
135 |   
136 |   # Convert values to QIDs where possible and identify which (if any) to remove
137 |   QS$items           <- as_qid(QS$items)
138 |   QS$items[remove,]  <- paste0("-",unlist(QS$items[remove,]))
139 |   
140 |   # Convert properties to PIDs where possible, unless special functions (such as lables and aliases)
141 |   QS$properties      <- as_pid(QS$properties)
142 |   
143 |   # Convert values to QIDs where possible, unless property is expecting a string
144 |   QS$values          <- tibble(QS$values)
145 |   if(any(sapply(QS$properties,check.PID.WikibaseItem))){
146 |     QS$values[sapply(QS$properties,check.PID.WikibaseItem),] <- as_qid(QS$values[sapply(QS$properties,check.PID.WikibaseItem),])
147 |   }
148 |   QS$values          <- as_quot(QS$values,format)
149 |   
150 |   # Check if multiple values and properties supplied for each item
151 |   if(!is.null(dim(QS$properties))){
152 |     if(all (dim(QS$properties) != dim(QS$values))){
153 |       stop("multiple properties and values supplied for each item, but number of properties and values don't match")
154 |     }
155 |     QS$items      <- tibble(rep(unlist(QS$items),each=ncol(QS$properties)))
156 |     QS$properties <- tibble(as.vector(t(QS$properties)))
157 |     QS$values     <- tibble(as.vector(t(QS$values)))
158 |   }
159 |   
160 |   # Convert first three columns into tibble (tibbulate?)
161 |   colnames(QS$items)      <- "Item"
162 |   colnames(QS$properties) <- "Prop"
163 |   colnames(QS$values)     <- "Value"
164 |   
165 |   QS.tib <- bind_cols(QS$items,
166 |                       QS$properties,
167 |                       QS$values)  
168 | 
169 |   # optionally, append columns for qualifier properties and qualifier values for those statements
170 |   if(!is.null(QS$qual.properties)|!is.null(QS$qual.values)){
171 |     QS$qual.properties <- as_pid(QS$qual.properties)
172 |     QS$qual.values     <- as_quot(QS$qual.values,format)
173 |     
174 |     # if no value, clear property 
175 |     QS$qual.properties[QS$qual.values==""|is.na(QS$qual.values)] <- NA
176 | 
177 |     colnames(QS$qual.properties) <- paste0("qual.prop.",1:ncol(QS$qual.properties))
178 |     colnames(QS$qual.values)     <- paste0("qual.value.",1:ncol(QS$qual.values))
179 |     
180 |     QSq <- list(QS$qual.properties,
181 |                 QS$qual.values)
182 |     
183 |     QSq.check  <- var(sapply(c(QS,QSq),function(x){if(is.null(dim(x))){length(x)}else{nrow(x)}}))==0
184 |     if(!QSq.check){stop("Incorrect number of qualifiers provided. If no qualifers needed for a statement, use NA or \"\".")}
185 |     
186 |     QS.qual.tib <- as_tibble(cbind(QSq[[1]],QSq[[2]])[,c(rbind(1:ncol(QSq[[1]]),ncol(QSq[[1]])+1:ncol(QSq[[2]])))])
187 |     
188 |     QS.tib <- tibble(QS.tib,
189 |                      QS.qual.tib)
190 |   }
191 |   
192 |   # optionally, append columns for source properties and source values for those statements
193 |   if(!is.null(src.properties)|!is.null(src.values)){
194 |     QS$src.properties <- as_sid(QS$src.properties)
195 |     QS$src.values     <- as_quot(QS$src.values,format)
196 | 
197 |     # if no value, clear property 
198 |     QS$src.properties[QS$src.values==""|is.na(QS$src.values)] <- NA
199 | 
200 |     colnames(QS$src.properties) <- paste0("src.prop.",1:ncol(QS$src.properties))
201 |     colnames(QS$src.values)     <- paste0("src.values.",1:ncol(QS$src.values))
202 |     
203 |     QSs <- list(QS$src.properties,
204 |                 QS$src.values)
205 |     QSs.check  <- var(sapply(c(QS,QSs),function(x){if(is.null(dim(x))){length(x)}else{nrow(x)}}))==0
206 |     if(!QSs.check){stop("incorrect number of sources provided")}
207 |     
208 |     QS.src.tib <- as_tibble(cbind(QSs[[1]],QSs[[2]])[,c(rbind(1:ncol(QSs[[1]]),ncol(QSs[[1]])+1:ncol(QSs[[2]])))])
209 |     
210 |     QS.tib <- tibble(QS.tib,
211 |                      QS.src.tib)
212 |   }
213 |   
214 |   # if new QIDs are being created via tidy "CREATExyz" keywords, need to insert CREATE lines above and replace subsequent "CREATExyz" with "LAST"
215 |   QS.tib <- createrows.tidy(QS.tib)
216 |   
217 |   # remove any impossible rows (value is NA)
218 |   if(nrow(QS.tib)!=1){
219 |     QS.tib <- QS.tib[!is.na(QS.tib$Value),]
220 |     QS.tib <- as_tibble(apply(QS.tib,2,replace_na,"")) 
221 |   }
222 |   
223 |   # format up the output
224 |   if (format=="csv"){
225 |     write.table(QS.tib,quote = FALSE,row.names = FALSE,sep = ",")
226 |   }
227 |   
228 |   if (format=="tibble"){
229 |     return(QS.tib)
230 |   }
231 |   
232 |   if (format=="website"){
233 |     api.temp1 <- format_tsv(QS.tib, col_names = FALSE, quote_escape = "none")
234 |     api.temp2 <- gsub("\t", "%7C",api.temp1)       # Replace TAB with "%7C"
235 |     api.temp3 <- gsub("%7C(%7C)+","%7C",api.temp2) # Replace multiple tabs (from missing values) with a single tab (to distinguish from newlines)
236 |     api.temp4 <- gsub("\n", "%7C%7C",api.temp3)    # Replace end-of-line with "%7C%7C"
237 |     api.temp5 <- gsub(" ",  "%20",api.temp4)       # Replace space with "%20"
238 |     api.temp6 <- gsub("\\+","%2B",api.temp5)       # Replace plus with "%2B"
239 |     api.data  <- gsub("/",  "%2F",api.temp6)       # Replace slash with "%2F"
240 |     
241 |     url <- paste0("https://quickstatements.toolforge.org/#/v1=","&data=%7C%7C",api.data)
242 |     
243 |     if(api.submit){ 
244 |       browseURL(url)
245 |     }else{
246 |       return(url)
247 |     }
248 |   }
249 |   
250 |   if (format=="api"){
251 |     api.temp1 <- format_tsv(QS.tib, col_names = FALSE, quote_escape = "none")
252 |     api.temp2 <- gsub("%22","\"",api.temp1) #cludge to fix as_quote issues
253 |     api.data  <- gsub("%2F","/",api.temp2) #cludge to fix as_date issues
254 |     
255 |     if (api.submit){
256 |       POST(url="https://tools.wmflabs.org/quickstatements/api.php",
257 |                body = list(action    = "import",
258 |                            submit    = "1",
259 |                            format    = api.format,
260 |                            batchname = api.batchname,
261 |                            username  = api.username,
262 |                            token     = api.token,
263 |                            data      = api.data)
264 |            )
265 |       browseURL("https://quickstatements.toolforge.org/#/batches")
266 |     }else{
267 |       api.temp1 <- format_tsv(QS.tib, col_names = FALSE, quote_escape = "none")
268 |       api.temp2 <- gsub("\t", "%7C",api.temp1)       # Replace TAB with "%7C"
269 |       api.temp3 <- gsub("%7C(%7C)+","%7C",api.temp2) # Replace multiple tabs (from missing values) with a single tab (to distinguish from newlines)
270 |       api.temp4 <- gsub("\n", "%7C%7C",api.temp3)    # Replace end-of-line with "%7C%7C"
271 |       api.temp5 <- gsub(" ",  "%20",api.temp4)       # Replace space with "%20"
272 |       api.temp6 <- gsub("\\+","%2B",api.temp5)       # Replace plus with "%2B"
273 |       api.data  <- gsub("/",  "%2F",api.temp6)       # Replace slash with "%2F"
274 |       url <- paste0("https://tools.wmflabs.org/quickstatements/api.php",
275 |                     "?action=",    "import",
276 |                     "&submit=",    "1",
277 |                     "&format=",    api.format,
278 |                     "&batchname=", api.batchname,
279 |                     "&username=",  api.username,
280 |                     "&token=",     api.token,
281 |                     "&data=%7C%7C",api.data)
282 |       return(url)
283 |     }
284 |   }
285 | }
286 | 


--------------------------------------------------------------------------------
/R/writes_wikibase.R:
--------------------------------------------------------------------------------
  1 | #' @title Write statements to any Wikibase instance
  2 | #' @description Upload data to a Wikibase instance, including creating items,
  3 | #' adding statements to existing items (via the quickstatements format and API).
  4 | #'
  5 | #' @param items a vector of strings indicating the items to which to add statements (as QIDs or labels).
  6 | #' Note: In contrast to \code{write_wikidata}, this function takes no labels as input, just QIDs.
  7 | #' New QIDs can be created by using the "CREATE_xyz", where "_xyz" is any unique string.
  8 | #' Using the same id will add additional statements to those new items
  9 | #' @param properties a vector of strings indicating the properties to add as statements (as PIDs or labels).
 10 | #' Note: In contrast to \code{write_wikidata}, this function takes no labels as input, just PIDs.
 11 | #' Four special properties can also be used: labels, aliases, descriptions and sitelinks.
 12 | #' See [this link](https://www.wikidata.org/wiki/Help:QuickStatements#Adding_labels,_aliases,_descriptions_and_sitelinks) for the syntax.
 13 | #' @param values a vector of strings indicating the values to add as statements (as QIDs).
 14 | #' Note: if strings are provided, they will be treated as plain text.
 15 | #' @param qual.properties a vector, data frame, or tibble of strings indicating the properties to add as qualifiers to statements (as PIDs).
 16 | #' @param qual.values a vector, data frame, or tibble of strings indicating the values to add as statements (as QIDs or strings).
 17 | #' Note: if strings are provided, they will be treated as plain text.
 18 | #' @param src.properties a vector, data frame, or tibble of strings indicating the properties to add as reference sources to statements (as SIDs or labels).
 19 | #' Note: if labels are provided, and multiple items match, the first matching item will be used
 20 | #' (see \code{as_sid} function), so use with caution.
 21 | #' @param src.values a vector, data frame, or tibble of strings indicating the values to add reference sources to statements (as QIDs or strings).
 22 | #' Note: if strings are provided, they will be treated as plain text.
 23 | #' @param remove a vector of boolians for each statemnt indicating whether it should
 24 | #' be removed from the item rather than added (default = FALSE)
 25 | #' @param format output format as a string. Options include:
 26 | #' \describe{
 27 | #'   \item{tibble}{easiest format to further manipulation in R}
 28 | #'   \item{csv}{can be copy-pasted to the Wikibase QuickStatements website (or manipulated in a spreadsheet programs). In contrast to write_wikidata function the delimiter is `tab`, because Quickstatements expect tab-separated data}
 29 | #'   \item{api}{a url that can be copy-pasted into a web browser, or automatically submitted (see \code{api.submit} parameter)}
 30 | #'   \item{website}{open a [QuickStatements](https://quickstatements.toolforge.org/) web browser window summarizing the edits to be made to Wikidata)}
 31 | #' }
 32 | #' @param format.csv.file path to save the csv file. If none is provided, then printed to console.
 33 | #' @param api.username a string indicating your Wikimedia username
 34 | #' @param api.token a string indicating your api token (the unique identifier that you can find listed at [your user page](https://quickstatements.toolforge.org/#/user))
 35 | #' @param api.format a string indicating which version of the quickstatement format used to submit the api (default = "v1")
 36 | #' @param api.batchname a string create a named batch (listed at [your batch history page](https://quickstatements.toolforge.org/#/batches)) and tag in the edit summaries
 37 | #' @param api.submit boolian indicating whether to submit instruction directly to wikidata (else returns the URL that can be copy-pasted into a web browser)
 38 | #' @param quickstatements.url url to access quickstatements of the corresponding Wikibase instance.
 39 | #' @param coordinate_pid PID of a geocoordinates; need to have a different formatting
 40 | #'
 41 | #' @return data formatted to upload to Wikidata (via quickstatemsnts),
 42 | #' optionally also directly uploaded to Wikidata (see \code{format} parameter).
 43 | #'
 44 | #' @examples
 45 | #' # Add a statement to the "Wikidata sandbox" item (Q4115189)
 46 | #' # to say that it is an "instance of" (P31) of Q1 (the universe).
 47 | #' # The instruction will submit directly to Wikidata via the API
 48 | #' # (if you include your Wikibase/Wikimedia username and token)
 49 | #'
 50 | #' \dontrun{
 51 | #' write_wikibase(
 52 | #'   items = "Q24",
 53 | #'   properties = "P2",
 54 | #'   values = "Q8",
 55 | #'   format = "api",
 56 | #'   api.username = "myusername",
 57 | #'   api.token = "mytoken",
 58 | #'   api.submit = TRUE,
 59 | #'   quickstatements.url = NULL
 60 | #' )
 61 | #' }
 62 | #' # note:
 63 | #'
 64 | #' @export
 65 | 
 66 | write_wikibase <- function(items,
 67 |                            properties = NULL,
 68 |                            values = NULL,
 69 |                            qual.properties = NULL,
 70 |                            qual.values = NULL,
 71 |                            src.properties = NULL,
 72 |                            src.values = NULL,
 73 |                            remove = FALSE,
 74 |                            format = "tibble",
 75 |                            format.csv.file = NULL,
 76 |                            api.username = NULL,
 77 |                            api.token = NULL, # Find yours from [your user page](https://tools.wmflabs.org/quickstatements/#/user)
 78 |                            api.format = "v1",
 79 |                            api.batchname = NULL,
 80 |                            api.submit = TRUE,
 81 |                            quickstatements.url = NULL,
 82 |                            coordinate_pid = NULL) {
 83 | 
 84 |   # Check if username and token provided
 85 |   if (format == "api") {
 86 |     if (is.null(api.username)) {
 87 |       stop("Enter your Wikimedia username")
 88 |     }
 89 |     if (is.null(api.token)) {
 90 |       stop("Enter your api.token (Find yours at https://tools.wmflabs.org/quickstatements/#/user)")
 91 |     }
 92 |   }
 93 | 
 94 |   # Place all the quickstatements variables into a list
 95 |   QS <- list(
 96 |     items = items,
 97 |     properties = properties,
 98 |     values = values,
 99 |     qual.properties = qual.properties,
100 |     qual.values = qual.values,
101 |     src.properties = src.properties,
102 |     src.values = src.values
103 |   )
104 | 
105 |   QS <- lapply(QS, function(x) {
106 |     if (!is.null(x)) {
107 |       tibble(x)
108 |     }
109 |   })
110 | 
111 |   # If new QIDs are being created via the "CREATE" keyword, need to insert blank lines across the other parameters to align correctly into rows
112 |   # This is the most similar to the standard quickstatements method, though the "CREATExyz" method is preferred (see createrows.tidy function later)
113 |   QS$properties <- createrows(QS$items, QS$properties)
114 |   QS$values <- createrows(QS$items, QS$values)
115 |   QS$qual.properties <- createrows(QS$items, QS$qual.properties)
116 |   QS$qual.values <- createrows(QS$items, QS$qual.values)
117 |   QS$src.properties <- createrows(QS$items, QS$src.properties)
118 |   QS$src.values <- createrows(QS$items, QS$src.values)
119 | 
120 |   # If same number of rows as the rowmax, do nothing
121 |   # If only one row, repeat it rowmax times
122 |   # If wrong number of rows, stop with an error message
123 |   rowcount <- unlist(lapply(QS, nrow))
124 |   rowmax <- max(rowcount)
125 |   stoprun <- FALSE
126 | 
127 |   if (var(unlist(rowcount)) != 0) {
128 |     for (x in 1:length(QS)) {
129 |       if (nrow(QS[[x]]) == rowmax) {
130 |         QS[[x]] <- QS[[x]]
131 |       } else if (nrow(QS[[x]]) == 1) {
132 |         QS[[x]] <- slice(QS[[x]], rep(1:n(), each = rowmax))
133 |       } else {
134 |         stoprun <- TRUE
135 |         warning(paste0(
136 |           "Not all quickstatement columns have equal rows: ",
137 |           nrow(QS$items), " items (including ",
138 |           sum(is.create(unlist(QS$items))), " new QIDs to CREATE) were provided, but ",
139 |           names(QS)[x],
140 |           " has ",
141 |           nrow(QS[[x]]),
142 |           " rows (expecting ",
143 |           nrow(QS$items),
144 |           ")."
145 |         ))
146 |       }
147 |     }
148 |   }
149 |   if (stoprun) {
150 |     stop("Therefore stopping")
151 |   }
152 | 
153 |   # Convert values to QIDs where possible and identify which (if any) to remove
154 | 
155 |   QS$items[remove, ] <- paste0("-", unlist(QS$items[remove, ]))
156 | 
157 |   # Convert values to QIDs where possible, unless property is expecting a string
158 |   QS$values <- tibble(QS$values)
159 |   QS$values <- as_quot(QS$values, format)
160 | 
161 |   # Convert first three columns into tibble (tibbulate?)
162 |   colnames(QS$items) <- "Item"
163 |   colnames(QS$properties) <- "Prop"
164 |   colnames(QS$values) <- "Value"
165 | 
166 |   QS.tib <- bind_cols(
167 |     QS$items,
168 |     QS$properties,
169 |     QS$values
170 |   )
171 | 
172 |   # optionally, append columns for qualifier properties and qualifier values for those statements
173 |   if (!is.null(QS$qual.properties) | !is.null(QS$qual.values)) {
174 |     QS$qual.properties <- as_pid(QS$qual.properties)
175 |     QS$qual.values <- as_quot(QS$qual.values, format)
176 | 
177 |     colnames(QS$qual.properties) <- paste0("Qual.prop.", 1:ncol(QS$qual.properties))
178 |     colnames(QS$qual.values) <- paste0("Qual.value.", 1:ncol(QS$qual.values))
179 | 
180 |     QSq <- list(
181 |       QS$qual.properties,
182 |       QS$qual.values
183 |     )
184 |     QSq.check <- var(sapply(c(QS, QSq), function(x) {
185 |       if (is.null(dim(x))) {
186 |         length(x)
187 |       } else {
188 |         nrow(x)
189 |       }
190 |     })) == 0
191 |     if (!QSq.check) {
192 |       stop("Incorrect number of qualifiers provided. If no qualifers needed for a statement, use NA or \"\".")
193 |     }
194 | 
195 |     QS.qual.tib <- as_tibble(cbind(QSq[[1]], QSq[[2]])[, c(rbind(1:ncol(QSq[[1]]), ncol(QSq[[1]]) + 1:ncol(QSq[[2]])))])
196 | 
197 |     QS.tib <- tibble(
198 |       QS.tib,
199 |       QS.qual.tib
200 |     )
201 |   }
202 | 
203 |   # optionally, append columns for source properties and source values for those statements
204 |   if (!is.null(src.properties) | !is.null(src.values)) {
205 |     QS$src.properties <- as_sid(QS$src.properties)
206 |     QS$src.values <- as_quot(QS$src.values, format)
207 | 
208 |     colnames(QS$src.properties) <- paste0("Src.prop.", 1:ncol(QS$src.properties))
209 |     colnames(QS$src.values) <- paste0("Src.values.", 1:ncol(QS$src.values))
210 | 
211 |     QSs <- list(
212 |       QS$src.properties,
213 |       QS$src.values
214 |     )
215 |     QSs.check <- var(sapply(c(QS, QSs), function(x) {
216 |       if (is.null(dim(x))) {
217 |         length(x)
218 |       } else {
219 |         nrow(x)
220 |       }
221 |     })) == 0
222 |     if (!QSs.check) {
223 |       stop("incorrect number of sources provided")
224 |     }
225 | 
226 |     QS.src.tib <- as_tibble(cbind(QSs[[1]], QSs[[2]])[, c(rbind(1:ncol(QSs[[1]]), ncol(QSs[[1]]) + 1:ncol(QSs[[2]])))])
227 | 
228 |     QS.tib <- tibble(
229 |       QS.tib,
230 |       QS.src.tib
231 |     )
232 |   }
233 | 
234 |   # if new QIDs are being created via tidy "CREATExyz" keywords, need to insert CREATE lines above and replace subsequent "CREATExyz" with "LAST"
235 |   QS.tib <- createrows.tidy(QS.tib)
236 | 
237 |   # remove quotes, if PID is coordinates
238 |   if (!is.null(coordinate_pid)) {
239 |     QS.tib$Value <- ifelse(QS.tib$Prop == coordinate_pid, gsub('\"', '', QS.tib$Value), QS.tib$Value)
240 |   }
241 | 
242 |   # output
243 |   if (format == "csv") {
244 |     if(!is.null(format.csv.file)) {
245 |       write.table(QS.tib, file = format.csv.file, quote = FALSE, row.names = FALSE, sep = "\t", col.names = FALSE)
246 |     } else {
247 |       write.table(QS.tib, quote = FALSE, row.names = FALSE, sep = "\t") 
248 |     }
249 |   }
250 |   # format up the output
251 |   if (format == "tibble") {
252 |     return(QS.tib)
253 |   }
254 |   if (format == "api" | format == "website") {
255 |     api.temp1 <- format_tsv(QS.tib, col_names = FALSE)
256 |     api.temp2 <- gsub("\t", "%7C", api.temp1) # Replace TAB with "%7C"
257 |     api.temp3 <- gsub("\n", "%7C%7C", api.temp2) # Replace end-of-line with "%7C%7C"
258 |     api.temp4 <- gsub(" ", "%20", api.temp3) # Replace space with "%20"
259 |     api.temp5 <- gsub("\\+", "%2B", api.temp4) # Replace plus with "%2B"
260 |     api.data <- gsub("/", "%2F", api.temp5) # Replace slash with "%2F"
261 | 
262 |     if (format == "api") {
263 |       if (is.null(api.token)) {
264 |         stop(paste0("API token needed. Find yours at", quickstatements.url, "#/user"))
265 |       }
266 |       url <- paste0(
267 |         quickstatements.url, "api.php",
268 |         "?action=", "import",
269 |         "&submit=", "1",
270 |         "&format=", api.format,
271 |         "&batchname=", api.batchname,
272 |         "&username=", api.username,
273 |         "&token=", api.token,
274 |         "&data=", api.data
275 |       )
276 |     }
277 |     if (format == "website") {
278 |       # not working with v2
279 |       url <- paste0(
280 |         quickstatements.url, "#/v1=",
281 |         "&data=", api.data
282 |       )
283 |     }
284 |     if (api.submit) {
285 |       browseURL(url)
286 |     } else {
287 |       return(url)
288 |     }
289 |   }
290 | }
291 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | # # ----------- Validation checks -----------
 2 | # 
 3 | # # Below are the queries used to generate sysdata.rda within the R package
 4 | # # Ideally run these on startup or something? The parameters will change not frequently.
 5 | # # Useful for checking whether quickstatements inputs will be valid to warn early.
 6 | # 
 7 | # .onAttach <- function(){
 8 | # 
 9 | #   message('Updating key variables from wikidata (estimated time <1 min)')
10 | # 
11 | #   # Valid reference source properties
12 | #   message(' ... Checking valid reference source properties')
13 | #   sparql_query <- 'SELECT ?Wikidata_property_to_indicate_a_source ?Wikidata_property_to_indicate_a_sourceLabel WHERE {
14 | #                       SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
15 | #                       ?Wikidata_property_to_indicate_a_source wdt:P31 wd:Q18608359.
16 | #                    }'
17 | #   SID.valid <- query_wikidata(sparql_query)
18 | # 
19 | #   # The required data type for each property
20 | #   message(' ... Checking required data type for each property')
21 | #   sparql_query <- 'SELECT ?property ?propertyLabel ?wbtype WHERE {
22 | #                       SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
23 | #                       ?property rdf:type               wikibase:Property.
24 | #                       ?property wikibase:propertyType  ?wbtype.
25 | #                    }'
26 | #   PID.datatype <- query_wikidata(sparql_query)
27 | #   PID.datatype$wbtype <- gsub("ontology#","",PID.datatype$wbtype)
28 | # 
29 | #   # The expected regex match for each property
30 | #   message(' ... Checking expected regex match for each property')
31 | #   # Those with a 'format as a regular expression' (P1793) listed as a qualifier of their 'property constraint' (P2302)
32 | #   sparql_query1 <- 'SELECT DISTINCT ?Wikidata_property ?Wikidata_propertyLabel ?fmt WHERE {
33 | #                       SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
34 | #                       ?Wikidata_property wdt:P31/wdt:P279* wd:Q18616576.
35 | #                       ?Wikidata_property p:P2302 [pq:P1793 ?fmt].
36 | #                     }'
37 | #   # Those with a 'format as a regular expression' (P1793) only listed as a property statement
38 | #   sparql_query2 <- 'SELECT DISTINCT ?Wikidata_property ?Wikidata_propertyLabel ?fmt WHERE {
39 | #                       SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
40 | #                       ?Wikidata_property wdt:P31/wdt:P279* wd:Q18616576.
41 | #                       ?Wikidata_property wdt:P1793 ?fmt
42 | #                       MINUS{?Wikidata_property p:P2302 [pq:P1793 ?fmtmain]}.
43 | #                    }'
44 | #   PID.constraint  <- add_row(query_wikidata(sparql_query1),
45 | #                              query_wikidata(sparql_query2))
46 | # 
47 | # 
48 | #   # Language abbreviations
49 | #   message(' ... Checking language abbreviations')
50 | #   sparql_query <- 'SELECT ?abbrev WHERE {
51 | #                       ?language wdt:P305 ?abbrev.
52 | #                    }'
53 | #   lang.abbrev <- query_wikidata(sparql_query)
54 | # 
55 | #   # Language abbreviations for current wikis
56 | #   message(' ... Checking language abbreviations for current wikis')
57 | #   sparql_query <- 'SELECT ?abbrev WHERE {
58 | #                       ?Wikipedia_language_edition wdt:P31 wd:Q10876391.
59 | #                       ?Wikipedia_language_edition wdt:P424 ?abbrev.
60 | #                    }'
61 | #   lang.abbrev.wiki <- query_wikidata(sparql_query)
62 | # 
63 | #   # Wikimedia abbreviations for current wikis
64 | #   message(' ... Checking Wikimedia abbreviations for current wikis')
65 | #   sparql_query <- 'SELECT ?abbrev WHERE {
66 | #                       ?Wiki_edition wdt:P1800 ?abbrev.
67 | #                    }'
68 | #   abbrev.wiki <- query_wikidata(sparql_query)
69 | # 
70 | #   # #example
71 | #   # grep(as.matrix(PID.constraint[PID.constraint$Wikidata_property=="P968","fmt"]),
72 | #   #      "mailto:t.shafee@gmail.com",
73 | #   #      perl=TRUE)
74 | #   assign(x = "WD.globalvar",
75 | #          envir = .GlobalEnv,
76 | #          value = list(SID.valid        = SID.valid,
77 | #                       PID.datatype     = PID.datatype,
78 | #                       PID.constraint   = PID.constraint,
79 | #                       lang.abbrev      = lang.abbrev,
80 | #                       lang.abbrev.wiki = lang.abbrev.wiki,
81 | #                       abbrev.wiki      = abbrev.wiki)
82 | #   )
83 | # 
84 | #   message('Update complete (data saved as WD.globalvar)')
85 | # }
86 | 
87 | 
88 | # # Below used to save as system data within an R package
89 | # save(list="WD.globalvar",file="R//sysdata.rda", compress = "xz")


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | WikidataR
  2 | =========
  3 | 
  4 | A combined R package for reading, writing and handling Wikidata semantic data (via APIs).
  5 | 
  6 | __Authors:__ [Thomas Shafee](https://github.com/TS404) (aut., maint.), [Os Keys](https://github.com/Ironholds) (aut., cre.)  
  7 | __License:__ [MIT](https://opensource.org/licenses/MIT)  
  8 | __Status:__ Stable
  9 | 
 10 | Description
 11 | ======
 12 | WikidataR includes functions to:
 13 | - read from wikidata (single items, properties, or properties)
 14 | - query wikidata (retrieving all items that match a set of criterial via [Wikidata SPARQL query service](https://query.wikidata.org))
 15 | - write to Wikidata (adding new items or statements via [QuickStatements](https://tools.wmflabs.org/quickstatements)) 
 16 | - Handle and manipulate Wikidata objects (as lists and tibbles)
 17 | For details on how to best use it, see the examples below.
 18 | 
 19 | Installation
 20 | ======
 21 | 
 22 | To download the stable version of WikidataR from CRAN:
 23 | 
 24 |     install.packages("WikidataR","WikidataQueryServiceR")
 25 |     
 26 | To get the most current development version from Github:
 27 | 
 28 |     install.packages("devtools")
 29 |     devtools::install_github("ts404/WikidataR")
 30 |     
 31 | Examples
 32 | ======
 33 | ### Search Wikidata to see if an item exists (example: pharmaceuticals)
 34 | For cases where you don't already know the QID of an item or the PID of a property, you can search wikidata by name. Note that some search terms will return multiple possible items. You can also specify a language (defaults to English).
 35 | 
 36 | ``` r
 37 | find_item("Paracetamol")
 38 | find_property("medical condition treated")
 39 | ```
 40 | Which returns the lists: 
 41 | 
 42 | ```
 43 |     acetaminophen (Q57055) - common drug for pain and fever  
 44 |     Paracetamol (Q36716177) - scientific article published on July 1980  
 45 |     Paracetamol (Q54982056) - musical group  
 46 |     ...
 47 | ```
 48 | 
 49 | and
 50 | ```
 51 |     medical condition treated (P2175) - disease that this pharmaceutical drug, procedure, or therapy is used to treat 
 52 | ```
 53 | Elements within those lists include basic information from wikidata (ID, description, labels). The QID or PID can then be used to get the full data for the item (see below).
 54 | 
 55 | ### Convert between identifiers
 56 | Wikidata is an excellent thesaurus for different identifiers. For example it is possible to convert from any identifier to wikidata QIDs or between different identifiers
 57 | ``` r
 58 | qid_from_identifier('ISBN-13','978-0-262-53817-6')
 59 | identifier_from_identifier('ORCID iD','IMDb ID',c('0000-0002-7865-7235','0000-0003-1079-5604'))
 60 | ```
 61 | Which returns the lists: 
 62 | ```
 63 |     978-0-262-53817-6 Q102035721 Wikipedia @ 20: Stories of an Incomplete Revolution
 64 | ```
 65 | and
 66 | ```
 67 |     # A tibble: 2 x 2
 68 |       value               return   
 69 |       <chr>               <fct>    
 70 |     1 0000-0002-7865-7235 nm2118834
 71 |     2 0000-0003-1079-5604 nm1821217
 72 | ```
 73 | 
 74 | ### Get full items from Wikidata (example: journal articles)
 75 | In this example, we search for three articles using their DOIs ([P356](https://www.wikidata.org/wiki/Property:P356)), find their QIDs, download their full wikidata entries, and then extract the "main topics" (note PID didn't have to be used).
 76 | 
 77 | ``` r
 78 | article.qid      <- qid_from_DOI(c('10.15347/WJM/2017.007','10.15347/WJM/2019.001','10.15347/WJM/2019.007'))
 79 | article.q        <- get_item(article.qid)
 80 | article.topics.p <- extract_claims(article.q, "main topic")
 81 | get_names_from_properties(article.topics.p)
 82 | ```
 83 | Which returns a tibble for each of the journal articles, listing the main topics of each and their QIDs.
 84 | ```
 85 |     $`10.15347/WJM/2017.007`
 86 |     # A tibble: 1 x 2
 87 |       QID          value    
 88 |       <chr>        <chr>    
 89 |     1 P921.Q164778 rotavirus
 90 | 
 91 |     $`10.15347/WJM/2019.001`
 92 |     # A tibble: 2 x 2
 93 |       QID            value                               
 94 |       <chr>          <chr>                               
 95 |     1 P921.Q15989108 Western African Ebola virus epidemic
 96 |     2 P921.Q10538943 Ebola virus                         
 97 | 
 98 |     $`10.15347/WJM/2019.007`
 99 |     # A tibble: 2 x 2
100 |       QID            value                          
101 |       <chr>          <chr>                          
102 |     1 P921.Q1820650  readability                    
103 |     2 P921.Q16235120 health information on Wikipedia
104 | ```
105 | 
106 | ### Query Wikidata with complex searches (example: movie genres)
107 | 
108 | In this example, we search Wikidata for any items that are an "instance of" ([P31](https://www.wikidata.org/wiki/Property:P31)) "film" ([Q11424](https://www.wikidata.org/wiki/Q11424)) that has the label "The Cabin in the Woods" ([Q45394](https://www.wikidata.org/wiki/Q45394)), and ask for the item's genres ([P136](https://www.wikidata.org/wiki/Property:P136)).
109 | 
110 | ``` r
111 | query_wikidata('SELECT DISTINCT
112 |   ?genre ?genreLabel
113 | WHERE {
114 |   ?film wdt:P31 wd:Q11424.
115 |   ?film rdfs:label "The Cabin in the Woods"@en.
116 |   ?film wdt:P136 ?genre.
117 |   SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
118 | }')
119 | ```
120 | Which returns a tibble:
121 | ```
122 |     # A tibble: 6 x 2
123 |       genre                                   genreLabel          
124 |       <chr>                                   <chr>               
125 |     1 http://www.wikidata.org/entity/Q3072049 zombie film         
126 |     2 http://www.wikidata.org/entity/Q471839  science fiction film
127 |     3 http://www.wikidata.org/entity/Q859369  comedy-drama        
128 |     4 http://www.wikidata.org/entity/Q1342372 monster film        
129 |     5 http://www.wikidata.org/entity/Q853630  slasher film        
130 |     6 http://www.wikidata.org/entity/Q224700  comedy horror    
131 | ```
132 | 
133 | For more example SPARQL queries, see [this page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) on [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page).
134 | 
135 | `query_wikidata()` can accept multiple queries, returning a (potentially named) list of data frames. If the vector of SPARQL queries is named, the results will inherit those names.
136 | 
137 | #### Links for learning SPARQL  
138 | 
139 | -   [A beginner-friendly course for SPARQL](https://www.wikidata.org/wiki/Wikidata:A_beginner-friendly_course_for_SPARQL)
140 | -   Building a SPARQL query: [Museums on Instagram](https://www.wikidata.org/wiki/Help:SPARQL/Building_a_query/Museums_on_Instagram)
141 | -   [SPARQL Query Examples](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) for WDQS
142 | -   [Using SPARQL to access Linked Open Data](https://programminghistorian.org/lessons/graph-databases-and-SPARQL) by Matthew Lincoln
143 | -   Interesting or illustrative [SPARQL queries](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries) for Wikidata
144 | -   Wikidata [2016 SPARQL Workshop](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/2016_SPARQL_Workshop)
145 | -   [Wikidata SPARQL Query video tutorial](https://www.youtube.com/watch?v=1jHoUkj_mKw) by Navino Evans
146 | -   *[Learning SPARQL](http://www.learningsparql.com/)* by Bob DuCharme
147 | -   [WDQS User Manual](https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual)
148 | 
149 | ### Write to Wikidata (example: paintings)  
150 | In this example we'll write directly to wikidata via the [QuickStatements](https://tools.wmflabs.org/quickstatements) format.
151 | ``` r
152 | write_wikidata(items      = c("Q4115189","Q13406268"),
153 |                properties = "author",
154 |                values     = c("Q762","Q41406"),
155 |                format     = "api",
156 |                api.username = "myusername", # Enter your Wikimedia username here
157 |                api.token  = "" #REDACTED# Find yours from https://tools.wmflabs.org/quickstatements/#/user
158 |                )
159 | ```
160 | Results in the statements being directly added to wikidata under your username via the API.  
161 | > The Mona Lisa (Q12418) has the Creator (P170) of Leonardo da Vinci (Q762)  
162 | > The Scream (Q471379) has the Creator (P170) of Edvard Munch (Q41406)  
163 | 
164 | Alternatively, you can print via <code>format=tibble</code> and paste into the [QuickStatements](https://tools.wmflabs.org/quickstatements) website.
165 | 
166 | ### Combining all of the above (example: journal articles)
167 | The example below finds all articles in a journal, works out the URL for their peer reviews, and writes those URLs into those articles' wikidata items.
168 | ``` r
169 | sparql_query <- 'SELECT ?Article ?ArticleLabel ?JLabel ?T ?peer_review_URL WHERE {
170 |   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
171 |   ?Article wdt:P1433 wd:Q24657325.
172 |   OPTIONAL { ?Article wdt:P1433 ?J. }
173 |   OPTIONAL { ?Article wdt:P1476 ?T. }
174 |   OPTIONAL { ?Article wdt:P7347 ?peer_review_URL. }}
175 | LIMIT 10000'
176 | articles.qr <- as_tibble(query_wikidata(sparql_query))
177 | articles.qr <- articles.qr[articles.qr$peer_review_URL=="",] #omit those with review URLs listed
178 | review.URLs <- paste0('https://en.wikiversity.org/wiki/Talk:',
179 |                       articles.qr$JLabel,
180 |                       "/",
181 |                       articles.qr$T
182 |                      )
183 | review.URLs <- gsub(" ","_",review.URLs)
184 | 
185 | write_wikidata(items      = sapply(sapply(articles.qr$Article,pattern = "/",stringr::str_split),tail,1),
186 |                properties = "Peer review URL",
187 |                values     = review.URLs,
188 |                format     = "tibble",
189 |                )
190 |                   
191 | write_wikidata(items        = sapply(sapply(articles.qr$Article,pattern = "/",stringr::str_split),tail,1),
192 |                properties   = "Peer review URL",
193 |                values       = review.URLs,
194 |                format       = "api",
195 |                api.username = "myusername", 
196 |                api.token    = , #REDACTED# Find yours from https://tools.wmflabs.org/quickstatements/#/user
197 |                )
198 | ```
199 | ### Acknowledgements
200 | This package combines and builds on the utilities of Os Keyes' [WikidataR](https://github.com/Ironholds/WikidataR), Christian Graul's
201 | [rwikidata](https://github.com/chgrl/rwikidata), Mikhail Popov's [WikidataQueryServiceR](https://github.com/wikimedia/WikidataQueryServiceR), and Serena Signorelli's [QueryWikidataR](https://github.com/serenasignorelli/QueryWikidataR) packages. It also uses the Magnus Manske's [QuickStatements](https://github.com/magnusmanske/quickstatements) tool.
202 | 


--------------------------------------------------------------------------------
/WikidataR.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: XeLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source --resave-data=best
18 | PackageCheckArgs: --as-cran
19 | PackageRoxygenize: rd,collate,namespace,vignette
20 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Test environments
 2 | * local R installation, R 3.6.3
 3 | * ubuntu 16.04 (on travis-ci), R 3.6.3
 4 | * win-builder (devel)
 5 | 
 6 | ## R CMD check results
 7 | 
 8 | 0 errors | 0 warnings | 1 note
 9 | 
10 | * This is a new release of WikidataR, previously meaintained by Os Keyes (github.com/Ironholds)
11 | * They have consented to hand maintainership over to me (github.com/TS404).
12 | 


--------------------------------------------------------------------------------
/inst/extdata/WD.globalvar.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TS404/WikidataR/d7873b6b80a951130e9c4ec5c17068bd4898fb6e/inst/extdata/WD.globalvar.RDS


--------------------------------------------------------------------------------
/man/WD.globalvar.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \name{WD.globalvar}
 4 | \alias{WD.globalvar}
 5 | \title{Global variables for Wikidata properties}
 6 | \format{
 7 | A list of tibbles documenting key property constraints from Wikidata  
 8 | \describe{
 9 |   \item{SID.valid}{valid reference source properties}
10 |   \item{PID.datatype}{required data type for each property}
11 |   \item{PID.constraint}{expected regex match for each property}
12 |   \item{lang.abbrev}{language abbreviations}
13 |   \item{lang.abbrev.wiki}{language abbreviations for current wikis}
14 |   \item{abbrev.wiki}{Wikimedia abbreviations for current wikis}
15 |   ...
16 | }
17 | }
18 | \description{
19 | A dataset of Wikidata global variables.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/WikidataR.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/WikidataR.R
 3 | \docType{package}
 4 | \name{WikidataR}
 5 | \alias{WikidataR}
 6 | \alias{WikidataR-package}
 7 | \title{API client library for Wikidata}
 8 | \description{
 9 | This package serves as an API client for reading and writing
10 | to and from \href{https://www.wikidata.org/wiki/Wikidata:Main_Page}{Wikidata}, (including 
11 | via the \href{https://quickstatements.toolforge.org/}{QuickStatements} format),
12 | as well as for reading from \href{https://www.wikipedia.org}{Wikipedia}.
13 | }
14 | \seealso{
15 | \code{\link{get_random}} for selecting a random item or property,
16 | \code{\link{get_item}} for a /specific/ item or property, or \code{\link{find_item}}
17 | for using search functionality to pull out item or property IDs where the descriptions
18 | or aliases match a particular search term.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/as_pid.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{as_pid}
 4 | \alias{as_pid}
 5 | \title{Convert an input to a property PID}
 6 | \usage{
 7 | as_pid(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a vector, data frame, or tibble of strings representing Wikidata properties}
11 | }
12 | \value{
13 | If the inputted string is a valid PID, return the string.
14 | If the inputted string matches a property label, return its PID.
15 | If the inputted string matches multiple labels of multiple properties, return the PID of the first hit.
16 | }
17 | \description{
18 | Convert an input string to the most likely property PID
19 | }
20 | \examples{
21 | # if input string is a valid PID
22 | as_pid("P50")
23 | # if input string matches multiple item labels
24 | as_pid("author")
25 | # if input string matches a single unique label
26 | as_pid("Scopus author ID")
27 | }
28 | 


--------------------------------------------------------------------------------
/man/as_qid.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{as_qid}
 4 | \alias{as_qid}
 5 | \title{Convert an input to a item QID.}
 6 | \usage{
 7 | as_qid(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a vector, data frame, or tibble of strings representing wikidata items}
11 | }
12 | \value{
13 | if the inputted string is a valid QID, return the string.
14 | If the inputted string matches an item label, return its QID.
15 | If the inputted string matches multiple labels of multiple items, return the QID of the first hit.
16 | }
17 | \description{
18 | Convert an input string to the most likely item 
19 | \href{https://www.wikidata.org/wiki/Q43649390}{QID}.
20 | }
21 | \examples{
22 | # if input string is a valid QID
23 | as_qid("Q42")
24 | # if input string matches multiple item labels
25 | as_qid("Douglas Adams")
26 | # if input string matches a single unique label
27 | as_qid("Douglas Adams and the question of arterial blood pressure in mammals")
28 | }
29 | 


--------------------------------------------------------------------------------
/man/as_quot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{as_quot}
 4 | \alias{as_quot}
 5 | \title{Add quotations marks}
 6 | \usage{
 7 | as_quot(x, format = "tibble")
 8 | }
 9 | \arguments{
10 | \item{x}{a vector, data frame, or tibble of strings}
11 | 
12 | \item{format}{either "tibble" / "csv" to use plain quotation marks (default), 
13 | or "api" / "website" to use '\%22'}
14 | }
15 | \value{
16 | A tibble of items inside of escaped quotation marks
17 | unless they are already in escaped quotation marks, is a 
18 | \href{https://www.wikidata.org/wiki/Q43649390}{QID}, 
19 | in which chase it is returned unchanged.
20 | }
21 | \description{
22 | Add escaped quotation marks around strings that need them ready for 
23 | submission to an API.
24 | }
25 | \examples{
26 | as_quot("text")
27 | }
28 | 


--------------------------------------------------------------------------------
/man/as_sid.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{as_sid}
 4 | \alias{as_sid}
 5 | \title{Convert an input to a source property SID}
 6 | \usage{
 7 | as_sid(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a vector, data frame, or tibble of strings representing Wikidata 
11 | source properties.}
12 | }
13 | \value{
14 | if the inputted string is a valid SID, return the string.
15 | If the inputted string matches a property label, return its SID.
16 | If the inputted string matches multiple labels of multiple properties, 
17 | return the SID of the first hit.
18 | }
19 | \description{
20 | Convert an input string to the most likely source SID
21 | (equivalent to PID.)
22 | }
23 | \examples{
24 | # if input string is a valid SID
25 | as_pid("S854")
26 | # if input string matches multiple item labels
27 | as_pid("URL")
28 | # if input string matches a single unique label
29 | as_pid("Reference URL")
30 | }
31 | 


--------------------------------------------------------------------------------
/man/check_input.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{check_input}
 4 | \alias{check_input}
 5 | \title{Generic input checker}
 6 | \usage{
 7 | check_input(input, substitution)
 8 | }
 9 | \arguments{
10 | \item{input}{string to check}
11 | 
12 | \item{substitution}{string for what's been looked for}
13 | }
14 | \value{
15 | boolian indicating whether the checked string contains a match for the substitution string
16 | }
17 | \description{
18 | Utility function to handle namespaces. Used by \code{get_item} and \code{get_property}
19 | }
20 | 


--------------------------------------------------------------------------------
/man/createrows.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{createrows}
 4 | \alias{createrows}
 5 | \title{"CREATE" rows}
 6 | \usage{
 7 | createrows(items, vector)
 8 | }
 9 | \arguments{
10 | \item{items}{a vector, data frame or tibble of items (which may or may not contain the keyword "CREATE")}
11 | 
12 | \item{vector}{a vector of properties or values which may be expanded based on the items vector}
13 | }
14 | \value{
15 | if the vector is NULL, return NULL. Otherwise, if the "CREATE" keyword appears in the
16 | items vector, insert blank strings at those positions in the vector.
17 | }
18 | \description{
19 | Add in empty lines for QuickStatements CREATE rows that mint new QIDs.
20 | This is a slightly messy quirk of the QuickStatements format that mints new QIDs via a line
21 | containing only "CREATE", so this function is a way to approximate that behavior in a tibble
22 | }
23 | 


--------------------------------------------------------------------------------
/man/createrows.tidy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{createrows.tidy}
 4 | \alias{createrows.tidy}
 5 | \title{"CREATE" rows from tidy format}
 6 | \usage{
 7 | createrows.tidy(QS.tib)
 8 | }
 9 | \arguments{
10 | \item{QS.tib}{a tibble of items, values and properties (optionally qualifiers and sources).}
11 | }
12 | \value{
13 | a tibble, with items that start with "CREATE" followed by any unique text causing the
14 | addition of a "Create" line above, being replaced with "LAST" in the QuickStatements 
15 | format to create new QIDs.
16 | }
17 | \description{
18 | Add in QuickStatements CREATE rows that mint new QIDs from tidy input data.
19 | New items are created by any item starting that starts with the text "CREATE" followed
20 | by any unique ID.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/disambiguate_QIDs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/disambiguators.R
 3 | \name{disambiguate_QIDs}
 4 | \alias{disambiguate_QIDs}
 5 | \title{Disambiguate QIDs}
 6 | \usage{
 7 | disambiguate_QIDs(
 8 |   list,
 9 |   variablename = "variables",
10 |   variableinfo = NULL,
11 |   filter_property = NULL,
12 |   filter_variable = NULL,
13 |   filter_firsthit = FALSE,
14 |   Q_min = NULL,
15 |   auto_create = FALSE,
16 |   limit = 10
17 | )
18 | }
19 | \arguments{
20 | \item{list}{a list or vector of text strings to find potential QID matches to.
21 | Can also be a list of lists (see examples)}
22 | 
23 | \item{variablename}{type of items in the list that are being disambiguated (used in messages)}
24 | 
25 | \item{variableinfo}{additional information about items that are being disambiguated (used in messages)}
26 | 
27 | \item{filter_property}{property to filter on (e.g. "P31" to filter on "instance of")}
28 | 
29 | \item{filter_variable}{values of that property to use to filter out (e.g. "Q571" to filter out books)}
30 | 
31 | \item{filter_firsthit}{apply filter to the first match presented or only if alternatives requested?
32 | (default = FALSE, note: true is slower if filter not needed on most matches)}
33 | 
34 | \item{Q_min}{return only possible hits with QIDs above the provided value}
35 | 
36 | \item{auto_create}{if no match found, automatically assign "CREATE"}
37 | 
38 | \item{limit}{number of alternative possible Wikidata items to present if multiple potential matches}
39 | }
40 | \value{
41 | a vector of:
42 | \describe{
43 |   \item{QID}{Selected QID (for when an appropriate Wikidata match exists)}
44 |   \item{CREATE}{Mark that a new Wikidata item should be created (for when no appropriate Wikidata match yet exists)}
45 |   \item{NA}{Mark that no Wikidata item is needed}
46 |   \item{STOP}{Mark that the process was halted at this point (so that output can be used as input to the function later)}
47 | }
48 | }
49 | \description{
50 | Interactive function that presents alternative possible QID matches for a list of text
51 | strings and provides options for choosing between alternatives, rejecting all presented alternatives,
52 | or creating new items. Useful in cases where a list of text strings may have either missing Wikidata items
53 | or multiple alternative potential matches that need to be manually disambiguated. Can also used on
54 | lists of lists (see examples). For long lists of items, the process can be stopped partway through and
55 | the returned vector will indicate where the process was stopped.
56 | }
57 | \examples{
58 | \dontrun{
59 | #Disambiguating possible QID matches for these music genres
60 | #Results should be:
61 | # "Q22731" as the first match
62 | # "Q147538" as the first match
63 | # "Q3947" as the second alternative match
64 | disambiguate_QIDs(list=c("Rock","Pop","House"),
65 |                  variablename="music genre")
66 | 
67 | #Disambiguating possible QID matches for these three words, but not the music genres
68 | #This will take longer as the filtering step is slower
69 | #Results should be:
70 | # "Q22731" (the material) as the first match
71 | # "Q147538" (the soft drink) as the second alternative match
72 | # "Q3947" (the building) as the first match
73 | disambiguate_QIDs(list=c("Rock","Pop","House"),
74 |                  filter_property="instance of",
75 |                  filter_variable="music genre",
76 |                  filter_firsthit=TRUE,
77 |                  variablename="concept, not the music genre")
78 | 
79 | #Disambiguating possible QID matches for the multiple expertise of
80 | #these three people as list of lists
81 | disambiguate_QIDs(list=list(alice=list("physics","chemistry","maths"),
82 |                            barry=list("history"),
83 |                            clair=list("law","genetics","ethics")),
84 |                  variablename="expertise")
85 | }
86 | }
87 | 


--------------------------------------------------------------------------------
/man/extract_claims.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{extract_claims}
 4 | \alias{extract_claims}
 5 | \title{Extract claims from returned item data}
 6 | \usage{
 7 | extract_claims(items, claims)
 8 | }
 9 | \arguments{
10 | \item{items}{a list of one or more Wikidata items returned with
11 | \code{\link{get_item}}.}
12 | 
13 | \item{claims}{a vector of claims (in the form "P321", "P12") to look for
14 | and extract.}
15 | }
16 | \value{
17 | a list containing one sub-list for each entry in \code{items},
18 | and (below that) the found data for each claim. In the event a claim
19 | cannot be found for an item, an \code{NA} will be returned
20 | instead.
21 | }
22 | \description{
23 | extract claim information from data returned using
24 | \code{\link{get_item}}.
25 | }
26 | \examples{
27 | # Get item data
28 | adams_data <- get_item("42")
29 | # Get claim data
30 | claims <- extract_claims(adams_data, "P31")
31 | }
32 | 


--------------------------------------------------------------------------------
/man/extract_para.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{extract_para}
 4 | \alias{extract_para}
 5 | \title{Extract a paragraph of text}
 6 | \usage{
 7 | extract_para(text, para = 1, templ = NULL)
 8 | }
 9 | \arguments{
10 | \item{text}{the input text as a string}
11 | 
12 | \item{para}{number indicating which paragraph(s) to return (default=1)}
13 | 
14 | \item{templ}{an optional string specifying a mediawikitemplate within
15 | which to restrict the search restrict search}
16 | }
17 | \value{
18 | the nth paragraph of the input text.
19 | }
20 | \description{
21 | Return the nth paragraph of a section of text
22 | Useful for extracting information from Wikipedia or other wikimarkup text
23 | }
24 | 


--------------------------------------------------------------------------------
/man/filter_qids.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{filter_qids}
 4 | \alias{filter_qids}
 5 | \title{Filter QIDs}
 6 | \usage{
 7 | filter_qids(
 8 |   ids,
 9 |   property = "P31",
10 |   filter = c("Q737498", "Q5633421", "Q7725634", "Q13442814", "Q18918145"),
11 |   message = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{ids}{QIDs to check}
16 | 
17 | \item{property}{property to check (default = P31 to filter on "instance of")}
18 | 
19 | \item{filter}{values of that property to use to filter out
20 | (default = Q737498, Q5633421, Q7725634, Q13442814, and Q18918145 to remove academic
21 | publications or books)}
22 | 
23 | \item{message}{message to return (useful for disambiguate_QIDs function)}
24 | }
25 | \value{
26 | a vector of QIDs that do not match the property filter
27 | }
28 | \description{
29 | For a QID or vector of QIDs, remove ones that match a particular statement
30 | (e.g. remove all that are instances of academic publications or books).
31 | }
32 | \details{
33 | The \href{https://www.wikidata.org/wiki/Q43649390}{Wikidata Q identifier} (QID) 
34 | is the unique identifier (UID) used in Wikidata.
35 | }
36 | \examples{
37 | \dontrun{
38 | # Filter three items called "Earth Science" to show only those that aren't
39 | # books, journals or journal articles
40 | filter_qids(ids = c("Q96695546","Q8008","Q58966429"),
41 |             property = "P31",
42 |             filter = c("Q737498","Q5633421","Q7725634","Q13442814","Q18918145"))
43 | }
44 | }
45 | 


--------------------------------------------------------------------------------
/man/find_item.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gets.R
 3 | \name{find_item}
 4 | \alias{find_item}
 5 | \alias{find_property}
 6 | \title{Search for Wikidata items or properties that match a search term}
 7 | \usage{
 8 | find_item(
 9 |   search_term,
10 |   language = "en",
11 |   limit = 10,
12 |   response_language = "en",
13 |   ...
14 | )
15 | 
16 | find_property(
17 |   search_term,
18 |   language = "en",
19 |   response_language = "en",
20 |   limit = 10
21 | )
22 | }
23 | \arguments{
24 | \item{search_term}{A term to search for.}
25 | 
26 | \item{language}{The language to return the labels and descriptions in; this should
27 | consist of an ISO language code. Defaults to \code{"en"}.}
28 | 
29 | \item{limit}{The number of results to return; set to \code{10} by default.}
30 | 
31 | \item{\\dots}{further arguments to pass to de{httr:ink[httr::GET]{GET}}.}
32 | }
33 | \value{
34 | A list containing the result of the query.
35 | }
36 | \description{
37 | \code{find_item} and \code{find_property} allow you to retrieve a set
38 | of Wikidata items or properties where the aliases or descriptions match a particular
39 | search term.  As with other \code{WikidataR} code, custom print methods are available;
40 | use \code{\link{str}} to manipulate and see the underlying structure of the data.
41 | }
42 | \examples{
43 | 
44 | #Check for entries relating to Douglas Adams in some way
45 | adams_items <- find_item("Douglas Adams")
46 | 
47 | #Check for properties involving the peerage
48 | peerage_props <- find_property("peerage")
49 | 
50 | }
51 | \seealso{
52 | \code{\link{get_random}} for selecting a random item or property,
53 | or \code{\link{get_item}} for selecting a specific item or property.
54 | }
55 | 


--------------------------------------------------------------------------------
/man/get_example.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gets.R
 3 | \name{get_example}
 4 | \alias{get_example}
 5 | \title{Get an example SPARQL query from Wikidata}
 6 | \usage{
 7 | get_example(example_name)
 8 | }
 9 | \arguments{
10 | \item{example_name}{the names of the examples as they appear on
11 | [this page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)}
12 | }
13 | \value{
14 | The SPARQL query as a character vector.
15 | }
16 | \description{
17 | Gets the specified example(s) from
18 |   [SPARQL query service examples page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)
19 |   using [Wikidata's MediaWiki API](https://www.wikidata.org/w/api.php).
20 | }
21 | \details{
22 | If you are planning on extracting multiple examples, please provide
23 |   all the names as a single vector for efficiency.
24 | }
25 | \examples{
26 | \dontrun{
27 | sparql_query <- extract_example(c("Cats", "Horses"))
28 | query_wikidata(sparql_query)
29 | # returns a named list with two data frames
30 | # one called "Cats" and one called "Horses"
31 | sparql_query <- extract_example("Largest cities with female mayor")
32 | cat(sparql_query)
33 | query_wikidata(sparql_query)
34 | }
35 | }
36 | \seealso{
37 | [query_wikidata]
38 | }
39 | 


--------------------------------------------------------------------------------
/man/get_geo_box.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/geo.R
 3 | \name{get_geo_box}
 4 | \alias{get_geo_box}
 5 | \title{Get geographic entities based on a bounding box}
 6 | \usage{
 7 | get_geo_box(
 8 |   first_city_code,
 9 |   first_corner,
10 |   second_city_code,
11 |   second_corner,
12 |   language = "en",
13 |   ...
14 | )
15 | }
16 | \arguments{
17 | \item{first_city_code}{a Wikidata item, or series of items, to use for
18 | one corner of the bounding box.}
19 | 
20 | \item{first_corner}{the direction of \code{first_city_code} relative
21 | to \code{city} (eg "NorthWest", "SouthEast").}
22 | 
23 | \item{second_city_code}{a Wikidata item, or series of items, to use for
24 | one corner of the bounding box.}
25 | 
26 | \item{second_corner}{the direction of \code{second_city_code} relative
27 | to \code{city} (eg "NorthWest", "SouthEast").}
28 | 
29 | \item{language}{the two-letter language code to use for the name
30 | of the item. "en" by default.}
31 | 
32 | \item{\\dots}{further arguments to pass to de{httr:ink[httr::GET]{GET}}.}
33 | }
34 | \value{
35 | a data.frame of 5 columns:
36 | \itemize{
37 |  \item{item}{ the Wikidata identifier of each object associated with
38 |  \code{entity}.}
39 |  \item{name}{ the name of the item, if available, in the requested language. If it
40 |  is not available, \code{NA} will be returned instead.}
41 |  \item{latitude}{ the latitude of \code{item}}
42 |  \item{longitude}{ the longitude of \code{item}}
43 |  \item{entity}{ the entity the item is associated with (necessary for multi-entity
44 |  queries).}
45 | }
46 | }
47 | \description{
48 | \code{get_geo_box} retrieves all geographic entities in
49 | Wikidata that fall between a bounding box between two existing items
50 | with geographic attributes (usually cities).
51 | }
52 | \examples{
53 | # Simple bounding box
54 | \donttest{bruges_box <- get_geo_box("Q12988", "NorthEast", "Q184287", "SouthWest")}
55 | 
56 | # Custom language
57 | \donttest{bruges_box_fr <- get_geo_box("Q12988", "NorthEast", "Q184287", "SouthWest",
58 |                                         language = "fr")}
59 | 
60 | }
61 | \seealso{
62 | \code{\link{get_geo_entity}} for using an unrestricted search or simple radius,
63 | rather than a bounding box.
64 | }
65 | 


--------------------------------------------------------------------------------
/man/get_geo_entity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/geo.R
 3 | \name{get_geo_entity}
 4 | \alias{get_geo_entity}
 5 | \title{Retrieve geographic information from Wikidata}
 6 | \usage{
 7 | get_geo_entity(entity, language = "en", radius = NULL, limit = 100, ...)
 8 | }
 9 | \arguments{
10 | \item{entity}{a Wikidata item (\code{Q...}) or series of items, to check
11 | for associated geo-tagged items.}
12 | 
13 | \item{language}{the two-letter language code to use for the name
14 | of the item. "en" by default, because we're imperialist
15 | anglocentric westerners.}
16 | 
17 | \item{radius}{optionally, a radius (in kilometers) around \code{entity}
18 | to restrict the search to.}
19 | 
20 | \item{limit}{the maximum number of results to return.}
21 | 
22 | \item{\\dots}{further arguments to pass to de{httr:ink[httr::GET]{GET}}.}
23 | }
24 | \value{
25 | a data.frame of 5 columns:
26 | \itemize{
27 |  \item{item}{ the Wikidata identifier of each object associated with
28 |  \code{entity}.}
29 |  \item{name}{ the name of the item, if available, in the requested language. If it
30 |  is not available, \code{NA} will be returned instead.}
31 |  \item{latitude}{ the latitude of \code{item}}
32 |  \item{longitude}{ the longitude of \code{item}}
33 |  \item{entity}{ the entity the item is associated with (necessary for multi-entity
34 |  queries).}
35 | }
36 | }
37 | \description{
38 | \code{get_geo_entity} retrieves the item ID, latitude
39 | and longitude of any object with geographic data associated with \emph{another}
40 | object with geographic data (example: all the locations around/near/associated with
41 | a city).
42 | }
43 | \examples{
44 | # All entities
45 | \donttest{sf_locations <- get_geo_entity("Q62")}
46 | 
47 | # Entities with French, rather than English, names
48 | \donttest{sf_locations <- get_geo_entity("Q62", language = "fr")}
49 | 
50 | # Entities within 1km
51 | \donttest{sf_close_locations <- get_geo_entity("Q62", radius = 1)}
52 | 
53 | # Multiple entities
54 | \donttest{multi_entity <- get_geo_entity(entity = c("Q62", "Q64"))}
55 | 
56 | }
57 | \seealso{
58 | \code{\link{get_geo_box}} for using a bounding box
59 | rather than an unrestricted search or simple radius.
60 | }
61 | 


--------------------------------------------------------------------------------
/man/get_item.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gets.R
 3 | \name{get_item}
 4 | \alias{get_item}
 5 | \alias{get_property}
 6 | \title{Retrieve specific Wikidata items or properties}
 7 | \usage{
 8 | get_item(id, ...)
 9 | 
10 | get_property(id, ...)
11 | }
12 | \arguments{
13 | \item{id}{the ID number(s) of the item or property you're looking for. This can be in
14 | various formats; either a numeric value ("200"), the full name ("Q200") or
15 | even with an included namespace ("Property:P10") - the function will format
16 | it appropriately. This function is vectorized and will happily accept
17 | multiple IDs.}
18 | 
19 | \item{\\dots}{further arguments to pass to de{httr:ink[httr::GET]{GET}}.}
20 | }
21 | \description{
22 | \code{get_item} and \code{get_property} allow you to retrieve the data associated
23 | with individual Wikidata items and properties, respectively. As with
24 | other \code{WikidataR} code, custom print methods are available; use \code{\link{str}}
25 | to manipulate and see the underlying structure of the data.
26 | }
27 | \examples{
28 | 
29 | #Retrieve a specific item
30 | adams_metadata <- get_item("42")
31 | 
32 | #Retrieve a specific property
33 | object_is_child <- get_property("P40")
34 | 
35 | }
36 | \seealso{
37 | \code{\link{get_random}} for selecting a random item or property,
38 | or \code{\link{find_item}} for using search functionality to pull out
39 | item or property IDs where the descriptions or aliases match a particular
40 | search term.
41 | }
42 | 


--------------------------------------------------------------------------------
/man/get_names_from_properties.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{get_names_from_properties}
 4 | \alias{get_names_from_properties}
 5 | \title{Get names of properties}
 6 | \usage{
 7 | get_names_from_properties(properties)
 8 | }
 9 | \arguments{
10 | \item{properties}{a claims list from \code{extract_claims}}
11 | }
12 | \value{
13 | tibble of labels for each property for a set of claims
14 | }
15 | \description{
16 | For a claim or set of claims, return the names of the properties
17 | }
18 | 


--------------------------------------------------------------------------------
/man/get_random.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gets.R
 3 | \name{get_random_item}
 4 | \alias{get_random_item}
 5 | \alias{get_random}
 6 | \alias{get_random_property}
 7 | \title{Retrieve randomly-selected Wikidata items or properties}
 8 | \usage{
 9 | get_random_item(limit = 1, ...)
10 | 
11 | get_random_property(limit = 1, ...)
12 | }
13 | \arguments{
14 | \item{limit}{how many random items to return. 1 by default, but can be higher.}
15 | 
16 | \item{\\dots}{arguments to pass to de{httr:ink[httr::GET]{GET}}.}
17 | }
18 | \description{
19 | \code{get_random_item} and \code{get_random_property} allow you to retrieve the data
20 | associated with randomly-selected Wikidata items and properties, respectively. As with
21 | other \code{WikidataR} code, custom print methods are available; use \code{\link{str}}
22 | to manipulate and see the underlying structure of the data.
23 | }
24 | \examples{
25 | \dontrun{
26 | #Random item
27 | random_item <- get_random_item()
28 | 
29 | #Random property
30 | random_property <- get_random_property()
31 | }
32 | }
33 | \seealso{
34 | \code{\link{get_item}} for selecting a specific item or property,
35 | or \code{\link{find_item}} for using search functionality to pull out
36 | item or property IDs where the descriptions or aliases match a particular
37 | search term.
38 | }
39 | 


--------------------------------------------------------------------------------
/man/identifier_from_identifier.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/queries.R
 3 | \name{identifier_from_identifier}
 4 | \alias{identifier_from_identifier}
 5 | \title{Identifier from identifier}
 6 | \usage{
 7 | identifier_from_identifier(
 8 |   property = "ORCID iD",
 9 |   return = "IMDb ID",
10 |   value = "0000-0002-7865-7235"
11 | )
12 | }
13 | \arguments{
14 | \item{property}{The identifier property to search (for caveats, see \code{as_pid})}
15 | 
16 | \item{return}{The identifier property to convert to}
17 | 
18 | \item{value}{The identifier value to match.}
19 | }
20 | \value{
21 | A vector of identifiers corresponding to identifiers submitted.
22 | }
23 | \description{
24 | Convert unique identifiers to other unique identifiers.
25 | }
26 | \examples{
27 | identifier_from_identifier(property ='ORCID iD',
28 |                            return = 'IMDb ID',
29 |                            value = c('0000-0002-7865-7235','0000-0003-1079-5604')
30 |                            )
31 | }
32 | 


--------------------------------------------------------------------------------
/man/initials.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{initials}
 4 | \alias{initials}
 5 | \title{Format short form person names}
 6 | \usage{
 7 | initials(x, format = "FLast")
 8 | }
 9 | \arguments{
10 | \item{x}{a vector of people's names as strings}
11 | 
12 | \item{format}{a vector of strings of either "FLast" or "FL" to indicate the output format}
13 | }
14 | \value{
15 | the inputted name strings with first names shortened based on the
16 | selected format.
17 | }
18 | \description{
19 | Converting names into first initial and surname, or just initials
20 | }
21 | 


--------------------------------------------------------------------------------
/man/list_properties.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{list_properties}
 4 | \alias{list_properties}
 5 | \title{List properties of a Wikidata item}
 6 | \usage{
 7 | list_properties(item, names = FALSE)
 8 | }
 9 | \arguments{
10 | \item{item}{a list of one or more Wikidata items returned with
11 | \code{\link{get_item}}.}
12 | 
13 | \item{names}{a boolian for whether to return property names, or just P numbers
14 | and extract.}
15 | }
16 | \value{
17 | a list containing one sub-list for each entry in \code{items},
18 | and (below that) the found data for each claim. In the event a claim
19 | cannot be found for an item, an \code{NA} will be returned
20 | instead.
21 | }
22 | \description{
23 | for a downloaded wikidata item, list the properties of all statements
24 | }
25 | \examples{
26 | # Get item data
27 | adams_data <- get_item("42")
28 | # Get claim data
29 | claims <- extract_claims(adams_data, "P31")
30 | }
31 | 


--------------------------------------------------------------------------------
/man/print.find_item.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/prints.R
 3 | \name{print.find_item}
 4 | \alias{print.find_item}
 5 | \title{Print method for find_item}
 6 | \usage{
 7 | \method{print}{find_item}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{find_item object with search results}
11 | 
12 | \item{\dots}{Arguments to be passed to methods}
13 | }
14 | \description{
15 | print found items.
16 | }
17 | 


--------------------------------------------------------------------------------
/man/print.find_property.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/prints.R
 3 | \name{print.find_property}
 4 | \alias{print.find_property}
 5 | \title{Print method for find_property}
 6 | \usage{
 7 | \method{print}{find_property}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{find_property object with search results}
11 | 
12 | \item{\dots}{Arguments to be passed to methods}
13 | }
14 | \description{
15 | print found properties.
16 | }
17 | 


--------------------------------------------------------------------------------
/man/print.wikidata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/prints.R
 3 | \name{print.wikidata}
 4 | \alias{print.wikidata}
 5 | \title{Print method for Wikidata objects}
 6 | \usage{
 7 | \method{print}{wikidata}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{Wikidata object from get_item, get_random_item, get_property or get_random_property}
11 | 
12 | \item{\dots}{Arguments to be passed to methods}
13 | }
14 | \description{
15 | print found objects generally.
16 | }
17 | \seealso{
18 | get_item, get_random_item, get_property or get_random_property
19 | }
20 | 


--------------------------------------------------------------------------------
/man/qid_from_DOI.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/schol.R
 3 | \name{qid_from_DOI}
 4 | \alias{qid_from_DOI}
 5 | \title{QID from DOI}
 6 | \usage{
 7 | qid_from_DOI(DOI = "10.15347/WJM/2019.001")
 8 | }
 9 | \arguments{
10 | \item{DOI}{digital object identifiers submitted as strings}
11 | }
12 | \value{
13 | vector of QIDs corresponding to DOIs submitted
14 | }
15 | \description{
16 | simple converter from DOIs to QIDs (for items in Wikidata)
17 | }
18 | 


--------------------------------------------------------------------------------
/man/qid_from_ORCID.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/schol.R
 3 | \name{qid_from_ORCID}
 4 | \alias{qid_from_ORCID}
 5 | \title{QID from ORCID}
 6 | \usage{
 7 | qid_from_ORCID(ORCID = "0000-0002-2298-7593")
 8 | }
 9 | \arguments{
10 | \item{ORCID}{digital object identifiers submitted as strings}
11 | }
12 | \value{
13 | vector of QIDs corresponding to ORCIDs submitted
14 | }
15 | \description{
16 | simple converter from ORCIDs to QIDs (for items in wikidata)
17 | }
18 | 


--------------------------------------------------------------------------------
/man/qid_from_identifier.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/queries.R
 3 | \name{qid_from_identifier}
 4 | \alias{qid_from_identifier}
 5 | \title{QID from identifier}
 6 | \usage{
 7 | qid_from_identifier(
 8 |   property = "DOI",
 9 |   value = c("10.15347/WJM/2019.001", "10.15347/WJM/2020.002")
10 | )
11 | }
12 | \arguments{
13 | \item{property}{The identifier property to search (for caveats, see \code{as_pid}.)}
14 | 
15 | \item{value}{The identifier value to match.}
16 | }
17 | \value{
18 | A vector of QIDs corresponding to identifiers submitted.
19 | }
20 | \description{
21 | Convert unique identifiers to QIDs (for items in Wikidata).
22 | }
23 | \details{
24 | The \href{https://www.wikidata.org/wiki/Q43649390}{Wikidata Q identifier} (QID) is the unique identifier (UID) 
25 | used in Wikidata.
26 | }
27 | \examples{
28 | qid_from_identifier('ISBN-13','978-0-262-53817-6')
29 | }
30 | 


--------------------------------------------------------------------------------
/man/qid_from_name.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/schol.R
 3 | \name{qid_from_name}
 4 | \alias{qid_from_name}
 5 | \title{QID from label name}
 6 | \usage{
 7 | qid_from_name(name = "Thomas Shafee", limit = 100, format = "vector")
 8 | }
 9 | \arguments{
10 | \item{name}{name labels submitted as strings}
11 | 
12 | \item{limit}{if multiple QIDs match each submitted name, how many to return}
13 | 
14 | \item{format}{output format ('vector' to return a simple vector, or 'list' to return a nested list)}
15 | }
16 | \value{
17 | vector of QIDs corresponding to names submitted. Note: some names may return multiple QIDs.
18 | }
19 | \description{
20 | simple converter from label names to QIDs (for items in wikidata).
21 | Essentially a simplification of \code{find_item}
22 | }
23 | 


--------------------------------------------------------------------------------
/man/query_wikidata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/queries.R
 3 | \name{query_wikidata}
 4 | \alias{query_wikidata}
 5 | \title{Send one or more SPARQL queries to WDQS}
 6 | \usage{
 7 | query_wikidata(sparql_query, format = "tibble", ...)
 8 | }
 9 | \arguments{
10 | \item{sparql_query}{SPARQL query (can be a vector of queries)}
11 | 
12 | \item{format}{`tibble` (default) returns a pure character data frame,
13 | `simple` returns a pure character vector, while
14 | `smart` fetches JSON-formatted data and returns a tibble with datetime
15 | columns converted to `POSIXct`.}
16 | 
17 | \item{\\dots}{Additional parameters to supply to \code{httr:\link[httr::POST]{POST}}.}
18 | }
19 | \value{
20 | A `tibble` or `vector`. Note: QID values will be returned as QIDs, rather than URLs.
21 | }
22 | \description{
23 | Makes a POST request to Wikidata Query Service SPARQL endpoint.
24 | }
25 | \section{Query limits}{
26 | 
27 | There is a hard query deadline configured which is set to 60 seconds. There
28 | are also following limits:
29 | - One client (user agent + IP) is allowed 60 seconds of processing time each
30 |   60 seconds
31 | - One client is allowed 30 error queries per minute
32 | See \href{https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits}{query limits section}
33 | in the Wikidata Query Service User Manual for more information.
34 | }
35 | 
36 | \examples{
37 | # R's versions and release dates:
38 | sparql_query <- 'SELECT DISTINCT
39 |   ?softwareVersion ?publicationDate
40 |   WHERE {
41 |     BIND(wd:Q206904 AS ?R)
42 |     ?R p:P348 [
43 |       ps:P348 ?softwareVersion;
44 |       pq:P577 ?publicationDate
45 |     ] .
46 | }'
47 | query_wikidata(sparql_query)
48 | 
49 | \dontrun{
50 | # "smart" format converts all datetime columns to POSIXct
51 | query_wikidata(sparql_query, format = "smart")
52 | }
53 | }
54 | 


--------------------------------------------------------------------------------
/man/searcher.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gets.R
 3 | \name{searcher}
 4 | \alias{searcher}
 5 | \title{Convert an input to a item QID}
 6 | \usage{
 7 | searcher(search_term, language, limit, response_language, type, ...)
 8 | }
 9 | \arguments{
10 | \item{search_term}{a term to search for.}
11 | 
12 | \item{language}{the language to conduct the search in; this should
13 | consist of an ISO language code. Set to "en" by default.}
14 | 
15 | \item{limit}{the number of results to return; set to 10 by default.}
16 | 
17 | \item{response_language}{the language to return the labels and descriptions in; this should
18 | consist of an ISO language code. Set to "en" by default.}
19 | 
20 | \item{type}{type of wikidata object to return (default = "item")}
21 | 
22 | \item{\\dots}{Additional parameters to supply to [httr::POST]}
23 | }
24 | \value{
25 | If the inputted string matches an item label, return its QID.
26 | If the inputted string matches multiple labels of multiple items, return the QID of the first hit.
27 | If the inputted string is already a QID, return the string.
28 | }
29 | \description{
30 | Convert an input string to the most likely item QID
31 | }
32 | \examples{
33 | # if input string is a valid QID
34 | as_qid("Q42")
35 | # if input string matches multiple item labels
36 | as_qid("Douglas Adams")
37 | # if input string matches a single unique label
38 | as_qid("Douglas Adams and the question of arterial blood pressure in mammals")
39 | }
40 | 


--------------------------------------------------------------------------------
/man/sparql_query.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/queries.R
 3 | \name{sparql_query}
 4 | \alias{sparql_query}
 5 | \title{Download full Wikidata items matching a SPARQL query}
 6 | \usage{
 7 | sparql_query(query, ...)
 8 | }
 9 | \arguments{
10 | \item{query}{The SPARQL query as a string}
11 | 
12 | \item{\\dots}{Additional parameters to supply to \code{httr:\link[httr::POST]{POST}}.}
13 | }
14 | \value{
15 | a download of the full wikidata objects formatted as a nested json list
16 | }
17 | \description{
18 | Utility wrapper for wikidata spargl endpoint to download items.
19 | Used by \code{get_geo_entity} and \code{get_geo_box}.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/unspecial.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{unspecial}
 4 | \alias{unspecial}
 5 | \title{Remove special characters}
 6 | \usage{
 7 | unspecial(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a vector of strings to check for special characters}
11 | }
12 | \value{
13 | the inputted strings with special characters replaced with
14 | closest match plan characters.
15 | }
16 | \description{
17 | Special characters can otherwise mess up wikidata read-writes
18 | }
19 | 


--------------------------------------------------------------------------------
/man/url_to_id.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{url_to_id}
 4 | \alias{url_to_id}
 5 | \title{Extract an identifier from a wikidata URL}
 6 | \usage{
 7 | url_to_id(x)
 8 | 
 9 | url_to_id(x)
10 | }
11 | \arguments{
12 | \item{x}{A vector of strings representing Wikidata URLs.}
13 | }
14 | \value{
15 | if the URL ends in a QID or PID, return that PID or QID, else return the original string
16 | 
17 | QID or PID.
18 | }
19 | \description{
20 | Convert a URL ending in an identifier (returned by SPARQL queries) to just
21 | the plain identifier (QID or PID).
22 | 
23 | Convert a URL ending in an identifier (returned by SPARQL queries)
24 | to just the plan identifier (QID or PID).
25 | }
26 | \details{
27 | The \href{https://www.wikidata.org/wiki/Q43649390}{Wikidata Q identifier} (QID) 
28 | is the unique identifier (UID) used in Wikidata.
29 | }
30 | \examples{
31 | url_to_id("http://www.wikidata.org/entity/42")
32 | url_to_id("http://www.wikidata.org/Q42")
33 | }
34 | 


--------------------------------------------------------------------------------
/man/wd_query.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/queries.R
 3 | \name{wd_query}
 4 | \alias{wd_query}
 5 | \title{Download a Wikidata item}
 6 | \usage{
 7 | wd_query(title, ...)
 8 | }
 9 | \arguments{
10 | \item{title}{The Wikidata item or property as a string.}
11 | 
12 | \item{\\dots}{Additional parameters to supply to  \code{httr:\link[httr::POST]{POST}}.}
13 | }
14 | \value{
15 | A downloaded full wikidata object (item or property) formatted as a 
16 | nested json list.
17 | }
18 | \description{
19 | Utility wrapper for Wikidata API to download item.
20 | Used by \code{get_item} and \code{get_property}.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/wd_rand_query.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/queries.R
 3 | \name{wd_rand_query}
 4 | \alias{wd_rand_query}
 5 | \title{Download random Wikidata items}
 6 | \usage{
 7 | wd_rand_query(ns, limit, ...)
 8 | }
 9 | \arguments{
10 | \item{ns}{string indicating namespace, most commonly "Main" for QID items, "Property" 
11 | for PID properties.}
12 | 
13 | \item{limit}{How many random object to return.}
14 | 
15 | \item{\\dots}{Additional parameters to supply to  \code{httr:\link[httr::POST]{POST}}.}
16 | }
17 | \value{
18 | Downloaded full wikidata objects (items or properties) formatted 
19 | as nested json lists.
20 | }
21 | \description{
22 | Utility wrapper for Wikidata API to download random items. 
23 | Used by \code{random_item}.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/write_wikibase.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/writes_wikibase.R
  3 | \name{write_wikibase}
  4 | \alias{write_wikibase}
  5 | \title{Write statements to any Wikibase instance}
  6 | \usage{
  7 | write_wikibase(
  8 |   items,
  9 |   properties = NULL,
 10 |   values = NULL,
 11 |   qual.properties = NULL,
 12 |   qual.values = NULL,
 13 |   src.properties = NULL,
 14 |   src.values = NULL,
 15 |   remove = FALSE,
 16 |   format = "tibble",
 17 |   format.csv.file = NULL,
 18 |   api.username = NULL,
 19 |   api.token = NULL,
 20 |   api.format = "v1",
 21 |   api.batchname = NULL,
 22 |   api.submit = TRUE,
 23 |   quickstatements.url = NULL,
 24 |   coordinate_pid = NULL
 25 | )
 26 | }
 27 | \arguments{
 28 | \item{items}{a vector of strings indicating the items to which to add statements (as QIDs or labels).
 29 | Note: In contrast to \code{write_wikidata}, this function takes no labels as input, just QIDs.
 30 | New QIDs can be created by using the "CREATE_xyz", where "_xyz" is any unique string.
 31 | Using the same id will add additional statements to those new items}
 32 | 
 33 | \item{properties}{a vector of strings indicating the properties to add as statements (as PIDs or labels).
 34 | Note: In contrast to \code{write_wikidata}, this function takes no labels as input, just PIDs.
 35 | Four special properties can also be used: labels, aliases, descriptions and sitelinks.
 36 | See [this link](https://www.wikidata.org/wiki/Help:QuickStatements#Adding_labels,_aliases,_descriptions_and_sitelinks) for the syntax.}
 37 | 
 38 | \item{values}{a vector of strings indicating the values to add as statements (as QIDs).
 39 | Note: if strings are provided, they will be treated as plain text.}
 40 | 
 41 | \item{qual.properties}{a vector, data frame, or tibble of strings indicating the properties to add as qualifiers to statements (as PIDs).}
 42 | 
 43 | \item{qual.values}{a vector, data frame, or tibble of strings indicating the values to add as statements (as QIDs or strings).
 44 | Note: if strings are provided, they will be treated as plain text.}
 45 | 
 46 | \item{src.properties}{a vector, data frame, or tibble of strings indicating the properties to add as reference sources to statements (as SIDs or labels).
 47 | Note: if labels are provided, and multiple items match, the first matching item will be used
 48 | (see \code{as_sid} function), so use with caution.}
 49 | 
 50 | \item{src.values}{a vector, data frame, or tibble of strings indicating the values to add reference sources to statements (as QIDs or strings).
 51 | Note: if strings are provided, they will be treated as plain text.}
 52 | 
 53 | \item{remove}{a vector of boolians for each statemnt indicating whether it should
 54 | be removed from the item rather than added (default = FALSE)}
 55 | 
 56 | \item{format}{output format as a string. Options include:
 57 | \describe{
 58 |   \item{tibble}{easiest format to further manipulation in R}
 59 |   \item{csv}{can be copy-pasted to the Wikibase QuickStatements website (or manipulated in a spreadsheet programs). In contrast to write_wikidata function the delimiter is `tab`, because Quickstatements expect tab-separated data}
 60 |   \item{api}{a url that can be copy-pasted into a web browser, or automatically submitted (see \code{api.submit} parameter)}
 61 |   \item{website}{open a [QuickStatements](https://quickstatements.toolforge.org/) web browser window summarizing the edits to be made to Wikidata)}
 62 | }}
 63 | 
 64 | \item{format.csv.file}{path to save the csv file. If none is provided, then printed to console.}
 65 | 
 66 | \item{api.username}{a string indicating your Wikimedia username}
 67 | 
 68 | \item{api.token}{a string indicating your api token (the unique identifier that you can find listed at [your user page](https://quickstatements.toolforge.org/#/user))}
 69 | 
 70 | \item{api.format}{a string indicating which version of the quickstatement format used to submit the api (default = "v1")}
 71 | 
 72 | \item{api.batchname}{a string create a named batch (listed at [your batch history page](https://quickstatements.toolforge.org/#/batches)) and tag in the edit summaries}
 73 | 
 74 | \item{api.submit}{boolian indicating whether to submit instruction directly to wikidata (else returns the URL that can be copy-pasted into a web browser)}
 75 | 
 76 | \item{quickstatements.url}{url to access quickstatements of the corresponding Wikibase instance.}
 77 | 
 78 | \item{coordinate_pid}{PID of a geocoordinates; need to have a different formatting}
 79 | }
 80 | \value{
 81 | data formatted to upload to Wikidata (via quickstatemsnts),
 82 | optionally also directly uploaded to Wikidata (see \code{format} parameter).
 83 | }
 84 | \description{
 85 | Upload data to a Wikibase instance, including creating items,
 86 | adding statements to existing items (via the quickstatements format and API).
 87 | }
 88 | \examples{
 89 | # Add a statement to the "Wikidata sandbox" item (Q4115189)
 90 | # to say that it is an "instance of" (P31) of Q1 (the universe).
 91 | # The instruction will submit directly to Wikidata via the API
 92 | # (if you include your Wikibase/Wikimedia username and token)
 93 | 
 94 | \dontrun{
 95 | write_wikibase(
 96 |   items = "Q24",
 97 |   properties = "P2",
 98 |   values = "Q8",
 99 |   format = "api",
100 |   api.username = "myusername",
101 |   api.token = "mytoken",
102 |   api.submit = TRUE,
103 |   quickstatements.url = NULL
104 | )
105 | }
106 | # note:
107 | 
108 | }
109 | 


--------------------------------------------------------------------------------
/man/write_wikidata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/writes.R
 3 | \name{write_wikidata}
 4 | \alias{write_wikidata}
 5 | \title{Write statements to Wikidata}
 6 | \usage{
 7 | write_wikidata(
 8 |   items,
 9 |   properties = NULL,
10 |   values = NULL,
11 |   qual.properties = NULL,
12 |   qual.values = NULL,
13 |   src.properties = NULL,
14 |   src.values = NULL,
15 |   remove = FALSE,
16 |   format = "tibble",
17 |   api.username = NULL,
18 |   api.token = NULL,
19 |   api.format = "v1",
20 |   api.batchname = NULL,
21 |   api.submit = TRUE
22 | )
23 | }
24 | \arguments{
25 | \item{items}{a vector of strings indicating the items to which to add statements (as QIDs or labels).
26 | Note: if labels are provided, and multiple items match, the first matching item will be used
27 | (see \code{as_qid} function), so use with caution.
28 | New QIDs can be created by using the "CREATE_xyz", where "_xyz" is any unique string.
29 | Using the same id will add additional statements to those new items}
30 | 
31 | \item{properties}{a vector of strings indicating the properties to add as statements (as PIDs or labels).
32 | Note: if labels are provided, and multiple items match, the first matching item will be used
33 | (see \code{as_pid} function), so use with caution.
34 | Four special properties can also be used: labels, aliases, descriptions and sitelinks.
35 | See [this link](https://www.wikidata.org/wiki/Help:QuickStatements#Adding_labels,_aliases,_descriptions_and_sitelinks) for the syntax.}
36 | 
37 | \item{values}{a vector of strings indicating the values to add as statements (as QIDs or strings).
38 | Note: if strings are provided, they will be treated as plain text.}
39 | 
40 | \item{qual.properties}{a vector, data frame, or tibble of strings indicating the properties to add as qualifiers to statements (as PIDs or labels).
41 | Note: if labels are provided, and multiple items match, the first matching item will be used
42 | (see \code{as_pid} function), so use with caution.}
43 | 
44 | \item{qual.values}{a vector, data frame, or tibble of strings indicating the values to add as statements (as QIDs or strings).
45 | Note: if strings are provided, they will be treated as plain text.}
46 | 
47 | \item{src.properties}{a vector, data frame, or tibble of strings indicating the properties to add as reference sources to statements (as SIDs or labels).
48 | Note: if labels are provided, and multiple items match, the first matching item will be used
49 | (see \code{as_sid} function), so use with caution.}
50 | 
51 | \item{src.values}{a vector, data frame, or tibble of strings indicating the values to add reference sources to statements (as QIDs or strings).
52 | Note: if strings are provided, they will be treated as plain text.}
53 | 
54 | \item{remove}{a vector of boolians for each statemnt indicating whether it should
55 | be removed from the item rather than added (default = FALSE)}
56 | 
57 | \item{format}{output format as a string. Options include:
58 | \describe{
59 |   \item{tibble}{easiest format to further manipulation in R}
60 |   \item{csv}{can be copy-pasted to [the QuickStatements website](https://quickstatements.toolforge.org/) (or manipulated in a spreadsheet programs)}
61 |   \item{api}{a url that can be copy-pasted into a web browser, or automatically submitted (see \code{api.submit} parameter)}
62 |   \item{website}{open a [QuickStatements](https://quickstatements.toolforge.org/) web browser window summarizing the edits to be made to Wikidata)}
63 | }}
64 | 
65 | \item{api.username}{a string indicating your Wikimedia username}
66 | 
67 | \item{api.token}{a string indicating your api token (the unique identifier that you can find listed at [your user page](https://quickstatements.toolforge.org/#/user))}
68 | 
69 | \item{api.format}{a string indicating which version of the quickstatement format used to submit the api (default = "v1")}
70 | 
71 | \item{api.batchname}{a string create a named batch (listed at [your batch history page](https://quickstatements.toolforge.org/#/batches)) and tag in the edit summaries}
72 | 
73 | \item{api.submit}{boolian indicating whether to submit instruction directly to wikidata (else returns the URL that can be copy-pasted into a web browser)}
74 | }
75 | \value{
76 | data formatted to upload to wikidata (via quickstatemsnts),
77 | optionally also directly uploaded to wikidata (see \code{format} parameter).
78 | }
79 | \description{
80 | Upload data to Wikidata, including creating items,
81 | adding statements to existing items (via the quickstatements format and API).
82 | }
83 | \examples{
84 | # Add a statement to the "Wikidata sandbox" item (Q4115189)
85 | # to say that it is an "instance of" (P31) of Q1 (the universe).
86 | # The instruction will submit directly to wikidata via the API
87 | # (if you include your Wikimedia username and token)
88 | 
89 | \dontrun{write_wikidata(items        = "Wikidata Sandbox",
90 |                properties   = "instance of",
91 |                values       = "Q1",
92 |                format       = "api",
93 |                api.username = "myusername", 
94 |                api.token    = , #REDACTED#
95 |                )}
96 | #note: 
97 | 
98 | }
99 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(WikidataR)
3 | 
4 | test_check("WikidataR")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test_geo.R:
--------------------------------------------------------------------------------
 1 | testthat::context("Geographic queries")
 2 | # Have had to comment out tests. Wikidata query service having timeout issues that cause tests to fail
 3 | 
 4 | 
 5 | # testthat::test_that("Simple entity-based geo lookups work", {
 6 | #   field_names <- c("item", "name", "latitutde", "longitude", "entity")
 7 | #   sf_locations <- get_geo_entity("Q62")
 8 | #   testthat::expect_true(is.data.frame(sf_locations))
 9 | #   testthat::expect_true(all(field_names == names(sf_locations)))
10 | #   testthat::expect_true(unique(sf_locations$entity) == "Q62")
11 | # })
12 | 
13 | # testthat::test_that("Language-variant entity-based geo lookups work", {
14 | #   field_names <- c("item", "name", "latitutde", "longitude", "entity")
15 | #   sf_locations <- get_geo_entity("Q62", language = "fr")
16 | #   testthat::expect_true(is.data.frame(sf_locations))
17 | #   testthat::expect_true(all(field_names == names(sf_locations)))
18 | #   testthat::expect_true(unique(sf_locations$entity) == "Q62")
19 | # })
20 | 
21 | # testthat::test_that("Radius restricted entity-based geo lookups work", {
22 | #   field_names <- c("item", "name", "latitutde", "longitude", "entity")
23 | #   sf_locations <- get_geo_entity("Q62", radius = 1)
24 | #   testthat::expect_true(is.data.frame(sf_locations))
25 | #   testthat::expect_true(all(field_names == names(sf_locations)))
26 | #   testthat::expect_true(unique(sf_locations$entity) == "Q62")
27 | # })
28 | 
29 | # testthat::test_that("multi-entity geo lookups work", {
30 | #   field_names <- c("item", "name", "latitutde", "longitude", "entity")
31 | #   sf_locations <- get_geo_entity(c("Q62", "Q64"), radius = 1)
32 | #   testthat::expect_true(is.data.frame(sf_locations))
33 | #   testthat::expect_true(all(field_names == names(sf_locations)))
34 | #   testthat::expect_equal(length(unique(sf_locations$entity)), 2)
35 | # })
36 | 
37 | # testthat::test_that("Simple bounding lookups work", {
38 | #   field_names <- c("item", "name", "latitutde", "longitude")
39 | #   bruges_box <- get_geo_box("Q12988", "NorthEast", "Q184287", "SouthWest")
40 | #   testthat::expect_true(is.data.frame(bruges_box))
41 | #   testthat::expect_true(all(field_names == names(bruges_box)))
42 | # })
43 | 
44 | # testthat::test_that("Language-variant bounding lookups work", {
45 | #   field_names <- c("item", "name", "latitutde", "longitude")
46 | #   bruges_box <- get_geo_box("Q12988", "NorthEast", "Q184287", "SouthWest",
47 | #                             language = "fr")
48 | #   testthat::expect_true(is.data.frame(bruges_box))
49 | #   testthat::expect_true(all(field_names == names(bruges_box)))
50 | # })


--------------------------------------------------------------------------------
/tests/testthat/test_gets.R:
--------------------------------------------------------------------------------
 1 | context("Direct Wikidata get functions")
 2 | 
 3 | test_that("A specific item can be retrieved with an entire item code", {
 4 |   expect_true({get_item("Q100");TRUE})
 5 | })
 6 | 
 7 | test_that("A specific item can be retrieved with a partial entire item code", {
 8 |   expect_true({get_item("100");TRUE})
 9 | })
10 | 
11 | test_that("A specific property can be retrieved with an entire prop code + namespace", {
12 |   expect_true({get_property("Property:P10");TRUE})
13 | })
14 | 
15 | test_that("A specific property can be retrieved with an entire prop code + namespace", {
16 |   expect_true({get_property("P10");TRUE})
17 | })
18 | 
19 | 
20 | test_that("A specific property can be retrieved with a partial prop code", {
21 |   expect_true({get_property("10");TRUE})
22 | })
23 | 
24 | test_that("A randomly-selected item can be retrieved",{
25 |   expect_true({get_random_item();TRUE})
26 | })
27 | 
28 | test_that("A randomly-selected property can be retriveed",{
29 |   expect_true({get_random_property();TRUE})
30 | })


--------------------------------------------------------------------------------
/tests/testthat/test_search.R:
--------------------------------------------------------------------------------
 1 | context("Search functions")
 2 | 
 3 | test_that("English-language search works",{
 4 |   expect_true({find_item("Wonder Girls", "en");TRUE})
 5 | })
 6 | 
 7 | test_that("Non-English-language search works",{
 8 |   expect_true({find_item("Wonder Girls", "es");TRUE})
 9 | })
10 | 
11 | test_that("Property search works",{
12 |   expect_true({find_property("Music", "en");TRUE})
13 | })


--------------------------------------------------------------------------------