├── .Rbuildignore
├── .gitignore
├── .lintr
├── CONDUCT.md
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
├── WikidataQueryServiceR-package.R
├── deprecated.R
├── http.R
├── query.R
├── utils.R
└── zzz.R
├── README.Rmd
├── README.md
├── WDQS.Rproj
├── cran-comments.md
├── man
├── WDQSR-deprecated.Rd
├── WikidataQueryServiceR-package.Rd
├── get_example.Rd
└── query_wikidata.Rd
└── tests
├── testthat.R
└── testthat
└── test-query.R
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^man-roxygen$
4 | ^data-raw$
5 | ^cran-comments\.md$
6 | ^README.Rmd
7 | ^CONDUCT\.md$
8 | ^README_cache$
9 | ^\.lintr$
10 | ^CRAN-RELEASE$
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 | .Rapp.history
4 |
5 | # Session Data files
6 | .RData
7 |
8 | # Example code in package build process
9 | *-Ex.R
10 |
11 | # Output files from R CMD build
12 | /*.tar.gz
13 |
14 | # Output files from R CMD check
15 | /*.Rcheck/
16 |
17 | # RStudio files
18 | .Rproj.user/
19 |
20 | # produced vignettes
21 | vignettes/*.html
22 | vignettes/*.pdf
23 |
24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
25 | .httr-oauth
26 |
27 | # knitr and R markdown default cache directories
28 | /*_cache/
29 | /cache/
30 |
31 | # Temporary files created by R markdown
32 | *.utf8.md
33 | *.knit.md
34 | .Rproj.user
35 |
--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
1 | linters: with_defaults(line_length_linter(120), object_usage_linter = NULL, closed_curly_linter = NULL, open_curly_linter = NULL, spaces_left_parentheses_linter = NULL, camel_case_linter = NULL)
2 | exclusions: list("R/zzz.R", "R/WikidataQueryServiceR-package.R")
3 | exclude: "# Exclude Linting"
4 | exclude_start: "# Begin Exclude Linting"
5 | exclude_end: "# End Exclude Linting"
6 |
--------------------------------------------------------------------------------
/CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
6 |
7 | ## Our Standards
8 |
9 | Examples of behavior that contributes to creating a positive environment include:
10 |
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 |
17 | Examples of unacceptable behavior by participants include:
18 |
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 |
25 | ## Our Responsibilities
26 |
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 |
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 |
31 | ## Scope
32 |
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 |
35 | ## Enforcement
36 |
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at mikhail@wikimedia.org. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 |
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 |
41 | ## Attribution
42 |
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 |
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: WikidataQueryServiceR
2 | Title: API Client Library for 'Wikidata Query Service'
3 | Version: 1.0.0
4 | Date: 2020-07-27
5 | Authors@R: c(
6 | person("Mikhail", "Popov", email = "mikhail@wikimedia.org",
7 | role = c("aut", "cre"), comment = "@bearloga on Twitter"),
8 | person("Wikimedia Foundation", role = "cph")
9 | )
10 | Description: An API client for the 'Wikidata Query Service'
11 | .
12 | Depends:
13 | R (>= 3.1.2)
14 | Imports:
15 | httr (>= 1.2.1),
16 | dplyr (>= 1.0.0),
17 | jsonlite (>= 1.2),
18 | WikipediR (>= 1.5.0),
19 | ratelimitr (>= 0.4.1),
20 | purrr (>= 0.3.4),
21 | readr (>= 1.3.1),
22 | rex (>= 1.2.0)
23 | Suggests:
24 | testthat (>= 2.3.0),
25 | lintr (>= 2.0.1)
26 | URL: https://github.com/wikimedia/WikidataQueryServiceR
27 | BugReports: https://github.com/wikimedia/WikidataQueryServiceR/issues
28 | License: MIT + file LICENSE
29 | Encoding: UTF-8
30 | LazyData: true
31 | Roxygen: list(markdown = TRUE)
32 | RoxygenNote: 7.1.1
33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2016
2 | COPYRIGHT HOLDER: Wikimedia Foundation
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(get_example)
4 | export(query_wikidata)
5 | export(scrape_example)
6 | import(ratelimitr)
7 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # WikidataQueryServiceR 1.0.0
2 |
3 | * Fixed example retrieval (was broken due to translation wikitext markers)
4 | * Rate-limiting ([11](https://github.com/bearloga/WikidataQueryServiceR/issues/11))
5 | * Using tidyverse family of packages (tibble, dplyr, purrr, readr)
6 | * Various improvements and modernizations
7 |
8 | # WikidataQueryServiceR 0.1.1
9 |
10 | ## Changes
11 |
12 | * Updated documentation to use Markdown thanks to [roxygen2 6.0.0](https://blog.rstudio.org/2017/02/01/roxygen2-6-0-0/)
13 | * Added more links for learning SPARQL in context of Wikidata
14 | (see `help("WDQS", package = "WikidataQueryServiceR")`)
15 |
16 | ## Bug fixes
17 |
18 | * Fixed a bug with JSON-formatted results ([#3](https://github.com/bearloga/WikidataQueryServiceR/issues/3))
19 |
20 | # WikidataQueryServiceR 0.1.0
21 |
22 | * Initial CRAN release:
23 | - Support for multiple SPARQL queries
24 | - Support for scraping example SPARQL queries via Suggested packages
25 |
--------------------------------------------------------------------------------
/R/WikidataQueryServiceR-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | #' @aliases WDQS
3 | #' @details [Wikidata Query Service](https://www.mediawiki.org/wiki/Wikidata_query_service)
4 | #' is maintained by the [Wikimedia Foundation](https://wikimediafoundation.org/).
5 | #' @section Resources:
6 | #' - [A beginner-friendly course for SPARQL](https://www.wikidata.org/wiki/Wikidata:A_beginner-friendly_course_for_SPARQL)
7 | #' - Building a SPARQL query: [Museums on Instagram](https://www.wikidata.org/wiki/Help:SPARQL/Building_a_query/Museums_on_Instagram)
8 | #' - [SPARQL Query Examples](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) for WDQS
9 | #' - [Using SPARQL to access Linked Open Data](http://programminghistorian.org/lessons/graph-databases-and-SPARQL)
10 | #' by Matthew Lincoln
11 | #' - Interesting or illustrative [SPARQL queries](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries)
12 | #' for Wikidata
13 | #' - Wikidata [2016 SPARQL Workshop](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/2016_SPARQL_Workshop)
14 | #' - [Wikidata SPARQL Query video tutorial](https://www.youtube.com/watch?v=1jHoUkj_mKw)
15 | #' by Navino Evans
16 | #' - _[Learning SPARQL](http://www.learningsparql.com/)_ by Bob DuCharme
17 | #' - [WDQS User Manual](https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual)
18 | #' - [Quick intro to WDQS & SPARQL](https://github.com/bearloga/wmf/blob/master/presentations/talks/Cascadia\%20R\%20Conference\%202017/presentation.md#wikidata-query-service-wdqs)
19 | #' from [my Cascadia R Conference 2017 talk](https://github.com/bearloga/wmf/tree/master/presentations/talks/Cascadia\%20R\%20Conference\%202017)
20 | "_PACKAGE"
21 |
22 | # The following block is used by usethis to automatically manage
23 | # roxygen namespace tags. Modify with care!
24 | ## usethis namespace: start
25 | ## usethis namespace: end
26 | NULL
27 |
--------------------------------------------------------------------------------
/R/deprecated.R:
--------------------------------------------------------------------------------
1 | #' @title Deprecated functions
2 | #' @description Why did I have to go and make things so deprecated?
3 | #' @name WDQSR-deprecated
4 | NULL
5 |
6 | #' @inheritParams get_example
7 | #' @param ... ignored (kept for backwards-compatibility)
8 | #' @describeIn WDQSR-deprecated use [get_example] instead which employs [WikipediR::page_content]
9 | #' @export
10 | scrape_example <- function(example_name, ...) {
11 | .Deprecated("get_example")
12 | return(get_example(example_name))
13 | }
14 |
--------------------------------------------------------------------------------
/R/http.R:
--------------------------------------------------------------------------------
1 | #' @import ratelimitr
2 | wdqs_requester <- function() {
3 | req <- function(query, ...) {
4 | httr::POST(
5 | url = "https://query.wikidata.org/sparql",
6 | query = list(query = query),
7 | httr::user_agent("https://github.com/bearloga/WikidataQueryServiceR"),
8 | ...
9 | )
10 | }
11 | return(limit_rate(req, rate(n = 30, period = 60)))
12 | }
13 |
--------------------------------------------------------------------------------
/R/query.R:
--------------------------------------------------------------------------------
1 | #' @title Send one or more SPARQL queries to WDQS
2 | #' @description Makes a POST request to Wikidata Query Service SPARQL endpoint.
3 | #' @param sparql_query SPARQL query (can be a vector of queries)
4 | #' @param format "simple" uses CSV and returns pure character data frame, while
5 | #' "smart" fetches JSON-formatted data and returns a data frame with datetime
6 | #' columns converted to `POSIXct`
7 | #' @return A tibble data frame
8 | #' @examples
9 | #' sparql_query <- "SELECT
10 | #' ?softwareVersion ?publicationDate
11 | #' WHERE {
12 | #' BIND(wd:Q206904 AS ?R)
13 | #' ?R p:P348 [
14 | #' ps:P348 ?softwareVersion;
15 | #' pq:P577 ?publicationDate
16 | #' ] .
17 | #' }"
18 | #' query_wikidata(sparql_query)
19 | #'
20 | #' \dontrun{
21 | #' query_wikidata(sparql_query, format = "smart")
22 | #' }
23 | #' @section Query limits:
24 | #' There is a hard query deadline configured which is set to 60 seconds. There
25 | #' are also following limits:
26 | #' - One client (user agent + IP) is allowed 60 seconds of processing time each
27 | #' 60 seconds
28 | #' - One client is allowed 30 error queries per minute
29 | #' See [query limits section](https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits)
30 | #' in the WDQS user manual for more information.
31 | #' @seealso [get_example]
32 | #' @export
33 | query_wikidata <- function(sparql_query, format = c("simple", "smart")) {
34 | format <- format[1]
35 | if (!format %in% c("simple", "smart")) {
36 | stop("`format` must be either \"simple\" or \"smart\"")
37 | }
38 | output <- lapply(sparql_query, function(sparql_query) {
39 | rate_limited_query <- wdqs_requester()
40 | if (format == "simple") {
41 | response <- rate_limited_query(sparql_query, httr::add_headers(Accept = "text/csv"))
42 | httr::stop_for_status(response)
43 | if (httr::http_type(response) == "text/csv") {
44 | content <- httr::content(response, as = "text", encoding = "UTF-8")
45 | return(readr::read_csv(content))
46 | } else {
47 | stop("returned response is not formatted as a CSV")
48 | }
49 | } else {
50 | response <- rate_limited_query(sparql_query, httr::add_headers(Accept = "application/sparql-results+json"))
51 | httr::stop_for_status(response)
52 | if (httr::http_type(response) == "application/sparql-results+json") {
53 | content <- httr::content(response, as = "text", encoding = "UTF-8")
54 | temp <- jsonlite::fromJSON(content, simplifyVector = FALSE)
55 | }
56 | if (length(temp$results$bindings) > 0) {
57 | data_frame <- purrr::map_dfr(temp$results$bindings, function(binding) {
58 | return(purrr::map_chr(binding, ~ .x$value))
59 | })
60 | datetime_columns <- purrr::map_lgl(temp$results$bindings[[1]], function(binding) {
61 | if ("datatype" %in% names(binding)) {
62 | return(binding[["datatype"]] == "http://www.w3.org/2001/XMLSchema#dateTime")
63 | } else {
64 | return(FALSE)
65 | }
66 | })
67 | data_frame <- dplyr::mutate_if(
68 | .tbl = data_frame,
69 | .predicate = datetime_columns,
70 | .funs = as.POSIXct,
71 | format = "%Y-%m-%dT%H:%M:%SZ", tz = "GMT"
72 | )
73 | } else {
74 | data_frame <- dplyr::as_tibble(
75 | matrix(
76 | character(),
77 | nrow = 0, ncol = length(temp$head$vars),
78 | dimnames = list(c(), unlist(temp$head$vars))
79 | )
80 | )
81 | }
82 | return(data_frame)
83 | }
84 | })
85 | if (length(output) == 1) {
86 | return(output[[1]])
87 | } else {
88 | if (!is.null(names(sparql_query))) {
89 | names(output) <- names(sparql_query)
90 | } else {
91 | names(output) <- NULL
92 | }
93 | return(output)
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 | #' @title Get an example SPARQL query from Wikidata
2 | #' @description Gets the specified example(s) from
3 | #' [SPARQL query service examples page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)
4 | #' using [Wikidata's MediaWiki API](https://www.wikidata.org/w/api.php).
5 | #' @details If you are planning on extracting multiple examples, please provide
6 | #' all the names as a single vector for efficiency.
7 | #' @param example_name the names of the examples as they appear on
8 | #' [this page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)
9 | #' @return The SPARQL query as a character vector.
10 | #' @examples
11 | #' \dontrun{
12 | #' sparql_query <- extract_example(c("Cats", "Horses"))
13 | #' query_wikidata(sparql_query)
14 | #' # returns a named list with two data frames
15 | #' # one called "Cats" and one called "Horses"
16 | #'
17 | #' sparql_query <- extract_example("Largest cities with female mayor")
18 | #' cat(sparql_query)
19 | #' query_wikidata(sparql_query)
20 | #' }
21 | #' @seealso [query_wikidata]
22 | #' @export
23 | get_example <- function(example_name) {
24 | content <- WikipediR::page_content(
25 | domain = "www.wikidata.org",
26 | page_name = "Wikidata:SPARQL query service/queries/examples",
27 | as_wikitext = TRUE
28 | )
29 | wikitext <- strsplit(content$parse$wikitext$`*`, "\n")[[1]]
30 | wikitext <- wikitext[wikitext != ""]
31 | examples <- purrr::map(example_name, function(example_name) {
32 | regex <- paste0(
33 | "^={2,}\\s?()?\\s?",
34 | rex::escape(example_name),
35 | "\\s?()?\\s?={2,}$"
36 | )
37 | heading_line <- which(grepl(regex, wikitext, fixed = FALSE))
38 | start_line <- which(grepl("{{SPARQL", wikitext[(heading_line + 1):length(wikitext)], fixed = TRUE))[1]
39 | end_line <- which(grepl("}}", wikitext[(heading_line + start_line + 1):length(wikitext)], fixed = TRUE))[1]
40 | query <- paste0(wikitext[(heading_line + start_line):(heading_line + start_line + end_line - 1)], collapse = "\n")
41 | return(sub("^\\s*\\{\\{SPARQL2?\\n?\\|query\\=", "", query))
42 | })
43 | names(examples) <- example_name
44 | return(examples)
45 | }
46 |
--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .onAttach <- function(libname, pkgname) {
2 | packageStartupMessage("See ?WDQS for resources on Wikidata Query Service and SPARQL")
3 | }
4 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "WikidataQueryServiceR"
3 | output:
4 | github_document:
5 | toc: true
6 | toc_depth: 3
7 | ---
8 |
9 | ```{r setup, include=FALSE}
10 | knitr::opts_chunk$set(echo = TRUE)
11 | # install.packages("printr", type = "source", repos = c("https://yihui.name/xran", CRAN = "https://cran.rstudio.com"))
12 | library(printr)
13 | ```
14 |
15 | [](http://www.repostatus.org/#active)
16 | [](https://cran.r-project.org/package=WikidataQueryServiceR)
17 | [](https://cran.r-project.org/package=WikidataQueryServiceR)
18 | [](https://opensource.org/licenses/MIT)
19 |
20 | This is an R wrapper for the [Wikidata Query Service (WDQS)](https://www.mediawiki.org/wiki/Wikidata_query_service) which provides a way for tools to query [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) via [SPARQL](https://en.wikipedia.org/wiki/SPARQL) (see the beta at https://query.wikidata.org/). It is written in and for R, and was inspired by Os Keyes' [WikipediR](https://github.com/Ironholds/WikipediR) and [WikidataR](https://github.com/Ironholds/WikidataR) packages.
21 |
22 | __Author:__ Mikhail Popov (Wikimedia Foundation)
23 | __License:__ [MIT](http://opensource.org/licenses/MIT)
24 | __Status:__ Active
25 |
26 | ## Installation
27 |
28 | ```R
29 | install.packages("WikidataQueryServiceR")
30 | ```
31 |
32 | To install the development version:
33 |
34 | ```R
35 | # install.packages("remotes")
36 | remotes::install_github("wikimedia/WikidataQueryServiceR@main")
37 | ```
38 |
39 | ## Usage
40 |
41 | ```{r load}
42 | library(WikidataQueryServiceR)
43 | ```
44 |
45 | You submit SPARQL queries using the `query_wikidata()` function.
46 |
47 | ### Example: fetching genres of a particular movie
48 |
49 | In this example, we find an "instance of" ([P31](https://www.wikidata.org/wiki/Property:P31)) "film" ([Q11424](https://www.wikidata.org/wiki/Q11424)) that has the label "The Cabin in the Woods" ([Q45394](https://www.wikidata.org/wiki/Q45394)), get its genres ([P136](https://www.wikidata.org/wiki/Property:P136)), and then use [WDQS label service](https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual#Label_service) to return the genre labels.
50 |
51 | ```{r wdqs_example, cache=TRUE}
52 | query_wikidata('SELECT DISTINCT
53 | ?genre ?genreLabel
54 | WHERE {
55 | ?film wdt:P31 wd:Q11424.
56 | ?film rdfs:label "The Cabin in the Woods"@en.
57 | ?film wdt:P136 ?genre.
58 | SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
59 | }')
60 | ```
61 |
62 | For more example SPARQL queries, see [this page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) on [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page).
63 |
64 | `query_wikidata()` can accept multiple queries, returning a (potentially named) list of data frames. If the vector of SPARQL queries is named, the results will inherit those names.
65 |
66 | ### Fetching queries from Wikidata's examples page
67 |
68 | The package provides a [WikipediR](https://github.com/Ironholds/WikipediR/)-based function for getting SPARQL queries from the [WDQS examples page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples).
69 |
70 | ```{r get_examples, cache=TRUE}
71 | sparql_query <- get_example(c("Cats", "How many states this US state borders"))
72 | ```
73 | ```{r, eval=FALSE}
74 | sparql_query[["How many states this US state borders"]]
75 | ```
76 | ```{r, echo=FALSE, results='asis'}
77 | cat("```SPARQL\n", sparql_query[["How many states this US state borders"]], "\n```")
78 | ```
79 |
80 | Now we can run all extracted SPARQL queries:
81 |
82 | ```{r run_examples, cache=TRUE, dependson='get_examples'}
83 | results <- query_wikidata(sparql_query)
84 | lapply(results, dim)
85 | head(results$`How many states this US state borders`)
86 | ```
87 |
88 | ## Links for learning SPARQL
89 |
90 | - [A beginner-friendly course for SPARQL](https://www.wikidata.org/wiki/Wikidata:A_beginner-friendly_course_for_SPARQL)
91 | - Building a SPARQL query: [Museums on Instagram](https://www.wikidata.org/wiki/Help:SPARQL/Building_a_query/Museums_on_Instagram)
92 | - [SPARQL Query Examples](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) for WDQS
93 | - [Using SPARQL to access Linked Open Data](http://programminghistorian.org/lessons/graph-databases-and-SPARQL) by Matthew Lincoln
94 | - Interesting or illustrative [SPARQL queries](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries) for Wikidata
95 | - Wikidata [2016 SPARQL Workshop](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/2016_SPARQL_Workshop)
96 | - [Wikidata SPARQL Query video tutorial](https://www.youtube.com/watch?v=1jHoUkj_mKw) by Navino Evans
97 | - _[Learning SPARQL](http://www.learningsparql.com/)_ by Bob DuCharme
98 | - [WDQS User Manual](https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual)
99 |
100 | ## Additional Information
101 |
102 | Please note that this project is released with a [Contributor Code of Conduct](https://github.com/bearloga/WikidataQueryServiceR/blob/master/CONDUCT.md). By participating in this project you agree to abide by its terms.
103 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | WikidataQueryServiceR
2 | ================
3 |
4 | - [Installation](#installation)
5 | - [Usage](#usage)
6 | - [Example: fetching genres of a particular
7 | movie](#example-fetching-genres-of-a-particular-movie)
8 | - [Fetching queries from Wikidata’s examples
9 | page](#fetching-queries-from-wikidatas-examples-page)
10 | - [Links for learning SPARQL](#links-for-learning-sparql)
11 | - [Additional Information](#additional-information)
12 |
13 | [](http://www.repostatus.org/#inactive)
16 | [](https://cran.r-project.org/package=WikidataQueryServiceR)
17 | [](https://cran.r-project.org/package=WikidataQueryServiceR)
19 | [](https://opensource.org/licenses/MIT)
21 |
22 | This is an R wrapper for the [Wikidata Query Service
23 | (WDQS)](https://www.mediawiki.org/wiki/Wikidata_query_service) which
24 | provides a way for tools to query
25 | [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) via
26 | [SPARQL](https://en.wikipedia.org/wiki/SPARQL) (see the beta at
27 | ). It is written in and for R, and was
28 | inspired by Os Keyes’
29 | [WikipediR](https://github.com/Ironholds/WikipediR) and
30 | [WikidataR](https://github.com/Ironholds/WikidataR) packages.
31 |
32 | **Author:** Mikhail Popov (Wikimedia Foundation)
**License:**
33 | [MIT](http://opensource.org/licenses/MIT)
**Status:** Active
34 |
35 | ## Installation
36 |
37 | ``` r
38 | install.packages("WikidataQueryServiceR")
39 | ```
40 |
41 | To install the development version:
42 |
43 | ``` r
44 | # install.packages("remotes")
45 | remotes::install_github("wikimedia/WikidataQueryServiceR@main")
46 | ```
47 |
48 | ## Usage
49 |
50 | ``` r
51 | library(WikidataQueryServiceR)
52 | ```
53 |
54 | ## See ?WDQS for resources on Wikidata Query Service and SPARQL
55 |
56 | You submit SPARQL queries using the `query_wikidata()` function.
57 |
58 | ### Example: fetching genres of a particular movie
59 |
60 | In this example, we find an “instance of”
61 | ([P31](https://www.wikidata.org/wiki/Property:P31)) “film”
62 | ([Q11424](https://www.wikidata.org/wiki/Q11424)) that has the label “The
63 | Cabin in the Woods” ([Q45394](https://www.wikidata.org/wiki/Q45394)),
64 | get its genres ([P136](https://www.wikidata.org/wiki/Property:P136)),
65 | and then use [WDQS label
66 | service](https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual#Label_service)
67 | to return the genre labels.
68 |
69 | ``` r
70 | query_wikidata('SELECT DISTINCT
71 | ?genre ?genreLabel
72 | WHERE {
73 | ?film wdt:P31 wd:Q11424.
74 | ?film rdfs:label "The Cabin in the Woods"@en.
75 | ?film wdt:P136 ?genre.
76 | SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
77 | }')
78 | ```
79 |
80 | | genre | genreLabel |
81 | | :---------------------------------------- | :------------------- |
82 | | | zombie film |
83 | | | science fiction film |
84 | | | comedy-drama |
85 | | | monster film |
86 | | | slasher film |
87 | | | comedy horror |
88 |
89 | For more example SPARQL queries, see [this
90 | page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)
91 | on [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page).
92 |
93 | `query_wikidata()` can accept multiple queries, returning a (potentially
94 | named) list of data frames. If the vector of SPARQL queries is named,
95 | the results will inherit those names.
96 |
97 | ### Fetching queries from Wikidata’s examples page
98 |
99 | The package provides a
100 | [WikipediR](https://github.com/Ironholds/WikipediR/)-based function for
101 | getting SPARQL queries from the [WDQS examples
102 | page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples).
103 |
104 | ``` r
105 | sparql_query <- get_example(c("Cats", "How many states this US state borders"))
106 | ```
107 |
108 | ``` r
109 | sparql_query[["How many states this US state borders"]]
110 | ```
111 |
112 | ``` sparql
113 |
114 | SELECT ?state ?stateLabel ?borders
115 | WHERE
116 | {
117 | {
118 | SELECT ?state (COUNT(?otherState) as ?borders)
119 | WHERE
120 | {
121 | ?state wdt:P31 wd:Q35657 .
122 | ?otherState wdt:P47 ?state .
123 | ?otherState wdt:P31 wd:Q35657 .
124 | }
125 | GROUP BY ?state
126 | }
127 | SERVICE wikibase:label {
128 | bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
129 | }
130 | }
131 | ORDER BY DESC(?borders)
132 | ```
133 |
134 | Now we can run all extracted SPARQL queries:
135 |
136 | ``` r
137 | results <- query_wikidata(sparql_query)
138 | lapply(results, dim)
139 | ```
140 |
141 | ## $Cats
142 | ## [1] 147 2
143 | ##
144 | ## $`How many states this US state borders`
145 | ## [1] 48 3
146 |
147 | ``` r
148 | head(results$`How many states this US state borders`)
149 | ```
150 |
151 | | state | stateLabel | borders |
152 | | :------------------------------------- | :----------- | ------: |
153 | | | Tennessee | 8 |
154 | | | Missouri | 8 |
155 | | | Colorado | 7 |
156 | | | Kentucky | 7 |
157 | | | Pennsylvania | 6 |
158 | | | South Dakota | 6 |
159 |
160 | ## Links for learning SPARQL
161 |
162 | - [A beginner-friendly course for
163 | SPARQL](https://www.wikidata.org/wiki/Wikidata:A_beginner-friendly_course_for_SPARQL)
164 | - Building a SPARQL query: [Museums on
165 | Instagram](https://www.wikidata.org/wiki/Help:SPARQL/Building_a_query/Museums_on_Instagram)
166 | - [SPARQL Query
167 | Examples](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)
168 | for WDQS
169 | - [Using SPARQL to access Linked Open
170 | Data](http://programminghistorian.org/lessons/graph-databases-and-SPARQL)
171 | by Matthew Lincoln
172 | - Interesting or illustrative [SPARQL
173 | queries](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries)
174 | for Wikidata
175 | - Wikidata [2016 SPARQL
176 | Workshop](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/2016_SPARQL_Workshop)
177 | - [Wikidata SPARQL Query video
178 | tutorial](https://www.youtube.com/watch?v=1jHoUkj_mKw) by Navino
179 | Evans
180 | - *[Learning SPARQL](http://www.learningsparql.com/)* by Bob DuCharme
181 | - [WDQS User
182 | Manual](https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual)
183 |
184 | ## Additional Information
185 |
186 | Please note that this project is released with a [Contributor Code of
187 | Conduct](https://github.com/bearloga/WikidataQueryServiceR/blob/master/CONDUCT.md).
188 | By participating in this project you agree to abide by its terms.
189 |
--------------------------------------------------------------------------------
/WDQS.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageCheckArgs: --as-cran
22 | PackageRoxygenize: rd,collate,namespace,vignette
23 |
--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | # WikidataQueryServiceR 1.0.0
2 |
3 | ## Test environments
4 | * local macOS install (R 4.0.1, R-devel)
5 | * win-builder (R-release, R-oldrelease)
6 |
7 | ## R CMD check results
8 | There were no ERRORs or WARNINGs or NOTEs.
9 |
--------------------------------------------------------------------------------
/man/WDQSR-deprecated.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/deprecated.R
3 | \name{WDQSR-deprecated}
4 | \alias{WDQSR-deprecated}
5 | \alias{scrape_example}
6 | \title{Deprecated functions}
7 | \usage{
8 | scrape_example(example_name, ...)
9 | }
10 | \arguments{
11 | \item{example_name}{the names of the examples as they appear on
12 | \href{https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples}{this page}}
13 |
14 | \item{...}{ignored (kept for backwards-compatibility)}
15 | }
16 | \description{
17 | Why did I have to go and make things so deprecated?
18 | }
19 | \section{Functions}{
20 | \itemize{
21 | \item \code{scrape_example}: use \link{get_example} instead which employs \link[WikipediR:page_content]{WikipediR::page_content}
22 | }}
23 |
24 |
--------------------------------------------------------------------------------
/man/WikidataQueryServiceR-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/WikidataQueryServiceR-package.R
3 | \docType{package}
4 | \name{WikidataQueryServiceR-package}
5 | \alias{WikidataQueryServiceR-package}
6 | \alias{_PACKAGE}
7 | \alias{WDQS}
8 | \title{WikidataQueryServiceR: API Client Library for 'Wikidata Query Service'}
9 | \description{
10 | An API client for the 'Wikidata Query Service'
11 | .
12 | }
13 | \details{
14 | \href{https://www.mediawiki.org/wiki/Wikidata_query_service}{Wikidata Query Service}
15 | is maintained by the \href{https://wikimediafoundation.org/}{Wikimedia Foundation}.
16 | }
17 | \section{Resources}{
18 |
19 | \itemize{
20 | \item \href{https://www.wikidata.org/wiki/Wikidata:A_beginner-friendly_course_for_SPARQL}{A beginner-friendly course for SPARQL}
21 | \item Building a SPARQL query: \href{https://www.wikidata.org/wiki/Help:SPARQL/Building_a_query/Museums_on_Instagram}{Museums on Instagram}
22 | \item \href{https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples}{SPARQL Query Examples} for WDQS
23 | \item \href{http://programminghistorian.org/lessons/graph-databases-and-SPARQL}{Using SPARQL to access Linked Open Data}
24 | by Matthew Lincoln
25 | \item Interesting or illustrative \href{https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries}{SPARQL queries}
26 | for Wikidata
27 | \item Wikidata \href{https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/2016_SPARQL_Workshop}{2016 SPARQL Workshop}
28 | \item \href{https://www.youtube.com/watch?v=1jHoUkj_mKw}{Wikidata SPARQL Query video tutorial}
29 | by Navino Evans
30 | \item \emph{\href{http://www.learningsparql.com/}{Learning SPARQL}} by Bob DuCharme
31 | \item \href{https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual}{WDQS User Manual}
32 | \item \href{https://github.com/bearloga/wmf/blob/master/presentations/talks/Cascadia\%20R\%20Conference\%202017/presentation.md#wikidata-query-service-wdqs}{Quick intro to WDQS & SPARQL}
33 | from \href{https://github.com/bearloga/wmf/tree/master/presentations/talks/Cascadia\%20R\%20Conference\%202017}{my Cascadia R Conference 2017 talk}
34 | }
35 | }
36 |
37 | \seealso{
38 | Useful links:
39 | \itemize{
40 | \item \url{https://github.com/wikimedia/WikidataQueryServiceR}
41 | \item Report bugs at \url{https://github.com/wikimedia/WikidataQueryServiceR/issues}
42 | }
43 |
44 | }
45 | \author{
46 | \strong{Maintainer}: Mikhail Popov \email{mikhail@wikimedia.org} (@bearloga on Twitter)
47 |
48 | Other contributors:
49 | \itemize{
50 | \item Wikimedia Foundation [copyright holder]
51 | }
52 |
53 | }
54 | \keyword{internal}
55 |
--------------------------------------------------------------------------------
/man/get_example.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{get_example}
4 | \alias{get_example}
5 | \title{Get an example SPARQL query from Wikidata}
6 | \usage{
7 | get_example(example_name)
8 | }
9 | \arguments{
10 | \item{example_name}{the names of the examples as they appear on
11 | \href{https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples}{this page}}
12 | }
13 | \value{
14 | The SPARQL query as a character vector.
15 | }
16 | \description{
17 | Gets the specified example(s) from
18 | \href{https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples}{SPARQL query service examples page}
19 | using \href{https://www.wikidata.org/w/api.php}{Wikidata's MediaWiki API}.
20 | }
21 | \details{
22 | If you are planning on extracting multiple examples, please provide
23 | all the names as a single vector for efficiency.
24 | }
25 | \examples{
26 | \dontrun{
27 | sparql_query <- extract_example(c("Cats", "Horses"))
28 | query_wikidata(sparql_query)
29 | # returns a named list with two data frames
30 | # one called "Cats" and one called "Horses"
31 |
32 | sparql_query <- extract_example("Largest cities with female mayor")
33 | cat(sparql_query)
34 | query_wikidata(sparql_query)
35 | }
36 | }
37 | \seealso{
38 | \link{query_wikidata}
39 | }
40 |
--------------------------------------------------------------------------------
/man/query_wikidata.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/query.R
3 | \name{query_wikidata}
4 | \alias{query_wikidata}
5 | \title{Send one or more SPARQL queries to WDQS}
6 | \usage{
7 | query_wikidata(sparql_query, format = c("simple", "smart"))
8 | }
9 | \arguments{
10 | \item{sparql_query}{SPARQL query (can be a vector of queries)}
11 |
12 | \item{format}{"simple" uses CSV and returns pure character data frame, while
13 | "smart" fetches JSON-formatted data and returns a data frame with datetime
14 | columns converted to \code{POSIXct}}
15 | }
16 | \value{
17 | A tibble data frame
18 | }
19 | \description{
20 | Makes a POST request to Wikidata Query Service SPARQL endpoint.
21 | }
22 | \section{Query limits}{
23 |
24 | There is a hard query deadline configured which is set to 60 seconds. There
25 | are also following limits:
26 | \itemize{
27 | \item One client (user agent + IP) is allowed 60 seconds of processing time each
28 | 60 seconds
29 | \item One client is allowed 30 error queries per minute
30 | See \href{https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits}{query limits section}
31 | in the WDQS user manual for more information.
32 | }
33 | }
34 |
35 | \examples{
36 | sparql_query <- "SELECT
37 | ?softwareVersion ?publicationDate
38 | WHERE {
39 | BIND(wd:Q206904 AS ?R)
40 | ?R p:P348 [
41 | ps:P348 ?softwareVersion;
42 | pq:P577 ?publicationDate
43 | ] .
44 | }"
45 | query_wikidata(sparql_query)
46 |
47 | \dontrun{
48 | query_wikidata(sparql_query, format = "smart")
49 | }
50 | }
51 | \seealso{
52 | \link{get_example}
53 | }
54 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(WikidataQueryServiceR)
3 |
4 | test_check("WikidataQueryServiceR")
5 |
--------------------------------------------------------------------------------
/tests/testthat/test-query.R:
--------------------------------------------------------------------------------
1 | context("Querying")
2 |
3 | query <- "SELECT DISTINCT
4 | ?softwareVersion ?publicationDate
5 | WHERE {
6 | BIND(wd:Q206904 AS ?R)
7 | ?R p:P348 [
8 | ps:P348 ?softwareVersion;
9 | pq:P577 ?publicationDate
10 | ] .
11 | }"
12 |
13 | suppressMessages({
14 | simple_results <- query_wikidata(query, format = "simple")
15 | smart_results <- query_wikidata(query, format = "smart")
16 | })
17 |
18 | test_that("data", {
19 | expect_s3_class(simple_results, "data.frame")
20 | expect_s3_class(smart_results, "data.frame")
21 | expect_equal(names(simple_results), c("softwareVersion", "publicationDate"))
22 | expect_equal(names(smart_results), c("softwareVersion", "publicationDate"))
23 | })
24 |
25 | test_that("date formatting", {
26 | expect_s3_class(smart_results$publicationDate, "POSIXct")
27 | })
28 |
29 | test_that("simple ~= smart", {
30 | expect_equal(nrow(simple_results), nrow(smart_results))
31 | })
32 |
--------------------------------------------------------------------------------