Skip to content
This repository was archived by the owner on Jan 12, 2026. It is now read-only.

Commit e4cc518

Browse files
authored
Merge pull request #7 from bearloga/wikipedir-switch
WikipediR and POST
2 parents d7c7c11 + 8b89c73 commit e4cc518

14 files changed

Lines changed: 324 additions & 150 deletions

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@
55
^cran-comments\.md$
66
^README.Rmd
77
^CONDUCT\.md$
8+
^README_cache$

DESCRIPTION

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: WikidataQueryServiceR
22
Title: API Client Library for 'Wikidata Query Service'
3-
Version: 0.1.1
3+
Version: 0.2.0.9000
44
Date: 2017-04-28
55
Authors@R: c(
66
person("Mikhail", "Popov", email = "mikhail@wikimedia.org",
@@ -14,10 +14,8 @@ Depends:
1414
Imports:
1515
httr (>= 1.2.1),
1616
dplyr (>= 0.5.0),
17-
jsonlite (>= 1.2)
18-
Suggests:
19-
rvest (>= 0.3.2),
20-
urltools (>= 1.6.0)
17+
jsonlite (>= 1.2),
18+
WikipediR (>= 1.5.0)
2119
URL: https://github.com/bearloga/WikidataQueryServiceR
2220
BugReports: https://github.com/bearloga/WikidataQueryServiceR/issues
2321
License: MIT + file LICENSE

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
export(get_example)
34
export(query_wikidata)
45
export(scrape_example)

R/deprecated.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#' @title Deprecated functions
2+
#' @description Why did I have to go and make things so deprecated?
3+
#' @name WDQSR-deprecated
4+
NULL
5+
6+
#' @inheritParams get_example
7+
#' @param ... ignored (kept for backwards-compatibility)
8+
#' @describeIn WDQSR-deprecated use [get_example] instead which employs [WikipediR::page_content]
9+
#' @export
10+
scrape_example <- function(example_name, ...) {
11+
.Deprecated("get_example")
12+
return(get_example(example_name))
13+
}

R/query.R

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
#' @title Send one or more SPARQL queries to WDQS
2-
#' @description Makes a GET request to Wikidata Query Service SPARQL endpoint.
2+
#' @description Makes a POST request to Wikidata Query Service SPARQL endpoint.
33
#' @param sparql_query SPARQL query (can be a vector of queries)
44
#' @param format "simple" uses CSV and returns pure character data frame, while
55
#' "smart" fetches JSON-formatted data and returns a data frame with datetime
66
#' columns converted to `POSIXct`
7-
#' @param ... Additional parameters to supply to [httr::GET()]
7+
#' @param ... Additional parameters to supply to [httr::POST]
88
#' @return A `data.frame`
99
#' @examples
1010
#' # R's versions and release dates:
@@ -23,14 +23,15 @@
2323
#' # "smart" format converts all datetime columns to POSIXct
2424
#' query_wikidata(sparql_query, format = "smart")
2525
#' }
26+
#' @seealso [get_example]
2627
#' @export
2728
query_wikidata <- function(sparql_query, format = c("simple", "smart"), ...) {
2829
if (!format[1] %in% c("simple", "smart")) {
2930
stop("`format` must be either \"simple\" or \"smart\"")
3031
}
3132
output <- lapply(sparql_query, function(sparql_query) {
3233
if (format[1] == "simple") {
33-
response <- httr::GET(
34+
response <- httr::POST(
3435
url = "https://query.wikidata.org/sparql",
3536
query = list(query = sparql_query),
3637
httr::add_headers(Accept = "text/csv"),

R/utils.R

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,38 @@
1-
#' @title Scrape an example SPARQL query from Wikidata
2-
#' @description Scrapes [SPARQL query service examples page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)
3-
#' for specified example(s). Requires rvest and urltools packages.
4-
#' @details If you are planning on scraping multiple examples, please provide
5-
#' all the names as a single vector.
6-
#' @param example_name The names of the examples as they appear on
1+
#' @title Get an example SPARQL query from Wikidata
2+
#' @description Gets the specified example(s) from
3+
#' [SPARQL query service examples page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)
4+
#' using [Wikidata's MediaWiki API](https://www.wikidata.org/w/api.php).
5+
#' @details If you are planning on extracting multiple examples, please provide
6+
#' all the names as a single vector for efficiency.
7+
#' @param example_name the names of the examples as they appear on
78
#' [this page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)
8-
#' @param ... Additional `httr` configurations passed to `rvest`
99
#' @return The SPARQL query as a character vector.
1010
#' @examples
1111
#' \dontrun{
12-
#' sparql_query <- scrape_example(c("Cats", "Horses"))
12+
#' sparql_query <- extract_example(c("Cats", "Horses"))
1313
#' query_wikidata(sparql_query)
1414
#' # returns a named list with two data frames
1515
#' # one called "Cats" and one called "Horses"
1616
#'
17-
#' sparql_query <- scrape_example("Largest cities with female mayor")
17+
#' sparql_query <- extract_example("Largest cities with female mayor")
1818
#' cat(sparql_query)
1919
#' query_wikidata(sparql_query)
2020
#' }
21+
#' @seealso [query_wikidata]
2122
#' @export
22-
scrape_example <- function(example_name, ...) {
23-
if (requireNamespace("rvest", quietly = TRUE) && requireNamespace("urltools", quietly = TRUE)) {
24-
html <- rvest::html_session("https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples",
25-
httr::user_agent("https://github.com/bearloga/WikidataQueryServiceR"), ...)
26-
return(vapply(example_name, function(example_name) {
27-
try_it <- rvest::html_node(html, xpath = paste0("//span[contains(text(), '", example_name, "') and @class='mw-headline']/following::p[descendant::a]/a"))
28-
href <- rvest::html_attr(try_it, "href")
29-
if (is.na(href)) {
30-
warning("could not find a query for example \"", example_name, "\"")
31-
return(invisible(NULL))
32-
}
33-
sparql_query <- urltools::url_decode(sub("//query.wikidata.org/#", "", href, fixed = TRUE))
34-
return(paste0(paste("#", example_name), "\n", sparql_query, collapse = "\n"))
35-
}, ""))
36-
} else {
37-
stop("\"rvest\" and \"urltools\" packages required for web-scraping")
38-
}
23+
get_example <- function(example_name) {
24+
content <- WikipediR::page_content(
25+
domain = "www.wikidata.org",
26+
page_name = "Wikidata:SPARQL query service/queries/examples",
27+
as_wikitext = TRUE
28+
)
29+
wiki <- strsplit(content$parse$wikitext$`*`, "\n")[[1]]
30+
wiki <- wiki[wiki != ""]
31+
return(vapply(example_name, function(example_name) {
32+
heading_line <- which(grepl(paste0("^===\\s?", example_name, "\\s?===$"), wiki, fixed = FALSE))
33+
start_line <- which(grepl("{{SPARQL", wiki[(heading_line + 1):length(wiki)], fixed = TRUE))[1]
34+
end_line <- which(grepl("}}", wiki[(heading_line + start_line + 1):length(wiki)], fixed = TRUE))[1]
35+
query <- paste0(wiki[(heading_line + start_line):(heading_line + start_line + end_line - 1)], collapse = "\n")
36+
return(sub("^\\s*\\{\\{SPARQL2?\\n?\\|query\\=", "", query))
37+
}, ""))
3938
}

R/wdqs.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,5 @@
2020
#' - [WDQS User Manual](https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual)
2121
#' @aliases WDQS
2222
#' @docType package
23-
#' @name WDQS-package
23+
#' @name WDQSR-package
2424
NULL

README.Rmd

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
---
2+
title: "WikidataQueryServiceR"
3+
output:
4+
github_document:
5+
toc: true
6+
toc_depth: 3
7+
---
8+
9+
```{r setup, include=FALSE}
10+
knitr::opts_chunk$set(echo = TRUE)
11+
# install.packages("printr", type = "source", repos = c("https://yihui.name/xran", CRAN = "https://cran.rstudio.com"))
12+
library(printr)
13+
```
14+
15+
[![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
16+
[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/WikidataQueryServiceR)](https://cran.r-project.org/package=WikidataQueryServiceR)
17+
[![CRAN Total Downloads](https://cranlogs.r-pkg.org/badges/grand-total/WikidataQueryServiceR)](https://cran.r-project.org/package=WikidataQueryServiceR)
18+
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
19+
20+
This is an R wrapper for the [Wikidata Query Service (WDQS)](https://www.mediawiki.org/wiki/Wikidata_query_service) which provides a way for tools to query [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) via [SPARQL](https://en.wikipedia.org/wiki/SPARQL) (see the beta at https://query.wikidata.org/). It is written in and for R, and was inspired by Oliver Keyes' [WikipediR](https://github.com/Ironholds/WikipediR) and [WikidataR](https://github.com/Ironholds/WikidataR) packages.
21+
22+
__Author:__ Mikhail Popov (Wikimedia Foundation)<br/>
23+
__License:__ [MIT](http://opensource.org/licenses/MIT)<br/>
24+
__Status:__ Active
25+
26+
## Installation
27+
28+
```R
29+
install.packages("WikidataQueryServiceR")
30+
```
31+
32+
To install the development version:
33+
34+
```R
35+
# install.packages(c("devtools", "httr", "dplyr", "jsonlite"))
36+
devtools::install_github("bearloga/WikidataQueryServiceR")
37+
```
38+
39+
## Usage
40+
41+
```{r load}
42+
library(WikidataQueryServiceR)
43+
```
44+
45+
You submit SPARQL queries using the `query_wikidata()` function.
46+
47+
### Example: fetching genres of a particular movie
48+
49+
In this example, we find an "instance of" ([P31](https://www.wikidata.org/wiki/Property:P31)) "film" ([Q11424](https://www.wikidata.org/wiki/Q11424)) that has the label "The Cabin in the Woods" ([Q45394](https://www.wikidata.org/wiki/Q45394)), get its genres ([P136](https://www.wikidata.org/wiki/Property:P136)), and then use [WDQS label service](https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual#Label_service) to return the genre labels.
50+
51+
```{r wdqs_example, cache=TRUE}
52+
query_wikidata('SELECT DISTINCT
53+
?genre ?genreLabel
54+
WHERE {
55+
?film wdt:P31 wd:Q11424.
56+
?film rdfs:label "The Cabin in the Woods"@en.
57+
?film wdt:P136 ?genre.
58+
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
59+
}')
60+
```
61+
62+
For more example SPARQL queries, see [this page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) on [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page).
63+
64+
`query_wikidata()` can accept multiple queries, returning a (potentially named) list of data frames. If the vector of SPARQL queries is named, the results will inherit those names.
65+
66+
### Fetching queries from Wikidata's examples page
67+
68+
The package provides a [WikipediR](https://github.com/Ironholds/WikipediR/)-based function for getting SPARQL queries from the [WDQS examples page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples).
69+
70+
```{r get_examples, cache=TRUE}
71+
sparql_query <- get_example(c("Cats", "Horses", "Largest cities with female mayor"))
72+
```
73+
```{r, eval=FALSE}
74+
sparql_query[["Largest cities with female mayor"]]
75+
```
76+
```{r, echo=FALSE, results='asis'}
77+
cat("```SPARQL\n", sparql_query[["Largest cities with female mayor"]], "\n```")
78+
```
79+
80+
Now we can run all three extracted SPARQL queries and get back three data.frames:
81+
82+
```{r run_examples, cache=TRUE, dependson='get_examples'}
83+
results <- query_wikidata(sparql_query)
84+
results$`Largest cities with female mayor`[, c("cityLabel", "mayorLabel")]
85+
```
86+
87+
## Links for learning SPARQL
88+
89+
- [A beginner-friendly course for SPARQL](https://www.wikidata.org/wiki/Wikidata:A_beginner-friendly_course_for_SPARQL)
90+
- Building a SPARQL query: [Museums on Instagram](https://www.wikidata.org/wiki/Help:SPARQL/Building_a_query/Museums_on_Instagram)
91+
- [SPARQL Query Examples](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) for WDQS
92+
- [Using SPARQL to access Linked Open Data](http://programminghistorian.org/lessons/graph-databases-and-SPARQL) by Matthew Lincoln
93+
- Interesting or illustrative [SPARQL queries](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries) for Wikidata
94+
- Wikidata [2016 SPARQL Workshop](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/2016_SPARQL_Workshop)
95+
- [Wikidata SPARQL Query video tutorial](https://www.youtube.com/watch?v=1jHoUkj_mKw) by Navino Evans
96+
- _[Learning SPARQL](http://www.learningsparql.com/)_ by Bob DuCharme
97+
- [WDQS User Manual](https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual)
98+
99+
## Additional Information
100+
101+
Please note that this project is released with a [Contributor Code of Conduct](https://github.com/bearloga/WikidataQueryServiceR/blob/master/CONDUCT.md). By participating in this project you agree to abide by its terms.

0 commit comments

Comments
 (0)