wikimedia
diff --git a/‎.Rbuildignore‎
Lines changed: 1 addition & 0 deletions b/‎.Rbuildignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 3 additions & 5 deletions b/‎DESCRIPTION‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 1 addition & 0 deletions b/‎NAMESPACE‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/deprecated.R‎
Lines changed: 13 additions & 0 deletions b/‎R/deprecated.R‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎R/query.R‎
Lines changed: 4 additions & 3 deletions b/‎R/query.R‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎R/utils.R‎
Lines changed: 25 additions & 26 deletions b/‎R/utils.R‎
Lines changed: 25 additions & 26 deletions
diff --git a/‎R/wdqs.R‎
Lines changed: 1 addition & 1 deletion b/‎R/wdqs.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.Rmd‎
Lines changed: 101 additions & 0 deletions b/‎README.Rmd‎
Lines changed: 101 additions & 0 deletions
@@ -5,3 +5,4 @@
 ^cran-comments\.md$
 ^README.Rmd
 ^CONDUCT\.md$
+^README_cache$
@@ -1,6 +1,6 @@
 Package: WikidataQueryServiceR
 Title: API Client Library for 'Wikidata Query Service'
-Version: 0.1.1
+Version: 0.2.0.9000
 Date: 2017-04-28
 Authors@R: c(
     person("Mikhail", "Popov", email = "mikhail@wikimedia.org",
@@ -14,10 +14,8 @@ Depends:
 Imports:
     httr (>= 1.2.1),
     dplyr (>= 0.5.0),
-    jsonlite (>= 1.2)
-Suggests:
-    rvest (>= 0.3.2),
-    urltools (>= 1.6.0)
+    jsonlite (>= 1.2),
+    WikipediR (>= 1.5.0)
 URL: https://github.com/bearloga/WikidataQueryServiceR
 BugReports: https://github.com/bearloga/WikidataQueryServiceR/issues
 License: MIT + file LICENSE
 
@@ -1,4 +1,5 @@
 # Generated by roxygen2: do not edit by hand
 
+export(get_example)
 export(query_wikidata)
 export(scrape_example)
@@ -0,0 +1,13 @@
+#' @title Deprecated functions
+#' @description Why did I have to go and make things so deprecated?
+#' @name WDQSR-deprecated
+NULL
+
+#' @inheritParams get_example
+#' @param ... ignored (kept for backwards-compatibility)
+#' @describeIn WDQSR-deprecated use [get_example] instead which employs [WikipediR::page_content]
+#' @export
+scrape_example <- function(example_name, ...) {
+  .Deprecated("get_example")
+  return(get_example(example_name))
+}
@@ -1,10 +1,10 @@
 #' @title Send one or more SPARQL queries to WDQS
-#' @description Makes a GET request to Wikidata Query Service SPARQL endpoint.
+#' @description Makes a POST request to Wikidata Query Service SPARQL endpoint.
 #' @param sparql_query SPARQL query (can be a vector of queries)
 #' @param format "simple" uses CSV and returns pure character data frame, while
 #'   "smart" fetches JSON-formatted data and returns a data frame with datetime
 #'   columns converted to `POSIXct`
-#' @param ... Additional parameters to supply to [httr::GET()]
+#' @param ... Additional parameters to supply to [httr::POST]
 #' @return A `data.frame`
 #' @examples
 #' # R's versions and release dates:
@@ -23,14 +23,15 @@
 #' # "smart" format converts all datetime columns to POSIXct
 #' query_wikidata(sparql_query, format = "smart")
 #' }
+#' @seealso [get_example]
 #' @export
 query_wikidata <- function(sparql_query, format = c("simple", "smart"), ...) {
   if (!format[1] %in% c("simple", "smart")) {
     stop("`format` must be either \"simple\" or \"smart\"")
   }
   output <- lapply(sparql_query, function(sparql_query) {
     if (format[1] == "simple") {
-      response <- httr::GET(
+      response <- httr::POST(
         url = "https://query.wikidata.org/sparql",
         query = list(query = sparql_query),
         httr::add_headers(Accept = "text/csv"),
 
@@ -1,39 +1,38 @@
-#' @title Scrape an example SPARQL query from Wikidata
-#' @description Scrapes [SPARQL query service examples page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)
-#'   for specified example(s). Requires rvest and urltools packages.
-#' @details If you are planning on scraping multiple examples, please provide
-#'   all the names as a single vector.
-#' @param example_name The names of the examples as they appear on
+#' @title Get an example SPARQL query from Wikidata
+#' @description Gets the specified example(s) from
+#'   [SPARQL query service examples page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)
+#'   using [Wikidata's MediaWiki API](https://www.wikidata.org/w/api.php).
+#' @details If you are planning on extracting multiple examples, please provide
+#'   all the names as a single vector for efficiency.
+#' @param example_name the names of the examples as they appear on
 #'   [this page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples)
-#' @param ... Additional `httr` configurations passed to `rvest`
 #' @return The SPARQL query as a character vector.
 #' @examples
 #' \dontrun{
-#' sparql_query <- scrape_example(c("Cats", "Horses"))
+#' sparql_query <- extract_example(c("Cats", "Horses"))
 #' query_wikidata(sparql_query)
 #' # returns a named list with two data frames
 #' # one called "Cats" and one called "Horses"
 #'
-#' sparql_query <- scrape_example("Largest cities with female mayor")
+#' sparql_query <- extract_example("Largest cities with female mayor")
 #' cat(sparql_query)
 #' query_wikidata(sparql_query)
 #' }
+#' @seealso [query_wikidata]
 #' @export
-scrape_example <- function(example_name, ...) {
-  if (requireNamespace("rvest", quietly = TRUE) && requireNamespace("urltools", quietly = TRUE)) {
-    html <- rvest::html_session("https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples",
-                                httr::user_agent("https://github.com/bearloga/WikidataQueryServiceR"), ...)
-    return(vapply(example_name, function(example_name) {
-      try_it <- rvest::html_node(html, xpath = paste0("//span[contains(text(), '", example_name, "') and @class='mw-headline']/following::p[descendant::a]/a"))
-      href <- rvest::html_attr(try_it, "href")
-      if (is.na(href)) {
-        warning("could not find a query for example \"", example_name, "\"")
-        return(invisible(NULL))
-      }
-      sparql_query <- urltools::url_decode(sub("//query.wikidata.org/#", "", href, fixed = TRUE))
-      return(paste0(paste("#", example_name), "\n", sparql_query, collapse = "\n"))
-    }, ""))
-  } else {
-    stop("\"rvest\" and \"urltools\" packages required for web-scraping")
-  }
+get_example <- function(example_name) {
+  content <- WikipediR::page_content(
+    domain = "www.wikidata.org",
+    page_name = "Wikidata:SPARQL query service/queries/examples",
+    as_wikitext = TRUE
+  )
+  wiki <- strsplit(content$parse$wikitext$`*`, "\n")[[1]]
+  wiki <- wiki[wiki != ""]
+  return(vapply(example_name, function(example_name) {
+    heading_line <- which(grepl(paste0("^===\\s?", example_name, "\\s?===$"), wiki, fixed = FALSE))
+    start_line <- which(grepl("{{SPARQL", wiki[(heading_line + 1):length(wiki)], fixed = TRUE))[1]
+    end_line <- which(grepl("}}", wiki[(heading_line + start_line + 1):length(wiki)], fixed = TRUE))[1]
+    query <- paste0(wiki[(heading_line + start_line):(heading_line + start_line + end_line - 1)], collapse = "\n")
+    return(sub("^\\s*\\{\\{SPARQL2?\\n?\\|query\\=", "", query))
+  }, ""))
 }
@@ -20,5 +20,5 @@
 #' - [WDQS User Manual](https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual)
 #' @aliases WDQS
 #' @docType package
-#' @name WDQS-package
+#' @name WDQSR-package
 NULL
@@ -0,0 +1,101 @@
+---
+title: "WikidataQueryServiceR"
+output:
+  github_document:
+    toc: true
+    toc_depth: 3
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+# install.packages("printr", type = "source", repos = c("https://yihui.name/xran", CRAN = "https://cran.rstudio.com"))
+library(printr)
+```
+
+[![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
+[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/WikidataQueryServiceR)](https://cran.r-project.org/package=WikidataQueryServiceR)
+[![CRAN Total Downloads](https://cranlogs.r-pkg.org/badges/grand-total/WikidataQueryServiceR)](https://cran.r-project.org/package=WikidataQueryServiceR)
+[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
+
+This is an R wrapper for the [Wikidata Query Service (WDQS)](https://www.mediawiki.org/wiki/Wikidata_query_service) which provides a way for tools to query [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) via [SPARQL](https://en.wikipedia.org/wiki/SPARQL) (see the beta at https://query.wikidata.org/). It is written in and for R, and was inspired by Oliver Keyes' [WikipediR](https://github.com/Ironholds/WikipediR) and [WikidataR](https://github.com/Ironholds/WikidataR) packages.
+
+__Author:__ Mikhail Popov (Wikimedia Foundation)<br/> 
+__License:__ [MIT](http://opensource.org/licenses/MIT)<br/>
+__Status:__ Active
+
+## Installation
+
+```R
+install.packages("WikidataQueryServiceR")
+```
+    
+To install the development version:
+
+```R
+# install.packages(c("devtools", "httr", "dplyr", "jsonlite"))
+devtools::install_github("bearloga/WikidataQueryServiceR")
+```
+
+## Usage
+
+```{r load}
+library(WikidataQueryServiceR)
+```
+
+You submit SPARQL queries using the `query_wikidata()` function.
+
+### Example: fetching genres of a particular movie
+
+In this example, we find an "instance of" ([P31](https://www.wikidata.org/wiki/Property:P31)) "film" ([Q11424](https://www.wikidata.org/wiki/Q11424)) that has the label "The Cabin in the Woods" ([Q45394](https://www.wikidata.org/wiki/Q45394)), get its genres ([P136](https://www.wikidata.org/wiki/Property:P136)), and then use [WDQS label service](https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual#Label_service) to return the genre labels.
+
+```{r wdqs_example, cache=TRUE}
+query_wikidata('SELECT DISTINCT
+  ?genre ?genreLabel
+WHERE {
+  ?film wdt:P31 wd:Q11424.
+  ?film rdfs:label "The Cabin in the Woods"@en.
+  ?film wdt:P136 ?genre.
+  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
+}')
+```
+
+For more example SPARQL queries, see [this page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) on [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page).
+
+`query_wikidata()` can accept multiple queries, returning a (potentially named) list of data frames. If the vector of SPARQL queries is named, the results will inherit those names.
+
+### Fetching queries from Wikidata's examples page
+
+The package provides a [WikipediR](https://github.com/Ironholds/WikipediR/)-based function for getting SPARQL queries from the [WDQS examples page](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples).
+
+```{r get_examples, cache=TRUE}
+sparql_query <- get_example(c("Cats", "Horses", "Largest cities with female mayor"))
+```
+```{r, eval=FALSE}
+sparql_query[["Largest cities with female mayor"]]
+```
+```{r, echo=FALSE, results='asis'}
+cat("```SPARQL\n", sparql_query[["Largest cities with female mayor"]], "\n```")
+```
+
+Now we can run all three extracted SPARQL queries and get back three data.frames:
+
+```{r run_examples, cache=TRUE, dependson='get_examples'}
+results <- query_wikidata(sparql_query)
+results$`Largest cities with female mayor`[, c("cityLabel", "mayorLabel")]
+```
+
+## Links for learning SPARQL
+
+- [A beginner-friendly course for SPARQL](https://www.wikidata.org/wiki/Wikidata:A_beginner-friendly_course_for_SPARQL)
+- Building a SPARQL query: [Museums on Instagram](https://www.wikidata.org/wiki/Help:SPARQL/Building_a_query/Museums_on_Instagram)
+- [SPARQL Query Examples](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples) for WDQS
+- [Using SPARQL to access Linked Open Data](http://programminghistorian.org/lessons/graph-databases-and-SPARQL) by Matthew Lincoln
+- Interesting or illustrative [SPARQL queries](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries) for Wikidata
+- Wikidata [2016 SPARQL Workshop](https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/2016_SPARQL_Workshop)
+- [Wikidata SPARQL Query video tutorial](https://www.youtube.com/watch?v=1jHoUkj_mKw) by Navino Evans
+- _[Learning SPARQL](http://www.learningsparql.com/)_ by Bob DuCharme
+- [WDQS User Manual](https://www.mediawiki.org/wiki/Wikidata_query_service/User_Manual)
+
+## Additional Information
+
+Please note that this project is released with a [Contributor Code of Conduct](https://github.com/bearloga/WikidataQueryServiceR/blob/master/CONDUCT.md). By participating in this project you agree to abide by its terms.