Skip to content

Commit

Permalink
Work in progress:
Browse files Browse the repository at this point in the history
- adds a c() method to concat rdf objects
- rdf_query() now coerces return type if it recognizes the data URI and can match it to an R type (a few XMLSchema types are recognized, otherwise defaults to character string).
- drop support for trig, which redland bindings fails to recognize
- fix support for turtle by changing mime=type for raptor
- add_rdf now coerces object to string type automatically
- experimental work to serialize arbitrary data.frames to RDF
  • Loading branch information
cboettig committed Feb 2, 2018
1 parent 6540874 commit 1e6b457
Show file tree
Hide file tree
Showing 12 changed files with 312 additions and 29 deletions.
9 changes: 6 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Description: The Resource Description Framework, or 'RDF' is a widely used
The 'rdflib' package provides a friendly and concise user interface
for performing common tasks on 'RDF' data, such as reading, writing
and converting between the various serializations of 'RDF' data,
including 'rdfxml', 'turtle', 'nquads', 'ntriples', 'trig', and 'json-ld';
including 'rdfxml', 'turtle', 'nquads', 'ntriples', and 'json-ld';
creating new 'RDF' graphs, and performing graph queries using 'SPARQL'.
This package wraps the low level 'redland' R package which
provides direct bindings to the 'redland' C library. Additionally,
Expand All @@ -38,8 +38,11 @@ Suggests: magrittr,
httr,
xml2,
jqr,
lubridate,
DT,
tidyverse,
readr,
dplyr,
lubridate,
DT
tidyr,
tibble
VignetteBuilder: knitr
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

S3method(c,rdf)
S3method(format,rdf)
S3method(print,rdf)
export(rdf)
Expand All @@ -17,6 +18,7 @@ importClassesFrom(redland,World)
importFrom(jsonld,jsonld_compact)
importFrom(jsonld,jsonld_expand)
importFrom(jsonld,jsonld_to_rdf)
importFrom(methods,as)
importFrom(methods,new)
importFrom(utils,download.file)
importMethodsFrom(redland,addStatement)
Expand Down
38 changes: 21 additions & 17 deletions R/rdf.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ print.rdf <- function(x, ...){
#'
#' @param doc path, URL, or literal string of the rdf document to parse
#' @param format rdf serialization format of the doc,
#' one of "rdfxml", "nquads", "ntriples", "trig", "turtle"
#' one of "rdfxml", "nquads", "ntriples", "turtle"
#' or "jsonld"
#' @param ... additional parameters (not implemented)
#'
Expand All @@ -68,7 +68,6 @@ rdf_parse <- function(doc,
format = c("rdfxml",
"nquads",
"ntriples",
"trig",
"turtle",
"jsonld"),
...){
Expand Down Expand Up @@ -151,7 +150,6 @@ rdf_serialize <- function(rdf,
format = c("rdfxml",
"nquads",
"ntriples",
"trig",
"turtle",
"jsonld"),
namespace = NULL,
Expand Down Expand Up @@ -234,16 +232,6 @@ rdf_query <- function(rdf, query, ...){
rectangularize_query_results(out)
}

rectangularize_query_results <- function(out){
vars <- unique(names(out))
X <- lapply(vars, function(v)
## Strip ^^TYPE typing
gsub('\"(([^\\^])+)\"\\^*.*',
"\\1",
as.character(out[names(out) == v ])))
names(X) <- vars
as.data.frame(X, stringsAsFactors=FALSE)
}

#' Add RDF Triples
#'
Expand All @@ -254,7 +242,7 @@ rectangularize_query_results <- function(out){
#' @param predicate character string containing the predicate
#' @param object character string containing the object
#' @param subjectType the Node type of the subject, i.e. "blank", "uri"
#' @param objectType the Node type of the object, i.e. "blank", "uri"
#' @param objectType the Node type of the object, i.e. "blank", "uri", "literal"
#' @param datatype_uri the datatype URI to associate with a object literal value
#'
#' @return the updated RDF graph (rdf object).
Expand All @@ -271,29 +259,45 @@ rectangularize_query_results <- function(out){
#' subject="http://www.dajobe.org/",
#' predicate="http://purl.org/dc/elements/1.1/language",
#' object="en")
#'
#' ## blank nodes should be declared as such:
#' rdf_add(rdf, "", "http://schema.org/jobTitle", "Professor",
#' subjectType = "blank")
#'
rdf_add <- function(rdf, subject, predicate, object,
subjectType = as.character(NA),
objectType = as.character(NA),
datatype_uri = as.character(NA)){
stmt <- new("Statement", world = rdf$world,
subject, predicate, object,
subject, predicate, as.character(object),
subjectType, objectType, datatype_uri)
addStatement(rdf$model, stmt)

## rdf object is a list of pointers, modified in pass-by-reference
invisible(rdf)
}

#' Concatenate rdf Objects
#' Note: this assumes absolute URIs for subject and predicate
#' @method c rdf
#' @export
#' @param ... objects to be concatenated
c.rdf <- function(...){
quads <- lapply(list(...), format)
txt <- paste(quads, collapse = "\n")
rdf_parse(txt, "nquads")
}

# Must match parser name & q 1.0 mimetype listed at:
# http://librdf.org/raptor/api/raptor-formats-types-by-parser.html
# 3 turtle options listed but only text/turtle works.
rdf_mimetypes <- c("nquads" = "text/x-nquads",
"ntriples" = "application/n-triples",
"rdfxml" = "application/rdf+xml",
"trig" = "application/x-trig",
"turtle" = "application/turtle")
"turtle" = "text/turtle")

# application/x-turtle & text/turtle also ok
# trig not working right now, not clear why



Expand Down
37 changes: 37 additions & 0 deletions R/utilities.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

## Utilities to coerce return type, if recognized
r_class <- function(x){
switch(gsub("<http://www.w3.org/2001/XMLSchema#(.*)>", "xs:\\1", x),
"xs:decimal" = "numeric",
"xs:string" = "character",
"xs:string" = "factor",
"xs:boolean" = "logical",
"xs:integer" = "integer",
"xs:date" = "Date",
"xs:dateTime" = "POSIXct",
"character"
)
}
get_values <- function(x) gsub('\"(([^\\^])+)\"\\^*(.*)', "\\1", x)
get_types <- function(x) out <- gsub('\"(([^\\^])+)\"\\^*(.*)', "\\3", x)


#' @importFrom methods as
type_by_datauri <- function(x){
types <- get_types(x)
r_types <- vapply(get_types(x), r_class, character(length(1)))
df <- data.frame(value = get_values(x), class = r_types)
apply(df, 1, function(x) as(x[1], x[2]))
}


rectangularize_query_results <- function(out){
vars <- unique(names(out))
X <- lapply(vars, function(v)
## Strip ^^TYPE typing
#gsub('\"(([^\\^])+)\"\\^*.*',
# "\\1",
type_by_datauri(as.character(out[names(out) == v ])))
names(X) <- vars
as.data.frame(X, stringsAsFactors=FALSE)
}
69 changes: 69 additions & 0 deletions inst/examples/as_rdf.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
as_rdf <- function(df, base_uri = NULL) UseMethod("as_rdf")

as_rdf.data.frame <- function(df, base_uri = NULL){

subjectType <- "blank"
if(is.null(base_uri)){
base_uri <- paste0(gsub("[^a-zA-Z]","",deparse(substitute(df))), ":")
subjectType <- "uri"
}

x <- tibble::rowid_to_column(df, "subject")
suppressWarnings(
x <- tidyr::gather(x, key = predicate, value = object, -subject)
)
## Add a column for the data type
col_classes <- data.frame(datatype = vapply(df, xs_class, character(1)))
col_classes <- tibble::rownames_to_column(col_classes, "predicate")
x <- dplyr::inner_join(x, col_classes, "predicate")

rdf <- rdf()
for(i in seq_along(x$subject)){
rdf <- rdf_add(rdf,
subject = paste0(base_uri,as.character(x$subject[[i]])),
predicate = paste0(base_uri, x$predicate[[i]]),
object = as.character(x$object[[i]]),
subjectType = subjectType,
datatype_uri = x$datatype[[i]])
}
rdf
}

xs_class <- function(x){
gsub("^xs:",
"http://www.w3.org/2001/XMLSchema#",
switch(class(x),
"numeric" = "xs:decimal",
"character" = "xs:string",
"factor" = "xs:string",
"logical" = "xs:boolean",
"integer" = "xs:integer",
"Date" = "xs:date",
"POSIXct" = "xs:dateTime",
NULL
))
}


as_rdf.list <- function(x){

}

cars <- mtcars[1:4, 1:4] %>% rownames_to_column("Model")


x1 <- as_rdf(iris)
x2 <- as_rdf(cars)
rdf <- c(x1,x2)


sparql <-
'SELECT ?Sepal_Length ?Sepal_Width ?Petal_Length ?Petal_Width ?Species
WHERE {
?s <iris:Sepal.Width> ?Sepal_Width .
?s <iris:Sepal.Length> ?Sepal_Length .
?s <iris:Petal.Width> ?Petal_Width .
?s <iris:Petal.Length> ?Petal_Length .
?s <iris:Species> ?Species .
}'
tmp <- rdf_query(rdf, sparql)
16 changes: 16 additions & 0 deletions man/c.rdf.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/rdf_add.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/rdf_parse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/rdf_serialize.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion paper.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ the familiar data table or rectangle of relational databases.
The `rdflib` package provides a friendly and concise user interface
for performing common tasks on RDF data, such as reading, writing
and converting between the various serializations of RDF data,
including `rdfxml`, `turtle`, `nquads`, `ntriples`, `trig`, and `json-ld`;
including `rdfxml`, `turtle`, `nquads`, `ntriples`, and `json-ld`;
creating new `rdf` graphs, and performing graph queries using SPARQL [@SPARQL; @W3C_SPARQL].
This package wraps the low level `redland` R package [@redland] which
provides direct bindings to the redland C library. Additionally,
Expand Down
41 changes: 40 additions & 1 deletion tests/testthat/test-rdf.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ testthat::test_that("we can initialize add triples to rdf graph", {
testthat::expect_is(x, "rdf")
})

testthat::test_that("we can parse and serialize json-ld", {
testthat::test_that("we can add, parse and serialize json-ld", {
#x <- rdf_parse(doc)
x <- rdf()
x <- rdf_add(x,
Expand Down Expand Up @@ -79,6 +79,30 @@ testthat::test_that("we can parse and serialize json-ld", {

})

testthat::test_that("we can parse and serialize nquads", {
x <- rdf_parse(doc)
rdf_serialize(x, out, "nquads")
roundtrip <- rdf_parse(out, "nquads")
testthat::expect_is(roundtrip, "rdf")
})
testthat::test_that("we can parse and serialize ntriples", {
x <- rdf_parse(doc)
rdf_serialize(x, out, "ntriples")
roundtrip <- rdf_parse(out, "ntriples")
testthat::expect_is(roundtrip, "rdf")
})
testthat::test_that("we can parse and serialize tutle", {
x <- rdf_parse(doc)
rdf_serialize(x, out, "turtle")
roundtrip <- rdf_parse(out, "turtle")
testthat::expect_is(roundtrip, "rdf")
})
testthat::test_that("we can parse and serialize rdfxml", {
x <- rdf_parse(doc)
rdf_serialize(x, out, "rdfxml")
roundtrip <- rdf_parse(out, "rdfxml")
testthat::expect_is(roundtrip, "rdf")
})


testthat::test_that("we can parse from a url", {
Expand All @@ -90,5 +114,20 @@ testthat::test_that("we can parse from a url", {
})


testthat::test_that("we can parse from a string", {
string <-
'
_:b0 <http://schema.org/jobTitle> "Professor" .
_:b0 <http://schema.org/name> "Jane Doe" .
_:b0 <http://schema.org/telephone> "(425) 123-4567" .
_:b0 <http://schema.org/url> <http://www.janedoe.com> .
_:b0 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
'
rdf <- rdf_parse(string, "nquads")
testthat::expect_is(rdf, "rdf")

})


unlink(out)

Loading

0 comments on commit 1e6b457

Please sign in to comment.