From 909b5e946e552d11bcc6ddb106234e5b387ecae9 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Tue, 7 May 2024 11:50:18 -0700 Subject: [PATCH 01/25] first pass not working --- R/class-yarn.R | 6 +- R/to_md.R | 129 ++++++++++++++++++++++++++++ tests/testthat/_snaps/class-yarn.md | 32 +++++++ tests/testthat/test-class-yarn.R | 13 +++ 4 files changed, 178 insertions(+), 2 deletions(-) diff --git a/R/class-yarn.R b/R/class-yarn.R index 10721c9..2b1c2a2 100644 --- a/R/class-yarn.R +++ b/R/class-yarn.R @@ -96,6 +96,8 @@ yarn <- R6::R6Class("yarn", #' @description show the markdown contents on the screen #' + #' @param lines a subset of elements to show. Defaults to `TRUE`, which + #' shows all lines of the output. This can be either logical or numeric. #' @param stylesheet_path path to the xsl stylesheet to convert XML to markdown. #' @return a character vector with one line for each line in the output #' @examples @@ -104,8 +106,8 @@ yarn <- R6::R6Class("yarn", #' ex2$head(5) #' ex2$tail(5) #' ex2$show() - show = function(stylesheet_path = stylesheet() ) { - show_user(private$md_lines(stylesheet = stylesheet_path)) + show = function(lines = TRUE, stylesheet_path = stylesheet()) { + show_user(private$md_lines(stylesheet = stylesheet_path)[lines]) }, #' @description show the head of the markdown contents on the screen diff --git a/R/to_md.R b/R/to_md.R index 07bdc6e..89c0f24 100644 --- a/R/to_md.R +++ b/R/to_md.R @@ -127,3 +127,132 @@ to_info <- function(code_block){ xml2::xml_set_attr(code_block, "info", info) } + +#' @examples +#' path <- system.file("extdata", "example1.md", package = "tinkr") +#' y <- tinkr::yarn$new(path) +#' items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) +#' links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) +#' md_fragment(items) +#' md_fragment(links) +md_fragment <- function(nodelist) { + parents <- purrr::map(nodelist, xml2::xml_parent) + donor <- xml2::read_xml(commonmark::markdown_xml("")) + gparents <- add_children_to_donor(donor, parents, nodelist) + add_grandparents(donor, gparents) + return(copy_xml(donor)) +} + +copy_and_isolate <- function(nodelist) { + + doc <- copy_xml(xml2::xml_root(nodelist)) + path <- xml2::xml_path(nodelist) + tim <- as.character(as.integer(Sys.time())) + purrr::walk(path, label_nodes, doc = doc, label = tim) + xpath <- paste0(".//node()[@label='",tim,"']") + labelled <- xml2::xml_find_all(doc, xpath) + purrr::walk(labelled, isolate_labelled) + return(doc) +} + +label_nodes <- function(xpath, doc, label = "save") { + xml2::xml_set_attr( + xml2::xml_find_all(doc, xpath, ns = md_ns()), + "label", label) +} +isolate_labelled <- function(node) { + sibs <- xml2::xml_siblings(node) + lab <- xml2::xml_attr(node, "label") + nolab <- xml2::xml_attr(sibs, "label") != lab + if (any(nolab)) { + xml2::xml_remove(sibs[nolab]) + } +} + + +add_children_to_donor <- function(donor, parents, children) { + old_parent <- NULL + new_parent <- NULL + gparents <- list() + # loop over all the parents + for (i in seq(parents)) { + # when we have not encountered the current parent + if (!identical(old_parent, parents[[i]])) { + # set it to the old_parent + old_parent <- parents[[i]] + # record its grandparent + grandparent <- xml2::xml_parent(old_parent) + if (is_root(grandparent)) { + grandparent <- NULL + } + # add grandparent to the list + gparents <- c(gparents, list(grandparent)) + # insert the old parent as a child of the donor document, + # returning the new node + new_parent <- insert_child(old_parent, donor) + } + # if the new_parent contains a parent, then add the child to the parent + if (!is.null(new_parent)) { + xml2::xml_add_child(new_parent, children[[i]]) + } + } + # return the grandparents for further processing + names(gparents) <- seq_along(gparents) + return(gparents) +} + +add_grandparents <- function(donor, grandparents) { + if (all(vapply(grandparents, is.null, logical(1)))) { + return(donor) + } + children <- xml2::xml_children(donor) + ggparents <- grandparents + old_grandparent <- NULL + this_parent <- NULL + for (i in seq(children)) { + if (is.null(grandparents[[i]]) || is_root(grandparents[[i]])) { + next + } + # when the grandparents are identical, then we need add the current parent + if (identical(old_grandparent, grandparents[[i]])) { + xml2::xml_add_parent(children[[i]], this_parent) + } else { + # otherwise, we need to insert a new parent + insert_parent(grandparents[[i]], children[[i]]) + this_parent <- xml2::xml_parent(children[[i]]) + # because we've recorded a new parent, we need to move up and + # capture the + old_grandparent <- grandparents[[i]] + ggp <- xml2::xml_parent(grandparents[[i]]) + if (is_root(ggp)) { + ggp <- NULL + } + ggparents[[i]] <- ggp + } + } + return(add_grandparents(donor, ggparents)) +} + +insert_child <- function(from, to) { + insert_node(from, to, type = "child") +} + +insert_parent <- function(from, to) { + insert_node(from, to, type = "parent") +} + +insert_node <- function(from, to, type = "child") { + if (type == "child") { + this_parent <- xml2::xml_add_child(to, xml2::xml_name(from)) + } else { + this_parent <- xml2::xml_add_parent(to, xml2::xml_name(from)) + } + purrr::imap(xml2::xml_attrs(from), + function(x, i) xml2::xml_set_attr(this_parent, i, x) + ) + return(this_parent) +} + +is_root <- function(node) { + xml2::xml_name(node) == "document" +} diff --git a/tests/testthat/_snaps/class-yarn.md b/tests/testthat/_snaps/class-yarn.md index 07c298b..98fea84 100644 --- a/tests/testthat/_snaps/class-yarn.md +++ b/tests/testthat/_snaps/class-yarn.md @@ -64,6 +64,38 @@ blabla +--- + + Code + show_user(res_11 <- y1$show(11:20), TRUE) + Output + + ## R Markdown + + This is an ~~R Markdown document~~. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see [http://rmarkdown.rstudio.com](http://rmarkdown.rstudio.com). + + When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: + + ```{r, eval=TRUE, echo=TRUE} + summary(cars) + ``` + +--- + + Code + show_user(res_1 <- y1$show(1:10), TRUE) + Output + --- + title: "Untitled" + author: "M. Salmon" + date: "September 6, 2018" + output: html_document + --- + + ```{r setup, include=FALSE, eval=TRUE} + knitr::opts_chunk$set(echo = TRUE) + ``` + --- Code diff --git a/tests/testthat/test-class-yarn.R b/tests/testthat/test-class-yarn.R index 5ec3e67..611ff32 100644 --- a/tests/testthat/test-class-yarn.R +++ b/tests/testthat/test-class-yarn.R @@ -24,8 +24,21 @@ test_that("yarn show, head, and tail methods work", { expect_snapshot(show_user(res <- y1$show(), TRUE)) expect_type(res, "character") + # the head method is identical to subsetting 10 lines + expect_snapshot(show_user(res_11 <- y1$show(11:20), TRUE)) + expect_length(res_11, 10) %>% + expect_identical(res[11:20]) %>% + expect_type("character") + + # a subset from the top has 10 lines + expect_snapshot(show_user(res_1 <- y1$show(1:10), TRUE)) + expect_length(res_1, 10) %>% + expect_type("character") + + # the head method is identical to subsetting 10 lines expect_snapshot(show_user(res <- y1$head(10), TRUE)) expect_length(res, 10) %>% + expect_identical(res_1) %>% expect_type("character") expect_snapshot(show_user(res <- y1$tail(11), TRUE)) From 81e028da15d2c5cfdb86d7c7557f871b8aee392e Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Wed, 8 May 2024 11:17:21 -0700 Subject: [PATCH 02/25] add show methods --- DESCRIPTION | 2 +- NAMESPACE | 3 + NEWS.md | 8 + R/class-yarn.R | 16 +- R/show.R | 158 +++++++++++++++++ R/to_md.R | 130 +------------- man/protect_unescaped.Rd | 2 +- man/show.Rd | 45 +++++ man/yarn.Rd | 5 +- tests/testthat/_snaps/show.md | 311 ++++++++++++++++++++++++++++++++++ tests/testthat/test-show.R | 33 ++++ 11 files changed, 566 insertions(+), 147 deletions(-) create mode 100644 R/show.R create mode 100644 man/show.Rd create mode 100644 tests/testthat/_snaps/show.md create mode 100644 tests/testthat/test-show.R diff --git a/DESCRIPTION b/DESCRIPTION index 1c487b2..298b9ff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -55,5 +55,5 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 93033f3..871ac17 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,9 @@ export(find_between) export(md_ns) export(protect_curly) export(protect_math) +export(show_bare) +export(show_context) +export(show_list) export(stylesheet) export(to_md) export(to_xml) diff --git a/NEWS.md b/NEWS.md index 45d39ff..0b55cf6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,13 @@ # tinkr 0.2.0.9000 +## NEW FEATURES + +* `show_list()`, `show_bare()`, and `show_context()` will show the markdown + content of a node, nodelist, or list of nodes without needing to print the + entire document. +* `yarn$show()` method now gains the `lines` parameter, which allows you to + subset the output by the lines of text. + ## BUG FIX * Inline math with single characters will no longer cause an error (issue: #101, diff --git a/R/class-yarn.R b/R/class-yarn.R index 2b1c2a2..7956b98 100644 --- a/R/class-yarn.R +++ b/R/class-yarn.R @@ -208,21 +208,7 @@ yarn <- R6::R6Class("yarn", encoding = "UTF-8", # converts the document to markdown and separates the output into lines md_lines = function(path = NULL, stylesheet = NULL) { - if (is.null(stylesheet)) { - md <- to_md(self, path) - } else { - md <- to_md(self, path, stylesheet) - } - if (!is.null(path) && !is.null(stylesheet)) { - return(md) - } - # Make sure that the yaml is not sitting on top of the first markdown line - if (length(md) == 2) { - md[1] <- paste0(md[1], "\n") - } - f <- textConnection(md) - on.exit(close(f)) - readLines(f) + print_lines(self, path = path, stylesheet = stylesheet) } ) ) diff --git a/R/show.R b/R/show.R new file mode 100644 index 0000000..d79f2a2 --- /dev/null +++ b/R/show.R @@ -0,0 +1,158 @@ +#' Display a node or nodelist as markdown +#' +#' When inspecting the results of an XPath query, displaying the text often +#' @param nodelist an object of class `xml_nodelist` OR `xml_node` OR a list of +#' either. +#' @return a character vector, displayed to the screen. +#' @examples +#' path <- system.file("extdata", "example1.md", package = "tinkr") +#' y <- tinkr::yarn$new(path, sourcepos = TRUE) +#' items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) +#' links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) +#' code <- xml2::xml_find_all(y$body, ".//md:code", tinkr::md_ns()) +#' blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns()) +#' +#' # show a list of items +#' show_list(links[1:10]) +#' show_list(code[1:10]) +#' show_list(blocks[1:2]) +#' +#' # show the items in the structure of the document +#' show_bare(items) +#' show_bare(links) +#' +#' # show the items with context markers ([...]) in the structure of the document +#' show_context(links[20:31]) +#' show_context(code[1:10]) +#' @rdname show +#' @export +show_list <- function(nodelist) { + res <- isolate_nodes(nodelist, type = "list") + return(show_user(print_lines(res$doc))) +} + +#' @rdname show +#' @export +show_bare <- function(nodelist) { + res <- isolate_nodes(nodelist) + return(show_user(print_lines(res$doc))) +} + +#' @rdname show +#' @export +show_context <- function(nodelist) { + res <- isolate_nodes(nodelist) + res <- add_isolation_context(nodelist, res) + return(show_user(print_lines(res$doc))) +} + +isolate_nodes <- function(nodelist, type = "context") { + switch(type, + "context" = isolate_nodes_in_context(nodelist), + "list" = isolate_nodes_a_la_carte(nodelist), + ) +} + +isolate_nodes_a_la_carte <- function(nodelist) { + doc <- xml2::read_xml(commonmark::markdown_xml("")) + # if we get a single node, make sure it's in a list + if (inherits(nodelist, "xml_node")) { + nodelist <- list(nodelist) + } + for (node in nodelist) { + parent <- xml2::xml_add_child(doc, "paragraph") + if (inherits(node, "xml_node")) { + xml2::xml_add_child(parent, node) + } else { + purrr::walk(node, function(n) xml2::xml_add_child(parent, n)) + } + } + return(list(doc = doc, key = NULL)) +} + +provision_isolation <- function(nodelist) { + # create a copy of our document + doc <- copy_xml(xml2::xml_root(nodelist)) + # get the path to the currently labelled nodes so we can isolate them + # in the copy + # This will return one path statement per node + path <- xml2::xml_path(nodelist) + # label new nodes with unique timestamp + tim <- as.character(as.integer(Sys.time())) + purrr::walk(path, label_nodes, doc = doc, label = tim) + + # find the unlabelled nodes + predicate <- sprintf("@label=%s", tim) + xpth <- sprintf("not(descendant::*[%s]) and not(ancestor::*[%s]) and not(%s)", + predicate, predicate, predicate + ) + rents <- xml2::xml_find_all(doc, sprintf("//node()[%s]", xpth)) + return(list(doc = doc, key = tim, parents = rents)) + +} + +isolate_nodes_in_context <- function(nodelist) { + res <- provision_isolation(nodelist) + xml2::xml_remove(res$parents) + return(list(doc = res$doc, key = res$key)) +} + +add_isolation_context <- function(nodelist, isolated) { + pretext <- xml2::xml_find_lgl(nodelist, + "boolean(count(preceding-sibling::*)!=0)" + ) + postext <- xml2::xml_find_lgl(nodelist, + "boolean(count(following-sibling::*)!=0)" + ) + xpath <- sprintf(".//node()[@label=%s]", isolated$key) + labelled <- xml2::xml_find_all(isolated$doc, xpath) + purrr::walk(labelled[pretext], function(node) { + xml2::xml_add_sibling(node, .where = "before", + "text", "[...] ", asis = "true" + ) + }) + purrr::walk(labelled[postext], function(node) { + xml2::xml_add_sibling(node, .where = "after", + "text", " [...]", asis = "true" + ) + }) + return(isolated) +} + +print_lines <- function(xml, path = NULL, stylesheet = NULL) { + if (inherits(xml, "xml_document")) { + xml <- list(yaml = "", body = xml) + } + if (is.null(stylesheet)) { + md <- to_md(xml, path) + } else { + md <- to_md(xml, path, stylesheet) + } + if (!is.null(path) && !is.null(stylesheet)) { + return(md) + } + # Make sure that the yaml is not sitting on top of the first markdown line + if (length(md) == 2) { + md[1] <- paste0(md[1], "\n") + } + f <- textConnection(md) + on.exit(close(f)) + readLines(f) +} + +label_nodes <- function(xpath, doc, label = "save") { + xml2::xml_set_attr( + xml2::xml_find_all(doc, xpath, ns = md_ns()), + "label", label) +} + +add_context_siblings <- function(node, where = "after") { + xml2::xml_add_sibling(node, .where = "after", + "text", " [...] ", asis = "true" + ) + xml2::xml_add_sibling(node, .where = "before", + "text", "[...] ", asis = "true" + ) +} + + diff --git a/R/to_md.R b/R/to_md.R index 89c0f24..473dc1e 100644 --- a/R/to_md.R +++ b/R/to_md.R @@ -35,7 +35,7 @@ #' to_md(yaml_xml_list, newmd) #' # file.edit("newmd.md") #' file.remove(newmd) -#' +#' to_md <- function(yaml_xml_list, path = NULL, stylesheet_path = stylesheet()){ # duplicate document to avoid overwriting @@ -128,131 +128,3 @@ to_info <- function(code_block){ xml2::xml_set_attr(code_block, "info", info) } -#' @examples -#' path <- system.file("extdata", "example1.md", package = "tinkr") -#' y <- tinkr::yarn$new(path) -#' items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) -#' links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) -#' md_fragment(items) -#' md_fragment(links) -md_fragment <- function(nodelist) { - parents <- purrr::map(nodelist, xml2::xml_parent) - donor <- xml2::read_xml(commonmark::markdown_xml("")) - gparents <- add_children_to_donor(donor, parents, nodelist) - add_grandparents(donor, gparents) - return(copy_xml(donor)) -} - -copy_and_isolate <- function(nodelist) { - - doc <- copy_xml(xml2::xml_root(nodelist)) - path <- xml2::xml_path(nodelist) - tim <- as.character(as.integer(Sys.time())) - purrr::walk(path, label_nodes, doc = doc, label = tim) - xpath <- paste0(".//node()[@label='",tim,"']") - labelled <- xml2::xml_find_all(doc, xpath) - purrr::walk(labelled, isolate_labelled) - return(doc) -} - -label_nodes <- function(xpath, doc, label = "save") { - xml2::xml_set_attr( - xml2::xml_find_all(doc, xpath, ns = md_ns()), - "label", label) -} -isolate_labelled <- function(node) { - sibs <- xml2::xml_siblings(node) - lab <- xml2::xml_attr(node, "label") - nolab <- xml2::xml_attr(sibs, "label") != lab - if (any(nolab)) { - xml2::xml_remove(sibs[nolab]) - } -} - - -add_children_to_donor <- function(donor, parents, children) { - old_parent <- NULL - new_parent <- NULL - gparents <- list() - # loop over all the parents - for (i in seq(parents)) { - # when we have not encountered the current parent - if (!identical(old_parent, parents[[i]])) { - # set it to the old_parent - old_parent <- parents[[i]] - # record its grandparent - grandparent <- xml2::xml_parent(old_parent) - if (is_root(grandparent)) { - grandparent <- NULL - } - # add grandparent to the list - gparents <- c(gparents, list(grandparent)) - # insert the old parent as a child of the donor document, - # returning the new node - new_parent <- insert_child(old_parent, donor) - } - # if the new_parent contains a parent, then add the child to the parent - if (!is.null(new_parent)) { - xml2::xml_add_child(new_parent, children[[i]]) - } - } - # return the grandparents for further processing - names(gparents) <- seq_along(gparents) - return(gparents) -} - -add_grandparents <- function(donor, grandparents) { - if (all(vapply(grandparents, is.null, logical(1)))) { - return(donor) - } - children <- xml2::xml_children(donor) - ggparents <- grandparents - old_grandparent <- NULL - this_parent <- NULL - for (i in seq(children)) { - if (is.null(grandparents[[i]]) || is_root(grandparents[[i]])) { - next - } - # when the grandparents are identical, then we need add the current parent - if (identical(old_grandparent, grandparents[[i]])) { - xml2::xml_add_parent(children[[i]], this_parent) - } else { - # otherwise, we need to insert a new parent - insert_parent(grandparents[[i]], children[[i]]) - this_parent <- xml2::xml_parent(children[[i]]) - # because we've recorded a new parent, we need to move up and - # capture the - old_grandparent <- grandparents[[i]] - ggp <- xml2::xml_parent(grandparents[[i]]) - if (is_root(ggp)) { - ggp <- NULL - } - ggparents[[i]] <- ggp - } - } - return(add_grandparents(donor, ggparents)) -} - -insert_child <- function(from, to) { - insert_node(from, to, type = "child") -} - -insert_parent <- function(from, to) { - insert_node(from, to, type = "parent") -} - -insert_node <- function(from, to, type = "child") { - if (type == "child") { - this_parent <- xml2::xml_add_child(to, xml2::xml_name(from)) - } else { - this_parent <- xml2::xml_add_parent(to, xml2::xml_name(from)) - } - purrr::imap(xml2::xml_attrs(from), - function(x, i) xml2::xml_set_attr(this_parent, i, x) - ) - return(this_parent) -} - -is_root <- function(node) { - xml2::xml_name(node) == "document" -} diff --git a/man/protect_unescaped.Rd b/man/protect_unescaped.Rd index 6d31652..44a9de4 100644 --- a/man/protect_unescaped.Rd +++ b/man/protect_unescaped.Rd @@ -79,7 +79,7 @@ md <- yarn$new(f, sourcepos = TRUE, unescaped = FALSE) md$show() if (requireNamespace("withr")) { lines <- readLines(f)[-length(md$yaml)] -lnks <- withr::with_namespace("tinkr", +lnks <- withr::with_namespace("tinkr", protect_unescaped(body = md$body, txt = lines)) md$body <- lnks md$show() diff --git a/man/show.Rd b/man/show.Rd new file mode 100644 index 0000000..a58f511 --- /dev/null +++ b/man/show.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/show.R +\name{show_list} +\alias{show_list} +\alias{show_bare} +\alias{show_context} +\title{Display a node or nodelist as markdown} +\usage{ +show_list(nodelist) + +show_bare(nodelist) + +show_context(nodelist) +} +\arguments{ +\item{nodelist}{an object of class \code{xml_nodelist} OR \code{xml_node} OR a list of +either.} +} +\value{ +a character vector, displayed to the screen. +} +\description{ +When inspecting the results of an XPath query, displaying the text often +} +\examples{ +path <- system.file("extdata", "example1.md", package = "tinkr") +y <- tinkr::yarn$new(path, sourcepos = TRUE) +items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) +links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) +code <- xml2::xml_find_all(y$body, ".//md:code", tinkr::md_ns()) +blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns()) + +# show a list of items +show_list(links[1:10]) +show_list(code[1:10]) +show_list(blocks[1:2]) + +# show the items in the structure of the document +show_bare(items) +show_bare(links) + +# show the items with context markers ([...]) in the structure of the document +show_context(links[20:31]) +show_context(code[1:10]) +} diff --git a/man/yarn.Rd b/man/yarn.Rd index c872c3b..e56eefb 100644 --- a/man/yarn.Rd +++ b/man/yarn.Rd @@ -251,12 +251,15 @@ unlink(tmp) \subsection{Method \code{show()}}{ show the markdown contents on the screen \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{yarn$show(stylesheet_path = stylesheet())}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{yarn$show(lines = TRUE, stylesheet_path = stylesheet())}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ +\item{\code{lines}}{a subset of elements to show. Defaults to \code{TRUE}, which +shows all lines of the output. This can be either logical or numeric.} + \item{\code{stylesheet_path}}{path to the xsl stylesheet to convert XML to markdown.} } \if{html}{\out{
}} diff --git a/tests/testthat/_snaps/show.md b/tests/testthat/_snaps/show.md new file mode 100644 index 0000000..24c47ad --- /dev/null +++ b/tests/testthat/_snaps/show.md @@ -0,0 +1,311 @@ +# show_list() will isolate elements + + Code + show_user(show_list(links), force = TRUE) + Output + + + [second post of the series where we obtained data from + eBird](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) + + [the fourth post of the + series](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/) + + [previous post + of the series](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) + + [(`glue::glue_collapse(species, sep = ", ", last = " and ")`)](https://twitter.com/LucyStats/status/1031938964796657665?s=19) + + [`taxize`](https://github.com/ropensci/taxize) + + [`spocc`](https://github.com/ropensci/spocc) + + [`fulltext`](https://github.com/ropensci/fulltext) + + ["Investigating the impact of media on demand for wildlife: A case + study of Harry Potter and the UK trade in + owls"](http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0182368) + + [`cites`](https://github.com/ecohealthalliance/cites/) + + [`rcites`](https://ibartomeus.github.io/rcites/) + + [`wordcloud` + package](https://cran.r-project.org/web/packages/wordcloud/index.html) + + [`wordcloud2` + package](https://github.com/Lchiffon/wordcloud2) + + [from + Phylopic](http://phylopic.org/image/6209c9be-060e-4d7f-bc74-a75f3ccf4629/) + + [DataONE](https://www.dataone.org/) + + [searching + DataONE + vignette](https://github.com/DataONEorg/rdataone/blob/master/vignettes/searching-dataone.Rmd) + + [download data + vignette](https://github.com/DataONEorg/rdataone/blob/master/vignettes/download-data.Rmd) + + [`europepmc`](https://github.com/ropensci/europepmc) + + [`jstor`](https://github.com/ropensci/jstor) + + [`suppdata`](https://github.com/ropensci/suppdata) + + [much + more](https://ropensci.org/packages/) + + [`dataone` + package](https://github.com/DataONEorg/rdataone) + + [`rfigshare`](https://github.com/ropensci/rfigshare) + + [Figshare](https://figshare.com/) + + [`EML` package](https://github.com/ropensci/EML) + + [unconf + `dataspice` project](https://github.com/ropenscilabs/dataspice) + + [here](https://ropensci.org/packages/) + + [How to identify spots for birding using open geographical + data](https://ropensci.org/blog/2018/08/14/where-to-bird/) + + [How to obtain bird occurrence data in + R](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) + + [How to extract text from old natural history + drawings](https://ropensci.org/blog/2018/08/28/birds-ocr/) + + [How to complement an occurrence dataset with taxonomy and trait + information](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/) + + [our friendly discussion + forum](https://discuss.ropensci.org/c/usecases) + + +--- + + Code + show_user(show_list(code[1:10]), force = TRUE) + Output + + + `glue::glue_collapse(species, sep = ", ", last = " and ")` + + `taxize` + + `spocc` + + `fulltext` + + `fulltext` + + `tidytext` + + `dplyr::bind_rows` + + `fulltext` + + `cites` + + `rcites` + + +--- + + Code + show_user(show_list(blocks[1:2]), force = TRUE) + Output + + + ```r + # polygon for filtering + landkreis_konstanz <- osmdata::getbb("Landkreis Konstanz", + format_out = "sf_polygon") + crs <- sf::st_crs(landkreis_konstanz) + + # get and filter data + f_out_ebd <- "ebird/ebd_lk_konstanz.txt" + + library("magrittr") + + ebd <- auk::read_ebd(f_out_ebd) %>% + sf::st_as_sf(coords = c("longitude", "latitude"), + crs = crs) + + in_indices <- sf::st_within(ebd, landkreis_konstanz) + + ebd <- dplyr::filter(ebd, lengths(in_indices) > 0) + + ebd <- as.data.frame(ebd) + + ebd <- dplyr::filter(ebd, approved, lubridate::year(observation_date) > 2010) + ``` + + + ```r + species <- ebd %>% + dplyr::count(common_name, sort = TRUE) %>% + head(n = 50) %>% + dplyr::pull(common_name) + ``` + + + +# show context will provide context for the elements + + Code + show_user(show_bare(items), force = TRUE) + Output + + + - study the results of such queries (e.g. meta studies of number of, + say, versions by datasets) + + - or find data to integrate to a new study. If you want to *download* + data from DataONE, refer to the [download data + vignette](https://github.com/DataONEorg/rdataone/blob/master/vignettes/download-data.Rmd). + + - [How to identify spots for birding using open geographical + data](https://ropensci.org/blog/2018/08/14/where-to-bird/). + Featuring `opencage` for geocoding, `bbox` for bounding box + creation, `osmdata` for OpenStreetMap's Overpass API querying, + `osmplotr` for map drawing using OpenStreetMap's data. + + - [How to obtain bird occurrence data in + R](https://ropensci.org/blog/2018/08/21/birds-radolfzell/). + Featuring `rebird` for interaction with the eBird's API, and `auk` + for munging of the whole eBird dataset. + + - [How to extract text from old natural history + drawings](https://ropensci.org/blog/2018/08/28/birds-ocr/). + Featuring `magick` for image manipulation, `tesseract` for Optical + Character Recognition, `cld2` and `cld3` for language detection, and + `taxize::gnr_resolve` for taxonomic name resolution. + + - [How to complement an occurrence dataset with taxonomy and trait + information](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/). + Featuring `taxize`, taxonomic toolbelt for R, and `traits`, + providing access to species traits data. + + - How to query the scientific literature and scientific open data + repositories. This is the post you've just read! + + +--- + + Code + show_user(show_bare(links), force = TRUE) + Output + + + [second post of the series where we obtained data from + eBird](https://ropensci.org/blog/2018/08/21/birds-radolfzell/)[the fourth post of the + series](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/) + + [previous post + of the series](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) + + [(`glue::glue_collapse(species, sep = ", ", last = " and ")`)](https://twitter.com/LucyStats/status/1031938964796657665?s=19) + + [`taxize`](https://github.com/ropensci/taxize)[`spocc`](https://github.com/ropensci/spocc)[`fulltext`](https://github.com/ropensci/fulltext) + + ["Investigating the impact of media on demand for wildlife: A case + study of Harry Potter and the UK trade in + owls"](http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0182368)[`cites`](https://github.com/ecohealthalliance/cites/)[`rcites`](https://ibartomeus.github.io/rcites/) + + [`wordcloud` + package](https://cran.r-project.org/web/packages/wordcloud/index.html) + + [`wordcloud2` + package](https://github.com/Lchiffon/wordcloud2)[from + Phylopic](http://phylopic.org/image/6209c9be-060e-4d7f-bc74-a75f3ccf4629/) + + [DataONE](https://www.dataone.org/) + + [searching + DataONE + vignette](https://github.com/DataONEorg/rdataone/blob/master/vignettes/searching-dataone.Rmd) + + - [download data + vignette](https://github.com/DataONEorg/rdataone/blob/master/vignettes/download-data.Rmd) + + [`europepmc`](https://github.com/ropensci/europepmc)[`jstor`](https://github.com/ropensci/jstor)[`suppdata`](https://github.com/ropensci/suppdata)[much + more](https://ropensci.org/packages/) + + [`dataone` + package](https://github.com/DataONEorg/rdataone)[`rfigshare`](https://github.com/ropensci/rfigshare)[Figshare](https://figshare.com/)[`EML` package](https://github.com/ropensci/EML)[unconf + `dataspice` project](https://github.com/ropenscilabs/dataspice) + + [here](https://ropensci.org/packages/) + + - [How to identify spots for birding using open geographical + data](https://ropensci.org/blog/2018/08/14/where-to-bird/) + + - [How to obtain bird occurrence data in + R](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) + + - [How to extract text from old natural history + drawings](https://ropensci.org/blog/2018/08/28/birds-ocr/) + + - [How to complement an occurrence dataset with taxonomy and trait + information](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/) + + [our friendly discussion + forum](https://discuss.ropensci.org/c/usecases) + + +--- + + Code + show_user(show_context(links[20:31]), force = TRUE) + Output + + + [...] [much + more](https://ropensci.org/packages/) [...] + + [...] [`dataone` + package](https://github.com/DataONEorg/rdataone) [...][...] [`rfigshare`](https://github.com/ropensci/rfigshare) [...][...] [Figshare](https://figshare.com/) [...][...] [`EML` package](https://github.com/ropensci/EML) [...][...] [unconf + `dataspice` project](https://github.com/ropenscilabs/dataspice) [...] + + [...] [here](https://ropensci.org/packages/) [...] + + - [How to identify spots for birding using open geographical + data](https://ropensci.org/blog/2018/08/14/where-to-bird/) [...] + + - [How to obtain bird occurrence data in + R](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) [...] + + - [How to extract text from old natural history + drawings](https://ropensci.org/blog/2018/08/28/birds-ocr/) [...] + + - [How to complement an occurrence dataset with taxonomy and trait + information](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/) [...] + + [...] [our friendly discussion + forum](https://discuss.ropensci.org/c/usecases) [...] + + +--- + + Code + show_user(show_context(code[1:10]), force = TRUE) + Output + + + [[...] `glue::glue_collapse(species, sep = ", ", last = " and ")` [...]](https://twitter.com/LucyStats/status/1031938964796657665?s=19) + + [`taxize`](https://github.com/ropensci/taxize)[`spocc`](https://github.com/ropensci/spocc)[`fulltext`](https://github.com/ropensci/fulltext) + + [...] `fulltext` [...][...] `tidytext` [...] + + [...] `dplyr::bind_rows` [...][...] `fulltext` [...] + + [`cites`](https://github.com/ecohealthalliance/cites/)[`rcites`](https://ibartomeus.github.io/rcites/) + + diff --git a/tests/testthat/test-show.R b/tests/testthat/test-show.R new file mode 100644 index 0000000..9e85a20 --- /dev/null +++ b/tests/testthat/test-show.R @@ -0,0 +1,33 @@ +test_that("show_list() will isolate elements", { + + path <- system.file("extdata", "example1.md", package = "tinkr") + y <- tinkr::yarn$new(path, sourcepos = TRUE) + items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) + links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) + code <- xml2::xml_find_all(y$body, ".//md:code", tinkr::md_ns()) + blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns()) + # show a list of items + expect_snapshot(show_user(show_list(links), force = TRUE)) + expect_snapshot(show_user(show_list(code[1:10]), force = TRUE)) + expect_snapshot(show_user(show_list(blocks[1:2]), force = TRUE)) + +}) + + +test_that("show context will provide context for the elements", { + path <- system.file("extdata", "example1.md", package = "tinkr") + y <- tinkr::yarn$new(path, sourcepos = TRUE) + items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) + links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) + code <- xml2::xml_find_all(y$body, ".//md:code", tinkr::md_ns()) + blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns()) + + # show the items in the structure of the document + expect_snapshot(show_user(show_bare(items), force = TRUE)) + expect_snapshot(show_user(show_bare(links), force = TRUE)) + # show the items with context markers ([...]) in the structure of the document + expect_snapshot(show_user(show_context(links[20:31]), force = TRUE)) + expect_snapshot(show_user(show_context(code[1:10]), force = TRUE)) + +}) + From da21d6a751d54e2db00075f2b23f057d106cf7cd Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Wed, 8 May 2024 16:06:45 -0700 Subject: [PATCH 03/25] add show_censor() --- NAMESPACE | 1 + NEWS.md | 6 +-- R/show.R | 63 +++++++++++++++++++++++----- man/show.Rd | 11 ++++- tests/testthat/_snaps/show.md | 78 +++++++++++++++++++++++++++++++++++ tests/testthat/test-show.R | 28 +++++++++++++ 6 files changed, 172 insertions(+), 15 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 871ac17..5cdb8af 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(md_ns) export(protect_curly) export(protect_math) export(show_bare) +export(show_censor) export(show_context) export(show_list) export(stylesheet) diff --git a/NEWS.md b/NEWS.md index 0b55cf6..fdbf7f0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,9 +2,9 @@ ## NEW FEATURES -* `show_list()`, `show_bare()`, and `show_context()` will show the markdown - content of a node, nodelist, or list of nodes without needing to print the - entire document. +* `show_list()`, `show_bare()`, `show_context()`, and `show_censor()` will show + the markdown content of a node, nodelist, or list of nodes without needing to + print the entire document. * `yarn$show()` method now gains the `lines` parameter, which allows you to subset the output by the lines of text. diff --git a/R/show.R b/R/show.R index d79f2a2..1e5dc17 100644 --- a/R/show.R +++ b/R/show.R @@ -17,13 +17,19 @@ #' show_list(code[1:10]) #' show_list(blocks[1:2]) #' -#' # show the items in the structure of the document +#' # show the items in their local structure #' show_bare(items) #' show_bare(links) #' #' # show the items with context markers ([...]) in the structure of the document #' show_context(links[20:31]) #' show_context(code[1:10]) +#' +#' # show the items in the full document censored: +#' show_censor(links) +#' # you can set the mark to censor by using the `tinkr.censor option` +#' option(tinkr.censor = ".") +#' show_censor(links) #' @rdname show #' @export show_list <- function(nodelist) { @@ -34,20 +40,30 @@ show_list <- function(nodelist) { #' @rdname show #' @export show_bare <- function(nodelist) { - res <- isolate_nodes(nodelist) + res <- isolate_nodes(nodelist, type = "context") return(show_user(print_lines(res$doc))) } #' @rdname show #' @export show_context <- function(nodelist) { - res <- isolate_nodes(nodelist) + res <- isolate_nodes(nodelist, type = "context") res <- add_isolation_context(nodelist, res) return(show_user(print_lines(res$doc))) } +#' @rdname show +#' @export +show_censor <- function(nodelist) { + res <- isolate_nodes(nodelist, type = "censor") + return(show_user(print_lines(res$doc))) +} + + + isolate_nodes <- function(nodelist, type = "context") { switch(type, + "censor" = isolate_nodes_censor(nodelist), "context" = isolate_nodes_in_context(nodelist), "list" = isolate_nodes_a_la_carte(nodelist), ) @@ -70,6 +86,22 @@ isolate_nodes_a_la_carte <- function(nodelist) { return(list(doc = doc, key = NULL)) } + +isolate_nodes_in_context <- function(nodelist) { + res <- provision_isolation(nodelist) + xml2::xml_remove(res$parents) + return(list(doc = res$doc, key = res$key)) +} + +isolate_nodes_censor <- function(nodelist) { + res <- provision_isolation(nodelist) + censor_attr(res$parents, "destination") + censor_attr(res$parents, "title") + txt <- xml2::xml_find_all(res$parents, ".//text()") + xml2::xml_set_text(txt, censor(xml2::xml_text(txt))) + return(list(doc = res$doc, key = res$key)) +} + provision_isolation <- function(nodelist) { # create a copy of our document doc <- copy_xml(xml2::xml_root(nodelist)) @@ -91,18 +123,13 @@ provision_isolation <- function(nodelist) { } -isolate_nodes_in_context <- function(nodelist) { - res <- provision_isolation(nodelist) - xml2::xml_remove(res$parents) - return(list(doc = res$doc, key = res$key)) -} - add_isolation_context <- function(nodelist, isolated) { + sib <- sprintf("sibling::*[1][not(@label=%s)]", isolated$key) pretext <- xml2::xml_find_lgl(nodelist, - "boolean(count(preceding-sibling::*)!=0)" + sprintf("boolean(count(preceding-%s)!=0)", sib) ) postext <- xml2::xml_find_lgl(nodelist, - "boolean(count(following-sibling::*)!=0)" + sprintf("boolean(count(following-%s)!=0)", sib) ) xpath <- sprintf(".//node()[@label=%s]", isolated$key) labelled <- xml2::xml_find_all(isolated$doc, xpath) @@ -119,6 +146,20 @@ add_isolation_context <- function(nodelist, isolated) { return(isolated) } + +censor_attr <- function(nodes, attr) { + attrs <- xml2::xml_attr(nodes, attr) + nomiss <- !is.na(attrs) + xml2::xml_set_attr(nodes[nomiss], attr, + censor(attrs[nomiss]) + ) +} + +censor <- function(x) { + item <- getOption("tinkr.censor", default = "\u2587") + gsub("[^[:space:]]", item, x, perl = TRUE) +} + print_lines <- function(xml, path = NULL, stylesheet = NULL) { if (inherits(xml, "xml_document")) { xml <- list(yaml = "", body = xml) diff --git a/man/show.Rd b/man/show.Rd index a58f511..6605b42 100644 --- a/man/show.Rd +++ b/man/show.Rd @@ -4,6 +4,7 @@ \alias{show_list} \alias{show_bare} \alias{show_context} +\alias{show_censor} \title{Display a node or nodelist as markdown} \usage{ show_list(nodelist) @@ -11,6 +12,8 @@ show_list(nodelist) show_bare(nodelist) show_context(nodelist) + +show_censor(nodelist) } \arguments{ \item{nodelist}{an object of class \code{xml_nodelist} OR \code{xml_node} OR a list of @@ -35,11 +38,17 @@ show_list(links[1:10]) show_list(code[1:10]) show_list(blocks[1:2]) -# show the items in the structure of the document +# show the items in their local structure show_bare(items) show_bare(links) # show the items with context markers ([...]) in the structure of the document show_context(links[20:31]) show_context(code[1:10]) + +# show the items in the full document censored: +show_censor(links) +# you can set the mark to censor by using the `tinkr.censor option` +option(tinkr.censor = ".") +show_censor(links) } diff --git a/tests/testthat/_snaps/show.md b/tests/testthat/_snaps/show.md index 24c47ad..64d733f 100644 --- a/tests/testthat/_snaps/show.md +++ b/tests/testthat/_snaps/show.md @@ -156,6 +156,84 @@ +# show_censor() will censor elements + + Code + show_user(lnks[1:10], force = TRUE) + Output + + + ▇▇ ▇▇▇ [second post of the series where we obtained data from + eBird](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) ▇▇ + ▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇▇▇▇ ▇▇ ▇▇▇ ▇▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇ ▇▇ + ▇▇▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇▇ ▇▇ + [the fourth post of the + series](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/)▇ ▇▇▇▇ + ▇▇ ▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇ ▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇ ▇▇▇▇▇ ▇▇▇▇▇ ▇▇ *▇▇▇▇▇▇▇▇▇▇ + ▇▇▇▇*▇ ▇▇ ▇▇▇▇ ▇▇▇▇▇ ▇▇ ▇▇▇▇ ▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇ ▇▇ ▇▇▇▇ + +--- + + Code + show_user(tail(cd, 20), force = TRUE) + Output + + - [▇▇▇ ▇▇ ▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇ + ▇▇▇▇▇▇▇▇](▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇)▇ + ▇▇▇▇▇▇▇▇▇ `magick` ▇▇▇ ▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇▇▇▇ `tesseract` ▇▇▇ ▇▇▇▇▇▇▇ + ▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇▇▇ `cld2` ▇▇▇ `cld3` ▇▇▇ ▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇ + `taxize::gnr_resolve` ▇▇▇ ▇▇▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇▇ + + - [▇▇▇ ▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇ + ▇▇▇▇▇▇▇▇▇▇▇](▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇)▇ + ▇▇▇▇▇▇▇▇▇ `taxize`▇ ▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇ ▇▇▇ ▇▇ ▇▇▇ `traits`▇ + ▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇ ▇▇▇▇▇▇ ▇▇▇▇▇ + + - ▇▇▇ ▇▇ ▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇ + ▇▇▇▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇ ▇▇▇ ▇▇▇▇ ▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇ + + ▇▇▇▇▇▇ ▇ ▇▇▇▇▇ ▇▇▇ ▇▇▇▇ ▇▇▇▇▇ *▇▇▇* ▇▇▇▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇▇▇ + ▇▇▇▇▇ ▇▇▇ ▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇ ▇▇▇ ▇▇ ▇▇▇▇▇ ▇▇▇▇▇ ▇▇▇▇ ▇▇▇ ▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇▇ + ▇▇▇▇▇▇▇▇ ▇▇ ▇ ▇▇▇▇▇▇ ▇▇ ▇▇▇ ▇▇▇ [▇▇▇ ▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇ + ▇▇▇▇▇](▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇)▇ ▇▇▇▇▇ ▇▇▇▇▇▇▇▇ + + +--- + + Code + show_user(blks[19:48], force = TRUE) + Output + ... ...... .......... .. ... ...... . .... .. .... .... .... ... ... + .... ...... ...... + + ```r + # polygon for filtering + landkreis_konstanz <- osmdata::getbb("Landkreis Konstanz", + format_out = "sf_polygon") + crs <- sf::st_crs(landkreis_konstanz) + + # get and filter data + f_out_ebd <- "ebird/ebd_lk_konstanz.txt" + + library("magrittr") + + ebd <- auk::read_ebd(f_out_ebd) %>% + sf::st_as_sf(coords = c("longitude", "latitude"), + crs = crs) + + in_indices <- sf::st_within(ebd, landkreis_konstanz) + + ebd <- dplyr::filter(ebd, lengths(in_indices) > 0) + + ebd <- as.data.frame(ebd) + + ebd <- dplyr::filter(ebd, approved, lubridate::year(observation_date) > 2010) + ``` + + ... ... .... .. ........... .. ..... .... ... ... .. ....... ........ + ... .... ...... + + # show context will provide context for the elements Code diff --git a/tests/testthat/test-show.R b/tests/testthat/test-show.R index 9e85a20..31a7f37 100644 --- a/tests/testthat/test-show.R +++ b/tests/testthat/test-show.R @@ -14,6 +14,34 @@ test_that("show_list() will isolate elements", { }) +test_that("show_censor() will censor elements", { + path <- system.file("extdata", "example1.md", package = "tinkr") + y <- tinkr::yarn$new(path, sourcepos = TRUE) + items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) + links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) + code <- xml2::xml_find_all(y$body, ".//md:code", tinkr::md_ns()) + blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns()) + # give us the original for comparison + orig <- y$show() + n <- length(orig) - length(y$yaml) + 1 + lnks <- show_censor(links) + cd <- show_censor(code) + # the censor option can be adjusted + withr::local_options(list(tinkr.censor = ".")) + blks <- show_censor(blocks) + + # the length of the documents are identical + expect_length(lnks, n) + expect_length(cd, n) + expect_length(blks, n) + + expect_snapshot(show_user(lnks[1:10], force = TRUE)) + expect_snapshot(show_user(tail(cd, 20), force = TRUE)) + expect_snapshot(show_user(blks[19:48], force = TRUE)) +}) + + + test_that("show context will provide context for the elements", { path <- system.file("extdata", "example1.md", package = "tinkr") y <- tinkr::yarn$new(path, sourcepos = TRUE) From e1723dd31abe1039d78d3a7329ae59ac7affe671 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Wed, 8 May 2024 16:08:38 -0700 Subject: [PATCH 04/25] update documentation --- R/show.R | 2 +- man/show.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/show.R b/R/show.R index 1e5dc17..18459bf 100644 --- a/R/show.R +++ b/R/show.R @@ -28,7 +28,7 @@ #' # show the items in the full document censored: #' show_censor(links) #' # you can set the mark to censor by using the `tinkr.censor option` -#' option(tinkr.censor = ".") +#' options(tinkr.censor = ".") #' show_censor(links) #' @rdname show #' @export diff --git a/man/show.Rd b/man/show.Rd index 6605b42..654cc31 100644 --- a/man/show.Rd +++ b/man/show.Rd @@ -49,6 +49,6 @@ show_context(code[1:10]) # show the items in the full document censored: show_censor(links) # you can set the mark to censor by using the `tinkr.censor option` -option(tinkr.censor = ".") +options(tinkr.censor = ".") show_censor(links) } From 311de29f334657e7e2fddc4d2e6816c64e7e7d4b Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Wed, 8 May 2024 16:21:13 -0700 Subject: [PATCH 05/25] fix tests for windows I forgot that old windows don't like the UTF-8 stuff --- tests/testthat/_snaps/show.md | 44 +++++++++++++++++------------------ tests/testthat/test-show.R | 4 ++-- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/testthat/_snaps/show.md b/tests/testthat/_snaps/show.md index 64d733f..1392ced 100644 --- a/tests/testthat/_snaps/show.md +++ b/tests/testthat/_snaps/show.md @@ -163,14 +163,14 @@ Output - ▇▇ ▇▇▇ [second post of the series where we obtained data from - eBird](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) ▇▇ - ▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇▇▇▇ ▇▇ ▇▇▇ ▇▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇ ▇▇ - ▇▇▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇▇ ▇▇ + .. ... [second post of the series where we obtained data from + eBird](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) .. + .......... .... ..... .... ........ .. ... ...... .. .......... ... .. + ............ .... ......... .... .... ......... ... ..... ........... .. [the fourth post of the - series](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/)▇ ▇▇▇▇ - ▇▇ ▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇ ▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇ ▇▇▇▇▇ ▇▇▇▇▇ ▇▇ *▇▇▇▇▇▇▇▇▇▇ - ▇▇▇▇*▇ ▇▇ ▇▇▇▇ ▇▇▇▇▇ ▇▇ ▇▇▇▇ ▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇ ▇▇ ▇▇▇▇ + series](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/). .... + .. ..... .. ....... ..... ... .......... .. ..... ..... .. *.......... + ....*. .. .... ..... .. .... ..... ... .......... .......... ... .. .... --- @@ -178,24 +178,24 @@ show_user(tail(cd, 20), force = TRUE) Output - - [▇▇▇ ▇▇ ▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇ - ▇▇▇▇▇▇▇▇](▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇)▇ - ▇▇▇▇▇▇▇▇▇ `magick` ▇▇▇ ▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇▇▇▇ `tesseract` ▇▇▇ ▇▇▇▇▇▇▇ - ▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇▇▇ `cld2` ▇▇▇ `cld3` ▇▇▇ ▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇ - `taxize::gnr_resolve` ▇▇▇ ▇▇▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇▇ + - [... .. ....... .... .... ... ....... ....... + ........](...............................................). + ......... `magick` ... ..... ............. `tesseract` ... ....... + ......... ............ `cld2` ... `cld3` ... ........ .......... ... + `taxize::gnr_resolve` ... ......... .... ........... - - [▇▇▇ ▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇ - ▇▇▇▇▇▇▇▇▇▇▇](▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇)▇ - ▇▇▇▇▇▇▇▇▇ `taxize`▇ ▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇ ▇▇▇ ▇▇ ▇▇▇ `traits`▇ - ▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇ ▇▇▇▇▇▇ ▇▇▇▇▇ + - [... .. .......... .. .......... ....... .... ........ ... ..... + ...........](.......................................................). + ......... `taxize`. ......... ........ ... .. ... `traits`. + ......... ...... .. ....... ...... ..... - - ▇▇▇ ▇▇ ▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇ - ▇▇▇▇▇▇▇▇▇▇▇▇▇ ▇▇▇▇ ▇▇ ▇▇▇ ▇▇▇▇ ▇▇▇▇▇▇ ▇▇▇▇ ▇▇▇▇▇ + - ... .. ..... ... .......... .......... ... .......... .... .... + ............. .... .. ... .... ...... .... ..... - ▇▇▇▇▇▇ ▇ ▇▇▇▇▇ ▇▇▇ ▇▇▇▇ ▇▇▇▇▇ *▇▇▇* ▇▇▇▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇▇▇ - ▇▇▇▇▇ ▇▇▇ ▇▇▇▇ ▇▇▇ ▇▇▇▇▇▇ ▇▇▇ ▇▇ ▇▇▇▇▇ ▇▇▇▇▇ ▇▇▇▇ ▇▇▇ ▇▇▇▇▇ ▇▇ ▇▇▇▇▇▇▇▇ - ▇▇▇▇▇▇▇▇ ▇▇ ▇ ▇▇▇▇▇▇ ▇▇ ▇▇▇ ▇▇▇ [▇▇▇ ▇▇▇▇▇▇▇▇ ▇▇▇▇▇▇▇▇▇▇ - ▇▇▇▇▇](▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇)▇ ▇▇▇▇▇ ▇▇▇▇▇▇▇▇ + ...... . ..... ... .... ..... *...* ........ .. ....... ... ........ + ..... ... .... ... ...... ... .. ..... ..... .... ... ..... .. ........ + ........ .. . ...... .. ... ... [... ........ .......... + .....](.......................................). ..... ........ --- diff --git a/tests/testthat/test-show.R b/tests/testthat/test-show.R index 31a7f37..059812a 100644 --- a/tests/testthat/test-show.R +++ b/tests/testthat/test-show.R @@ -24,10 +24,10 @@ test_that("show_censor() will censor elements", { # give us the original for comparison orig <- y$show() n <- length(orig) - length(y$yaml) + 1 - lnks <- show_censor(links) - cd <- show_censor(code) # the censor option can be adjusted withr::local_options(list(tinkr.censor = ".")) + lnks <- show_censor(links) + cd <- show_censor(code) blks <- show_censor(blocks) # the length of the documents are identical From ed3e7e313e7e663c3b4b7c806c875cbb01364a45 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 9 May 2024 10:17:46 -0700 Subject: [PATCH 06/25] add to_md_vec --- NAMESPACE | 1 + NEWS.md | 2 ++ R/to_md.R | 41 ++++++++++++++++++++++++++-------- man/to_md.Rd | 33 +++++++++++++++++++-------- tests/testthat/_snaps/to_md.md | 17 ++++++++++++++ tests/testthat/test-to_md.R | 37 ++++++++++++++++++++++++++++++ 6 files changed, 113 insertions(+), 18 deletions(-) create mode 100644 tests/testthat/_snaps/to_md.md diff --git a/NAMESPACE b/NAMESPACE index 5cdb8af..e279e3a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ export(show_context) export(show_list) export(stylesheet) export(to_md) +export(to_md_vec) export(to_xml) export(yarn) importFrom(R6,R6Class) diff --git a/NEWS.md b/NEWS.md index fdbf7f0..95614ac 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,8 @@ ## NEW FEATURES +* `to_md_vec()` takes an xml node or nodelist and returns a character vector of + the markdown produced. * `show_list()`, `show_bare()`, `show_context()`, and `show_censor()` will show the markdown content of a node, nodelist, or list of nodes without needing to print the entire document. diff --git a/R/to_md.R b/R/to_md.R index 473dc1e..9574751 100644 --- a/R/to_md.R +++ b/R/to_md.R @@ -13,7 +13,9 @@ #' the path to your XSL stylesheet as argument. #' #' -#' @return the converted document, invisibly. +#' @return +#' - `to_md()`: `\[character\]` the converted document, invisibly as a character vector containing two elements: the yaml list and the markdown body. +#' - `to_md_vec()`: `\[character\]` the markdown representation of each node. #' #' @export #' @@ -21,18 +23,25 @@ #' path <- system.file("extdata", "example1.md", package = "tinkr") #' yaml_xml_list <- to_xml(path) #' names(yaml_xml_list) -#' library("magrittr") +#' # extract the level 3 headers from the body +#' headers3 <- xml2::xml_find_all( +#' yaml_xml_list$body, +#' xpath = './/md:heading[@level="3"]', +#' ns = md_ns() +#' ) +#' # show the headers +#' print(h3 <- to_md_vec(headers3)) #' # transform level 3 headers into level 1 headers -#' body <- yaml_xml_list$body -#' body %>% -#' xml2::xml_find_all(xpath = './/d1:heading', -#' xml2::xml_ns(.)) %>% -#' .[xml2::xml_attr(., "level") == "3"] -> headers3 +#' # NOTE: these nodes are still associated with the document and this is done +#' # in place. #' xml2::xml_set_attr(headers3, "level", 1) -#' yaml_xml_list$body <- body +#' # preview the new headers +#' print(h1 <- to_md_vec(headers3)) #' # save back and have a look #' newmd <- tempfile("newmd", fileext = ".md") -#' to_md(yaml_xml_list, newmd) +#' res <- to_md(yaml_xml_list, newmd) +#' # show that it works +#' regmatches(res[[2]], gregexpr(h1[1], res[[2]], fixed = TRUE)) #' # file.edit("newmd.md") #' file.remove(newmd) #' @@ -57,6 +66,20 @@ to_md <- function(yaml_xml_list, path = NULL, stylesheet_path = stylesheet()){ invisible(md_out) } +#' @rdname to_md +#' @export +#' @param nodelist an object of `xml_nodelist` or `xml_node` +to_md_vec <- function(nodelist) { + if (inherits(nodelist, "xml_node")) { + nodelist <- list(nodelist) + } + nodes <- lapply(nodelist, function(i) { + print_lines(isolate_nodes(i, "list")$doc) + }) + trimws(vapply(nodes, paste, character(1), collapse = "\n")) +} + + # convert body and yaml to markdown text given a stylesheet transform_to_md <- function(body, yaml, stylesheet) { body <- xslt::xml_xslt(body, stylesheet = stylesheet) diff --git a/man/to_md.Rd b/man/to_md.Rd index 6390e0b..8223162 100644 --- a/man/to_md.Rd +++ b/man/to_md.Rd @@ -2,9 +2,12 @@ % Please edit documentation in R/to_md.R \name{to_md} \alias{to_md} +\alias{to_md_vec} \title{Write YAML and XML back to disk as (R)Markdown} \usage{ to_md(yaml_xml_list, path = NULL, stylesheet_path = stylesheet()) + +to_md_vec(nodelist) } \arguments{ \item{yaml_xml_list}{result from a call to \code{\link[=to_xml]{to_xml()}} and editing.} @@ -14,9 +17,14 @@ any file, but will still produce the conversion and pass the output as a character vector.} \item{stylesheet_path}{path to the XSL stylesheet} + +\item{nodelist}{an object of \code{xml_nodelist} or \code{xml_node}} } \value{ -the converted document, invisibly. +\itemize{ +\item \code{to_md()}: \verb{\[character\]} the converted document, invisibly as a character vector containing two elements: the yaml list and the markdown body. +\item \code{to_md_vec()}: \verb{\[character\]} the markdown representation of each node. +} } \description{ Write YAML and XML back to disk as (R)Markdown @@ -32,18 +40,25 @@ the path to your XSL stylesheet as argument. path <- system.file("extdata", "example1.md", package = "tinkr") yaml_xml_list <- to_xml(path) names(yaml_xml_list) -library("magrittr") +# extract the level 3 headers from the body +headers3 <- xml2::xml_find_all( + yaml_xml_list$body, + xpath = './/md:heading[@level="3"]', + ns = md_ns() +) +# show the headers +print(h3 <- to_md_vec(headers3)) # transform level 3 headers into level 1 headers -body <- yaml_xml_list$body -body \%>\% - xml2::xml_find_all(xpath = './/d1:heading', - xml2::xml_ns(.)) \%>\% - .[xml2::xml_attr(., "level") == "3"] -> headers3 +# NOTE: these nodes are still associated with the document and this is done +# in place. xml2::xml_set_attr(headers3, "level", 1) -yaml_xml_list$body <- body +# preview the new headers +print(h1 <- to_md_vec(headers3)) # save back and have a look newmd <- tempfile("newmd", fileext = ".md") -to_md(yaml_xml_list, newmd) +res <- to_md(yaml_xml_list, newmd) +# show that it works +regmatches(res[[2]], gregexpr(h1[1], res[[2]], fixed = TRUE)) # file.edit("newmd.md") file.remove(newmd) diff --git a/tests/testthat/_snaps/to_md.md b/tests/testthat/_snaps/to_md.md new file mode 100644 index 0000000..e48e282 --- /dev/null +++ b/tests/testthat/_snaps/to_md.md @@ -0,0 +1,17 @@ +# to_md_vec() returns a vector of the same length as the nodelist + + Code + show_user(to_md_vec(blocks[5:6]), force = TRUE) + Output + ```r + get_papers <- ratelimitr::limit_rate(.get_papers, + rate = ratelimitr::rate(1, 2)) + + all_papers <- purrr::map_df(species, get_papers) + + nrow(all_papers) + ``` + ``` + ## [1] 522 + ``` + diff --git a/tests/testthat/test-to_md.R b/tests/testthat/test-to_md.R index 803e534..75da3af 100644 --- a/tests/testthat/test-to_md.R +++ b/tests/testthat/test-to_md.R @@ -132,6 +132,8 @@ test_that("to_md does not break tables", { expect_snapshot_file(newtable) }) + + test_that("code chunks can be inserted on round trip", { tmpdir <- withr::local_tempdir("newdir") path <- system.file("extdata", "example2.Rmd", package = "tinkr") @@ -174,3 +176,38 @@ test_that("links that start lines are not escaped", { expect_equal(actual, expected) }) + + +test_that("to_md_vec() returns a vector of the same length as the nodelist", { + + path <- system.file("extdata", "example1.md", package = "tinkr") + y <- tinkr::yarn$new(path, sourcepos = TRUE) + items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) + links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) + code <- xml2::xml_find_all(y$body, ".//md:code", tinkr::md_ns()) + blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns()) + # no tables + tables <- xml2::xml_find_all(y$body, ".//md:table", tinkr::md_ns()) + + # each item is a character vector of equal length to the nodelist + expect_length(to_md_vec(items), length(items)) %>% + expect_type("character") + expect_length(to_md_vec(links), length(links)) %>% + expect_type("character") + expect_length(to_md_vec(code), length(code)) %>% + expect_type("character") + expect_length(to_md_vec(blocks), length(blocks)) %>% + expect_type("character") + expect_length(to_md_vec(tables), 0) %>% + expect_type("character") + + # single elements work as well + expect_length(to_md_vec(blocks[[1]]), 1) %>% + expect_type("character") + + # the output is as expected + expect_snapshot(show_user(to_md_vec(blocks[5:6]), force = TRUE)) + + +}) + From 7d1e0f4aa2c8f8b96edf230f4f8e1f26f3d21a43 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 9 May 2024 10:29:58 -0700 Subject: [PATCH 07/25] update show method function names --- NAMESPACE | 3 +-- R/show.R | 32 ++++++++++++++------------------ man/show.Rd | 27 +++++++++++++++------------ tests/testthat/_snaps/show.md | 10 +++++----- tests/testthat/test-show.R | 10 +++++----- 5 files changed, 40 insertions(+), 42 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e279e3a..7ada2c8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,9 +4,8 @@ export(find_between) export(md_ns) export(protect_curly) export(protect_math) -export(show_bare) +export(show_block) export(show_censor) -export(show_context) export(show_list) export(stylesheet) export(to_md) diff --git a/R/show.R b/R/show.R index 18459bf..199feb6 100644 --- a/R/show.R +++ b/R/show.R @@ -3,7 +3,8 @@ #' When inspecting the results of an XPath query, displaying the text often #' @param nodelist an object of class `xml_nodelist` OR `xml_node` OR a list of #' either. -#' @return a character vector, displayed to the screen. +#' @return a character vector, invisibly. The result of these functions are +#' displayed to the screen #' @examples #' path <- system.file("extdata", "example1.md", package = "tinkr") #' y <- tinkr::yarn$new(path, sourcepos = TRUE) @@ -18,18 +19,15 @@ #' show_list(blocks[1:2]) #' #' # show the items in their local structure -#' show_bare(items) -#' show_bare(links) +#' show_block(items) +#' show_block(links, mark = TRUE) #' -#' # show the items with context markers ([...]) in the structure of the document -#' show_context(links[20:31]) -#' show_context(code[1:10]) -#' #' # show the items in the full document censored: #' show_censor(links) #' # you can set the mark to censor by using the `tinkr.censor option` #' options(tinkr.censor = ".") #' show_censor(links) +#' @seealso [to_md_vec()] to get a vector of these elements in isolation. #' @rdname show #' @export show_list <- function(nodelist) { @@ -38,17 +36,17 @@ show_list <- function(nodelist) { } #' @rdname show +#' @param mark \[bool\] When `TRUE` markers (`[...]`) are added to replace +#' nodes that come before or after the islated nodes. Defaults to `FALSE`, +#' which only shows the isolated nodes in their respective blocks. Note that +#' the default state may cause nodes within the same block to appear adjacent +#' to each other. #' @export -show_bare <- function(nodelist) { - res <- isolate_nodes(nodelist, type = "context") - return(show_user(print_lines(res$doc))) -} - -#' @rdname show -#' @export -show_context <- function(nodelist) { +show_block <- function(nodelist, mark = FALSE) { res <- isolate_nodes(nodelist, type = "context") - res <- add_isolation_context(nodelist, res) + if (mark) { + res <- add_isolation_context(nodelist, res) + } return(show_user(print_lines(res$doc))) } @@ -59,8 +57,6 @@ show_censor <- function(nodelist) { return(show_user(print_lines(res$doc))) } - - isolate_nodes <- function(nodelist, type = "context") { switch(type, "censor" = isolate_nodes_censor(nodelist), diff --git a/man/show.Rd b/man/show.Rd index 654cc31..93af7f5 100644 --- a/man/show.Rd +++ b/man/show.Rd @@ -2,25 +2,29 @@ % Please edit documentation in R/show.R \name{show_list} \alias{show_list} -\alias{show_bare} -\alias{show_context} +\alias{show_block} \alias{show_censor} \title{Display a node or nodelist as markdown} \usage{ show_list(nodelist) -show_bare(nodelist) - -show_context(nodelist) +show_block(nodelist, mark = FALSE) show_censor(nodelist) } \arguments{ \item{nodelist}{an object of class \code{xml_nodelist} OR \code{xml_node} OR a list of either.} + +\item{mark}{[bool] When \code{TRUE} markers (\verb{[...]}) are added to replace +nodes that come before or after the islated nodes. Defaults to \code{FALSE}, +which only shows the isolated nodes in their respective blocks. Note that +the default state may cause nodes within the same block to appear adjacent +to each other.} } \value{ -a character vector, displayed to the screen. +a character vector, invisibly. The result of these functions are +displayed to the screen } \description{ When inspecting the results of an XPath query, displaying the text often @@ -39,12 +43,8 @@ show_list(code[1:10]) show_list(blocks[1:2]) # show the items in their local structure -show_bare(items) -show_bare(links) - -# show the items with context markers ([...]) in the structure of the document -show_context(links[20:31]) -show_context(code[1:10]) +show_block(items) +show_block(links, mark = TRUE) # show the items in the full document censored: show_censor(links) @@ -52,3 +52,6 @@ show_censor(links) options(tinkr.censor = ".") show_censor(links) } +\seealso{ +\code{\link[=to_md_vec]{to_md_vec()}} to get a vector of these elements in isolation. +} diff --git a/tests/testthat/_snaps/show.md b/tests/testthat/_snaps/show.md index 1392ced..85c45b1 100644 --- a/tests/testthat/_snaps/show.md +++ b/tests/testthat/_snaps/show.md @@ -234,10 +234,10 @@ ... .... ...... -# show context will provide context for the elements +# show_block() will provide context for the elements Code - show_user(show_bare(items), force = TRUE) + show_user(show_block(items), force = TRUE) Output @@ -277,7 +277,7 @@ --- Code - show_user(show_bare(links), force = TRUE) + show_user(show_block(links), force = TRUE) Output @@ -340,7 +340,7 @@ --- Code - show_user(show_context(links[20:31]), force = TRUE) + show_user(show_block(links[20:31], mark = TRUE), force = TRUE) Output @@ -372,7 +372,7 @@ --- Code - show_user(show_context(code[1:10]), force = TRUE) + show_user(show_block(code[1:10], mark = TRUE), force = TRUE) Output diff --git a/tests/testthat/test-show.R b/tests/testthat/test-show.R index 059812a..8a04915 100644 --- a/tests/testthat/test-show.R +++ b/tests/testthat/test-show.R @@ -42,7 +42,7 @@ test_that("show_censor() will censor elements", { -test_that("show context will provide context for the elements", { +test_that("show_block() will provide context for the elements", { path <- system.file("extdata", "example1.md", package = "tinkr") y <- tinkr::yarn$new(path, sourcepos = TRUE) items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) @@ -51,11 +51,11 @@ test_that("show context will provide context for the elements", { blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns()) # show the items in the structure of the document - expect_snapshot(show_user(show_bare(items), force = TRUE)) - expect_snapshot(show_user(show_bare(links), force = TRUE)) + expect_snapshot(show_user(show_block(items), force = TRUE)) + expect_snapshot(show_user(show_block(links), force = TRUE)) # show the items with context markers ([...]) in the structure of the document - expect_snapshot(show_user(show_context(links[20:31]), force = TRUE)) - expect_snapshot(show_user(show_context(code[1:10]), force = TRUE)) + expect_snapshot(show_user(show_block(links[20:31], mark = TRUE), force = TRUE)) + expect_snapshot(show_user(show_block(code[1:10], mark = TRUE), force = TRUE)) }) From ecfc4b2a8adecc3ea4fa9971333c8b102356a5be Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 9 May 2024 10:41:09 -0700 Subject: [PATCH 08/25] bump news --- NEWS.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 95614ac..6835aea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,9 +4,9 @@ * `to_md_vec()` takes an xml node or nodelist and returns a character vector of the markdown produced. -* `show_list()`, `show_bare()`, `show_context()`, and `show_censor()` will show - the markdown content of a node, nodelist, or list of nodes without needing to - print the entire document. +* `show_list()`, `show_block()`, and `show_context()` will show the markdown + content of a node, nodelist, or list of nodes without needing to print the + entire document. * `yarn$show()` method now gains the `lines` parameter, which allows you to subset the output by the lines of text. From 269c5f753a5385d88b659c731cacc04a93ea2ae6 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 9 May 2024 10:42:13 -0700 Subject: [PATCH 09/25] fix news mistake --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 6835aea..033346f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,7 @@ * `to_md_vec()` takes an xml node or nodelist and returns a character vector of the markdown produced. -* `show_list()`, `show_block()`, and `show_context()` will show the markdown +* `show_list()`, `show_block()`, and `show_censor()` will show the markdown content of a node, nodelist, or list of nodes without needing to print the entire document. * `yarn$show()` method now gains the `lines` parameter, which allows you to From bb0cccf668d65d5c70e33f204a642b728838664f Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 9 May 2024 10:55:29 -0700 Subject: [PATCH 10/25] update snaps --- tests/testthat/_snaps/show.md | 8 ++++---- tests/testthat/test-show.R | 12 ++++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/testthat/_snaps/show.md b/tests/testthat/_snaps/show.md index 85c45b1..e5d2677 100644 --- a/tests/testthat/_snaps/show.md +++ b/tests/testthat/_snaps/show.md @@ -237,7 +237,7 @@ # show_block() will provide context for the elements Code - show_user(show_block(items), force = TRUE) + show_user(b_items, force = TRUE) Output @@ -277,7 +277,7 @@ --- Code - show_user(show_block(links), force = TRUE) + show_user(b_links, force = TRUE) Output @@ -340,7 +340,7 @@ --- Code - show_user(show_block(links[20:31], mark = TRUE), force = TRUE) + show_user(b_links, force = TRUE) Output @@ -372,7 +372,7 @@ --- Code - show_user(show_block(code[1:10], mark = TRUE), force = TRUE) + show_user(b_code, force = TRUE) Output diff --git a/tests/testthat/test-show.R b/tests/testthat/test-show.R index 8a04915..3a8db85 100644 --- a/tests/testthat/test-show.R +++ b/tests/testthat/test-show.R @@ -51,11 +51,15 @@ test_that("show_block() will provide context for the elements", { blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns()) # show the items in the structure of the document - expect_snapshot(show_user(show_block(items), force = TRUE)) - expect_snapshot(show_user(show_block(links), force = TRUE)) + b_items <- show_block(items) + b_links <- show_block(links) + expect_snapshot(show_user(b_items, force = TRUE)) + expect_snapshot(show_user(b_links, force = TRUE)) # show the items with context markers ([...]) in the structure of the document - expect_snapshot(show_user(show_block(links[20:31], mark = TRUE), force = TRUE)) - expect_snapshot(show_user(show_block(code[1:10], mark = TRUE), force = TRUE)) + b_links <- show_block(links[20:31], mark = TRUE) + b_code <- show_block(code[1:10], mark = TRUE) + expect_snapshot(show_user(b_links, force = TRUE)) + expect_snapshot(show_user(b_code, force = TRUE)) }) From fcbfbd63651158d4ab70aa22faf7dea5e17c1510 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 9 May 2024 11:23:45 -0700 Subject: [PATCH 11/25] remove dead code; test missing line --- R/show.R | 15 ++++----------- tests/testthat/_snaps/show.md | 20 ++++++++++++++++++++ tests/testthat/test-show.R | 13 +++++++++++++ 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/R/show.R b/R/show.R index 199feb6..fafad4a 100644 --- a/R/show.R +++ b/R/show.R @@ -76,7 +76,10 @@ isolate_nodes_a_la_carte <- function(nodelist) { if (inherits(node, "xml_node")) { xml2::xml_add_child(parent, node) } else { - purrr::walk(node, function(n) xml2::xml_add_child(parent, n)) + purrr::walk(node, function(n) { + xml2::xml_add_child(parent, n) + xml2::xml_add_child(parent, "softbreak") + }) } } return(list(doc = doc, key = NULL)) @@ -183,13 +186,3 @@ label_nodes <- function(xpath, doc, label = "save") { "label", label) } -add_context_siblings <- function(node, where = "after") { - xml2::xml_add_sibling(node, .where = "after", - "text", " [...] ", asis = "true" - ) - xml2::xml_add_sibling(node, .where = "before", - "text", "[...] ", asis = "true" - ) -} - - diff --git a/tests/testthat/_snaps/show.md b/tests/testthat/_snaps/show.md index e5d2677..dadab94 100644 --- a/tests/testthat/_snaps/show.md +++ b/tests/testthat/_snaps/show.md @@ -156,6 +156,26 @@ +# show_list() will isolate groups of elements + + Code + show_user(show_list(list(links[1:3], links[4:5])), force = TRUE) + Output + + + [second post of the series where we obtained data from + eBird](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) + [the fourth post of the + series](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/) + [previous post + of the series](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) + + + [(`glue::glue_collapse(species, sep = ", ", last = " and ")`)](https://twitter.com/LucyStats/status/1031938964796657665?s=19) + [`taxize`](https://github.com/ropensci/taxize) + + + # show_censor() will censor elements Code diff --git a/tests/testthat/test-show.R b/tests/testthat/test-show.R index 3a8db85..37ecb26 100644 --- a/tests/testthat/test-show.R +++ b/tests/testthat/test-show.R @@ -13,6 +13,19 @@ test_that("show_list() will isolate elements", { }) +test_that("show_list() will isolate groups of elements", { + + path <- system.file("extdata", "example1.md", package = "tinkr") + y <- tinkr::yarn$new(path, sourcepos = TRUE) + items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) + links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) + code <- xml2::xml_find_all(y$body, ".//md:code", tinkr::md_ns()) + blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns()) + # show a list of items + expect_snapshot(show_user(show_list(list(links[1:3], links[4:5])), force = TRUE)) + +}) + test_that("show_censor() will censor elements", { path <- system.file("extdata", "example1.md", package = "tinkr") From 615b6a9da46346b3d30e1270bfd44308f1889e30 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 23 May 2024 07:48:45 -0700 Subject: [PATCH 12/25] update test --- tests/testthat/_snaps/class-yarn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/_snaps/class-yarn.md b/tests/testthat/_snaps/class-yarn.md index af1b0f8..f689981 100644 --- a/tests/testthat/_snaps/class-yarn.md +++ b/tests/testthat/_snaps/class-yarn.md @@ -72,7 +72,7 @@ ## R Markdown - This is an ~~R Markdown document~~. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see [http://rmarkdown.rstudio.com](http://rmarkdown.rstudio.com). + This is an ~~R Markdown document~~. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: From 385c5b07129d583b48b6703bcc0523299e111e48 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 23 May 2024 09:55:01 -0700 Subject: [PATCH 13/25] add an example for the show family --- inst/extdata/show-example.md | 50 ++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 inst/extdata/show-example.md diff --git a/inst/extdata/show-example.md b/inst/extdata/show-example.md new file mode 100644 index 0000000..0dc47ba --- /dev/null +++ b/inst/extdata/show-example.md @@ -0,0 +1,50 @@ +--- +title: 'Example of the show methods' +--- + +## Links + +### Relative + +Here are some [relative links](#links) and [anchor links]. + +### Images + +![kittens are cute](https://loremflickr.com/320/240){alt='a random picture of a kitten'} + +## Lists + +- kittens + - are + - super + - cute + - have + - teef + - murder mittens +- brains + - are + - wrinkly + +## Code + +Here is an example of the `utils::strcapture()` function + +```r +sourcepos <- c("2:1-2:33", "4:1-7:7") +pattern <- "([[:digit:]]+):([[:digit:]]+)-([[:digit:]]+):([[:digit:]]+)" +proto <- data.frame( + linestart = integer(), colstart = integer(), + lineend = integer(), colend = integer() +) +utils::strcapture(pattern, sourcepos, proto) +``` + +## Math + +Inline math can be written as $y = mx + b$ while block math should be: + +$$ +y = mx + b +$$ + +[anchor links]: https://example.com/anchor From b8293b85659646f7ad177a0097c7c8d3a3b5966f Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 23 May 2024 09:55:36 -0700 Subject: [PATCH 14/25] include "rel" attribute for censorship --- R/show.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/show.R b/R/show.R index fafad4a..e453869 100644 --- a/R/show.R +++ b/R/show.R @@ -96,6 +96,7 @@ isolate_nodes_censor <- function(nodelist) { res <- provision_isolation(nodelist) censor_attr(res$parents, "destination") censor_attr(res$parents, "title") + censor_attr(res$parents, "rel") txt <- xml2::xml_find_all(res$parents, ".//text()") xml2::xml_set_text(txt, censor(xml2::xml_text(txt))) return(list(doc = res$doc, key = res$key)) From 6bde9340ee1efb54f495fab37f7871963075d38e Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 23 May 2024 09:57:12 -0700 Subject: [PATCH 15/25] add warning for yarn class new pattern --- R/class-yarn.R | 13 +++++++++++++ R/show.R | 5 +++-- tests/testthat/test-class-yarn.R | 19 +++++++++++++++++++ tests/testthat/test-show.R | 6 +++++- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/R/class-yarn.R b/R/class-yarn.R index 7956b98..7239313 100644 --- a/R/class-yarn.R +++ b/R/class-yarn.R @@ -107,6 +107,19 @@ yarn <- R6::R6Class("yarn", #' ex2$tail(5) #' ex2$show() show = function(lines = TRUE, stylesheet_path = stylesheet()) { + if (is.character(lines) && length(lines) == 1 && file.exists(lines)) { + # when using {tinkr} < 0.3.0 + stylesheet_path <- lines + lines <- TRUE + the_call <- match.call() + the_call$stylesheet_path <- the_call$lines + the_call$lines <- NULL + new_call <- capture.output(print(the_call)) + warning( + "In {tinkr} 0.3.0, the $show() method gained the `lines` argument as the first argument.\n", + sprintf("To remove this warning, use: `%s`", new_call), + call. = FALSE) + } show_user(private$md_lines(stylesheet = stylesheet_path)[lines]) }, diff --git a/R/show.R b/R/show.R index e453869..949a56a 100644 --- a/R/show.R +++ b/R/show.R @@ -156,8 +156,9 @@ censor_attr <- function(nodes, attr) { } censor <- function(x) { - item <- getOption("tinkr.censor", default = "\u2587") - gsub("[^[:space:]]", item, x, perl = TRUE) + regex <- getOption("tinkr.censor.regex", default = "[^[:space:]]") + mark <- getOption("tinkr.censor.mark", default = "\u2587") + gsub(regex, mark, x, perl = TRUE) } print_lines <- function(xml, path = NULL, stylesheet = NULL) { diff --git a/tests/testthat/test-class-yarn.R b/tests/testthat/test-class-yarn.R index 611ff32..e35a0be 100644 --- a/tests/testthat/test-class-yarn.R +++ b/tests/testthat/test-class-yarn.R @@ -47,6 +47,25 @@ test_that("yarn show, head, and tail methods work", { }) + +test_that("yarn show method will warn if using positional stylesheet", { + + path <- system.file("extdata", "table.md", package = "tinkr") + y1 <- yarn$new(path) + expect_no_warning({ + md_show <- y1$show(TRUE) + }) + expect_no_warning({ + md_show1 <- y1$show(stylesheet_path = stylesheet()) + }) + suppressWarnings({ + expect_warning(md_show2 <- y1$show(stylesheet())) + }) + expect_identical(md_show, md_show2) + +}) + + test_that("yarn can be created from Rmarkdown", { pathrmd <- system.file("extdata", "example2.Rmd", package = "tinkr") y1 <- yarn$new(pathrmd) diff --git a/tests/testthat/test-show.R b/tests/testthat/test-show.R index 37ecb26..59b4cb7 100644 --- a/tests/testthat/test-show.R +++ b/tests/testthat/test-show.R @@ -38,7 +38,11 @@ test_that("show_censor() will censor elements", { orig <- y$show() n <- length(orig) - length(y$yaml) + 1 # the censor option can be adjusted - withr::local_options(list(tinkr.censor = ".")) + withr::local_options(list( + tinkr.censor.mark = ".", + tinkr.censor.regex = "[^[:space:]]" + ) + ) lnks <- show_censor(links) cd <- show_censor(code) blks <- show_censor(blocks) From 0cc72f4561b47d3778b8805e5a8d7a427fbc045f Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 23 May 2024 09:58:04 -0700 Subject: [PATCH 16/25] update show tests to use example doc --- tests/testthat/_snaps/show.md | 469 +++++++++++++++------------------- tests/testthat/test-show.R | 57 +++-- 2 files changed, 247 insertions(+), 279 deletions(-) diff --git a/tests/testthat/_snaps/show.md b/tests/testthat/_snaps/show.md index dadab94..b74081c 100644 --- a/tests/testthat/_snaps/show.md +++ b/tests/testthat/_snaps/show.md @@ -5,356 +5,317 @@ Output - [second post of the series where we obtained data from - eBird](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) + [relative links](#links) - [the fourth post of the - series](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/) + [anchor links] - [previous post - of the series](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) + [anchor links]: https://example.com/anchor - [(`glue::glue_collapse(species, sep = ", ", last = " and ")`)](https://twitter.com/LucyStats/status/1031938964796657665?s=19) - [`taxize`](https://github.com/ropensci/taxize) - - [`spocc`](https://github.com/ropensci/spocc) - - [`fulltext`](https://github.com/ropensci/fulltext) + +--- + + Code + show_user(show_list(code), force = TRUE) + Output - ["Investigating the impact of media on demand for wildlife: A case - study of Harry Potter and the UK trade in - owls"](http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0182368) - [`cites`](https://github.com/ecohealthalliance/cites/) + `utils::strcapture()` - [`rcites`](https://ibartomeus.github.io/rcites/) + +--- + + Code + show_user(show_list(blocks), force = TRUE) + Output - [`wordcloud` - package](https://cran.r-project.org/web/packages/wordcloud/index.html) - [`wordcloud2` - package](https://github.com/Lchiffon/wordcloud2) + ```r + sourcepos <- c("2:1-2:33", "4:1-7:7") + pattern <- "([[:digit:]]+):([[:digit:]]+)-([[:digit:]]+):([[:digit:]]+)" + proto <- data.frame( + linestart = integer(), colstart = integer(), + lineend = integer(), colend = integer() + ) + utils::strcapture(pattern, sourcepos, proto) + ``` - [from - Phylopic](http://phylopic.org/image/6209c9be-060e-4d7f-bc74-a75f3ccf4629/) - [DataONE](https://www.dataone.org/) + +# show_list() will isolate groups of elements + + Code + show_user(show_list(list(links, headings)), force = TRUE) + Output - [searching - DataONE - vignette](https://github.com/DataONEorg/rdataone/blob/master/vignettes/searching-dataone.Rmd) - [download data - vignette](https://github.com/DataONEorg/rdataone/blob/master/vignettes/download-data.Rmd) + [relative links](#links) + [anchor links] + [anchor links]: https://example.com/anchor - [`europepmc`](https://github.com/ropensci/europepmc) - [`jstor`](https://github.com/ropensci/jstor) - [`suppdata`](https://github.com/ropensci/suppdata) + ## Links - [much - more](https://ropensci.org/packages/) - [`dataone` - package](https://github.com/DataONEorg/rdataone) + ### Relative - [`rfigshare`](https://github.com/ropensci/rfigshare) - [Figshare](https://figshare.com/) + ### Images - [`EML` package](https://github.com/ropensci/EML) - [unconf - `dataspice` project](https://github.com/ropenscilabs/dataspice) + ## Lists - [here](https://ropensci.org/packages/) - [How to identify spots for birding using open geographical - data](https://ropensci.org/blog/2018/08/14/where-to-bird/) + ## Code - [How to obtain bird occurrence data in - R](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) - [How to extract text from old natural history - drawings](https://ropensci.org/blog/2018/08/28/birds-ocr/) + ## Math - [How to complement an occurrence dataset with taxonomy and trait - information](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/) - [our friendly discussion - forum](https://discuss.ropensci.org/c/usecases) ---- +# show_censor() will censor elements Code - show_user(show_list(code[1:10]), force = TRUE) + show_user(lnks, force = TRUE) Output - `glue::glue_collapse(species, sep = ", ", last = " and ")` + ## ..... - `taxize` + ### ........ - `spocc` + .... ... .... [relative links](#links) ... [anchor links]. - `fulltext` + ### ...... - `fulltext` + ![....... ... ....](...............................)....... ...... ....... .. . ........ - `tidytext` + ## ..... - `dplyr::bind_rows` + - ....... + - ... + - ..... + - .... + - .... + - .... + - ...... ....... + - ...... + - ... + - ....... - `fulltext` + ## .... - `cites` + .... .. .. ....... .. ... `...................` ........ + + ```r + ......... .. ............. .......... + ....... .. ............................................................. + ..... .. ........... + ......... . .......... ........ . .......... + ....... . .......... ...... . ......... + . + .......................... .......... ...... + ``` + + ## .... + + ...... .... ... .. ....... .. .. . .. . .. ..... ..... .... ...... ... + + .. + . . .. . . + .. + + [anchor links]: https://example.com/anchor - `rcites` --- Code - show_user(show_list(blocks[1:2]), force = TRUE) + show_user(cd, force = TRUE) Output - ```r - # polygon for filtering - landkreis_konstanz <- osmdata::getbb("Landkreis Konstanz", - format_out = "sf_polygon") - crs <- sf::st_crs(landkreis_konstanz) + ## ..... - # get and filter data - f_out_ebd <- "ebird/ebd_lk_konstanz.txt" + ### ........ - library("magrittr") + .... ... .... [........ .....](......) ... [...... .....]. - ebd <- auk::read_ebd(f_out_ebd) %>% - sf::st_as_sf(coords = c("longitude", "latitude"), - crs = crs) + ### ...... - in_indices <- sf::st_within(ebd, landkreis_konstanz) + ![....... ... ....](...............................)....... ...... ....... .. . ........ - ebd <- dplyr::filter(ebd, lengths(in_indices) > 0) + ## ..... - ebd <- as.data.frame(ebd) + - ....... + - ... + - ..... + - .... + - .... + - .... + - ...... ....... + - ...... + - ... + - ....... - ebd <- dplyr::filter(ebd, approved, lubridate::year(observation_date) > 2010) - ``` + ## .... + .... .. .. ....... .. ... `utils::strcapture()` ........ ```r - species <- ebd %>% - dplyr::count(common_name, sort = TRUE) %>% - head(n = 50) %>% - dplyr::pull(common_name) + ......... .. ............. .......... + ....... .. ............................................................. + ..... .. ........... + ......... . .......... ........ . .......... + ....... . .......... ...... . ......... + . + .......................... .......... ...... ``` + ## .... - -# show_list() will isolate groups of elements - - Code - show_user(show_list(list(links[1:3], links[4:5])), force = TRUE) - Output - - - [second post of the series where we obtained data from - eBird](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) - [the fourth post of the - series](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/) - [previous post - of the series](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) + ...... .... ... .. ....... .. .. . .. . .. ..... ..... .... ...... ... + .. + . . .. . . + .. - [(`glue::glue_collapse(species, sep = ", ", last = " and ")`)](https://twitter.com/LucyStats/status/1031938964796657665?s=19) - [`taxize`](https://github.com/ropensci/taxize) + [...... .....]: .......................... -# show_censor() will censor elements - - Code - show_user(lnks[1:10], force = TRUE) - Output - - - .. ... [second post of the series where we obtained data from - eBird](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) .. - .......... .... ..... .... ........ .. ... ...... .. .......... ... .. - ............ .... ......... .... .... ......... ... ..... ........... .. - [the fourth post of the - series](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/). .... - .. ..... .. ....... ..... ... .......... .. ..... ..... .. *.......... - ....*. .. .... ..... .. .... ..... ... .......... .......... ... .. .... - --- Code - show_user(tail(cd, 20), force = TRUE) + show_user(blks, force = TRUE) Output - - [... .. ....... .... .... ... ....... ....... - ........](...............................................). - ......... `magick` ... ..... ............. `tesseract` ... ....... - ......... ............ `cld2` ... `cld3` ... ........ .......... ... - `taxize::gnr_resolve` ... ......... .... ........... - - [... .. .......... .. .......... ....... .... ........ ... ..... - ...........](.......................................................). - ......... `taxize`. ......... ........ ... .. ... `traits`. - ......... ...... .. ....... ...... ..... + ## ..... - - ... .. ..... ... .......... .......... ... .......... .... .... - ............. .... .. ... .... ...... .... ..... + ### ........ - ...... . ..... ... .... ..... *...* ........ .. ....... ... ........ - ..... ... .... ... ...... ... .. ..... ..... .... ... ..... .. ........ - ........ .. . ...... .. ... ... [... ........ .......... - .....](.......................................). ..... ........ + .... ... .... [........ .....](......) ... [...... .....]. - ---- - - Code - show_user(blks[19:48], force = TRUE) - Output - ... ...... .......... .. ... ...... . .... .. .... .... .... ... ... - .... ...... ...... - - ```r - # polygon for filtering - landkreis_konstanz <- osmdata::getbb("Landkreis Konstanz", - format_out = "sf_polygon") - crs <- sf::st_crs(landkreis_konstanz) + ### ...... - # get and filter data - f_out_ebd <- "ebird/ebd_lk_konstanz.txt" + ![....... ... ....](...............................)....... ...... ....... .. . ........ - library("magrittr") + ## ..... - ebd <- auk::read_ebd(f_out_ebd) %>% - sf::st_as_sf(coords = c("longitude", "latitude"), - crs = crs) + - ....... + - ... + - ..... + - .... + - .... + - .... + - ...... ....... + - ...... + - ... + - ....... - in_indices <- sf::st_within(ebd, landkreis_konstanz) + ## .... - ebd <- dplyr::filter(ebd, lengths(in_indices) > 0) + .... .. .. ....... .. ... `...................` ........ - ebd <- as.data.frame(ebd) - - ebd <- dplyr::filter(ebd, approved, lubridate::year(observation_date) > 2010) + ```r + sourcepos <- c("2:1-2:33", "4:1-7:7") + pattern <- "([[:digit:]]+):([[:digit:]]+)-([[:digit:]]+):([[:digit:]]+)" + proto <- data.frame( + linestart = integer(), colstart = integer(), + lineend = integer(), colend = integer() + ) + utils::strcapture(pattern, sourcepos, proto) ``` - ... ... .... .. ........... .. ..... .... ... ... .. ....... ........ - ... .... ...... - - -# show_block() will provide context for the elements - - Code - show_user(b_items, force = TRUE) - Output - - - - study the results of such queries (e.g. meta studies of number of, - say, versions by datasets) - - - or find data to integrate to a new study. If you want to *download* - data from DataONE, refer to the [download data - vignette](https://github.com/DataONEorg/rdataone/blob/master/vignettes/download-data.Rmd). + ## .... - - [How to identify spots for birding using open geographical - data](https://ropensci.org/blog/2018/08/14/where-to-bird/). - Featuring `opencage` for geocoding, `bbox` for bounding box - creation, `osmdata` for OpenStreetMap's Overpass API querying, - `osmplotr` for map drawing using OpenStreetMap's data. + ...... .... ... .. ....... .. .. . .. . .. ..... ..... .... ...... ... - - [How to obtain bird occurrence data in - R](https://ropensci.org/blog/2018/08/21/birds-radolfzell/). - Featuring `rebird` for interaction with the eBird's API, and `auk` - for munging of the whole eBird dataset. + .. + . . .. . . + .. - - [How to extract text from old natural history - drawings](https://ropensci.org/blog/2018/08/28/birds-ocr/). - Featuring `magick` for image manipulation, `tesseract` for Optical - Character Recognition, `cld2` and `cld3` for language detection, and - `taxize::gnr_resolve` for taxonomic name resolution. + [...... .....]: .......................... - - [How to complement an occurrence dataset with taxonomy and trait - information](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/). - Featuring `taxize`, taxonomic toolbelt for R, and `traits`, - providing access to species traits data. - - - How to query the scientific literature and scientific open data - repositories. This is the post you've just read! ---- +# tinkr.censor.regex can adjust for symbols Code - show_user(b_links, force = TRUE) + show_user(itms, force = TRUE) Output - [second post of the series where we obtained data from - eBird](https://ropensci.org/blog/2018/08/21/birds-radolfzell/)[the fourth post of the - series](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/) + ## Links - [previous post - of the series](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) + ### Relative - [(`glue::glue_collapse(species, sep = ", ", last = " and ")`)](https://twitter.com/LucyStats/status/1031938964796657665?s=19) + Here are some [relative links](#links) and [anchor links]. - [`taxize`](https://github.com/ropensci/taxize)[`spocc`](https://github.com/ropensci/spocc)[`fulltext`](https://github.com/ropensci/fulltext) + ### Images - ["Investigating the impact of media on demand for wildlife: A case - study of Harry Potter and the UK trade in - owls"](http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0182368)[`cites`](https://github.com/ecohealthalliance/cites/)[`rcites`](https://ibartomeus.github.io/rcites/) + ![kittens are cute](https://loremflickr.com/320/240){alt='a random picture of a kitten'} - [`wordcloud` - package](https://cran.r-project.org/web/packages/wordcloud/index.html) + ## Lists - [`wordcloud2` - package](https://github.com/Lchiffon/wordcloud2)[from - Phylopic](http://phylopic.org/image/6209c9be-060e-4d7f-bc74-a75f3ccf4629/) + - kittens + - are + - super + - cute + - have + - teef + - murder mittens + - brains + - are + - wrinkly - [DataONE](https://www.dataone.org/) + ## Code - [searching - DataONE - vignette](https://github.com/DataONEorg/rdataone/blob/master/vignettes/searching-dataone.Rmd) + Here is an example of the `utils::strcapture()` function - - [download data - vignette](https://github.com/DataONEorg/rdataone/blob/master/vignettes/download-data.Rmd) + ```r + AAAAAAAAA <- A("A:A-A:AA", "A:A-A:A") + AAAAAAA <- "([[:AAAAA:]]+):([[:AAAAA:]]+)-([[:AAAAA:]]+):([[:AAAAA:]]+)" + AAAAA <- AAAA.AAAAA( + AAAAAAAAA = AAAAAAA(), AAAAAAAA = AAAAAAA(), + AAAAAAA = AAAAAAA(), AAAAAA = AAAAAAA() + ) + AAAAA::AAAAAAAAAA(AAAAAAA, AAAAAAAAA, AAAAA) + ``` - [`europepmc`](https://github.com/ropensci/europepmc)[`jstor`](https://github.com/ropensci/jstor)[`suppdata`](https://github.com/ropensci/suppdata)[much - more](https://ropensci.org/packages/) + ## Math - [`dataone` - package](https://github.com/DataONEorg/rdataone)[`rfigshare`](https://github.com/ropensci/rfigshare)[Figshare](https://figshare.com/)[`EML` package](https://github.com/ropensci/EML)[unconf - `dataspice` project](https://github.com/ropenscilabs/dataspice) + Inline math can be written as $y = mx + b$ while block math should be: - [here](https://ropensci.org/packages/) + $$ + y = mx + b + $$ - - [How to identify spots for birding using open geographical - data](https://ropensci.org/blog/2018/08/14/where-to-bird/) + [anchor links]: https://example.com/anchor - - [How to obtain bird occurrence data in - R](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) - - [How to extract text from old natural history - drawings](https://ropensci.org/blog/2018/08/28/birds-ocr/) + +# show_block() will provide context for the elements + + Code + show_user(b_items, force = TRUE) + Output - - [How to complement an occurrence dataset with taxonomy and trait - information](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/) - [our friendly discussion - forum](https://discuss.ropensci.org/c/usecases) + - kittens + - are + - super + - cute + - have + - teef + - murder mittens + - brains + - are + - wrinkly --- @@ -364,46 +325,32 @@ Output - [...] [much - more](https://ropensci.org/packages/) [...] + [relative links](#links)[anchor links] - [...] [`dataone` - package](https://github.com/DataONEorg/rdataone) [...][...] [`rfigshare`](https://github.com/ropensci/rfigshare) [...][...] [Figshare](https://figshare.com/) [...][...] [`EML` package](https://github.com/ropensci/EML) [...][...] [unconf - `dataspice` project](https://github.com/ropenscilabs/dataspice) [...] + [anchor links]: https://example.com/anchor - [...] [here](https://ropensci.org/packages/) [...] - - [How to identify spots for birding using open geographical - data](https://ropensci.org/blog/2018/08/14/where-to-bird/) [...] + +--- + + Code + show_user(bmark_links, force = TRUE) + Output - - [How to obtain bird occurrence data in - R](https://ropensci.org/blog/2018/08/21/birds-radolfzell/) [...] - - [How to extract text from old natural history - drawings](https://ropensci.org/blog/2018/08/28/birds-ocr/) [...] + [...] [relative links](#links) [...][...] [anchor links] [...] - - [How to complement an occurrence dataset with taxonomy and trait - information](https://ropensci.org/blog/2018/09/04/birds-taxo-traits/) [...] + [anchor links]: https://example.com/anchor - [...] [our friendly discussion - forum](https://discuss.ropensci.org/c/usecases) [...] --- Code - show_user(b_code, force = TRUE) + show_user(bmark_code, force = TRUE) Output - [[...] `glue::glue_collapse(species, sep = ", ", last = " and ")` [...]](https://twitter.com/LucyStats/status/1031938964796657665?s=19) - - [`taxize`](https://github.com/ropensci/taxize)[`spocc`](https://github.com/ropensci/spocc)[`fulltext`](https://github.com/ropensci/fulltext) - - [...] `fulltext` [...][...] `tidytext` [...] - - [...] `dplyr::bind_rows` [...][...] `fulltext` [...] - - [`cites`](https://github.com/ecohealthalliance/cites/)[`rcites`](https://ibartomeus.github.io/rcites/) + [...] `utils::strcapture()` [...] diff --git a/tests/testthat/test-show.R b/tests/testthat/test-show.R index 59b4cb7..e0e922c 100644 --- a/tests/testthat/test-show.R +++ b/tests/testthat/test-show.R @@ -1,34 +1,32 @@ test_that("show_list() will isolate elements", { - path <- system.file("extdata", "example1.md", package = "tinkr") + path <- system.file("extdata", "show-example.md", package = "tinkr") y <- tinkr::yarn$new(path, sourcepos = TRUE) - items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) + headings <- xml2::xml_find_all(y$body, ".//md:heading", tinkr::md_ns()) code <- xml2::xml_find_all(y$body, ".//md:code", tinkr::md_ns()) blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns()) # show a list of items expect_snapshot(show_user(show_list(links), force = TRUE)) - expect_snapshot(show_user(show_list(code[1:10]), force = TRUE)) - expect_snapshot(show_user(show_list(blocks[1:2]), force = TRUE)) + expect_snapshot(show_user(show_list(code), force = TRUE)) + expect_snapshot(show_user(show_list(blocks), force = TRUE)) }) test_that("show_list() will isolate groups of elements", { - path <- system.file("extdata", "example1.md", package = "tinkr") + path <- system.file("extdata", "show-example.md", package = "tinkr") y <- tinkr::yarn$new(path, sourcepos = TRUE) - items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) - code <- xml2::xml_find_all(y$body, ".//md:code", tinkr::md_ns()) - blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns()) + headings <- xml2::xml_find_all(y$body, ".//md:heading", tinkr::md_ns()) # show a list of items - expect_snapshot(show_user(show_list(list(links[1:3], links[4:5])), force = TRUE)) + expect_snapshot(show_user(show_list(list(links, headings)), force = TRUE)) }) test_that("show_censor() will censor elements", { - path <- system.file("extdata", "example1.md", package = "tinkr") + path <- system.file("extdata", "show-example.md", package = "tinkr") y <- tinkr::yarn$new(path, sourcepos = TRUE) items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) @@ -52,15 +50,38 @@ test_that("show_censor() will censor elements", { expect_length(cd, n) expect_length(blks, n) - expect_snapshot(show_user(lnks[1:10], force = TRUE)) - expect_snapshot(show_user(tail(cd, 20), force = TRUE)) - expect_snapshot(show_user(blks[19:48], force = TRUE)) + expect_snapshot(show_user(lnks, force = TRUE)) + expect_snapshot(show_user(cd, force = TRUE)) + expect_snapshot(show_user(blks, force = TRUE)) +}) + +test_that("tinkr.censor.regex can adjust for symbols", { + path <- system.file("extdata", "show-example.md", package = "tinkr") + y <- tinkr::yarn$new(path, sourcepos = TRUE) + items <- xml2::xml_find_all(y$body, ".//node()[not(self::md:code_block)]", + tinkr::md_ns()) + + # the censor option can be adjusted + withr::local_options(list( + tinkr.censor.mark = "A", + tinkr.censor.regex = "[^[:space:][:punct:]]" + ) + ) + itms <- show_censor(items) + + # the length of the documents are identical + # give us the original for comparison + orig <- y$show() + n <- length(orig) - length(y$yaml) + 1 + expect_length(itms, n) + + expect_snapshot(show_user(itms, force = TRUE)) }) test_that("show_block() will provide context for the elements", { - path <- system.file("extdata", "example1.md", package = "tinkr") + path <- system.file("extdata", "show-example.md", package = "tinkr") y <- tinkr::yarn$new(path, sourcepos = TRUE) items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) @@ -73,10 +94,10 @@ test_that("show_block() will provide context for the elements", { expect_snapshot(show_user(b_items, force = TRUE)) expect_snapshot(show_user(b_links, force = TRUE)) # show the items with context markers ([...]) in the structure of the document - b_links <- show_block(links[20:31], mark = TRUE) - b_code <- show_block(code[1:10], mark = TRUE) - expect_snapshot(show_user(b_links, force = TRUE)) - expect_snapshot(show_user(b_code, force = TRUE)) + bmark_links <- show_block(links, mark = TRUE) + bmark_code <- show_block(code, mark = TRUE) + expect_snapshot(show_user(bmark_links, force = TRUE)) + expect_snapshot(show_user(bmark_code, force = TRUE)) }) From 9af96199f2ee35295c861e2e218fd2a6d556c405 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 23 May 2024 10:09:17 -0700 Subject: [PATCH 17/25] update show documentation --- R/show.R | 30 ++++++++++++++++++++++-------- man/show.Rd | 30 ++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/R/show.R b/R/show.R index 949a56a..e18a15a 100644 --- a/R/show.R +++ b/R/show.R @@ -6,27 +6,41 @@ #' @return a character vector, invisibly. The result of these functions are #' displayed to the screen #' @examples -#' path <- system.file("extdata", "example1.md", package = "tinkr") +#' path <- system.file("extdata", "show-example.md", package = "tinkr") #' y <- tinkr::yarn$new(path, sourcepos = TRUE) +#' y$protect_math()$protect_curly() #' items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) +#' imgs <- xml2::xml_find_all(y$body, ".//md:image | .//node()[@curly]", +#' tinkr::md_ns()) #' links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) #' code <- xml2::xml_find_all(y$body, ".//md:code", tinkr::md_ns()) #' blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns()) #' #' # show a list of items -#' show_list(links[1:10]) -#' show_list(code[1:10]) -#' show_list(blocks[1:2]) +#' show_list(links) +#' show_list(code) +#' show_list(blocks) #' #' # show the items in their local structure #' show_block(items) #' show_block(links, mark = TRUE) #' -#' # show the items in the full document censored: -#' show_censor(links) -#' # you can set the mark to censor by using the `tinkr.censor option` -#' options(tinkr.censor = ".") +#' # show the items in the full document censored (everything but whitespace): +#' show_censor(imgs) +#' +#' # You can also adjust the censorship parameters. There are two paramters +#' # available: the mark, which chooses what character you want to use to +#' # replace characters (default: `\u2587`); and the regex which specifies +#' # characters to replace (default: `[^[:space:]]`, which replaces all +#' # non-whitespace characters. +#' # +#' # The following will replace everything that is not a whitespace +#' # or punctuation character with "o" for a very ghostly document +#' op <- options() +#' options(tinkr.censor.regex = "[^[:space:][:punct:]]") +#' options(tinkr.censor.mark = "o") #' show_censor(links) +#' options(op) #' @seealso [to_md_vec()] to get a vector of these elements in isolation. #' @rdname show #' @export diff --git a/man/show.Rd b/man/show.Rd index 93af7f5..1fa185c 100644 --- a/man/show.Rd +++ b/man/show.Rd @@ -30,27 +30,41 @@ displayed to the screen When inspecting the results of an XPath query, displaying the text often } \examples{ -path <- system.file("extdata", "example1.md", package = "tinkr") +path <- system.file("extdata", "show-example.md", package = "tinkr") y <- tinkr::yarn$new(path, sourcepos = TRUE) +y$protect_math()$protect_curly() items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) +imgs <- xml2::xml_find_all(y$body, ".//md:image | .//node()[@curly]", + tinkr::md_ns()) links <- xml2::xml_find_all(y$body, ".//md:link", tinkr::md_ns()) code <- xml2::xml_find_all(y$body, ".//md:code", tinkr::md_ns()) blocks <- xml2::xml_find_all(y$body, ".//md:code_block", tinkr::md_ns()) # show a list of items -show_list(links[1:10]) -show_list(code[1:10]) -show_list(blocks[1:2]) +show_list(links) +show_list(code) +show_list(blocks) # show the items in their local structure show_block(items) show_block(links, mark = TRUE) -# show the items in the full document censored: -show_censor(links) -# you can set the mark to censor by using the `tinkr.censor option` -options(tinkr.censor = ".") +# show the items in the full document censored (everything but whitespace): +show_censor(imgs) + +# You can also adjust the censorship parameters. There are two paramters +# available: the mark, which chooses what character you want to use to +# replace characters (default: `\u2587`); and the regex which specifies +# characters to replace (default: `[^[:space:]]`, which replaces all +# non-whitespace characters. +# +# The following will replace everything that is not a whitespace +# or punctuation character with "o" for a very ghostly document +op <- options() +options(tinkr.censor.regex = "[^[:space:][:punct:]]") +options(tinkr.censor.mark = "o") show_censor(links) +options(op) } \seealso{ \code{\link[=to_md_vec]{to_md_vec()}} to get a vector of these elements in isolation. From 674430cf47be5193bc8e3f4245c29a3e53067bc7 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 23 May 2024 10:43:13 -0700 Subject: [PATCH 18/25] add stylesheet path to to_md_vec() --- R/to_md.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/to_md.R b/R/to_md.R index 9574751..1d89cac 100644 --- a/R/to_md.R +++ b/R/to_md.R @@ -69,12 +69,12 @@ to_md <- function(yaml_xml_list, path = NULL, stylesheet_path = stylesheet()){ #' @rdname to_md #' @export #' @param nodelist an object of `xml_nodelist` or `xml_node` -to_md_vec <- function(nodelist) { +to_md_vec <- function(nodelist, stylesheet_path = stylesheet()) { if (inherits(nodelist, "xml_node")) { nodelist <- list(nodelist) } nodes <- lapply(nodelist, function(i) { - print_lines(isolate_nodes(i, "list")$doc) + print_lines(isolate_nodes(i, "list")$doc, stylesheet = stylesheet_path) }) trimws(vapply(nodes, paste, character(1), collapse = "\n")) } From 89de85c150ca8a740106a0b60c9d1e6545d57784 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 23 May 2024 10:43:36 -0700 Subject: [PATCH 19/25] add md_vec method to yarn --- R/class-yarn.R | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/R/class-yarn.R b/R/class-yarn.R index 7239313..202f9e2 100644 --- a/R/class-yarn.R +++ b/R/class-yarn.R @@ -127,7 +127,6 @@ yarn <- R6::R6Class("yarn", #' #' @param n the number of elements to show from the top. Negative numbers #' @param stylesheet_path path to the xsl stylesheet to convert XML to markdown. - #' exclude lines from the bottom #' @return a character vector with `n` elements head = function(n = 6L, stylesheet_path = stylesheet()) { show_user(head(private$md_lines(stylesheet = stylesheet_path), n)) @@ -137,13 +136,41 @@ yarn <- R6::R6Class("yarn", #' #' @param n the number of elements to show from the bottom. Negative numbers #' @param stylesheet_path path to the xsl stylesheet to convert XML to markdown. - #' exclude lines from the top #' #' @return a character vector with `n` elements tail = function(n = 6L, stylesheet_path = stylesheet()) { show_user(tail(private$md_lines(stylesheet = stylesheet_path), n)) }, + #' @description query and extract markdown elements + #' + #' @param xpath a valid XPath expression + #' @param stylesheet_path path to the xsl stylesheet to convert XML to markdown. + #' + #' @return a vector of markdown elements generated from the query + #' @seealso [to_md_vec()] for a way to generate the same vector from a + #' nodelist without a yarn object + #' @examples + #' path <- system.file("extdata", "example1.md", package = "tinkr") + #' ex <- tinkr::yarn$new(path) + #' # all headings + #' ex$md_vec(".//md:heading") + #' # all headings greater than level 3 + #' ex$md_vec(".//md:heading[@level>3]") + #' # all links + #' ex$md_vec(".//md:link") + #' # all links that are part of lists + #' ex$md_vec(".//md:list//md:link") + #' # all code + #' ex$md_vec(".//md:code | .//md:code_block") + md_vec = function(xpath = NULL, stylesheet_path = stylesheet()) { + if (is.null(xpath)) { + return(NULL) + } + nodes <- xml2::xml_find_all(self$body, xpath, ns = self$ns) + return(to_md_vec(nodes, stylesheet_path)) + }, + #' @description add an arbitrary Markdown element to the document #' #' @param md a string of markdown formatted text. From 2061d00fbcc0c8bbd5cf6ccc278d9e04a959464d Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 23 May 2024 10:43:48 -0700 Subject: [PATCH 20/25] test md_vec method --- tests/testthat/test-class-yarn.R | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/testthat/test-class-yarn.R b/tests/testthat/test-class-yarn.R index e35a0be..36aa515 100644 --- a/tests/testthat/test-class-yarn.R +++ b/tests/testthat/test-class-yarn.R @@ -153,3 +153,25 @@ test_that("random markdown can be added", { expect_snapshot_file(scarf3) }) + + +test_that("md_vec() will convert a query to a markdown vector", { + + pathmd <- system.file("extdata", "example1.md", package = "tinkr") + y1 <- yarn$new(pathmd, sourcepos = TRUE, encoding = "utf-8") + + expect_null(y1$md_vec(NULL)) + + headings <- xml2::xml_find_all(y1$body, ".//md:heading", y1$ns) + + expected <- paste(strrep("#", xml2::xml_attr(headings, "level")), + xml2::xml_text(headings) + ) + expect_equal(y1$md_vec(".//md:heading"), expected) + + expect_equal(y1$md_vec(".//md:heading[@level=3]"), expected[1:4]) + expect_equal(y1$md_vec(".//md:heading[@level=4]"), expected[5:7]) + + expect_length(y1$md_vec(".//md:list//md:link"), 5) +}) + From ff81f40bcbf2f6e60379544cbc61818fa0dc74ac Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 23 May 2024 10:45:52 -0700 Subject: [PATCH 21/25] bump news; redocument --- NEWS.md | 6 ++++- man/to_md.Rd | 2 +- man/yarn.Rd | 69 +++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 71 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index ef7b1ca..cae651f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,7 +8,11 @@ content of a node, nodelist, or list of nodes without needing to print the entire document. * `yarn$show()` method now gains the `lines` parameter, which allows you to - subset the output by the lines of text. + subset the output by the lines of text. A warning is produced if a stylesheet + is supplied in place of `lines`. +* `yarn$md_vec()` is a new method that will generate a character vector of + markdown elements from a query. This is a convenience method that uses + `xml2::xml_find_all()` and `to_md_vec()` in the background. ## BUG FIX diff --git a/man/to_md.Rd b/man/to_md.Rd index 8223162..75c68e1 100644 --- a/man/to_md.Rd +++ b/man/to_md.Rd @@ -7,7 +7,7 @@ \usage{ to_md(yaml_xml_list, path = NULL, stylesheet_path = stylesheet()) -to_md_vec(nodelist) +to_md_vec(nodelist, stylesheet_path = stylesheet()) } \arguments{ \item{yaml_xml_list}{result from a call to \code{\link[=to_xml]{to_xml()}} and editing.} diff --git a/man/yarn.Rd b/man/yarn.Rd index e56eefb..b98660f 100644 --- a/man/yarn.Rd +++ b/man/yarn.Rd @@ -65,6 +65,23 @@ ex2$head(5) ex2$tail(5) ex2$show() +## ------------------------------------------------ +## Method `yarn$md_vec` +## ------------------------------------------------ + +path <- system.file("extdata", "example1.md", package = "tinkr") +ex <- tinkr::yarn$new(path) +# all headings +ex$md_vec(".//md:heading") +# all headings greater than level 3 +ex$md_vec(".//md:heading[@level>3]") +# all links +ex$md_vec(".//md:link") +# all links that are part of lists +ex$md_vec(".//md:list//md:link") +# all code +ex$md_vec(".//md:code | .//md:code_block") + ## ------------------------------------------------ ## Method `yarn$add_md` ## ------------------------------------------------ @@ -114,6 +131,10 @@ ex <- tinkr::yarn$new(path, sourcepos = TRUE, unescaped = FALSE) ex$tail() ex$protect_unescaped()$tail() } +\seealso{ +\code{\link[=to_md_vec]{to_md_vec()}} for a way to generate the same vector from a +nodelist without a yarn object +} \section{Public fields}{ \if{html}{\out{
}} \describe{ @@ -137,6 +158,7 @@ commonmark.} \item \href{#method-yarn-show}{\code{yarn$show()}} \item \href{#method-yarn-head}{\code{yarn$head()}} \item \href{#method-yarn-tail}{\code{yarn$tail()}} +\item \href{#method-yarn-md_vec}{\code{yarn$md_vec()}} \item \href{#method-yarn-add_md}{\code{yarn$add_md()}} \item \href{#method-yarn-protect_math}{\code{yarn$protect_math()}} \item \href{#method-yarn-protect_curly}{\code{yarn$protect_curly()}} @@ -294,8 +316,7 @@ show the head of the markdown contents on the screen \describe{ \item{\code{n}}{the number of elements to show from the top. Negative numbers} -\item{\code{stylesheet_path}}{path to the xsl stylesheet to convert XML to markdown. -exclude lines from the bottom} +\item{\code{stylesheet_path}}{path to the xsl stylesheet to convert XML to markdown.} } \if{html}{\out{
}} } @@ -317,14 +338,54 @@ show the tail of the markdown contents on the screen \describe{ \item{\code{n}}{the number of elements to show from the bottom. Negative numbers} -\item{\code{stylesheet_path}}{path to the xsl stylesheet to convert XML to markdown. -exclude lines from the top} +\item{\code{stylesheet_path}}{path to the xsl stylesheet to convert XML to markdown.} } \if{html}{\out{}} } \subsection{Returns}{ a character vector with \code{n} elements } +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-yarn-md_vec}{}}} +\subsection{Method \code{md_vec()}}{ +query and extract markdown elements +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{yarn$md_vec(xpath = NULL, stylesheet_path = stylesheet())}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{xpath}}{a valid XPath expression} + +\item{\code{stylesheet_path}}{path to the xsl stylesheet to convert XML to markdown.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +a vector of markdown elements generated from the query +} +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{path <- system.file("extdata", "example1.md", package = "tinkr") +ex <- tinkr::yarn$new(path) +# all headings +ex$md_vec(".//md:heading") +# all headings greater than level 3 +ex$md_vec(".//md:heading[@level>3]") +# all links +ex$md_vec(".//md:link") +# all links that are part of lists +ex$md_vec(".//md:list//md:link") +# all code +ex$md_vec(".//md:code | .//md:code_block") +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} From 571d2f052a543b8f8b64ca9413219e3b6c851938 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Thu, 23 May 2024 10:57:14 -0700 Subject: [PATCH 22/25] avoid utf-8 things --- tests/testthat/test-class-yarn.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-class-yarn.R b/tests/testthat/test-class-yarn.R index 36aa515..24eb010 100644 --- a/tests/testthat/test-class-yarn.R +++ b/tests/testthat/test-class-yarn.R @@ -167,11 +167,12 @@ test_that("md_vec() will convert a query to a markdown vector", { expected <- paste(strrep("#", xml2::xml_attr(headings, "level")), xml2::xml_text(headings) ) - expect_equal(y1$md_vec(".//md:heading"), expected) - expect_equal(y1$md_vec(".//md:heading[@level=3]"), expected[1:4]) + expect_length(y1$md_vec(".//md:list//md:link"), 5) + + skip_on_os("windows") expect_equal(y1$md_vec(".//md:heading[@level=4]"), expected[5:7]) + expect_equal(y1$md_vec(".//md:heading"), expected) - expect_length(y1$md_vec(".//md:list//md:link"), 5) }) From 66d2aa1293c3f894c9c1156c91c7f7a0f101749a Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Fri, 14 Jun 2024 11:30:30 -0700 Subject: [PATCH 23/25] update and further document show funs --- R/show.R | 119 +++++++++++++++++++++++----------- man/isolate_nodes.Rd | 53 +++++++++++++++ man/show.Rd | 11 ++-- tests/testthat/_snaps/show.md | 35 ++++++++++ tests/testthat/test-show.R | 19 ++++++ 5 files changed, 196 insertions(+), 41 deletions(-) create mode 100644 man/isolate_nodes.Rd diff --git a/R/show.R b/R/show.R index e18a15a..c895f3a 100644 --- a/R/show.R +++ b/R/show.R @@ -3,6 +3,7 @@ #' When inspecting the results of an XPath query, displaying the text often #' @param nodelist an object of class `xml_nodelist` OR `xml_node` OR a list of #' either. +#' @inheritParams to_md #' @return a character vector, invisibly. The result of these functions are #' displayed to the screen #' @examples @@ -40,11 +41,12 @@ #' options(tinkr.censor.regex = "[^[:space:][:punct:]]") #' options(tinkr.censor.mark = "o") #' show_censor(links) -#' options(op) +#' options(tinkr.censor.regex = NULL) +#' options(tinkr.censor.mark = NULL) #' @seealso [to_md_vec()] to get a vector of these elements in isolation. #' @rdname show #' @export -show_list <- function(nodelist) { +show_list <- function(nodelist, stylesheet_path = stylesheet()) { res <- isolate_nodes(nodelist, type = "list") return(show_user(print_lines(res$doc))) } @@ -56,7 +58,7 @@ show_list <- function(nodelist) { #' the default state may cause nodes within the same block to appear adjacent #' to each other. #' @export -show_block <- function(nodelist, mark = FALSE) { +show_block <- function(nodelist, mark = FALSE, stylesheet_path = stylesheet()) { res <- isolate_nodes(nodelist, type = "context") if (mark) { res <- add_isolation_context(nodelist, res) @@ -66,20 +68,84 @@ show_block <- function(nodelist, mark = FALSE) { #' @rdname show #' @export -show_censor <- function(nodelist) { +show_censor <- function(nodelist, stylesheet_path = stylesheet()) { res <- isolate_nodes(nodelist, type = "censor") return(show_user(print_lines(res$doc))) } +#' Isolate nodes in a document +#' +#' @inheritParams show_list +#' @param type a string of either "context" (default), "censor", or "list" +#' +#' @return a list of two elements: +#' - doc: a copy of the document with the nodes isolated depending on the +#' context +#' - key: a string used to tag nodes that are isolated via the `tnk-key` +#' attribute +#' +#' @details +#' `isolate_nodes()` and `provision_isolation()` are the workhorses for the +#' `show` family of functions. These functions will create a copy of the +#' document with the nodes present in `nodelist` isolated. +#' +#' - `isolate_nodes()` provides a switch between specific modes: +#' - "context" include the nodes within the block context of the document. +#' For example, if the nodelist contains links in headings, paragraphs, and +#' lists, those links will appear within these blocks. When `mark = TRUE`, +#' ellipses `[...]` will be added to indicate hidden content. +#' - "censor" by default will replace all non-whitespace characters with a +#' censor character. This is controlled by `tinkr.censor.regex` and +#' `tinkr.censor.mark` +#' - "list" creates a new document and copies over the nodes so they appear +#' as a list of paragraphs. +#' - `provision_isolation()` uses [xml2::xml_root()] and [xml2::xml_path()] to +#' make a copy of the root document and then tag the corresponding nodes in +#' the nodelist so that we can filter on nodes that are not connected to +#' those present in the nodelist. +#' +#' @keywords internal isolate_nodes <- function(nodelist, type = "context") { switch(type, + "context" = isolate_nodes_block(nodelist), "censor" = isolate_nodes_censor(nodelist), - "context" = isolate_nodes_in_context(nodelist), - "list" = isolate_nodes_a_la_carte(nodelist), + "list" = isolate_nodes_list(nodelist), + ) +} + +#' @rdname isolate_nodes +provision_isolation <- function(nodelist) { + # create a copy of our document + doc <- if (inherits(nodelist, "xml_node")) nodelist else nodelist[[1]] + doc <- copy_xml(xml2::xml_root(doc)) + # get the path to the currently labelled nodes so we can + # isolate them in the copy. + # This will return one path statement per node + if (inherits(nodelist, c("xml_nodelist", "xml_node"))) { + path <- xml2::xml_path(nodelist) + } else { + path <- purrr::flatten_chr(purrr::map(nodelist, xml2::xml_path)) + } + # label new nodes with unique timestamp + key <- as.character(as.integer(Sys.time())) + purrr::walk(path, label_nodes, doc = doc, label = key) + + # find the unrelated nodes for deletion/censoring + # - not labelled + # - not a descendant of a labelled node + # - not an _ancestor_ of a labelled node + predicate <- sprintf("@tnk-key=%s", key) + ancestor <- sprintf("ancestor::*[%s]", predicate) + descendant <- sprintf("descendant::*[%s]", predicate) + xpth <- sprintf("not(%s) and not(%s) and not(%s)", + ancestor, descendant, predicate ) + unrelated <- xml2::xml_find_all(doc, sprintf("//node()[%s]", xpth)) + return(list(doc = doc, key = key, unrelated = unrelated)) } -isolate_nodes_a_la_carte <- function(nodelist) { + +isolate_nodes_list <- function(nodelist) { doc <- xml2::read_xml(commonmark::markdown_xml("")) # if we get a single node, make sure it's in a list if (inherits(nodelist, "xml_node")) { @@ -100,52 +166,31 @@ isolate_nodes_a_la_carte <- function(nodelist) { } -isolate_nodes_in_context <- function(nodelist) { +isolate_nodes_block <- function(nodelist) { res <- provision_isolation(nodelist) - xml2::xml_remove(res$parents) + xml2::xml_remove(res$unrelated) return(list(doc = res$doc, key = res$key)) } isolate_nodes_censor <- function(nodelist) { res <- provision_isolation(nodelist) - censor_attr(res$parents, "destination") - censor_attr(res$parents, "title") - censor_attr(res$parents, "rel") - txt <- xml2::xml_find_all(res$parents, ".//text()") + censor_attr(res$unrelated, "destination") + censor_attr(res$unrelated, "title") + censor_attr(res$unrelated, "rel") + txt <- xml2::xml_find_all(res$unrelated, ".//text()") xml2::xml_set_text(txt, censor(xml2::xml_text(txt))) return(list(doc = res$doc, key = res$key)) } -provision_isolation <- function(nodelist) { - # create a copy of our document - doc <- copy_xml(xml2::xml_root(nodelist)) - # get the path to the currently labelled nodes so we can isolate them - # in the copy - # This will return one path statement per node - path <- xml2::xml_path(nodelist) - # label new nodes with unique timestamp - tim <- as.character(as.integer(Sys.time())) - purrr::walk(path, label_nodes, doc = doc, label = tim) - - # find the unlabelled nodes - predicate <- sprintf("@label=%s", tim) - xpth <- sprintf("not(descendant::*[%s]) and not(ancestor::*[%s]) and not(%s)", - predicate, predicate, predicate - ) - rents <- xml2::xml_find_all(doc, sprintf("//node()[%s]", xpth)) - return(list(doc = doc, key = tim, parents = rents)) - -} - add_isolation_context <- function(nodelist, isolated) { - sib <- sprintf("sibling::*[1][not(@label=%s)]", isolated$key) + sib <- sprintf("sibling::*[1][not(@tnk-key=%s)]", isolated$key) pretext <- xml2::xml_find_lgl(nodelist, sprintf("boolean(count(preceding-%s)!=0)", sib) ) postext <- xml2::xml_find_lgl(nodelist, sprintf("boolean(count(following-%s)!=0)", sib) ) - xpath <- sprintf(".//node()[@label=%s]", isolated$key) + xpath <- sprintf(".//node()[@tnk-key=%s]", isolated$key) labelled <- xml2::xml_find_all(isolated$doc, xpath) purrr::walk(labelled[pretext], function(node) { xml2::xml_add_sibling(node, .where = "before", @@ -199,6 +244,6 @@ print_lines <- function(xml, path = NULL, stylesheet = NULL) { label_nodes <- function(xpath, doc, label = "save") { xml2::xml_set_attr( xml2::xml_find_all(doc, xpath, ns = md_ns()), - "label", label) + "tnk-key", label) } diff --git a/man/isolate_nodes.Rd b/man/isolate_nodes.Rd new file mode 100644 index 0000000..8e92b4c --- /dev/null +++ b/man/isolate_nodes.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/show.R +\name{isolate_nodes} +\alias{isolate_nodes} +\alias{provision_isolation} +\title{Isolate nodes in a document} +\usage{ +isolate_nodes(nodelist, type = "context") + +provision_isolation(nodelist) +} +\arguments{ +\item{nodelist}{an object of class \code{xml_nodelist} OR \code{xml_node} OR a list of +either.} + +\item{type}{a string of either "context" (default), "censor", or "list"} +} +\value{ +a list of two elements: +\itemize{ +\item doc: a copy of the document with the nodes isolated depending on the +context +\item key: a string used to tag nodes that are isolated via the \code{tnk-key} +attribute +} +} +\description{ +Isolate nodes in a document +} +\details{ +\code{isolate_nodes()} and \code{provision_isolation()} are the workhorses for the +\code{show} family of functions. These functions will create a copy of the +document with the nodes present in \code{nodelist} isolated. +\itemize{ +\item \code{isolate_nodes()} provides a switch between specific modes: +\itemize{ +\item "context" include the nodes within the block context of the document. +For example, if the nodelist contains links in headings, paragraphs, and +lists, those links will appear within these blocks. When \code{mark = TRUE}, +ellipses \verb{[...]} will be added to indicate hidden content. +\item "censor" by default will replace all non-whitespace characters with a +censor character. This is controlled by \code{tinkr.censor.regex} and +\code{tinkr.censor.mark} +\item "list" creates a new document and copies over the nodes so they appear +as a list of paragraphs. +} +\item \code{provision_isolation()} uses \code{\link[xml2:xml_children]{xml2::xml_root()}} and \code{\link[xml2:xml_path]{xml2::xml_path()}} to +make a copy of the root document and then tag the corresponding nodes in +the nodelist so that we can filter on nodes that are not connected to +those present in the nodelist. +} +} +\keyword{internal} diff --git a/man/show.Rd b/man/show.Rd index 1fa185c..27ed745 100644 --- a/man/show.Rd +++ b/man/show.Rd @@ -6,16 +6,18 @@ \alias{show_censor} \title{Display a node or nodelist as markdown} \usage{ -show_list(nodelist) +show_list(nodelist, stylesheet_path = stylesheet()) -show_block(nodelist, mark = FALSE) +show_block(nodelist, mark = FALSE, stylesheet_path = stylesheet()) -show_censor(nodelist) +show_censor(nodelist, stylesheet_path = stylesheet()) } \arguments{ \item{nodelist}{an object of class \code{xml_nodelist} OR \code{xml_node} OR a list of either.} +\item{stylesheet_path}{path to the XSL stylesheet} + \item{mark}{[bool] When \code{TRUE} markers (\verb{[...]}) are added to replace nodes that come before or after the islated nodes. Defaults to \code{FALSE}, which only shows the isolated nodes in their respective blocks. Note that @@ -64,7 +66,8 @@ op <- options() options(tinkr.censor.regex = "[^[:space:][:punct:]]") options(tinkr.censor.mark = "o") show_censor(links) -options(op) +options(tinkr.censor.regex = NULL) +options(tinkr.censor.mark = NULL) } \seealso{ \code{\link[=to_md_vec]{to_md_vec()}} to get a vector of these elements in isolation. diff --git a/tests/testthat/_snaps/show.md b/tests/testthat/_snaps/show.md index b74081c..cdfb045 100644 --- a/tests/testthat/_snaps/show.md +++ b/tests/testthat/_snaps/show.md @@ -75,6 +75,41 @@ +# show_censor() will show a censored list of disparate elements + + Code + show_user(disp, force = TRUE) + Output + + + ## ..... + + ### ........ + + .... ... .... [........ .....](......) ... [...... .....]. + + ### ...... + + ![....... ... ....](...............................)....... ...... ....... .. . ........ + + ## Lists + + - ....... + - ... + - ..... + - .... + - .... + - .... + - ...... ....... + - ...... + - ... + - ....... + + ## .... + + .... .. .. ....... .. ... `utils::strcapture()` function + + # show_censor() will censor elements Code diff --git a/tests/testthat/test-show.R b/tests/testthat/test-show.R index e0e922c..d198d07 100644 --- a/tests/testthat/test-show.R +++ b/tests/testthat/test-show.R @@ -25,6 +25,25 @@ test_that("show_list() will isolate groups of elements", { }) +test_that("show_censor() will show a censored list of disparate elements", { + path <- system.file("extdata", "show-example.md", package = "tinkr") + y <- tinkr::yarn$new(path, sourcepos = TRUE) + pth <- c("*[6]", "*[9]/*[2]", "*[9]/*[3]") + nodes <- purrr::map(pth, function(p) { + xml2::xml_find_all(y$body, p) + }) + expect_length(nodes, 3) + # the censor option can be adjusted + withr::local_options(list( + tinkr.censor.mark = ".", + tinkr.censor.regex = "[^[:space:]]" + ) + ) + disp <- show_censor(nodes)[1:29] + expect_snapshot(show_user(disp, force = TRUE)) +}) + + test_that("show_censor() will censor elements", { path <- system.file("extdata", "show-example.md", package = "tinkr") y <- tinkr::yarn$new(path, sourcepos = TRUE) From 60e79eefaf87a3857270f0cb864467ee0744c2a4 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Fri, 14 Jun 2024 14:22:58 -0700 Subject: [PATCH 24/25] fix class check xml_nodelist -> xml_nodeset --- R/show.R | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/R/show.R b/R/show.R index c895f3a..417a75a 100644 --- a/R/show.R +++ b/R/show.R @@ -1,7 +1,7 @@ #' Display a node or nodelist as markdown #' #' When inspecting the results of an XPath query, displaying the text often -#' @param nodelist an object of class `xml_nodelist` OR `xml_node` OR a list of +#' @param nodelist an object of class `xml_nodeset` OR `xml_node` OR a list of #' either. #' @inheritParams to_md #' @return a character vector, invisibly. The result of these functions are @@ -85,25 +85,18 @@ show_censor <- function(nodelist, stylesheet_path = stylesheet()) { #' attribute #' #' @details -#' `isolate_nodes()` and `provision_isolation()` are the workhorses for the -#' `show` family of functions. These functions will create a copy of the -#' document with the nodes present in `nodelist` isolated. -#' -#' - `isolate_nodes()` provides a switch between specific modes: -#' - "context" include the nodes within the block context of the document. -#' For example, if the nodelist contains links in headings, paragraphs, and -#' lists, those links will appear within these blocks. When `mark = TRUE`, -#' ellipses `[...]` will be added to indicate hidden content. -#' - "censor" by default will replace all non-whitespace characters with a -#' censor character. This is controlled by `tinkr.censor.regex` and -#' `tinkr.censor.mark` -#' - "list" creates a new document and copies over the nodes so they appear -#' as a list of paragraphs. -#' - `provision_isolation()` uses [xml2::xml_root()] and [xml2::xml_path()] to -#' make a copy of the root document and then tag the corresponding nodes in -#' the nodelist so that we can filter on nodes that are not connected to -#' those present in the nodelist. -#' +#' `isolate_nodes()`is the workhorse for the `show` family of functions. These +#' functions will create a copy of the document with the nodes present in +#' `nodelist` isolated. It has the following switches for "type": +#' - "context" include the nodes within the block context of the document. +#' For example, if the nodelist contains links in headings, paragraphs, and +#' lists, those links will appear within these blocks. When `mark = TRUE`, +#' ellipses `[...]` will be added to indicate hidden content. +#' - "censor" by default will replace all non-whitespace characters with a +#' censor character. This is controlled by `tinkr.censor.regex` and +#' `tinkr.censor.mark` +#' - "list" creates a new document and copies over the nodes so they appear +#' as a list of paragraphs. #' @keywords internal isolate_nodes <- function(nodelist, type = "context") { switch(type, @@ -113,7 +106,13 @@ isolate_nodes <- function(nodelist, type = "context") { ) } -#' @rdname isolate_nodes +#' Create a document and list of nodes to isolate +#' +#' This uses [xml2::xml_root()] and [xml2::xml_path()] to make a copy of the +#' root document and then tag the corresponding nodes in the nodelist so that +#' we can filter on nodes that are not connected to those present in the +#' nodelist. +#' provision_isolation <- function(nodelist) { # create a copy of our document doc <- if (inherits(nodelist, "xml_node")) nodelist else nodelist[[1]] @@ -121,7 +120,7 @@ provision_isolation <- function(nodelist) { # get the path to the currently labelled nodes so we can # isolate them in the copy. # This will return one path statement per node - if (inherits(nodelist, c("xml_nodelist", "xml_node"))) { + if (inherits(nodelist, c("xml_nodeset", "xml_node"))) { path <- xml2::xml_path(nodelist) } else { path <- purrr::flatten_chr(purrr::map(nodelist, xml2::xml_path)) From cda6afcb094ce54368234b64a193d5390678a2d1 Mon Sep 17 00:00:00 2001 From: "Zhian N. Kamvar" Date: Fri, 21 Jun 2024 15:01:06 -0700 Subject: [PATCH 25/25] update documentation --- R/class-yarn.R | 9 +++++--- R/show.R | 31 +++++++++++++++++++++++++- man/isolate_nodes.Rd | 33 +++++++++++++++++----------- man/provision_isolation.Rd | 45 ++++++++++++++++++++++++++++++++++++++ man/show.Rd | 2 +- 5 files changed, 102 insertions(+), 18 deletions(-) create mode 100644 man/provision_isolation.Rd diff --git a/R/class-yarn.R b/R/class-yarn.R index d59a473..a352c3a 100644 --- a/R/class-yarn.R +++ b/R/class-yarn.R @@ -115,9 +115,12 @@ yarn <- R6::R6Class("yarn", the_call$stylesheet_path <- the_call$lines the_call$lines <- NULL new_call <- capture.output(print(the_call)) - warning( - "In {tinkr} 0.3.0, the $show() method gained the `lines` argument as the first argument.\n", - sprintf("To remove this warning, use: `%s`", new_call), + rlang::warn( + c( + "!" = "In {tinkr} 0.3.0, the $show() method gains the `lines` argument as the first argument.", + "i" = "To remove this warning, use the following code:", + " " = new_call + ), call. = FALSE) } show_user(private$md_lines(stylesheet = stylesheet_path)[lines]) diff --git a/R/show.R b/R/show.R index 417a75a..ad218ff 100644 --- a/R/show.R +++ b/R/show.R @@ -98,6 +98,16 @@ show_censor <- function(nodelist, stylesheet_path = stylesheet()) { #' - "list" creates a new document and copies over the nodes so they appear #' as a list of paragraphs. #' @keywords internal +#' @family nodeset isolation functions +#' @examplesIf isTRUE(as.logical(Sys.getenv("NOT_CRAN", "false"))) +#' path <- system.file("extdata", "show-example.md", package = "tinkr") +#' y <- tinkr::yarn$new(path, sourcepos = TRUE) +#' y$protect_math()$protect_curly() +#' items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) +#' tnk <- asNamespace("tinkr") +#' tnk$isolate_nodes(items, type = "context") +#' tnk$isolate_nodes(items, type = "censor") +#' tnk$isolate_nodes(items, type = "list") isolate_nodes <- function(nodelist, type = "context") { switch(type, "context" = isolate_nodes_block(nodelist), @@ -111,8 +121,27 @@ isolate_nodes <- function(nodelist, type = "context") { #' This uses [xml2::xml_root()] and [xml2::xml_path()] to make a copy of the #' root document and then tag the corresponding nodes in the nodelist so that #' we can filter on nodes that are not connected to those present in the -#' nodelist. +#' nodelist. This function is required for [isolate_nodes()] to work. #' +#' +#' @inheritParams isolate_nodes +#' +#' @return a list of three elements: +#' - doc: a copy of the document with the nodes isolated depending on the +#' context +#' - key: a string used to tag nodes that are isolated via the `tnk-key` +#' attribute. +#' - unrelated: an `xml_nodeset` containing nodes that have no ancestor, +#' descendant, or self relationship to the nodes in `nodelist`. +#' @keywords internal +#' @family nodeset isolation functions +#' @examplesIf isTRUE(as.logical(Sys.getenv("NOT_CRAN", "false"))) +#' path <- system.file("extdata", "show-example.md", package = "tinkr") +#' y <- tinkr::yarn$new(path, sourcepos = TRUE) +#' y$protect_math()$protect_curly() +#' items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) +#' tnk <- asNamespace("tinkr") +#' tnk$provision_isolation(items) provision_isolation <- function(nodelist) { # create a copy of our document doc <- if (inherits(nodelist, "xml_node")) nodelist else nodelist[[1]] diff --git a/man/isolate_nodes.Rd b/man/isolate_nodes.Rd index 8e92b4c..380e07c 100644 --- a/man/isolate_nodes.Rd +++ b/man/isolate_nodes.Rd @@ -2,15 +2,12 @@ % Please edit documentation in R/show.R \name{isolate_nodes} \alias{isolate_nodes} -\alias{provision_isolation} \title{Isolate nodes in a document} \usage{ isolate_nodes(nodelist, type = "context") - -provision_isolation(nodelist) } \arguments{ -\item{nodelist}{an object of class \code{xml_nodelist} OR \code{xml_node} OR a list of +\item{nodelist}{an object of class \code{xml_nodeset} OR \code{xml_node} OR a list of either.} \item{type}{a string of either "context" (default), "censor", or "list"} @@ -28,11 +25,9 @@ attribute Isolate nodes in a document } \details{ -\code{isolate_nodes()} and \code{provision_isolation()} are the workhorses for the -\code{show} family of functions. These functions will create a copy of the -document with the nodes present in \code{nodelist} isolated. -\itemize{ -\item \code{isolate_nodes()} provides a switch between specific modes: +\code{isolate_nodes()}is the workhorse for the \code{show} family of functions. These +functions will create a copy of the document with the nodes present in +\code{nodelist} isolated. It has the following switches for "type": \itemize{ \item "context" include the nodes within the block context of the document. For example, if the nodelist contains links in headings, paragraphs, and @@ -44,10 +39,22 @@ censor character. This is controlled by \code{tinkr.censor.regex} and \item "list" creates a new document and copies over the nodes so they appear as a list of paragraphs. } -\item \code{provision_isolation()} uses \code{\link[xml2:xml_children]{xml2::xml_root()}} and \code{\link[xml2:xml_path]{xml2::xml_path()}} to -make a copy of the root document and then tag the corresponding nodes in -the nodelist so that we can filter on nodes that are not connected to -those present in the nodelist. } +\examples{ +\dontshow{if (isTRUE(as.logical(Sys.getenv("NOT_CRAN", "false")))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +path <- system.file("extdata", "show-example.md", package = "tinkr") +y <- tinkr::yarn$new(path, sourcepos = TRUE) +y$protect_math()$protect_curly() +items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) +tnk <- asNamespace("tinkr") +tnk$isolate_nodes(items, type = "context") +tnk$isolate_nodes(items, type = "censor") +tnk$isolate_nodes(items, type = "list") +\dontshow{\}) # examplesIf} +} +\seealso{ +Other nodeset isolation functions: +\code{\link{provision_isolation}()} } +\concept{nodeset isolation functions} \keyword{internal} diff --git a/man/provision_isolation.Rd b/man/provision_isolation.Rd new file mode 100644 index 0000000..c1c22b9 --- /dev/null +++ b/man/provision_isolation.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/show.R +\name{provision_isolation} +\alias{provision_isolation} +\title{Create a document and list of nodes to isolate} +\usage{ +provision_isolation(nodelist) +} +\arguments{ +\item{nodelist}{an object of class \code{xml_nodeset} OR \code{xml_node} OR a list of +either.} +} +\value{ +a list of three elements: +\itemize{ +\item doc: a copy of the document with the nodes isolated depending on the +context +\item key: a string used to tag nodes that are isolated via the \code{tnk-key} +attribute. +\item unrelated: an \code{xml_nodeset} containing nodes that have no ancestor, +descendant, or self relationship to the nodes in \code{nodelist}. +} +} +\description{ +This uses \code{\link[xml2:xml_children]{xml2::xml_root()}} and \code{\link[xml2:xml_path]{xml2::xml_path()}} to make a copy of the +root document and then tag the corresponding nodes in the nodelist so that +we can filter on nodes that are not connected to those present in the +nodelist. This function is required for \code{\link[=isolate_nodes]{isolate_nodes()}} to work. +} +\examples{ +\dontshow{if (isTRUE(as.logical(Sys.getenv("NOT_CRAN", "false")))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +path <- system.file("extdata", "show-example.md", package = "tinkr") +y <- tinkr::yarn$new(path, sourcepos = TRUE) +y$protect_math()$protect_curly() +items <- xml2::xml_find_all(y$body, ".//md:item", tinkr::md_ns()) +tnk <- asNamespace("tinkr") +tnk$provision_isolation(items) +\dontshow{\}) # examplesIf} +} +\seealso{ +Other nodeset isolation functions: +\code{\link{isolate_nodes}()} +} +\concept{nodeset isolation functions} +\keyword{internal} diff --git a/man/show.Rd b/man/show.Rd index 27ed745..5934195 100644 --- a/man/show.Rd +++ b/man/show.Rd @@ -13,7 +13,7 @@ show_block(nodelist, mark = FALSE, stylesheet_path = stylesheet()) show_censor(nodelist, stylesheet_path = stylesheet()) } \arguments{ -\item{nodelist}{an object of class \code{xml_nodelist} OR \code{xml_node} OR a list of +\item{nodelist}{an object of class \code{xml_nodeset} OR \code{xml_node} OR a list of either.} \item{stylesheet_path}{path to the XSL stylesheet}