Skip to content

Commit

Permalink
implement formatting function for adorn_ns (#515)
Browse files Browse the repository at this point in the history
* implement format_func from #444

* specify namespace for dplyr::desc
  • Loading branch information
sfirke committed Jan 24, 2023
1 parent d528ec9 commit a52a120
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 13 deletions.
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@

* `get_dupes()` results are now sorted first by descending order of `dupe_count`, then alphabetically by sorting variables. (#493)

* There are several minor breaking changes resulting from enhancements to `adorn_ns()`:
* The addition of the new argument `format_func` means that previous calls relying on `,,,` as shorthand to get to the `...` column selection argument may now require an extra comma
* `adorn_ns()` now defaults to displaying numbers of >3 digits with `big.mark = ","`, as part of the default value of the new `format_func` argument. E.g., `1234` is now `1,234`.
* `adorn_ns()` no longer prints leading whitespace when `position = "front"` - this is not a visible change in the printed result and it would be rare that this affects any code.

## New features

* `row_to_names()` now has a new helper function, `find_header()` to help find the row that contains the names. It can be used by passing `row_number="find_header"`, and see the documentation of `row_to_names()` and `find_header()` for more examples. (fix #429)
Expand All @@ -24,6 +29,8 @@

* A new function `get_one_to_one()` has been added to find columns that map 1:1 to each other, even if the values within the columns differ (fix #291, **@billdenney**)

* `adorn_Ns()` contains a new `format_func` argument so that the user can format the Ns to their liking, e.g., changing the `big.mark` character. (#444)

* `clean_names()` can now be called on database connection in a dbplyr code pipeline (#467)

## Minor features
Expand Down
29 changes: 23 additions & 6 deletions R/adorn_ns.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
#'
#' @param dat a data.frame of class \code{tabyl} that has had \code{adorn_percentages} and/or \code{adorn_pct_formatting} called on it. If given a list of data.frames, this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).
#' @param position should the N go in the front, or in the rear, of the percentage?
#' @param ns the Ns to append. The default is the "core" attribute of the input tabyl \code{dat}, where the original Ns of a two-way \code{tabyl} are stored. However, if you need to modify the numbers, e.g., to format \code{4000} as \code{4,000} or \code{4k}, you can do that separately and supply the formatted result here.
#' @param ns the Ns to append. The default is the "core" attribute of the input tabyl \code{dat}, where the original Ns of a two-way \code{tabyl} are stored. However, if your Ns are stored somewhere else, or you need to customize them beyond what can be done with `format_func`, you can supply them here.
#' @param format_func a formatting function to run on the Ns. Consider defining with \code{base::format()}.
#' @param ... columns to adorn. This takes a tidyselect specification. By default, all columns are adorned except for the first column and columns not of class \code{numeric}, but this allows you to manually specify which columns should be adorned, for use on a data.frame that does not result from a call to \code{tabyl}.
#'
#' @return a data.frame with Ns appended
Expand All @@ -18,6 +19,19 @@
#' adorn_pct_formatting() %>%
#' adorn_ns(position = "front")
#'
#' # Format the Ns with a custom format_func:
#' set.seed(1)
#' bigger_dat <- data.frame(sex = rep(c("m", "f"), 3000),
#' age = round(runif(3000, 1, 102), 0))
#' bigger_dat$age_group = cut(bigger_dat$age, quantile(bigger_dat$age, c(0, 1/3, 2/3, 1)))
#'
#' bigger_dat %>%
#' tabyl(age_group, sex, show_missing_levels = FALSE) %>%
#' adorn_totals(c("row", "col")) %>%
#' adorn_percentages("col") %>%
#' adorn_pct_formatting(digits = 1) %>%
#' adorn_ns(format_func = function(x) format(x, big.mark = ".", decimal.mark = ","))

#' # Control the columns to be adorned with the ... variable selection argument
#' # If using only the ... argument, you can use empty commas as shorthand
#' # to supply the default values to the preceding arguments:
Expand All @@ -31,10 +45,10 @@
#'
#'cases %>%
#' adorn_percentages("col",,recovered:died) %>%
#' adorn_pct_formatting(,,,recovered:died) %>%
#' adorn_ns(,,recovered:died)
#' adorn_pct_formatting(,,,,,recovered:died) %>%
#' adorn_ns(,,,recovered:died)
#'
adorn_ns <- function(dat, position = "rear", ns = attr(dat, "core"), ...) {
adorn_ns <- function(dat, position = "rear", ns = attr(dat, "core"), format_func = function(x) { format(x, big.mark = ",") }, ...) {
# if input is a list, call purrr::map to recursively apply this function to each data.frame
if (is.list(dat) && !is.data.frame(dat)) {
purrr::map(dat, adorn_ns, position) # okay not to pass ns and allow for static Ns, b/c one size fits all for each list entry doesn't make sense for Ns.
Expand Down Expand Up @@ -66,14 +80,17 @@ adorn_ns <- function(dat, position = "rear", ns = attr(dat, "core"), ...) {
if (custom_ns_supplied & !identical(dim(ns), dim(dat))) { # user-supplied Ns must include values for totals row/col if present
stop("if supplying your own data.frame of Ns to append, its dimensions must match those of the data.frame in the \"dat\" argument")
}

# If appending the default Ns from the core, and there are totals rows/cols, append those values to the Ns table
# Custom inputs to ns argument will need to calculate & format their own totals row/cols
if (!custom_ns_supplied) {
if (!is.null(attr(dat, "totals"))) { # add totals row/col to core for pasting, if applicable
ns <- adorn_totals(ns, attr(dat, "totals"))
ns <- ns[order(match(ns[, 1], dat[, 1])), ] # from #407 - in rare event Totals row has been sorted off the bottom, sort to match
}
numeric_cols <- which(vapply(ns, is.numeric, logical(1)))
ns[] <- lapply(ns, format_func)
ns[] <- lapply(ns, stringr::str_trim)
}

if (position == "rear") {
Expand All @@ -92,7 +109,7 @@ adorn_ns <- function(dat, position = "rear", ns = attr(dat, "core"), ...) {
if(custom_ns_supplied & rlang::dots_n(...) == 0){
dont_adorn <- 1L
} else if(rlang::dots_n(...) == 0){
cols_to_adorn <- which(vapply(ns, is.numeric, logical(1))) # numeric cols
cols_to_adorn <- numeric_cols
dont_adorn <- setdiff(1:ncol(dat), cols_to_adorn)
dont_adorn <- unique(c(1, dont_adorn)) # always don't-append first column
} else {
Expand Down
2 changes: 1 addition & 1 deletion R/get_dupes.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ get_dupes <- function(dat, ...) {
dplyr::add_count(!!! nms, name = "dupe_count") %>%
dplyr::filter(dupe_count > 1) %>%
dplyr::select(!!! nms, dupe_count, dplyr::everything()) %>%
dplyr::arrange(desc(dupe_count), !!! nms)
dplyr::arrange(dplyr::desc(dupe_count), !!! nms)

# shorten error message for large data.frames
if (length(var_names) > 10) {
Expand Down
30 changes: 26 additions & 4 deletions man/adorn_ns.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 41 additions & 2 deletions tests/testthat/test-adorn-ns.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ test_that("front parameter works", {
untabyl(),
data.frame(
x = c(letters[1:4], "Total"),
`0` = c("417 (82.7%)", " 2 (0.4%)", " 1 (0.2%)", " 0 (0.0%)", "420 (83.3%)"),
`1` = c("83 (16.5%)", " 0 (0.0%)", " 0 (0.0%)", " 1 (0.2%)", "84 (16.7%)"),
`0` = c("417 (82.7%)", "2 (0.4%)", "1 (0.2%)", "0 (0.0%)", "420 (83.3%)"),
`1` = c("83 (16.5%)", "0 (0.0%)", "0 (0.0%)", "1 (0.2%)", "84 (16.7%)"),
check.names = FALSE,
stringsAsFactors = FALSE
)
Expand Down Expand Up @@ -198,3 +198,42 @@ test_that("adorn_ns works on single column data.frame with custom Ns if tidysele
adorn_ns(ns = dplyr::select(attr(adorned_single, "core"), a = `4`),,,, a)
expect_equal(stringr::str_sub(adorned_single$a, -4, -1), c(" (3)", " (8)"))
})

# This tests the display of the decimal.mark by forcing a decimal into a tabyl
# Can't happen with a natural table, but maybe someone will use adorn_ns on a homespun data.frame
test_that("formatting function works, #444", {
set.seed(1)
bigger_dat <- data.frame(sex = rep(c("m", "f"), 3000),
age = round(runif(3000, 1, 102), 0))
bigger_dat$age_group = cut(bigger_dat$age, quantile(bigger_dat$age, c(0, 1/3, 2/3, 1)))

bigger_tab <- bigger_dat %>%
tabyl(age_group, sex, show_missing_levels = F)

standard_output <- bigger_tab %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns(position = "front")

# test commas in thousands place by default
expect_equal(standard_output$f,
c("1,018 (33.9%)", "990 (33.0%)", "980 (32.7%)", "12 (0.4%)")
)

# Test decimal mark
bigger_tab$f[1] <- 1018.5 # makes no sense in a tabyl but need for testing decimal mark display

bigger_result <- bigger_tab %>%
untabyl() %>% # to get the decimal into the core
as_tabyl() %>%
adorn_totals(c("row", "col")) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns(position = "rear", format_func = function(x) format(x, big.mark = ".", decimal.mark = ","))

expect_equal(
bigger_result$f,
c("33.9% (1.018,5)", "33.0% (990,0)", "32.7% (980,0)", "0.4% (12,0)",
"100.0% (3.000,5)")
)
})

0 comments on commit a52a120

Please sign in to comment.