Merge pull request #30 from juliasilge/master

Update for tidytext::cast_sparse()
juliasilge · Apr 3, 2020 · e5e790c · e5e790c
2 parents 3e800ac + c5966f9
commit e5e790c
Show file tree

Hide file tree

Showing 12 changed files with 73 additions and 56 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -9,12 +9,14 @@ Description: Encapsulates the pattern of untidying data into a wide matrix,
   is useful for several operations such as co-occurrence counts,
   correlations, or clustering that are mathematically convenient on wide matrices.
 License: MIT + file LICENSE
+Encoding: UTF-8
 LazyData: TRUE
 Maintainer: David Robinson <admiral.david@gmail.com>
 URL: http://github.com/dgrtwo/widyr
 BugReports: http://github.com/dgrtwo/widyr/issues
 VignetteBuilder: knitr
 Imports:
+    rlang,
     dplyr,
     tidyr,
     reshape2,
@@ -38,4 +40,4 @@ Suggests:
     ggplot2,
     maps,
     irlba
-RoxygenNote: 6.1.1
+RoxygenNote: 7.1.0
diff --git a/R/pairwise_count.R b/R/pairwise_count.R
@@ -19,12 +19,12 @@
 #' @examples
 #'
 #' library(dplyr)
-#' dat <- data_frame(group = rep(1:5, each = 2),
-#'                   letter = c("a", "b",
-#'                              "a", "c",
-#'                              "a", "c",
-#'                              "b", "e",
-#'                              "b", "f"))
+#' dat <- tibble(group = rep(1:5, each = 2),
+#'               letter = c("a", "b",
+#'                          "a", "c",
+#'                          "a", "c",
+#'                          "b", "e",
+#'                          "b", "f"))
 #'
 #' # count the number of times two letters appear together
 #' pairwise_count(dat, letter, group)

diff --git a/R/pairwise_pmi.R b/R/pairwise_pmi.R
@@ -22,12 +22,12 @@
 #'
 #' library(dplyr)
 #'
-#' dat <- data_frame(group = rep(1:5, each = 2),
-#'                   letter = c("a", "b",
-#'                              "a", "c",
-#'                              "a", "c",
-#'                              "b", "e",
-#'                              "b", "f"))
+#' dat <- tibble(group = rep(1:5, each = 2),
+#'               letter = c("a", "b",
+#'                          "a", "c",
+#'                          "a", "c",
+#'                          "b", "e",
+#'                          "b", "f"))
 #'
 #' # how informative is each letter about each other letter
 #' pairwise_pmi(dat, letter, group)

diff --git a/R/widely.R b/R/widely.R
@@ -88,20 +88,22 @@ widely_ <- function(.f,
     if (!sparse) {
       if (!is.null(maximum_size)) {
         matrix_size <- (length(unique(tbl[[row]])) *
-                        length(unique(tbl[[column]])))
+                          length(unique(tbl[[column]])))
         if (matrix_size > maximum_size) {
-          stop("Size of acast matrix, ", matrix_size,
-               " will be too large. Set maximum_size = NULL to avoid ",
-               "this error (make sure your memory is sufficient), ",
-               "or consider using sparse = TRUE.")
+          rlang::abort(
+            paste0("Size of acast matrix, ", matrix_size,
+                   " will be too large. Set maximum_size = NULL to avoid ",
+                   "this error (make sure your memory is sufficient), ",
+                   "or consider using sparse = TRUE.")
+          )
         }
       }
 
       form <- stats::as.formula(paste(row, column, sep = " ~ "))
 
       input <- reshape2::acast(tbl, form, value.var = value, fill = 0)
     } else {
-      input <- tidytext::cast_sparse_(tbl, row, column, value)
+      input <- tidytext::cast_sparse(tbl, !!row, !!column, !!value)
     }
     output <- purrr::as_mapper(.f)(input, ...)
 
@@ -123,7 +125,7 @@ widely_ <- function(.f,
 #' @noRd
 custom_melt <- function(m) {
   if (inherits(m, "data.frame")) {
-    stop("Output is a data frame: don't know how to fix")
+    rlang::abort("Output is a data frame: don't know how to fix")
   }
   if (inherits(m, "matrix")) {
     ret <- reshape2::melt(m, varnames = c("item1", "item2"), as.is = TRUE)

diff --git a/man/pairwise_cor.Rd b/man/pairwise_cor.Rd
diff --git a/man/pairwise_count.Rd b/man/pairwise_count.Rd
diff --git a/man/pairwise_pmi.Rd b/man/pairwise_pmi.Rd
diff --git a/man/widely_svd.Rd b/man/widely_svd.Rd
diff --git a/tests/testthat/test-pairwise-cor.R b/tests/testthat/test-pairwise-cor.R
@@ -2,9 +2,9 @@ context("pairwise_cor")
 
 suppressPackageStartupMessages(library(dplyr))
 
-d <- data_frame(col = rep(c("a", "b", "c"), each = 3),
-                row = rep(c("d", "e", "f"), 3),
-                value = c(1, 2, 3, 6, 5, 4, 7, 9, 8))
+d <- tibble(col = rep(c("a", "b", "c"), each = 3),
+            row = rep(c("d", "e", "f"), 3),
+            value = c(1, 2, 3, 6, 5, 4, 7, 9, 8))
 
 test_that("pairwise_cor computes pairwise correlations", {
   ret <- d %>%
@@ -29,7 +29,7 @@ test_that("pairwise_cor can compute Spearman correlations", {
 
 test_that("pairwise_cor works on binary matrices", {
   cors <- data.frame(x = c("a", "a", "a", "b", "b", "b", "c", "c", "c"),
-             y = c(1, 2, 3, 1, 2, 3, 1, 3, 4)) %>%
+                     y = c(1, 2, 3, 1, 2, 3, 1, 3, 4)) %>%
     pairwise_cor(x, y, sort = TRUE)
 
   expect_equal(colnames(cors), c("item1", "item2", "correlation"))

diff --git a/tests/testthat/test-pairwise-count.R b/tests/testthat/test-pairwise-count.R
@@ -5,10 +5,10 @@ context("pairwise_count")
 suppressPackageStartupMessages(library(dplyr))
 suppressPackageStartupMessages(library(tidytext))
 
-original <- data_frame(txt = c("I felt a funeral in my brain,",
-                               "And mourners, to and fro,",
-                               "Kept treading, treading, till it seemed",
-                               "That sense was breaking through.")) %>%
+original <- tibble(txt = c("I felt a funeral in my brain,",
+                           "And mourners, to and fro,",
+                           "Kept treading, treading, till it seemed",
+                           "That sense was breaking through.")) %>%
   mutate(line = row_number()) %>%
   unnest_tokens(char, txt, token = "characters")
 
@@ -56,9 +56,9 @@ test_that("pairing and counting works", {
 
 
 test_that("We can count with a weight column", {
-  d <- data_frame(col1 = c("a", "a", "a", "b", "b", "b"),
-                  col2 = c("x", "y", "z", "x", "x", "z"),
-                  weight = c(1, 1, 1, 5, 5, 5))
+  d <- tibble(col1 = c("a", "a", "a", "b", "b", "b"),
+              col2 = c("x", "y", "z", "x", "x", "z"),
+              weight = c(1, 1, 1, 5, 5, 5))
 
   ret1 <- pairwise_count(d, col2, col1)
   expect_equal(ret1$n[ret1$item1 == "z" & ret1$item2 == "y"], 1)
@@ -72,7 +72,7 @@ test_that("We can count with a weight column", {
 
 test_that("Counts co-occurences of words in Pride & Prejudice", {
   if (require("janeaustenr", quietly = TRUE)) {
-    words <- data_frame(text = prideprejudice) %>%
+    words <- tibble(text = prideprejudice) %>%
       mutate(line = row_number()) %>%
       unnest_tokens(word, text)
 

diff --git a/tests/testthat/test-pairwise-similarity.R b/tests/testthat/test-pairwise-similarity.R
@@ -2,9 +2,9 @@ context("pairwise_similarity")
 
 suppressPackageStartupMessages(library(dplyr))
 
-d <- data_frame(col = rep(c("a", "b", "c"), each = 3),
-                row = rep(c("d", "e", "f"), 3),
-                value = c(1, 2, 3, 6, 5, 4, 7, 9, 8))
+d <- tibble(col = rep(c("a", "b", "c"), each = 3),
+            row = rep(c("d", "e", "f"), 3),
+            value = c(1, 2, 3, 6, 5, 4, 7, 9, 8))
 
 cosine_similarity <- function(x, y) {
   sum(x * y) / (sqrt(sum(x^2)) * sqrt(sum(y^2)))

diff --git a/vignettes/united_nations.Rmd b/vignettes/united_nations.Rmd
@@ -8,7 +8,7 @@ vignette: >
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
 ---
-
+  
 ```{r setup, echo = FALSE}
 library(knitr)
 
@@ -102,9 +102,9 @@ library(igraph)
 cors_filtered <- cors %>%
   filter(correlation > .6)
 
-continents <- data_frame(country = unique(un_votes$country)) %>%
+continents <- tibble(country = unique(un_votes$country)) %>%
   filter(country %in% cors_filtered$item1 |
-         country %in% cors_filtered$item2) %>%
+           country %in% cors_filtered$item2) %>%
   mutate(continent = countrycode(country, "country.name", "continent"))
 
 set.seed(2017)