Skip to content

Commit

Permalink
Merge pull request #30 from juliasilge/master
Browse files Browse the repository at this point in the history
Update for tidytext::cast_sparse()
  • Loading branch information
dgrtwo committed Apr 3, 2020
2 parents 3e800ac + c5966f9 commit e5e790c
Show file tree
Hide file tree
Showing 12 changed files with 73 additions and 56 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ Description: Encapsulates the pattern of untidying data into a wide matrix,
is useful for several operations such as co-occurrence counts,
correlations, or clustering that are mathematically convenient on wide matrices.
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: TRUE
Maintainer: David Robinson <admiral.david@gmail.com>
URL: http://github.com/dgrtwo/widyr
BugReports: http://github.com/dgrtwo/widyr/issues
VignetteBuilder: knitr
Imports:
rlang,
dplyr,
tidyr,
reshape2,
Expand All @@ -38,4 +40,4 @@ Suggests:
ggplot2,
maps,
irlba
RoxygenNote: 6.1.1
RoxygenNote: 7.1.0
12 changes: 6 additions & 6 deletions R/pairwise_count.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
#' @examples
#'
#' library(dplyr)
#' dat <- data_frame(group = rep(1:5, each = 2),
#' letter = c("a", "b",
#' "a", "c",
#' "a", "c",
#' "b", "e",
#' "b", "f"))
#' dat <- tibble(group = rep(1:5, each = 2),
#' letter = c("a", "b",
#' "a", "c",
#' "a", "c",
#' "b", "e",
#' "b", "f"))
#'
#' # count the number of times two letters appear together
#' pairwise_count(dat, letter, group)
Expand Down
12 changes: 6 additions & 6 deletions R/pairwise_pmi.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@
#'
#' library(dplyr)
#'
#' dat <- data_frame(group = rep(1:5, each = 2),
#' letter = c("a", "b",
#' "a", "c",
#' "a", "c",
#' "b", "e",
#' "b", "f"))
#' dat <- tibble(group = rep(1:5, each = 2),
#' letter = c("a", "b",
#' "a", "c",
#' "a", "c",
#' "b", "e",
#' "b", "f"))
#'
#' # how informative is each letter about each other letter
#' pairwise_pmi(dat, letter, group)
Expand Down
16 changes: 9 additions & 7 deletions R/widely.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,20 +88,22 @@ widely_ <- function(.f,
if (!sparse) {
if (!is.null(maximum_size)) {
matrix_size <- (length(unique(tbl[[row]])) *
length(unique(tbl[[column]])))
length(unique(tbl[[column]])))
if (matrix_size > maximum_size) {
stop("Size of acast matrix, ", matrix_size,
" will be too large. Set maximum_size = NULL to avoid ",
"this error (make sure your memory is sufficient), ",
"or consider using sparse = TRUE.")
rlang::abort(
paste0("Size of acast matrix, ", matrix_size,
" will be too large. Set maximum_size = NULL to avoid ",
"this error (make sure your memory is sufficient), ",
"or consider using sparse = TRUE.")
)
}
}

form <- stats::as.formula(paste(row, column, sep = " ~ "))

input <- reshape2::acast(tbl, form, value.var = value, fill = 0)
} else {
input <- tidytext::cast_sparse_(tbl, row, column, value)
input <- tidytext::cast_sparse(tbl, !!row, !!column, !!value)
}
output <- purrr::as_mapper(.f)(input, ...)

Expand All @@ -123,7 +125,7 @@ widely_ <- function(.f,
#' @noRd
custom_melt <- function(m) {
if (inherits(m, "data.frame")) {
stop("Output is a data frame: don't know how to fix")
rlang::abort("Output is a data frame: don't know how to fix")
}
if (inherits(m, "matrix")) {
ret <- reshape2::melt(m, varnames = c("item1", "item2"), as.is = TRUE)
Expand Down
22 changes: 18 additions & 4 deletions man/pairwise_cor.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions man/pairwise_count.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions man/pairwise_pmi.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/widely_svd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions tests/testthat/test-pairwise-cor.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ context("pairwise_cor")

suppressPackageStartupMessages(library(dplyr))

d <- data_frame(col = rep(c("a", "b", "c"), each = 3),
row = rep(c("d", "e", "f"), 3),
value = c(1, 2, 3, 6, 5, 4, 7, 9, 8))
d <- tibble(col = rep(c("a", "b", "c"), each = 3),
row = rep(c("d", "e", "f"), 3),
value = c(1, 2, 3, 6, 5, 4, 7, 9, 8))

test_that("pairwise_cor computes pairwise correlations", {
ret <- d %>%
Expand All @@ -29,7 +29,7 @@ test_that("pairwise_cor can compute Spearman correlations", {

test_that("pairwise_cor works on binary matrices", {
cors <- data.frame(x = c("a", "a", "a", "b", "b", "b", "c", "c", "c"),
y = c(1, 2, 3, 1, 2, 3, 1, 3, 4)) %>%
y = c(1, 2, 3, 1, 2, 3, 1, 3, 4)) %>%
pairwise_cor(x, y, sort = TRUE)

expect_equal(colnames(cors), c("item1", "item2", "correlation"))
Expand Down
16 changes: 8 additions & 8 deletions tests/testthat/test-pairwise-count.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ context("pairwise_count")
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidytext))

original <- data_frame(txt = c("I felt a funeral in my brain,",
"And mourners, to and fro,",
"Kept treading, treading, till it seemed",
"That sense was breaking through.")) %>%
original <- tibble(txt = c("I felt a funeral in my brain,",
"And mourners, to and fro,",
"Kept treading, treading, till it seemed",
"That sense was breaking through.")) %>%
mutate(line = row_number()) %>%
unnest_tokens(char, txt, token = "characters")

Expand Down Expand Up @@ -56,9 +56,9 @@ test_that("pairing and counting works", {


test_that("We can count with a weight column", {
d <- data_frame(col1 = c("a", "a", "a", "b", "b", "b"),
col2 = c("x", "y", "z", "x", "x", "z"),
weight = c(1, 1, 1, 5, 5, 5))
d <- tibble(col1 = c("a", "a", "a", "b", "b", "b"),
col2 = c("x", "y", "z", "x", "x", "z"),
weight = c(1, 1, 1, 5, 5, 5))

ret1 <- pairwise_count(d, col2, col1)
expect_equal(ret1$n[ret1$item1 == "z" & ret1$item2 == "y"], 1)
Expand All @@ -72,7 +72,7 @@ test_that("We can count with a weight column", {

test_that("Counts co-occurences of words in Pride & Prejudice", {
if (require("janeaustenr", quietly = TRUE)) {
words <- data_frame(text = prideprejudice) %>%
words <- tibble(text = prideprejudice) %>%
mutate(line = row_number()) %>%
unnest_tokens(word, text)

Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-pairwise-similarity.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ context("pairwise_similarity")

suppressPackageStartupMessages(library(dplyr))

d <- data_frame(col = rep(c("a", "b", "c"), each = 3),
row = rep(c("d", "e", "f"), 3),
value = c(1, 2, 3, 6, 5, 4, 7, 9, 8))
d <- tibble(col = rep(c("a", "b", "c"), each = 3),
row = rep(c("d", "e", "f"), 3),
value = c(1, 2, 3, 6, 5, 4, 7, 9, 8))

cosine_similarity <- function(x, y) {
sum(x * y) / (sqrt(sum(x^2)) * sqrt(sum(y^2)))
Expand Down
6 changes: 3 additions & 3 deletions vignettes/united_nations.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ vignette: >
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r setup, echo = FALSE}
library(knitr)
Expand Down Expand Up @@ -102,9 +102,9 @@ library(igraph)
cors_filtered <- cors %>%
filter(correlation > .6)
continents <- data_frame(country = unique(un_votes$country)) %>%
continents <- tibble(country = unique(un_votes$country)) %>%
filter(country %in% cors_filtered$item1 |
country %in% cors_filtered$item2) %>%
country %in% cors_filtered$item2) %>%
mutate(continent = countrycode(country, "country.name", "continent"))
set.seed(2017)
Expand Down

0 comments on commit e5e790c

Please sign in to comment.