Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rstatix::t_test() doesen't work with unused factor levels while stats::t.test() accommodates them. #133

Open
caparks2 opened this issue Oct 20, 2021 · 2 comments

Comments

@caparks2
Copy link

caparks2 commented Oct 20, 2021

In the following example, base R stats::t.test() works, but rstatix::t_test() doesn't. I think the reason for the error is because of unused factor levels.

# Import libraries.
library(tidyverse)
library(rstatix)
#> 
#> Attaching package: 'rstatix'
#> The following object is masked from 'package:stats':
#> 
#>     filter

# Example data to reproduce the error.
df <- tibble(
  patient_id = c(
    10, 19, 05, 02, 14, 15, 21, 12, 10, 19,
    05, 02, 14, 15, 21, 12, 10, 19, 05, 02,
    14, 15, 21, 12, 10, 19, 05, 02, 14, 15,
    21, 12, 10, 19, 05, 02, 14, 15, 21, 12,
    10, 19, 05, 02, 14, 15, 21, 12, 10, 19,
    05, 02, 14, 15, 21, 12, 10, 19, 05, 02,
    14, 15, 21, 12, 10, 19, 05, 02, 14, 15,
    21, 12, 10, 19, 05, 02, 14, 15, 21, 12,
    10, 19, 05, 02, 14, 15, 21, 12, 10, 19,
    05, 02, 14, 15, 21, 12, 10, 19, 05, 02,
    14, 15, 21, 12, 10, 19, 05, 02, 14, 15,
    21, 12, 10, 19, 05, 02, 14, 15, 21, 12,
    10, 19, 05, 02, 14, 15, 21, 12, 10, 19,
    05, 02, 14, 15, 21, 12, 10, 19, 05, 02,
    14, 15, 21, 12
  ),
  t_cell_subset = c(
    "CD4", "CD4", "CD4", "CD4", "CD4", "CD4",
    "CD4", "CD4", "CD4", "CD4", "CD4", "CD4",
    "CD4", "CD4", "CD4", "CD4", "CD8", "CD8",
    "CD8", "CD8", "CD8", "CD8", "CD8", "CD8",
    "CD8", "CD8", "CD8", "CD8", "CD8", "CD8",
    "CD8", "CD8", "CD4", "CD4", "CD4", "CD4",
    "CD4", "CD4", "CD4", "CD4", "CD4", "CD4",
    "CD4", "CD4", "CD4", "CD4", "CD4", "CD4",
    "CD8", "CD8", "CD8", "CD8", "CD8", "CD8",
    "CD8", "CD8", "CD8", "CD8", "CD8", "CD8",
    "CD8", "CD8", "CD8", "CD8", "CD4", "CD4",
    "CD4", "CD4", "CD4", "CD4", "CD4", "CD4",
    "CD4", "CD4", "CD4", "CD4", "CD4", "CD4",
    "CD4", "CD4", "CD8", "CD8", "CD8", "CD8",
    "CD8", "CD8", "CD8", "CD8", "CD8", "CD8",
    "CD8", "CD8", "CD8", "CD8", "CD8", "CD8",
    "CD4", "CD4", "CD4", "CD4", "CD4", "CD4",
    "CD4", "CD4", "CD4", "CD4", "CD4", "CD4",
    "CD4", "CD4", "CD4", "CD4", "CD4", "CD4",
    "CD4", "CD4", "CD4", "CD4", "CD4", "CD4",
    "CD8", "CD8", "CD8", "CD8", "CD8", "CD8",
    "CD8", "CD8", "CD8", "CD8", "CD8", "CD8",
    "CD8", "CD8", "CD8", "CD8", "CD8", "CD8",
    "CD8", "CD8", "CD8", "CD8", "CD8", "CD8"
  ),
  phenotype = c(
    "SP", "SP", "SP", "SP", "SP", "SP", "SP",
    "SP", "DN", "DN", "DN", "DN", "DN", "DN",
    "DN", "DN", "SP", "SP", "SP", "SP", "SP",
    "SP", "SP", "SP", "DN", "DN", "DN", "DN",
    "DN", "DN", "DN", "DN", "DP", "DP", "DP",
    "DP", "DP", "DP", "DP", "DP", "DN", "DN",
    "DN", "DN", "DN", "DN", "DN", "DN", "DP",
    "DP", "DP", "DP", "DP", "DP", "DP", "DP",
    "DN", "DN", "DN", "DN", "DN", "DN", "DN",
    "DN", "DP", "DP", "DP", "DP", "DP", "DP",
    "DP", "DP", "SP", "SP", "SP", "SP", "SP",
    "SP", "SP", "SP", "DP", "DP", "DP", "DP",
    "DP", "DP", "DP", "DP", "SP", "SP", "SP",
    "SP", "SP", "SP", "SP", "SP", "DP", "DP",
    "DP", "DP", "DP", "DP", "DP", "DP", "SP",
    "SP", "SP", "SP", "SP", "SP", "SP", "SP",
    "DN", "DN", "DN", "DN", "DN", "DN", "DN",
    "DN", "DP", "DP", "DP", "DP", "DP", "DP",
    "DP", "DP", "SP", "SP", "SP", "SP", "SP",
    "SP", "SP", "SP", "DN", "DN", "DN", "DN",
    "DN", "DN", "DN", "DN"
  ),
  seqs_shared_with = c(
    "DP", "DP", "DP", "DP", "DP", "DP",
    "DP", "DP", "DP", "DP", "DP", "DP",
    "DP", "DP", "DP", "DP", "DP", "DP",
    "DP", "DP", "DP", "DP", "DP", "DP",
    "DP", "DP", "DP", "DP", "DP", "DP",
    "DP", "DP", "SP", "SP", "SP", "SP",
    "SP", "SP", "SP", "SP", "SP", "SP",
    "SP", "SP", "SP", "SP", "SP", "SP",
    "SP", "SP", "SP", "SP", "SP", "SP",
    "SP", "SP", "SP", "SP", "SP", "SP",
    "SP", "SP", "SP", "SP", "DN", "DN",
    "DN", "DN", "DN", "DN", "DN", "DN",
    "DN", "DN", "DN", "DN", "DN", "DN",
    "DN", "DN", "DN", "DN", "DN", "DN",
    "DN", "DN", "DN", "DN", "DN", "DN",
    "DN", "DN", "DN", "DN", "DN", "DN",
    "Blood", "Blood", "Blood", "Blood",
    "Blood", "Blood", "Blood", "Blood",
    "Blood", "Blood", "Blood", "Blood",
    "Blood", "Blood", "Blood", "Blood",
    "Blood", "Blood", "Blood", "Blood",
    "Blood", "Blood", "Blood", "Blood",
    "Blood", "Blood", "Blood", "Blood",
    "Blood", "Blood", "Blood", "Blood",
    "Blood", "Blood", "Blood", "Blood",
    "Blood", "Blood", "Blood", "Blood",
    "Blood", "Blood", "Blood", "Blood",
    "Blood", "Blood", "Blood", "Blood"
  ),
  frac_seqs_shared = c(
    0.0039, 0.0220, 0.0014, 0.0130, 0.0000,
    0.0260, 0.0074, 0.0068, 0.0060, 0.0200,
    0.0011, 0.0140, 0.0000, 0.0310, 0.0100,
    0.0064, 0.0520, 0.1700, 0.0350, 0.0940,
    0.0160, 0.1400, 0.1300, 0.2200, 0.0520,
    0.2000, 0.0310, 0.0790, 0.0000, 0.1800,
    0.1300, 0.1400, 0.1800, 0.4000, 0.2700,
    0.2800, 0.0000, 0.4100, 0.2900, 0.2300,
    0.3000, 0.2900, 0.1700, 0.2800, 0.0500,
    0.2600, 0.1900, 0.2600, 0.2400, 0.4000,
    0.3100, 0.2500, 0.1100, 0.2200, 0.2100,
    0.1600, 0.2800, 0.4400, 0.2000, 0.2700,
    0.2300, 0.4800, 0.6000, 0.3100, 0.0800,
    0.3300, 0.1800, 0.0930, 0.0000, 0.1400,
    0.0490, 0.1200, 0.0880, 0.2700, 0.1600,
    0.0820, 0.0070, 0.0740, 0.0230, 0.1500,
    0.1200, 0.1100, 0.1800, 0.1200, 0.0000,
    0.1000, 0.0350, 0.0580, 0.1400, 0.1000,
    0.1200, 0.1600, 0.0240, 0.1700, 0.0970,
    0.1800, 0.5300, 0.2800, 0.3800, 0.2600,
    0.0000, 0.3600, 0.2600, 0.5000, 0.4500,
    0.2400, 0.2200, 0.2800, 0.2500, 0.2900,
    0.2100, 0.5100, 0.5600, 0.3100, 0.2700,
    0.3900, 0.2800, 0.4100, 0.2800, 0.7000,
    0.4000, 0.1600, 0.2700, 0.2500, 0.2200,
    0.3500, 0.1000, 0.1900, 0.3000, 0.1700,
    0.2100, 0.2800, 0.6000, 0.4900, 0.2500,
    0.4400, 0.4000, 0.3400, 0.2200, 0.3200,
    0.4600, 0.7500, 0.6500, 0.5200
  )
)

# Mutate char columns to factors and filter for relevant data.
# Unused factor levels exist after filtering.
df <- df %>%
  mutate(across(where(is.character), as.factor)) %>%
  filter(t_cell_subset == "CD4", phenotype == "DP")

# Base R stats::t.test() handles unused factor levels.
pairwise.t.test(
  x = df$frac_seqs_shared,
  g = df$seqs_shared_with,
  p.adjust.method = "bonferroni",
  paired = TRUE,
  var.equal = FALSE
)
#> 
#>  Pairwise comparisons using paired t tests 
#> 
#> data:  df$frac_seqs_shared and df$seqs_shared_with 
#> 
#>    Blood DN   
#> DN 0.039 -    
#> SP 0.941 0.013
#> 
#> P value adjustment method: bonferroni

# rstatix::t_test() results in an error.
df %>% pairwise_t_test(frac_seqs_shared ~ seqs_shared_with,
  p.adjust.method = "bonferroni",
  paired = TRUE,
  var.equal = FALSE
)
#> Error in t.test.default(x = c(0.53, 0.28, 0.38, 0.26, 0, 0.36, 0.26, 0.5: not enough 'x' observations

If I manually fix the unused factor levels, rstatix::t_test() now works.

# manually fix unused factor levels and perform t tests
df %>%
  mutate(across(where(is.factor), as.character)) %>%
  mutate(across(where(is.character), as.factor)) %>%
  pairwise_t_test(frac_seqs_shared ~ seqs_shared_with,
    p.adjust.method = "bonferroni",
    paired = TRUE,
    var.equal = FALSE
  )
#> # A tibble: 3 × 10
#>   .y.              group1 group2    n1    n2 statistic    df     p p.adj p.adj.signif
#> * <chr>            <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <dbl> <chr>       
#> 1 frac_seqs_shared Blood  DN         8     8      3.31     7 0.013 0.039 *           
#> 2 frac_seqs_shared Blood  SP         8     8      1.09     7 0.314 0.942 ns          
#> 3 frac_seqs_shared DN     SP         8     8     -4.13     7 0.004 0.013 *
@robchallen
Copy link

I can confirm this one. Just got me, slightly simpler example

packageVersion("rstatix")
[1] ‘0.7.0# Doesn;t work (and throws mysterious error)
iris %>% filter(Species != "setosa") %>% rstatix::t_test(Sepal.Length ~ Species)

# Works:
t.test(Sepal.Length ~ Species, iris %>% filter(Species != "setosa"))

# Work around:
iris %>% filter(Species != "setosa") %>% 
   mutate(across(where(is.factor), forcats::fct_drop)) %>% 
   rstatix::t_test(Sepal.Length ~ Species)

@micwij
Copy link

micwij commented Sep 4, 2023

Also noticed this today. I think it would be good if the function would still work with unused factor levels. Maybe the function could automatically drop unused factor levels. Potentially with a warning. With the current error message, I found it quite tricky to figure out, what was going wrong.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants