mtcars
datavignettes/mtcars_examples.Rmd
mtcars_examples.Rmd
Note: The type
argument in generate()
is automatically filled based on the entries for specify()
and hypothesize()
. It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed.
library(infer)
library(dplyr)
mtcars <- mtcars %>%
mutate(cyl = factor(cyl),
vs = factor(vs),
am = factor(am),
gear = factor(gear),
carb = factor(carb))
# For reproducibility
set.seed(2018)
One numerical variable (mean)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", mu = 25) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 24.7
## 2 2 23.1
## 3 3 26.9
## 4 4 24.8
## 5 5 25.6
## 6 6 23.2
## 7 7 24.2
## 8 8 24.9
## 9 9 23.3
## 10 10 26.5
## # … with 90 more rows
One numerical variable (median)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", med = 26) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 26.5
## 2 2 26.5
## 3 3 24.6
## 4 4 25.0
## 5 5 26
## 6 6 26
## 7 7 25.0
## 8 8 27.2
## 9 9 25.2
## 10 10 28.2
## # … with 90 more rows
One categorical (2 level) variable
mtcars %>%
specify(response = am, success = "1") %>% # formula alt: am ~ NULL
hypothesize(null = "point", p = .25) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "prop")
## # A tibble: 100 x 2
## replicate stat
## <fct> <dbl>
## 1 1 0.375
## 2 2 0.0625
## 3 3 0.125
## 4 4 0.25
## 5 5 0.188
## 6 6 0.406
## 7 7 0.219
## 8 8 0.375
## 9 9 0.344
## 10 10 0.188
## # … with 90 more rows
Two categorical (2 level) variables
mtcars %>%
specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in props", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.0397
## 2 2 0.0873
## 3 3 0.214
## 4 4 -0.167
## 5 5 -0.167
## 6 6 -0.0397
## 7 7 0.0873
## 8 8 -0.0397
## 9 9 -0.0397
## 10 10 -0.294
## # … with 90 more rows
One categorical (>2 level) - GoF
mtcars %>%
specify(cyl ~ NULL) %>% # alt: response = cyl
hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "Chisq")
## # A tibble: 100 x 2
## replicate stat
## <fct> <dbl>
## 1 1 0.688
## 2 2 1.69
## 3 3 1.69
## 4 4 1.69
## 5 5 10.2
## 6 6 4.5
## 7 7 3
## 8 8 2.69
## 9 9 0.5
## 10 10 1.5
## # … with 90 more rows
Two categorical (>2 level) variables
mtcars %>%
specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "Chisq")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 3.90
## 2 2 3.68
## 3 3 1.01
## 4 4 0.557
## 5 5 1.34
## 6 6 2.93
## 7 7 1.45
## 8 8 0.557
## 9 9 0.557
## 10 10 1.01
## # … with 90 more rows
One numerical variable one categorical (2 levels) (diff in means)
mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in means", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 3.12
## 2 2 -1.01
## 3 3 0.813
## 4 4 1.46
## 5 5 0.0101
## 6 6 1.94
## 7 7 -0.00283
## 8 8 -1.84
## 9 9 -2.24
## 10 10 -3.59
## # … with 90 more rows
One numerical variable one categorical (2 levels) (diff in medians)
mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in medians", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -5
## 2 2 -2.3
## 3 3 4.10
## 4 4 0
## 5 5 0
## 6 6 1
## 7 7 1.90
## 8 8 -0.5
## 9 9 2.90
## 10 10 1.90
## # … with 90 more rows
One numerical one categorical (>2 levels) - ANOVA
mtcars %>%
specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "F")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.842
## 2 2 0.800
## 3 3 0.232
## 4 4 0.0158
## 5 5 0.0488
## 6 6 0.466
## 7 7 1.26
## 8 8 5.13
## 9 9 1.67
## 10 10 0.469
## # … with 90 more rows
Two numerical vars - SLR
mtcars %>%
specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "slope")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.0158
## 2 2 -0.0104
## 3 3 0.00876
## 4 4 0.0291
## 5 5 -0.0000981
## 6 6 -0.0206
## 7 7 -0.00727
## 8 8 0.0167
## 9 9 0.00682
## 10 10 0.0116
## # … with 90 more rows
One numerical variable (standard deviation)
Not currently implemented
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", sigma = 5) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")
One numerical (one mean)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 19.2
## 2 2 18.9
## 3 3 18.7
## 4 4 19.3
## 5 5 20.9
## 6 6 18.5
## 7 7 20.5
## 8 8 18.8
## 9 9 23.1
## 10 10 18.6
## # … with 90 more rows
One numerical (one median)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 17.0
## 2 2 19.2
## 3 3 19.4
## 4 4 19.4
## 5 5 17.1
## 6 6 18.2
## 7 7 20.4
## 8 8 22.8
## 9 9 19.0
## 10 10 21
## # … with 90 more rows
One numerical (standard deviation)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 6.11
## 2 2 5.27
## 3 3 4.82
## 4 4 4.35
## 5 5 5.55
## 6 6 7.83
## 7 7 6.28
## 8 8 5.68
## 9 9 7.19
## 10 10 5.67
## # … with 90 more rows
One categorical (one proportion)
mtcars %>%
specify(response = am, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "prop")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.25
## 2 2 0.5
## 3 3 0.344
## 4 4 0.531
## 5 5 0.438
## 6 6 0.5
## 7 7 0.312
## 8 8 0.438
## 9 9 0.656
## 10 10 0.406
## # … with 90 more rows
One numerical variable one categorical (2 levels) (diff in means)
mtcars %>%
specify(mpg ~ am) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -4.36
## 2 2 -5.64
## 3 3 -8.54
## 4 4 -9.26
## 5 5 -5.24
## 6 6 -5.55
## 7 7 -7.71
## 8 8 -7.68
## 9 9 -9.21
## 10 10 -7.17
## # … with 90 more rows
Two categorical variables (diff in proportions)
mtcars %>%
specify(am ~ vs, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in props", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.0648
## 2 2 -0.189
## 3 3 -0.208
## 4 4 -0.0952
## 5 5 -0.317
## 6 6 0.0317
## 7 7 0.143
## 8 8 -0.453
## 9 9 -0.212
## 10 10 -0.312
## # … with 90 more rows
Two numerical vars - SLR
mtcars %>%
specify(mpg ~ hp) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "slope")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.0878
## 2 2 -0.0691
## 3 3 -0.0866
## 4 4 -0.0518
## 5 5 -0.0593
## 6 6 -0.0711
## 7 7 -0.0588
## 8 8 -0.0776
## 9 9 -0.0615
## 10 10 -0.0464
## # … with 90 more rows
Two numerical vars - correlation
mtcars %>%
specify(mpg ~ hp) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "correlation")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.765
## 2 2 -0.846
## 3 3 -0.789
## 4 4 -0.718
## 5 5 -0.748
## 6 6 -0.800
## 7 7 -0.744
## 8 8 -0.832
## 9 9 -0.752
## 10 10 -0.824
## # … with 90 more rows