t-distribution vs Normal distribution
Comparing t-distribution with normal distribution
This short set of notes will show some differences in the t-distribution with varying degrees of freedom with a normal distribution. This
df <- c(2, 10, 15, 20, 30, 50, 100, 1000)
t_dist <- do.call('rbind', lapply(seq_along(df), function(xx) data.frame(value = seq(-5, 5, .01), density = dt(seq(-5, 5, .01), df = df[xx]), df = df[xx], normal = FALSE)))
z_dist <- data.frame(value = seq(-5, 5, .01), density = dnorm(seq(-5, 5, .01)), normal = TRUE, df = 0)
dist <- rbind(t_dist, z_dist)
head(dist)
## value density df normal
## 1 -5.00 0.007127781 2 FALSE
## 2 -4.99 0.007167524 2 FALSE
## 3 -4.98 0.007207557 2 FALSE
## 4 -4.97 0.007247883 2 FALSE
## 5 -4.96 0.007288503 2 FALSE
## 6 -4.95 0.007329422 2 FALSE
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggformula)
## Loading required package: ggstance
##
## Attaching package: 'ggstance'
##
## The following objects are masked from 'package:ggplot2':
##
## geom_errorbarh, GeomErrorbarh
##
## Loading required package: scales
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
##
## Loading required package: ggridges
##
## New to ggformula? Try the tutorials:
## learnr::run_tutorial("introduction", package = "ggformula")
## learnr::run_tutorial("refining", package = "ggformula")
library(patchwork)
theme_set(theme_bw(base_size = 18))
dist %>%
filter(df %in% c(0, 2)) %>%
gf_point(density ~ value, color = ~ normal, size = 1) %>%
gf_refine(theme(legend.position = 'none')) /
dist %>%
filter(df %in% c(0, 10)) %>%
gf_point(density ~ value, color = ~ normal, size = 1) %>%
gf_refine(theme(legend.position = 'none')) /
dist %>%
filter(df %in% c(0, 100)) %>%
gf_point(density ~ value, color = ~ normal, size = 1) %>%
gf_refine(theme(legend.position = 'none'))