# Least Squares Simulation

## Example to show least squares minimization

This little example is meant as a way to show the least square really minimizes the criterion, $\sum \left( Y - \hat{Y} \right)^2$.

In this example, we will generate some data so that we know what the truth is. Then, upon data generation, we will compute a bunch of different values for the linear slope and y-intercept. For each combination of the y-intercept and slope, I will compute the sum of squares error depicted above.

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1
## ✔ readr   2.1.2      ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(simglm)

theme_set(theme_bw(base_size = 18))

set.seed(2021)

sim_arguments <- list(
formula = y ~ x,
fixed = list(x = list(var_type = 'continuous', mean = 100, sd = 20)),
error = list(variance = 100),
sample_size = 1000,
reg_weights = c(5, .5)
)

sim_data <- simulate_fixed(data = NULL, sim_arguments) %>%
simulate_error(sim_arguments) %>%
generate_response(sim_arguments)


##   X.Intercept.         x level1_id       error fixed_outcome random_effects
## 1            1  97.55080         1   8.0959540      53.77540              0
## 2            1 111.04913         2   0.9941633      60.52457              0
## 3            1 106.97299         3 -23.4302131      58.48650              0
## 4            1 107.19264         4   6.7652598      58.59632              0
## 5            1 117.96107         5 -36.1147374      63.98054              0
## 6            1  61.54861         6  -1.6416799      35.77430              0
##          y
## 1 61.87135
## 2 61.51873
## 3 35.05628
## 4 65.36158
## 5 27.86580
## 6 34.13262

library(ggformula)

## Loading required package: ggstance

##
## Attaching package: 'ggstance'

## The following objects are masked from 'package:ggplot2':
##
##     geom_errorbarh, GeomErrorbarh

## Loading required package: scales

##
## Attaching package: 'scales'

## The following object is masked from 'package:purrr':
##

## The following object is masked from 'package:readr':
##
##     col_factor

## Loading required package: ggridges

##
## New to ggformula?  Try the tutorials:
## 	learnr::run_tutorial("introduction", package = "ggformula")
## 	learnr::run_tutorial("refining", package = "ggformula")

gf_point(y ~ x, data = sim_data, size = 4) %>%
gf_smooth(method = 'lm')


sim_lm <- lm (y ~ x, data = sim_data)
coef(sim_lm)

## (Intercept)           x
##   5.9653484   0.4941165

y_intercept <- seq(0, 15, by = .25)
slope <- seq(0, 1.5, by = .01)

conditions <- rbind(expand.grid(y_intercept = y_intercept,
slope = slope),
coef(sim_lm))

tail(conditions)

##      y_intercept     slope
## 9207   14.000000 1.5000000
## 9208   14.250000 1.5000000
## 9209   14.500000 1.5000000
## 9210   14.750000 1.5000000
## 9211   15.000000 1.5000000
## 9212    5.965348 0.4941165

dim(conditions)

## [1] 9212    2

gf_point(y ~ x, data = sim_data, size = 4) %>%
gf_smooth(method = 'lm') %>%
gf_abline(slope = ~slope, intercept = ~y_intercept, data = slice(conditions, 1), linetype = 2, size = 2) %>%
gf_abline(slope = ~slope, intercept = ~y_intercept, data = slice(conditions, 855), linetype = 2, color = 'lightgreen', size = 2) %>%
gf_refine(coord_cartesian(xlim = c(0, 160), ylim = c(0, 120)))


sum_square_error <- function(conditions, sim_data) {
fitted <- conditions[['y_intercept']] + conditions[['slope']] * sim_data[['x']]

deviation <- sim_data[['y']] - fitted

sqrt((sum(deviation^2) / (nrow(sim_data) - 2)))
}

sum_square_error(conditions[1892, ], sim_data)

## [1] 26.74862

summary(sim_lm)$sigma  ## [1] 10.18153  library(future) plan(multisession) conditions$sse <- unlist(lapply(1:nrow(conditions), function(xx) sum_square_error(conditions[xx, ], sim_data)))


##   y_intercept slope      sse

plan(sequential)