Moving Beyond Single-Level Models & Dealing With Clustering in Data
Understanding Grouping Structures in Data
avocados |>
filter(region %in% c("Houston", "Seattle", "Syracuse")) |>
ggplot(aes(average_price, log(total_volume), colour = region)) +
geom_point(alpha = 0.5) +
geom_smooth(method = lm, se = FALSE) +
facet_wrap(~ type) +
labs(x = "Average Price", y = "Total Volume") +
scwplot::scale_colour_qualitative(palette = "scw")
Issues with Using Single-Level Models for Multilevel Problems
Multilevel Solutions for Multilevel Problems!
lmer(total_volume ~ 1 + (1 | region), data = avocados) |>
performance::icc() |>
janitor::clean_names(replace = c("ICC" = ""), "title") |>
tidyr::pivot_longer(everything(), names_to = "ICC", values_to = "Value") |>
gt() |>
tab_header("Intraclass Correlation Coefficient (ICC)") |>
fmt_number(columns = is.numeric, decimals = 2) |>
tab_options(table.width = pct(100))
Intraclass Correlation Coefficient (ICC) | |
---|---|
ICC | Value |
Adjusted | 0.38 |
Conditional | 0.38 |
Unadjusted | 0.38 |
varying_intercepts <-
lmer(
log(total_volume) ~ average_price + organic + (1 | region),
data = avocados
)
modelsummary::msummary(
list("Total Volume" = varying_intercepts),
statistic = 'conf.int', coef_map = cm, gof_omit = "AIC|BIC|R2",
fmt = 2, exponentiate = TRUE, output = "gt",
title = "Multilevel Regression of Avocado Sales"
) |>
tab_row_group(md("**Group Effects**"), rows = 7:8) |>
tab_row_group(md("**Population Effects**"), rows = 1:6) |>
tab_style(
style = cell_text(size = "x-small"),
locations = cells_body(columns = 2, rows = c(2, 4, 6))
) |>
tab_options(table.width = pct(100), table.font.size = 12)
Total Volume | |
---|---|
Population Effects |
|
(Intercept) | 732844.18 |
[564203.10, 951892.30] | |
Average Price | 0.52 |
[0.51, 0.53] | |
Organic | 0.04 |
[0.04, 0.04] | |
Group Effects |
|
Region Intercept Variance | 2.46 |
Residual Variance | 1.57 |
Num.Obs. | 15545 |
ICC | 0.8 |
RMSE | 0.45 |
varying_intercepts |>
ggeffects::predict_response(c("average_price", "organic")) |>
tibble() |>
mutate(type = if_else(group == 0, "Conventional", "Organic")) |>
ggplot(aes(x, predicted, group = type, colour = type)) +
geom_point(size = 1.5) +
geom_line(linewidth = 1) +
geom_line(aes(y = conf.low), linetype = 2) +
geom_line(aes(y = conf.high), linetype = 2) +
scale_x_continuous(labels = label_currency()) +
scale_y_continuous(
labels = label_number(scale_cut = cut_short_scale())
) +
labs(x = "Average Price", y = "Total Volume") +
scwplot::scale_colour_qualitative("scw")
varying_intercepts |>
ggeffects::predict_response(
c("average_price", "organic", "region"), type = "random"
) |>
tibble() |>
filter(facet %in% c("Houston", "Seattle", "Syracuse")) |>
mutate(type = if_else(group == 0, "Conventional", "Organic")) |>
ggplot(aes(x, predicted, group = type, colour = type)) +
geom_point(size = 1.5) +
geom_line(linewidth = 1) +
geom_line(aes(y = conf.low), linetype = 2) +
geom_line(aes(y = conf.high), linetype = 2) +
facet_wrap(facets = vars(facet), nrow = 3) +
scale_x_continuous(labels = label_currency()) +
scale_y_continuous(
labels = label_number(scale_cut = cut_short_scale())
) +
labs(x = "Average Price", y = "Total Volume") +
scwplot::scale_colour_qualitative("scw")
Contact:
Code & Slides:
Paul Johnson // Introduction to Multilevel Regression // Aug 1, 2024