library(tidyverse)
library(DBI)
library(dbplyr)
<- dbConnect(
tidy_finance ::duckdb(),
duckdb"data/tidy_finance.duckdb",
read_only = FALSE)
<- tbl(tidy_finance, "crsp_monthly")
crsp_monthly <- tbl(tidy_finance, "crsp_daily")
crsp_daily
<- tbl(tidy_finance, "factors_ff_monthly")
factors_ff_monthly <- tbl(tidy_finance, "factors_ff_daily")
factors_ff_daily
<- "OVER (PARTITION BY permno ORDER BY month
window RANGE BETWEEN INTERVAL 59 MONTHS PRECEDING
AND INTERVAL 0 MONTHS FOLLOWING)"
<- sql(paste("regr_slope(ret_excess, mkt_excess)", window))
beta_sql <- sql(paste("count(*)", window))
n_sql <- sql(paste("max(month)", window))
max_sql <- sql(paste("min(month)", window))
min_sql
<-
beta_monthly |>
crsp_monthly left_join(factors_ff_monthly, by = "month") |>
mutate(beta_monthly = beta_sql,
n = n_sql,
max_month = max_sql,
min_month = min_sql) |>
ungroup() |>
# Check that months are consecutive
mutate(n_months = date_diff('month', min_month, max_month) + 1) |>
filter(n >= 48, n_months == n) |>
select(permno, month, beta_monthly) |>
filter(!is.na(beta_monthly)) %>%
compute()
<- "OVER (PARTITION BY permno ORDER BY month
window RANGE BETWEEN INTERVAL 2 MONTHS PRECEDING
AND INTERVAL 0 MONTHS FOLLOWING)"
<- sql(paste("regr_slope(ret_excess, mkt_excess)", window))
beta_sql <- sql(paste("count(*)", window))
n_sql
<-
beta_daily |>
crsp_daily left_join(factors_ff_daily, by = "date") |>
select(permno, month, ret_excess, mkt_excess) |>
filter(!is.na(ret_excess)) |>
mutate(beta_daily = beta_sql, n = n_sql) |>
filter(n >= 50) |>
select(permno, month, beta_daily) |>
distinct() |>
compute()
dbExecute(tidy_finance, "DROP TABLE IF EXISTS beta_alt")
<-
beta_alt |>
beta_monthly full_join(beta_daily, by = c("permno", "month")) |>
arrange(permno, month) %>%
compute(name = "beta_alt", temporary = FALSE)
dbDisconnect(tidy_finance, shutdown = TRUE)
5 Beta Estimation
5.1 Code to create beta
Instead of the code in the book, I used the following code to create beta
(here renamed beta_alt
). The code from the book (see here) takes 12 minutes and maxes out at about 37GB of RAM. The code below takes 25 seconds and peaks at about 7GB.
The code below is perhaps uglier than it needs to be, but is designed to get as close as possible to the results of the code in the book. Note that I get some differences with beta_monthly
, but the same values for beta_daily
.
5.2 Analysis of betas
Having created the table using the code below, I now include the analyses presented in the book, but using the database as the primary engine. Rendering this document takes less than six seconds.1
library(tidyverse)
library(DBI)
library(scales)
library(dbplyr)
<- dbConnect(
tidy_finance ::duckdb(),
duckdb"data/tidy_finance.duckdb",
read_only = TRUE)
<- tbl(tidy_finance, "beta_alt")
beta <- tbl(tidy_finance, "crsp_monthly") crsp_monthly
<- tribble(
examples ~permno, ~company,
14593, "Apple",
10107, "Microsoft",
93436, "Tesla",
17778, "Berkshire Hathaway"
|>
) copy_inline(tidy_finance, df = _)
<-
beta_examples |>
beta inner_join(examples, by = "permno") |>
select(permno, company, month, beta_monthly)
|>
beta_examples filter(!is.na(beta_monthly)) |>
ggplot(aes(
x = month,
y = beta_monthly,
color = company,
linetype = company)) +
geom_line() +
labs(
x = NULL, y = NULL, color = NULL, linetype = NULL,
title = "Monthly beta estimates for example stocks using 5 years of data"
)
|>
crsp_monthly left_join(beta, join_by(permno, month)) |>
filter(!is.na(beta_monthly)) |>
group_by(industry, permno) |>
summarize(beta = mean(beta_monthly, na.rm = TRUE),
.groups = "drop") |>
ggplot(aes(x = reorder(industry, beta, FUN = median), y = beta)) +
geom_boxplot() +
coord_flip() +
labs(
x = NULL, y = NULL,
title = "Firm-specific beta distributions by industry"
)
|>
beta filter(!is.na(beta_monthly)) |>
group_by(month) |>
mutate(quantile = ntile(-beta_monthly, 10) * 10) |>
group_by(month, quantile) |>
summarize(x = min(beta_monthly, na.rm = TRUE), .groups = "drop") |>
filter(quantile != 100) |>
ggplot(aes(
x = month,
y = x,
color = as_factor(quantile),
linetype = as_factor(quantile)
+
)) geom_line() +
labs(
x = NULL, y = NULL, color = NULL, linetype = NULL,
title = "Monthly deciles of estimated betas",
)
|>
beta inner_join(examples, by = "permno") |>
pivot_longer(cols = c(beta_monthly, beta_daily)) |>
filter(!is.na(value)) |>
ggplot(aes(
x = month,
y = value,
color = name,
linetype = name
+
)) geom_line() +
facet_wrap(~ company, ncol = 1) +
labs(
x = NULL, y = NULL, color = NULL, linetype = NULL,
title = "Comparison of beta estimates using monthly and daily data"
)
<-
beta_long |>
crsp_monthly left_join(beta, by = c("permno", "month")) |>
select(month, beta_monthly, beta_daily) |>
pivot_longer(cols = c(beta_monthly, beta_daily))
|>
beta_long group_by(month, name) |>
summarize(share = sum(as.double(!is.na(value)), na.rm = TRUE) / n(),
.groups = "drop") |>
ggplot(aes(
x = month,
y = share,
color = name,
linetype = name
+
)) geom_line() +
scale_y_continuous(labels = percent) +
labs(
x = NULL, y = NULL, color = NULL, linetype = NULL,
title = "End-of-month share of securities with beta estimates"
+
) coord_cartesian(ylim = c(0, 1))
|>
beta_long select(name, value) |>
filter(!is.na(value)) |>
group_by(name) |>
summarize(
mean = mean(value),
sd = sd(value),
min = min(value),
q05 = quantile(value, 0.05),
q50 = quantile(value, 0.50),
q95 = quantile(value, 0.95),
max = max(value),
n = n()
|>
) collect()
name | mean | sd | min | q05 | q50 | q95 | max | n |
---|---|---|---|---|---|---|---|---|
beta_monthly | 1.102093 | 0.7114593 | -13.02218 | 0.1294213 | 1.0373320 | 2.323526 | 10.34573 | 2163711 |
beta_daily | 0.748910 | 0.9264405 | -43.66630 | -0.4468799 | 0.6855455 | 2.226624 | 56.55841 | 3233745 |
dbDisconnect(tidy_finance, shutdown = TRUE)
Note that code above that creates
beta_alt
is not run with this document.↩︎