5  Beta Estimation

5.1 Code to create beta

Instead of the code in the book, I used the following code to create beta (here renamed beta_alt). The code from the book (see here) takes 12 minutes and maxes out at about 37GB of RAM. The code below takes 25 seconds and peaks at about 7GB.

The code below is perhaps uglier than it needs to be, but is designed to get as close as possible to the results of the code in the book. Note that I get some differences with beta_monthly, but the same values for beta_daily.

library(tidyverse)
library(DBI)
library(dbplyr)

tidy_finance <- dbConnect(
  duckdb::duckdb(),
  "data/tidy_finance.duckdb",
  read_only = FALSE)

crsp_monthly <- tbl(tidy_finance, "crsp_monthly") 
crsp_daily <- tbl(tidy_finance, "crsp_daily") 

factors_ff_monthly <- tbl(tidy_finance, "factors_ff_monthly")
factors_ff_daily <- tbl(tidy_finance, "factors_ff_daily")

window <- "OVER (PARTITION BY permno ORDER BY month
           RANGE BETWEEN INTERVAL 59 MONTHS PRECEDING
              AND INTERVAL 0 MONTHS FOLLOWING)"

beta_sql <- sql(paste("regr_slope(ret_excess, mkt_excess)", window))
n_sql <- sql(paste("count(*)", window))
max_sql <- sql(paste("max(month)", window))
min_sql <- sql(paste("min(month)", window))

beta_monthly <- 
  crsp_monthly |>
  left_join(factors_ff_monthly, by = "month") |>
  mutate(beta_monthly = beta_sql,
         n = n_sql,
         max_month = max_sql,
         min_month = min_sql) |>
  ungroup() |>
  # Check that months are consecutive
  mutate(n_months = date_diff('month', min_month, max_month) + 1) |>
  filter(n >= 48, n_months == n) |>
  select(permno, month, beta_monthly) |>
  filter(!is.na(beta_monthly)) %>%
  compute()

window <- "OVER (PARTITION BY permno ORDER BY month
           RANGE BETWEEN INTERVAL 2 MONTHS PRECEDING
              AND INTERVAL 0 MONTHS FOLLOWING)"

beta_sql <- sql(paste("regr_slope(ret_excess, mkt_excess)", window))
n_sql <- sql(paste("count(*)", window))

beta_daily <- 
  crsp_daily |>
  left_join(factors_ff_daily, by = "date") |>
  select(permno, month, ret_excess, mkt_excess) |>
  filter(!is.na(ret_excess)) |>
  mutate(beta_daily = beta_sql, n = n_sql) |>
  filter(n >= 50) |>
  select(permno, month, beta_daily) |>
  distinct() |>
  compute()

dbExecute(tidy_finance, "DROP TABLE IF EXISTS beta_alt")

beta_alt <- 
  beta_monthly |>
  full_join(beta_daily, by = c("permno", "month")) |>
  arrange(permno, month) %>%
  compute(name = "beta_alt", temporary = FALSE)

dbDisconnect(tidy_finance, shutdown = TRUE)

5.2 Analysis of betas

Having created the table using the code below, I now include the analyses presented in the book, but using the database as the primary engine. Rendering this document takes less than six seconds.1

library(tidyverse)
library(DBI)
library(scales)
library(dbplyr)
tidy_finance <- dbConnect(
  duckdb::duckdb(),
  "data/tidy_finance.duckdb",
  read_only = TRUE)
beta <- tbl(tidy_finance, "beta_alt")
crsp_monthly <- tbl(tidy_finance, "crsp_monthly")
examples <- tribble(
  ~permno, ~company,
  14593, "Apple",
  10107, "Microsoft",
  93436, "Tesla",
  17778, "Berkshire Hathaway"
) |>
  copy_inline(tidy_finance, df = _)
beta_examples <- 
  beta |>
  inner_join(examples, by = "permno") |>
  select(permno, company, month, beta_monthly) 

beta_examples |>
  filter(!is.na(beta_monthly)) |>
  ggplot(aes(
    x = month, 
    y = beta_monthly, 
    color = company,
    linetype = company)) +
  geom_line() +
  labs(
    x = NULL, y = NULL, color = NULL, linetype = NULL,
    title = "Monthly beta estimates for example stocks using 5 years of data"
  )

crsp_monthly |>
  left_join(beta, join_by(permno, month)) |>
  filter(!is.na(beta_monthly)) |>
  group_by(industry, permno) |>
  summarize(beta = mean(beta_monthly, na.rm = TRUE), 
            .groups = "drop") |>
  ggplot(aes(x = reorder(industry, beta, FUN = median), y = beta)) +
  geom_boxplot() +
  coord_flip() +
  labs(
    x = NULL, y = NULL,
    title = "Firm-specific beta distributions by industry"
  )

beta |>
  filter(!is.na(beta_monthly)) |>
  group_by(month) |>
  mutate(quantile = ntile(-beta_monthly, 10) * 10) |>
  group_by(month, quantile) |>
  summarize(x = min(beta_monthly, na.rm = TRUE), .groups = "drop") |>
  filter(quantile != 100) |>
  ggplot(aes(
    x = month, 
    y = x, 
    color = as_factor(quantile),
    linetype = as_factor(quantile)
    )) +
  geom_line() +
  labs(
    x = NULL, y = NULL, color = NULL, linetype = NULL,
    title = "Monthly deciles of estimated betas",
  )

beta |>
  inner_join(examples, by = "permno") |>
  pivot_longer(cols = c(beta_monthly, beta_daily)) |>
  filter(!is.na(value)) |>
  ggplot(aes(
    x = month, 
    y = value, 
    color = name, 
    linetype = name
    )) +
  geom_line() +
  facet_wrap(~ company, ncol = 1) +
  labs(
    x = NULL, y = NULL, color = NULL, linetype = NULL, 
    title = "Comparison of beta estimates using monthly and daily data"
  )

beta_long <- 
  crsp_monthly |>
  left_join(beta, by = c("permno", "month")) |>
  select(month, beta_monthly, beta_daily) |>
  pivot_longer(cols = c(beta_monthly, beta_daily))

beta_long |>
  group_by(month, name) |>
  summarize(share = sum(as.double(!is.na(value)), na.rm = TRUE) / n(),
            .groups = "drop") |>
  ggplot(aes(
    x = month, 
    y = share, 
    color = name,
    linetype = name
    )) +
  geom_line() +
  scale_y_continuous(labels = percent) +
  labs(
    x = NULL, y = NULL, color = NULL, linetype = NULL,
    title = "End-of-month share of securities with beta estimates"
  ) +
  coord_cartesian(ylim = c(0, 1))

beta_long |>
  select(name, value) |>
  filter(!is.na(value)) |>
  group_by(name) |>
  summarize(
    mean = mean(value),
    sd = sd(value),
    min = min(value),
    q05 = quantile(value, 0.05),
    q50 = quantile(value, 0.50),
    q95 = quantile(value, 0.95),
    max = max(value),
    n = n()
  ) |>
  collect()
name mean sd min q05 q50 q95 max n
beta_monthly 1.102093 0.7114593 -13.02218 0.1294213 1.0373320 2.323526 10.34573 2163711
beta_daily 0.748910 0.9264405 -43.66630 -0.4468799 0.6855455 2.226624 56.55841 3233745
dbDisconnect(tidy_finance, shutdown = TRUE)

  1. Note that code above that creates beta_alt is not run with this document.↩︎