8  Value and Bivariate Sorts: Database version

library(tidyverse)
library(DBI)
library(scales)
library(lmtest)
library(sandwich)
tidy_finance <- dbConnect(
  duckdb::duckdb(),
  "data/tidy_finance.duckdb",
  read_only = TRUE)

drop_na <- function(df) {
  df |>
    filter(if_all(everything(), ~ !is.na(.)))
}

# crsp_monthly <- 
crsp_monthly <- 
  tbl(tidy_finance, "crsp_monthly") |>
  select(permno, gvkey, month, ret_excess,
         mktcap, mktcap_lag, exchange) |>
  drop_na()
compustat <- tbl(tidy_finance, "compustat") 

be <- 
  compustat |>
  select(gvkey, datadate, be) |>
  drop_na() |>
  mutate(month = floor_date(datadate, "month"))

8.1 Book-to-Market Ratio

me <- 
  crsp_monthly |>
  mutate(sorting_date = month + months(1)) |>
  select(permno, sorting_date, me = mktcap)

bm <- 
  be |>
  inner_join(crsp_monthly |>
    select(month, permno, gvkey, mktcap), by = c("gvkey", "month")) |>
  mutate(
    bm = be / mktcap,
    sorting_date = month + months(6),
    comp_date = sorting_date
  ) |>
  select(permno, gvkey, sorting_date, comp_date, bm)

data_for_sorts <- 
  crsp_monthly |>
  left_join(bm, by = c("permno",
    "gvkey",
    "month" = "sorting_date"
  )) |>
  left_join(me, by = c("permno", "month" = "sorting_date")) |>
  select(
    permno, gvkey, month, ret_excess,
    mktcap_lag, me, bm, exchange, comp_date
  ) |>
  group_by(permno, gvkey) |>
  dbplyr::window_order(permno, gvkey, month) |>
  fill(bm, comp_date) |>
  filter(comp_date > month - months(12)) |>
  select(-comp_date) |>
  ungroup() |> 
  drop_na() |>
  compute()
get_breaks <- function(data, var, n_portfolios, exchanges) {
  
  ports <- 0:n_portfolios
  ports_sql <- sql(paste0("[", paste(ports, collapse = ", "), "]"))
  breaks <- seq(0, 1, 1/n_portfolios)
  breaks_sql <- sql(paste0("[", paste(breaks, collapse = ", "), "]"))
  
  data |>
    filter(exchange %in% exchanges) |>
    summarize(portfolio = ports_sql,
              breaks = quantile_cont({{ var }}, breaks_sql),
              .groups = "keep") %>%
    mutate(portfolio = unnest(portfolio),
           breaks = unnest(breaks)) |>
    dbplyr::window_order(portfolio) |>
    mutate(port_max = if_else(portfolio == n_portfolios, Inf, breaks),
           port_min = if_else(portfolio == 1, -Inf, lag(breaks))) |>
    filter(portfolio > 0) |>
    select(group_cols(), portfolio, port_max, port_min)
}

assign_portfolio <- function(data, var, n_portfolios, exchanges) {
  breaks <- get_breaks(data, {{ var }}, n_portfolios, exchanges)
  
  group_vars <- group_vars(data) 
  
  data |>
    inner_join(breaks, 
               join_by(!!!group_vars, 
                       {{ var }} >= port_min,
                       {{ var }} < port_max)) |>
    select(-port_min, -port_max) |>
    compute()
}

8.2 Independent Sorts

value_portfolios <-
  data_for_sorts |>
  group_by(month) |>
  assign_portfolio(
    var = bm,
    n_portfolios = 5,
    exchanges = c("NYSE")) |>
  rename(portfolio_bm = portfolio) |>
  assign_portfolio(
    var = me,
    n_portfolios = 5,
    exchanges = c("NYSE")) |>
  rename(portfolio_me = portfolio) |>
  group_by(month, portfolio_me, portfolio_bm) |>
  summarize(
    ret = sum(ret_excess * mktcap_lag) / sum(mktcap_lag),
    .groups = "drop")
value_premium <- 
  value_portfolios |>
  group_by(month, portfolio_bm) |>
  summarize(ret = mean(ret), .groups = "drop_last") |>
  mutate(portfolio_bm =
           case_when(portfolio_bm == min(portfolio_bm) ~ "min",
                     portfolio_bm == max(portfolio_bm) ~ "max",
                     TRUE ~ "other")) |>
  filter(portfolio_bm %in% c("min", "max")) |>
  pivot_wider(names_from = portfolio_bm, values_from = ret) |>
  mutate(value_premium = max - min) |>
  summarize(value_premium = mean(value_premium))
value_premium |>
  summarize(mean(value_premium, na.rm = TRUE)) |>
  pull() * 100
[1] 0.3839036

8.3 Dependent Sorts

value_portfolios <-
  data_for_sorts |>
  group_by(month) |>
  assign_portfolio(
    var = me,
    n_portfolios = 5,
    exchanges = c("NYSE")) |>
  rename(portfolio_me = portfolio) |>
  group_by(month, portfolio_me) |>
  assign_portfolio(
    var = bm,
    n_portfolios = 5,
    exchanges = c("NYSE")) |>
  rename(portfolio_bm = portfolio) |>
  group_by(month, portfolio_me, portfolio_bm) |>
  summarize(
    ret = sum(ret_excess * mktcap_lag) / sum(mktcap_lag),
    .groups = "drop")
value_premium <- 
  value_portfolios |>
  group_by(month, portfolio_bm) |>
  summarize(ret = mean(ret), .groups = "drop_last") |>
  mutate(portfolio_bm =
           case_when(portfolio_bm == min(portfolio_bm) ~ "min",
                     portfolio_bm == max(portfolio_bm) ~ "max",
                     TRUE ~ "other")) |>
  filter(portfolio_bm %in% c("min", "max")) |>
  pivot_wider(names_from = portfolio_bm, values_from = ret) |>
  mutate(value_premium = max - min) |>
  summarize(value_premium = mean(value_premium))
value_premium |>
  summarize(mean(value_premium, na.rm = TRUE)) |>
  pull() * 100
[1] 0.3278762