library(tidyverse)
library(DBI)
library(scales)
library(lmtest)
library(sandwich)8 Value and Bivariate Sorts: Database version
tidy_finance <- dbConnect(
duckdb::duckdb(),
"data/tidy_finance.duckdb",
read_only = TRUE)
drop_na <- function(df) {
df |>
filter(if_all(everything(), ~ !is.na(.)))
}
# crsp_monthly <-
crsp_monthly <-
tbl(tidy_finance, "crsp_monthly") |>
select(permno, gvkey, month, ret_excess,
mktcap, mktcap_lag, exchange) |>
drop_na()compustat <- tbl(tidy_finance, "compustat")
be <-
compustat |>
select(gvkey, datadate, be) |>
drop_na() |>
mutate(month = floor_date(datadate, "month"))8.1 Book-to-Market Ratio
me <-
crsp_monthly |>
mutate(sorting_date = month + months(1)) |>
select(permno, sorting_date, me = mktcap)
bm <-
be |>
inner_join(crsp_monthly |>
select(month, permno, gvkey, mktcap), by = c("gvkey", "month")) |>
mutate(
bm = be / mktcap,
sorting_date = month + months(6),
comp_date = sorting_date
) |>
select(permno, gvkey, sorting_date, comp_date, bm)
data_for_sorts <-
crsp_monthly |>
left_join(bm, by = c("permno",
"gvkey",
"month" = "sorting_date"
)) |>
left_join(me, by = c("permno", "month" = "sorting_date")) |>
select(
permno, gvkey, month, ret_excess,
mktcap_lag, me, bm, exchange, comp_date
) |>
group_by(permno, gvkey) |>
dbplyr::window_order(permno, gvkey, month) |>
fill(bm, comp_date) |>
filter(comp_date > month - months(12)) |>
select(-comp_date) |>
ungroup() |>
drop_na() |>
compute()get_breaks <- function(data, var, n_portfolios, exchanges) {
ports <- 0:n_portfolios
ports_sql <- sql(paste0("[", paste(ports, collapse = ", "), "]"))
breaks <- seq(0, 1, 1/n_portfolios)
breaks_sql <- sql(paste0("[", paste(breaks, collapse = ", "), "]"))
data |>
filter(exchange %in% exchanges) |>
summarize(portfolio = ports_sql,
breaks = quantile_cont({{ var }}, breaks_sql),
.groups = "keep") %>%
mutate(portfolio = unnest(portfolio),
breaks = unnest(breaks)) |>
dbplyr::window_order(portfolio) |>
mutate(port_max = if_else(portfolio == n_portfolios, Inf, breaks),
port_min = if_else(portfolio == 1, -Inf, lag(breaks))) |>
filter(portfolio > 0) |>
select(group_cols(), portfolio, port_max, port_min)
}
assign_portfolio <- function(data, var, n_portfolios, exchanges) {
breaks <- get_breaks(data, {{ var }}, n_portfolios, exchanges)
group_vars <- group_vars(data)
data |>
inner_join(breaks,
join_by(!!!group_vars,
{{ var }} >= port_min,
{{ var }} < port_max)) |>
select(-port_min, -port_max) |>
compute()
}8.2 Independent Sorts
value_portfolios <-
data_for_sorts |>
group_by(month) |>
assign_portfolio(
var = bm,
n_portfolios = 5,
exchanges = c("NYSE")) |>
rename(portfolio_bm = portfolio) |>
assign_portfolio(
var = me,
n_portfolios = 5,
exchanges = c("NYSE")) |>
rename(portfolio_me = portfolio) |>
group_by(month, portfolio_me, portfolio_bm) |>
summarize(
ret = sum(ret_excess * mktcap_lag) / sum(mktcap_lag),
.groups = "drop")value_premium <-
value_portfolios |>
group_by(month, portfolio_bm) |>
summarize(ret = mean(ret), .groups = "drop_last") |>
mutate(portfolio_bm =
case_when(portfolio_bm == min(portfolio_bm) ~ "min",
portfolio_bm == max(portfolio_bm) ~ "max",
TRUE ~ "other")) |>
filter(portfolio_bm %in% c("min", "max")) |>
pivot_wider(names_from = portfolio_bm, values_from = ret) |>
mutate(value_premium = max - min) |>
summarize(value_premium = mean(value_premium))value_premium |>
summarize(mean(value_premium, na.rm = TRUE)) |>
pull() * 100[1] 0.3839036
8.3 Dependent Sorts
value_portfolios <-
data_for_sorts |>
group_by(month) |>
assign_portfolio(
var = me,
n_portfolios = 5,
exchanges = c("NYSE")) |>
rename(portfolio_me = portfolio) |>
group_by(month, portfolio_me) |>
assign_portfolio(
var = bm,
n_portfolios = 5,
exchanges = c("NYSE")) |>
rename(portfolio_bm = portfolio) |>
group_by(month, portfolio_me, portfolio_bm) |>
summarize(
ret = sum(ret_excess * mktcap_lag) / sum(mktcap_lag),
.groups = "drop")value_premium <-
value_portfolios |>
group_by(month, portfolio_bm) |>
summarize(ret = mean(ret), .groups = "drop_last") |>
mutate(portfolio_bm =
case_when(portfolio_bm == min(portfolio_bm) ~ "min",
portfolio_bm == max(portfolio_bm) ~ "max",
TRUE ~ "other")) |>
filter(portfolio_bm %in% c("min", "max")) |>
pivot_wider(names_from = portfolio_bm, values_from = ret) |>
mutate(value_premium = max - min) |>
summarize(value_premium = mean(value_premium))value_premium |>
summarize(mean(value_premium, na.rm = TRUE)) |>
pull() * 100[1] 0.3278762