library(tidyverse)
library(DBI)
library(scales)
library(lmtest)
library(sandwich)
8 Value and Bivariate Sorts: Database version
<- dbConnect(
tidy_finance ::duckdb(),
duckdb"data/tidy_finance.duckdb",
read_only = TRUE)
<- function(df) {
drop_na |>
df filter(if_all(everything(), ~ !is.na(.)))
}
# crsp_monthly <-
<-
crsp_monthly tbl(tidy_finance, "crsp_monthly") |>
select(permno, gvkey, month, ret_excess,
|>
mktcap, mktcap_lag, exchange) drop_na()
<- tbl(tidy_finance, "compustat")
compustat
<-
be |>
compustat select(gvkey, datadate, be) |>
drop_na() |>
mutate(month = floor_date(datadate, "month"))
8.1 Book-to-Market Ratio
<-
me |>
crsp_monthly mutate(sorting_date = month + months(1)) |>
select(permno, sorting_date, me = mktcap)
<-
bm |>
be inner_join(crsp_monthly |>
select(month, permno, gvkey, mktcap), by = c("gvkey", "month")) |>
mutate(
bm = be / mktcap,
sorting_date = month + months(6),
comp_date = sorting_date
|>
) select(permno, gvkey, sorting_date, comp_date, bm)
<-
data_for_sorts |>
crsp_monthly left_join(bm, by = c("permno",
"gvkey",
"month" = "sorting_date"
|>
)) left_join(me, by = c("permno", "month" = "sorting_date")) |>
select(
permno, gvkey, month, ret_excess,
mktcap_lag, me, bm, exchange, comp_date|>
) group_by(permno, gvkey) |>
::window_order(permno, gvkey, month) |>
dbplyrfill(bm, comp_date) |>
filter(comp_date > month - months(12)) |>
select(-comp_date) |>
ungroup() |>
drop_na() |>
compute()
<- function(data, var, n_portfolios, exchanges) {
get_breaks
<- 0:n_portfolios
ports <- sql(paste0("[", paste(ports, collapse = ", "), "]"))
ports_sql <- seq(0, 1, 1/n_portfolios)
breaks <- sql(paste0("[", paste(breaks, collapse = ", "), "]"))
breaks_sql
|>
data filter(exchange %in% exchanges) |>
summarize(portfolio = ports_sql,
breaks = quantile_cont({{ var }}, breaks_sql),
.groups = "keep") %>%
mutate(portfolio = unnest(portfolio),
breaks = unnest(breaks)) |>
::window_order(portfolio) |>
dbplyrmutate(port_max = if_else(portfolio == n_portfolios, Inf, breaks),
port_min = if_else(portfolio == 1, -Inf, lag(breaks))) |>
filter(portfolio > 0) |>
select(group_cols(), portfolio, port_max, port_min)
}
<- function(data, var, n_portfolios, exchanges) {
assign_portfolio <- get_breaks(data, {{ var }}, n_portfolios, exchanges)
breaks
<- group_vars(data)
group_vars
|>
data inner_join(breaks,
join_by(!!!group_vars,
>= port_min,
{{ var }} < port_max)) |>
{{ var }} select(-port_min, -port_max) |>
compute()
}
8.2 Independent Sorts
<-
value_portfolios |>
data_for_sorts group_by(month) |>
assign_portfolio(
var = bm,
n_portfolios = 5,
exchanges = c("NYSE")) |>
rename(portfolio_bm = portfolio) |>
assign_portfolio(
var = me,
n_portfolios = 5,
exchanges = c("NYSE")) |>
rename(portfolio_me = portfolio) |>
group_by(month, portfolio_me, portfolio_bm) |>
summarize(
ret = sum(ret_excess * mktcap_lag) / sum(mktcap_lag),
.groups = "drop")
<-
value_premium |>
value_portfolios group_by(month, portfolio_bm) |>
summarize(ret = mean(ret), .groups = "drop_last") |>
mutate(portfolio_bm =
case_when(portfolio_bm == min(portfolio_bm) ~ "min",
== max(portfolio_bm) ~ "max",
portfolio_bm TRUE ~ "other")) |>
filter(portfolio_bm %in% c("min", "max")) |>
pivot_wider(names_from = portfolio_bm, values_from = ret) |>
mutate(value_premium = max - min) |>
summarize(value_premium = mean(value_premium))
|>
value_premium summarize(mean(value_premium, na.rm = TRUE)) |>
pull() * 100
[1] 0.3839036
8.3 Dependent Sorts
<-
value_portfolios |>
data_for_sorts group_by(month) |>
assign_portfolio(
var = me,
n_portfolios = 5,
exchanges = c("NYSE")) |>
rename(portfolio_me = portfolio) |>
group_by(month, portfolio_me) |>
assign_portfolio(
var = bm,
n_portfolios = 5,
exchanges = c("NYSE")) |>
rename(portfolio_bm = portfolio) |>
group_by(month, portfolio_me, portfolio_bm) |>
summarize(
ret = sum(ret_excess * mktcap_lag) / sum(mktcap_lag),
.groups = "drop")
<-
value_premium |>
value_portfolios group_by(month, portfolio_bm) |>
summarize(ret = mean(ret), .groups = "drop_last") |>
mutate(portfolio_bm =
case_when(portfolio_bm == min(portfolio_bm) ~ "min",
== max(portfolio_bm) ~ "max",
portfolio_bm TRUE ~ "other")) |>
filter(portfolio_bm %in% c("min", "max")) |>
pivot_wider(names_from = portfolio_bm, values_from = ret) |>
mutate(value_premium = max - min) |>
summarize(value_premium = mean(value_premium))
|>
value_premium summarize(mean(value_premium, na.rm = TRUE)) |>
pull() * 100
[1] 0.3278762