To read the created .tex
file, the following \(\LaTeX\) packages are required:
Following the checklist for tables by DIME Analytics, the Constant
coefficient is dropped and means of outcome variables are added instead.
# Install and load packages ---------------
packages <- c(
"tidyverse",
"haven",
"lfe",
"stargazer",
"broom",
"kableExtra",
"XML",
"magrittr",
"pdftools"
)
pacman::p_load(packages, character.only = TRUE, install = TRUE)
# Load an example dataset ---------------
data <- read_dta("data/ReplicationDataGhanaJDE.dta")
outcome <- "realfinalprofit"
rhs_var_1 <- c("atreatcash", "atreatequip")
rhs_var_2 <- c("atreatcashfemale", "atreatequipfemale", "atreatcashmale", "atreatequipmale")
control_var_1 <- colnames(data)[grepl("^wave\\d$", colnames(data))]
control_var_2 <- colnames(data)[grepl("^wave\\d(_female)?$", colnames(data))]
control_var_3 <- colnames(data)[grepl("^wave6(_female)?$", colnames(data))]
fe_1 <- "groupnum"
fe_2 <- "sheno"
iv <- "0"
cluster <- "sheno"
# function to create formula which will be passed to lfe::felm
create_formula <- function(outcome, rhs_var, control_var, fe, iv, cluster) {
as.formula(
paste(
paste(outcome, paste(c(rhs_var, control_var), collapse = " + "), sep = " ~ "),
fe, iv, cluster, sep = " | "
)
)
}
# data.frame of formulas
formulas <- c(
replicate(2, create_formula(outcome, rhs_var_1, control_var_1, fe_1, iv, cluster)),
replicate(2, create_formula(outcome, rhs_var_1, control_var_1, fe_2, iv, cluster)),
replicate(2, create_formula(outcome, rhs_var_2, control_var_2, fe_1, iv, cluster)),
replicate(2, create_formula(outcome, rhs_var_2, control_var_2, fe_2, iv, cluster)),
replicate(2, create_formula(outcome, rhs_var_2, control_var_3, fe_1, iv, cluster))
) %>%
enframe("model_no", "formula")
# list of functions to filter data, which will be used to create subsamples
# in each regression
data_filter_list <- vector(mode = "list", length = 10)
for (i in seq(10)) {
if (i %in% c(1, 3, 5, 7)) {
data_filter_list[[i]] <- filter
} else if (i %in% c(2, 4, 6, 8)) {
data_filter_list[[i]] <- function (x) {filter(x, is.na(trimgroup))}
} else if (i == 9) {
data_filter_list[[i]] <- function (x) {filter(x, wave >= 5)}
} else if (i == 10) {
data_filter_list[[i]] <- function (x) {filter(x, is.na(trimgroup), wave >= 5)}
}
}
# list of functions to conduct F-test
test_fun_list <- replicate(5, vector(mode = "list", length = 10), FALSE)
for (i in seq(10)) {
if (i <= 4) {
test_fun_list[[1]][[i]] <- function(x) {
formatC(
waldtest(x, ~ atreatcash - atreatequip)["p.F"],
3, format = "f"
)
}
test_fun_list[[2]][[i]] <- function (x) {return("")}
test_fun_list[[3]][[i]] <- function (x) {return("")}
test_fun_list[[4]][[i]] <- function (x) {return("")}
test_fun_list[[5]][[i]] <- function (x) {return("")}
} else if (i >= 5) {
test_fun_list[[1]][[i]] <- function (x) {return("")}
test_fun_list[[2]][[i]] <- function(x) {
formatC(
waldtest(x, ~ atreatcashfemale - atreatequipfemale)["p.F"],
3, format = "f"
)
}
test_fun_list[[3]][[i]] <- function(x) {
formatC(
waldtest(x, ~ atreatcashmale - atreatequipmale)["p.F"],
3, format = "f"
)
}
test_fun_list[[4]][[i]] <- function(x) {
formatC(
waldtest(x, ~ atreatcashmale - atreatcashfemale)["p.F"],
3, format = "f"
)
}
test_fun_list[[5]][[i]] <- function(x) {
formatC(
waldtest(x, ~ atreatequipmale - atreatequipfemale)["p.F"],
3, format = "f"
)
}
}
}
# regression results and p-values from F-test
reg_res <- formulas %>%
mutate(
model = map2(formula, data_filter_list, function(x, y) felm(x, y(data))),
test_1 = map2_chr(model, test_fun_list[[1]], function(x, y) y(x)),
test_2 = map2_chr(model, test_fun_list[[2]], function(x, y) y(x)),
test_3 = map2_chr(model, test_fun_list[[3]], function(x, y) y(x)),
test_4 = map2_chr(model, test_fun_list[[4]], function(x, y) y(x)),
test_5 = map2_chr(model, test_fun_list[[5]], function(x, y) y(x)),
n_firm = map_int(model, function(x) n_distinct(model.frame(x)$sheno)),
mean_outcome = map_chr(model, function(x) formatC(colMeans(model.frame(x)[outcome]), 2, format = "f"))
)
# result output as .tex file, using stargazer package
reg_res %>%
pull(model) %>%
stargazer(
dep.var.labels.include = FALSE,
column.labels = c(rep(c("OLS", "OLS", "FE", "FE"), 2), rep("OLS", 2)),
covariate.labels = c(
"Cash treatment",
"In-kind treatment",
"Cash treatment * female",
"In-kind treatment * female",
"Cash treatment * male",
"In-kind treatment * male"
),
keep = c(rhs_var_1, rhs_var_2),
title = "",
add.lines = list(
c("Number of firms", reg_res$n_firm),
c("Waves", rep("All", 8), rep("5 and 6", 2)),
c("Baseline trimming", rep(c("No", "Yes"), 5)),
c("$p$-values for testing:", rep("", 10)),
c("\\ Cash = In-kind", reg_res$test_1),
c("\\ Cash = In-kind for females", reg_res$test_2),
c("\\ Cash = In-kind for males", reg_res$test_3),
c("\\ Cash male = cash Female", reg_res$test_4),
c("\\ In-kind male = in-kind female", reg_res$test_5),
c("Mean of outcomes", reg_res$mean_outcome)
),
type = "latex",
out = "tex/FMQW2014_table3_replicate.tex",
omit.stat = c("adj.rsq", "ser"),
table.layout = "=#c-t-sa-n",
digits = 2
)