R code for data visualization in economics, created and maintained by DIME Analytics.
# Install and load packages ---------------
packages <- c(
"tidyverse",
"haven",
"rdd",
"splines"
)
# Change to install = TRUE to install the required packages
pacman::p_load(packages, character.only = TRUE, install = FALSE)
# Load an example dataset ---------------
data <- read_dta("https://github.com/worldbank/r-econ-visual-library/raw/master/Library/Data/RDD_data.dta")
data <- data %>%
mutate(treatment = (pmt_score >= cutoff))
bin_width = 1.0
bin_breaks = with(data, c(seq(mean(cutoff), min(pmt_score) - bin_width, -bin_width), seq(mean(cutoff) + bin_width, max(pmt_score) + bin_width, bin_width)))
# Functions to find endpoints of intervals
left_endpoint <- function(x){
return(max(bin_breaks[bin_breaks <= x]))
}
right_endpoint <- function(x){
return(min(bin_breaks[bin_breaks > x]))
}
fig_data <- data %>%
mutate(pmt_score_bin = cut(pmt_score, sort(bin_breaks))) %>%
group_by(treatment, pmt_score_bin) %>%
add_count(treatment) %>%
mutate(
mean_tmt_status = mean(tmt_status),
mid_point = (sapply(pmt_score, left_endpoint) + sapply(pmt_score, right_endpoint)) / 2
) %>%
ungroup()
ggplot(fig_data, aes(x = pmt_score, tmt_status, color = treatment)) +
geom_point(aes(x = mid_point, y = mean_tmt_status, size = n), show.legend = FALSE) +
geom_smooth(method = lm, formula = y ~ bs(x, 3), size = 1.0, se = FALSE) +
geom_ribbon(
stat = "smooth", method = "lm",
formula = "y ~ bs(x, 3)", fill = NA, linetype = "dashed", size = 0.3
) +
geom_vline(aes(xintercept = cutoff), linetype = "longdash") +
xlab("Proxy means test score") +
ylab("Receiving treatment (95% CI)") +
scale_color_brewer(palette = "Set2") +
theme_classic() +
theme(
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.position = "none"
)