R Econ Visual Library

R code for data visualization in economics, created and maintained by DIME Analytics.

# Install and load packages ---------------
packages <- c(
  "tidyverse",
  "haven",
  "rdd",
  "splines"
)

# Change to install = TRUE to install the required packages
pacman::p_load(packages, character.only = TRUE, install = FALSE)

# Load an example dataset ---------------
data <- read_dta("https://github.com/worldbank/r-econ-visual-library/raw/master/Library/Data/RDD_data.dta")

data <- data %>%
  mutate(treatment = (pmt_score >= cutoff))

bin_width = 1.0
bin_breaks = with(data, c(seq(mean(cutoff), min(pmt_score) - bin_width, -bin_width), seq(mean(cutoff) + bin_width, max(pmt_score) + bin_width, bin_width)))

# Functions to find endpoints of intervals
left_endpoint <- function(x){
  return(max(bin_breaks[bin_breaks <= x]))
}

right_endpoint <- function(x){
  return(min(bin_breaks[bin_breaks > x]))
}

fig_data <- data %>%
  mutate(pmt_score_bin = cut(pmt_score, sort(bin_breaks))) %>%
  group_by(treatment, pmt_score_bin) %>%
  add_count(treatment) %>%
  mutate(
    mean_tmt_status = mean(tmt_status),
    mid_point = (sapply(pmt_score, left_endpoint) + sapply(pmt_score, right_endpoint)) / 2
    ) %>%
  ungroup()

ggplot(fig_data, aes(x = pmt_score, tmt_status, color = treatment)) +
  geom_point(aes(x = mid_point, y = mean_tmt_status, size = n), show.legend = FALSE) +
  geom_smooth(method = lm, formula = y ~ bs(x, 3), size = 1.0, se = FALSE) +
  geom_ribbon(
    stat = "smooth", method = "lm", 
    formula = "y ~ bs(x, 3)", fill = NA, linetype = "dashed", size = 0.3
    ) +
  geom_vline(aes(xintercept = cutoff), linetype = "longdash") +
  xlab("Proxy means test score") +
  ylab("Receiving treatment (95% CI)") +
  scale_color_brewer(palette = "Set2") +
  theme_classic() +
  theme(
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 14),
    legend.position = "none"
    )