R Econ Visual Library

R code for data visualization in economics, created and maintained by DIME Analytics.

# Install and load packages ---------------
packages <- c(
  "tidyverse",
  "haven",
  "splines"
)

# Change to install = TRUE to install the required packages
pacman::p_load(packages, character.only = TRUE, install = FALSE)

# Load an example dataset ---------------
data <- read_dta("https://github.com/worldbank/r-econ-visual-library/raw/master/Library/Data/RDD_data.dta")

data <- data %>%
  mutate(treatment = (pmt_score >= cutoff))

bin_width = 1.0

fig_data <- data %>%
  mutate(
    pmt_score_bin = cut(pmt_score, c(
      seq(mean(cutoff), min(.$pmt_score), -bin_width), 
      seq(mean(cutoff) + 0.001, max(.$pmt_score), bin_width)
      ))
    ) %>%
  group_by(treatment, pmt_score_bin) %>%
  add_count(treatment) %>%
  mutate(
    mean_pmt_score = (min(pmt_score) + max(pmt_score)) / 2,
    mean_tmt_status = mean(tmt_status)
    ) %>%
  ungroup()

ggplot(fig_data, aes(x = pmt_score, tmt_status, color = treatment)) +
  geom_smooth(method = lm, formula = y ~ x, size = 0.5, se = FALSE) +
  geom_ribbon(
    aes(fill = treatment), stat = "smooth", method = "lm", 
    formula = y ~ x, linetype = "blank", alpha = 0.2
    ) +
  geom_smooth(method = lm, formula = y ~ bs(x, 3), size = 0.5, se = FALSE, linetype = "longdash") +
  geom_vline(aes(xintercept = cutoff), linetype = "longdash") +
  xlab("Proxy means test score") +
  ylab("Receiving treatment (95% CI for linear fit)") +
  scale_color_brewer(palette = "Set2") +
  scale_fill_brewer(palette = "Set2") +
  theme_classic() +
  theme(
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 14),
    legend.position = "none"
    )

## Don't know how to automatically pick scale for object of type haven_labelled. Defaulting to continuous.