R Econ Visual Library

R code for data visualization in economics, created and maintained by DIME Analytics.

# Install and load packages ---------------
packages <- c(
  "tidyverse",
  "haven",
  "rdd",
  "splines"
)

# Change to install = TRUE to install the required packages
pacman::p_load(packages, character.only = TRUE, install = FALSE)

# Load an example dataset ---------------
# https://openknowledge.worldbank.org/handle/10986/25030
data <- read_dta("https://github.com/worldbank/r-econ-visual-library/raw/master/Library/Data/evaluation.dta")

cutoff <- 58

# Functions to find endpoints of intervals
left_endpoint <- function(x){
  return(max(hor_bin_breaks[hor_bin_breaks <= x]))
}

right_endpoint <- function(x){
  return(min(hor_bin_breaks[hor_bin_breaks > x]))
}

bottom_endpoint <- function(x){
  return(max(var_bin_breaks[var_bin_breaks <= x]))
}

top_endpoint <- function(x){
  return(min(var_bin_breaks[var_bin_breaks > x]))
}

# Bins for histogram
hor_bin_width = 2.5
hor_bin_breaks = with(
  data, c(
    seq(
      cutoff, 
      min(poverty_index) - hor_bin_width, 
      -hor_bin_width
      ), 
    seq(cutoff + hor_bin_width, 
        max(poverty_index) + hor_bin_width, 
        hor_bin_width
        )
    )
  )

var_bin_width = 2.5
var_bin_breaks = with(data, seq(0, max(health_expenditures) + var_bin_width, var_bin_width))

fig_data <- data %>%
  filter(treatment_locality == 1) %>%
  mutate(
    treatment = (poverty_index <= cutoff),
    poverty_index_bin = cut(poverty_index, sort(hor_bin_breaks)),
    health_exp_bin = cut(health_expenditures, sort(var_bin_breaks))
    ) %>%
  group_by(treatment, poverty_index_bin, health_exp_bin) %>%
  mutate(n_bin = n()) %>%
  mutate(
    xmin = sapply(poverty_index, left_endpoint),
    xmax = sapply(poverty_index, right_endpoint),
    ymin = sapply(health_expenditures, bottom_endpoint),
    ymax = sapply(health_expenditures, top_endpoint)
    ) %>%
  ungroup()

ggplot(fig_data, aes(x = poverty_index, y = health_expenditures, color = treatment)) +
  geom_rect(
    aes(xmin = xmin, xmax = xmax, 
        ymin = ymin, ymax = ymax,
        fill = n_bin),
    color = NA, alpha = 0.05) +
  geom_smooth(method = lm, formula = y ~ bs(x, 3), size = 1.0, se = FALSE) +
  geom_vline(xintercept = cutoff, linetype = "longdash") +
  ylim(c(0, 45)) +
  xlab("Baseline Poverty Index") +
  ylab("Health Expenditures ($)") +
#  scale_color_brewer(name = "Eligibility",
#                     palette = "Set2", 
#                     labels = c("Not eligible", "Eligible")) +
#  scale_color_manual(name = "Eligibility",
#                     values = c("#56B4E9", "#E69F00"),
#                     labels = c("Not eligible", "Eligible")) +
#  scale_fill_gradient(name = "Counts",
#                      low = "gray50", high = "gray30") +
  scale_color_manual(
    name = "Eligibility",
    values = c("#F0E442", "#CC79A7"),
    labels = c("Not eligible", "Eligible")
    ) +
  scale_fill_gradient(
    name = "Counts",
    low = "#56B4E9", high = "#0072B2"
    ) +
  guides(color = guide_legend(override.aes = list(fill = NA))) +
  theme_classic() +
  theme(
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 14),
    legend.title = element_text(size = 13),
    legend.text = element_text(size = 11),
    legend.background = element_rect(color = NA)
  )