R code for data visualization in economics, created and maintained by DIME Analytics.
# Install and load packages ---------------
packages <- c(
"tidyverse",
"tidymodels",
"haven",
"ri2"
)
# Change to install = TRUE to install the required packages
pacman::p_load(packages, character.only = TRUE, install = FALSE)
# Load an example dataset ---------------
# https://openknowledge.worldbank.org/handle/10986/25030
data <- read_dta("https://github.com/worldbank/r-econ-visual-library/raw/master/Library/Data/evaluation.dta")
analysis_data <- data %>%
filter(eligible == 1 & round == 1)
num_cluster <- length(unique((analysis_data$locality_identifier)))
num_treated_cluster <- length(unique((analysis_data %>% filter(treatment_locality == 1))$locality_identifier))
set.seed(42949)
num_sim <- 1000
sim_est <- rep(0, num_sim)
for (i in 1:num_sim){
treated_locality_sim <- sample(unique(analysis_data$locality_identifier), num_treated_cluster)
sim_data <- analysis_data %>%
mutate(treat_sim = ifelse(locality_identifier %in% treated_locality_sim, 1, 0))
res <- lm(health_expenditures ~ treat_sim, data = sim_data)
sim_est[i] <- res$coefficients['treat_sim']
}
res <- lm(health_expenditures ~ treatment_locality, data = analysis_data)
point_est <- res$coefficients["treatment_locality"]
ggplot() +
geom_histogram(aes(x = sim_est), binwidth = 0.25) +
geom_vline(xintercept = point_est, alpha = 0.7, colour = "red") +
theme_classic() +
xlab("Simulated point estimates") +
theme(
axis.line.y = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_text(size = 14),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(size = 12)
)