R Econ Visual Library

R code for data visualization in economics, created and maintained by DIME Analytics.

# Install and load packages ---------------
packages <- c(
  "tidyverse",
  "haven"
)

# Change to install = TRUE to install the required packages
pacman::p_load(packages, character.only = TRUE, install = FALSE)

# Load an example dataset ---------------
data <- read_dta("https://github.com/worldbank/r-econ-visual-library/raw/master/Library/Data/DensityWithData.dta")

data <- data %>%
  filter(!(is.na(theta_mle) | is.na(roster_6a8))) %>%
  mutate(
    score = theta_mle - min(theta_mle), 
    bach = (as.numeric(roster_6a8) > 4)
    ) %>%
  group_by(bach) %>%
  mutate(tot_num = n()) %>%
  ungroup()

bw = 0.1

for (i in c(0, 1)){
  sub_data <- data %>% filter(bach == i)
  hist_graph <- ggplot(sub_data, aes(x = score)) + geom_histogram(binwidth = bw)
  hist_data <- ggplot_build(hist_graph)$data[[1]]
  hist_breaks = c(hist_data$xmin, tail(hist_data$xmax, n = 1))
  
  data$bin_score[data$bach == i] <- hist_data$count[findInterval(sub_data$score, hist_breaks)]
}
## Warning: Unknown or uninitialised column: `bin_score`.
head(data)
## # A tibble: 6 x 11
##   location_type FACILITY_ID DOCTOR_ID facility facilitycode roster_6a8 theta_mle score bach  tot_num
##       <dbl+lbl>       <dbl>     <dbl>    <dbl>        <dbl>  <dbl+lbl>     <dbl> <dbl> <lgl>   <int>
## 1   1 [Commune]       10101   1010101       NA            1 4 [Bachel~     1.75   6.75 FALSE     575
## 2   1 [Commune]       10202   1020204       NA            2 4 [Bachel~     0.161  5.16 FALSE     575
## 3   1 [Commune]       10303   1030301       NA            3 4 [Bachel~    -3.50   1.50 FALSE     575
## 4   1 [Commune]       10404   1040401       NA            4 4 [Bachel~    -0.867  4.13 FALSE     575
## 5   1 [Commune]       10505   1050501       NA            5 4 [Bachel~    -3.67   1.33 FALSE     575
## 6   1 [Commune]       10506   1050601       NA            6 6 [Specia~    -0.489  4.51 TRUE      428
## # ... with 1 more variable: bin_score <dbl>
ggplot(data, aes(x = score, colour = bach)) +
  #geom_density(aes(y = ..density.. * tot_num * bw)) +
  geom_density(aes(y = ..count.. * bw)) +
  geom_density() +
  geom_point(aes(y = bin_score), size = 0.5)