R Econ Visual Library

R code for data visualization in economics, created and maintained by DIME Analytics.

# Install and load packages ---------------
packages <- c(
  "tidyverse",
  "haven",
  "labelled",
  "forcats",
  "scales"
)

# Change to install = TRUE to install the required packages
pacman::p_load(packages, character.only = TRUE, install = FALSE)

# Load an example dataset ---------------
data <- read_dta("https://github.com/worldbank/r-econ-visual-library/raw/master/Library/Data/ScatterPlotsStratified.dta")
data_varlabel <- unlist(var_label(data))

sp_case_label <- names(val_labels(data$sp_case))
sp_case_val <- as.vector(val_labels(data$sp_case))

var_list <- c("essential", "correct", "cxr", "sputum", "dstgx", 
              "s5_referral", "sp_drugs_tb", "sp_drugs_antibio", "sp_drugs_quin")

# Collapse data
collapsed_data <- data %>%
  group_by(sp_case) %>%
  summarise_at(var_list, list(~ mean(., na.rm = T))) %>%
  ungroup()

# Reshape data
reshaped_data <- collapsed_data %>%
  pivot_longer(all_of(var_list), names_to = "key", values_to = "value") %>%
  mutate(
    key = as.factor(data_varlabel[.$key]),
    key = fct_rev(factor(key, levels = data_varlabel[var_list])), 
    sp_case = fct_rev(as.factor(sp_case))
    )

p <- ggplot(reshaped_data, aes(x = key, y = value, fill = sp_case)) + 
  geom_dotplot(
    binaxis = "y", position = "dodge",
    stackdir = "center", dotsize = 0.5, binwidth = 1/30
    ) +
  coord_flip(ylim = c(0, 1)) +
  scale_fill_discrete(breaks = sp_case_val, labels = sp_case_label) +
  scale_y_continuous(labels = percent) +
  theme_classic() +
  theme(
    axis.text = element_text(size = 10),
    axis.line = element_blank(),
    axis.ticks.y = element_blank(),
    axis.title = element_blank(),
    legend.title = element_blank(),
    legend.text = element_text(size = 12)
    )

dots_xaxis <- (ggplot_build(p)$data[[1]]["xmin"] + ggplot_build(p)$data[[1]]["xmax"]) / 2
p + geom_vline(xintercept = as.numeric(dots_xaxis[,1]), size = 0.1, alpha = 0.5, lty = 2)