R code for data visualization in economics, created and maintained by DIME Analytics.
# Install and load packages ---------------
packages <- c(
"tidyverse",
"haven",
"labelled",
"forcats",
"scales"
)
# Change to install = TRUE to install the required packages
pacman::p_load(packages, character.only = TRUE, install = FALSE)
# Load an example dataset ---------------
data <- read_dta("https://github.com/worldbank/r-econ-visual-library/raw/master/Library/Data/ScatterPlotsStratified.dta")
data_varlabel <- unlist(var_label(data))
sp_case_label <- names(val_labels(data$sp_case))
sp_case_val <- as.vector(val_labels(data$sp_case))
var_list <- c("essential", "correct", "cxr", "sputum", "dstgx",
"s5_referral", "sp_drugs_tb", "sp_drugs_antibio", "sp_drugs_quin")
# Collapse data
collapsed_data <- data %>%
group_by(sp_case) %>%
summarise_at(var_list, list(~ mean(., na.rm = T))) %>%
ungroup()
# Reshape data
reshaped_data <- collapsed_data %>%
pivot_longer(all_of(var_list), names_to = "key", values_to = "value") %>%
mutate(
key = as.factor(data_varlabel[.$key]),
key = fct_rev(factor(key, levels = data_varlabel[var_list])),
sp_case = fct_rev(as.factor(sp_case))
)
p <- ggplot(reshaped_data, aes(x = key, y = value, fill = sp_case)) +
geom_dotplot(
binaxis = "y", position = "dodge",
stackdir = "center", dotsize = 0.5, binwidth = 1/30
) +
coord_flip(ylim = c(0, 1)) +
scale_fill_discrete(breaks = sp_case_val, labels = sp_case_label) +
scale_y_continuous(labels = percent) +
theme_classic() +
theme(
axis.text = element_text(size = 10),
axis.line = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size = 12)
)
dots_xaxis <- (ggplot_build(p)$data[[1]]["xmin"] + ggplot_build(p)$data[[1]]["xmax"]) / 2
p + geom_vline(xintercept = as.numeric(dots_xaxis[,1]), size = 0.1, alpha = 0.5, lty = 2)