Themes, axes, annotations

Lecture 8

Dr. Greg Chism

University of Arizona
INFO 526 - Summer 2024


# load packages

# set theme for ggplot2
ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

# set width of code output
options(width = 65)

# set figure parameters for knitr
  fig.width = 7, # 7" width
  fig.asp = 0.618, # the golden ratio
  fig.retina = 3, # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300 # higher dpi, sharper image


Complete themes

p <- ggplot(penguins, aes(x = flipper_length_mm, y = body_mass_g)) +

p + theme_gray() + labs(title = "Gray")
p + theme_void() + labs(title = "Void")
p + theme_dark() + labs(title = "Dark")

Themes from ggthemes


p + theme_fivethirtyeight() + labs(title = "FiveThirtyEight")
p + theme_economist() + labs(title = "Economist")
p + theme_wsj() + labs(title = "Wall Street Journal")

Themes and color scales from ggthemes

p + 
  aes(color = species) +
  scale_color_wsj() +
  theme_wsj() + 
  labs(title = "Wall Street Journal")

UArizona theme!


This package is a work in progress. Feedback and issues welcome!

Modifying theme elements

p + 
  labs(title = "Palmer penguins") +
    plot.title = element_text(color = "red", face = "bold", family = "Comic Sans MS"),
    plot.background = element_rect(color = "red", fill = "mistyrose")


Axis breaks

How can the following figure be improved with custom breaks in axes, if at all?

Context matters

pac_plot +
  scale_x_continuous(breaks = seq(from = 2000, to = 2022, by = 2))

Conciseness matters

pac_plot +
  scale_x_continuous(breaks = seq(2000, 2022, 4))

Precision matters

pac_plot +
  scale_x_continuous(breaks = seq(2000, 2022, 4)) +
  labs(x = "Election year")


Why annotate?

Video recap

Can be useful when individual observations are identifiable, but can also get overwhelming…

ggplot(state_stats, aes(x = homeownership, y = pop2010)) + 

ggplot(state_stats, aes(x = homeownership, y = pop2010)) + 
  geom_text(aes(label = abbr))

Revisit Tucson AQI

All of the data doesn’t tell a story

Highlighting in ggplot2

We have (at least) two options:

  1. Native ggplot2 – use layers

  2. gghighlight:

Data: SF AQI

sf_files <- fs::dir_ls(here::here("data/san-francisco"))
sf <- read_csv(sf_files, na = c(".", ""))

sf <- sf |>
  janitor::clean_names() |>
  mutate(date = mdy(date)) |>
  arrange(date) |>
  select(date, aqi_value)

# A tibble: 2,557 × 2
   date       aqi_value
   <date>         <dbl>
 1 2016-01-01        32
 2 2016-01-02        37
 3 2016-01-03        45
 4 2016-01-04        33
 5 2016-01-05        27
 6 2016-01-06        39
 7 2016-01-07        39
 8 2016-01-08        31
 9 2016-01-09        20
10 2016-01-10        20
# ℹ 2,547 more rows

Data prep

sf <- sf |>
    year = year(date),
    day_of_year = yday(date)
# check
sf |>
  filter(day_of_year < 3)
# A tibble: 14 × 4
   date       aqi_value  year day_of_year
   <date>         <dbl> <dbl>       <dbl>
 1 2016-01-01        32  2016           1
 2 2016-01-02        37  2016           2
 3 2017-01-01        55  2017           1
 4 2017-01-02        36  2017           2
 5 2018-01-01        87  2018           1
 6 2018-01-02        95  2018           2
 7 2019-01-01        33  2019           1
 8 2019-01-02        50  2019           2
 9 2020-01-01        53  2020           1
10 2020-01-02        43  2020           2
11 2021-01-01        79  2021           1
12 2021-01-02        57  2021           2
13 2022-01-01        53  2022           1
14 2022-01-02        55  2022           2

Plot AQI over years

ggplot(sf, aes(x = day_of_year, y = aqi_value, group = year)) +

Plot AQI over years

ggplot(sf, aes(x = day_of_year, y = aqi_value, group = year, color = year)) +

Plot AQI over years

ggplot(sf, aes(x = day_of_year, y = aqi_value, group = year, color = factor(year))) +

Highlight 2016

ggplot(sf, aes(x = day_of_year, y = aqi_value, group = year)) +
  geom_line(color = "gray") +
  geom_line(data = sf |> filter(year == 2016), color = "red") +
    title = "AQI levels in SF in 2016",
    subtitle = "Versus all years 2016 - 2022",
    x = "Day of year", y = "AQI value"

Highlight 2017

ggplot(sf, aes(x = day_of_year, y = aqi_value, group = year)) +
  geom_line(color = "gray") +
  geom_line(data = sf |> filter(year == 2017), color = "red") +
    title = "AQI levels in SF in 2017",
    subtitle = "Versus all years 2016 - 2022",
    x = "Day of year", y = "AQI value"

Highlight 2018

ggplot(sf, aes(x = day_of_year, y = aqi_value, group = year)) +
  geom_line(color = "gray") +
  geom_line(data = sf |> filter(year == 2018), color = "red") +
    title = "AQI levels in SF in 2018",
    subtitle = "Versus all years 2016 - 2022",
    x = "Day of year", y = "AQI value"

Highlight any year

year_to_highlight <- 2018

ggplot(sf, aes(x = day_of_year, y = aqi_value, group = year)) +
  geom_line(color = "gray") +
  geom_line(data = sf |> filter(year == year_to_highlight), color = "red") +
    title = glue("AQI levels in SF in {year_to_highlight}"),
    subtitle = "Versus all years 2016 - 2022",
    x = "Day of year", y = "AQI value"

Highlight with gghighlight

year_to_highlight <- 2018
sf |> 
  ungroup() |>
ggplot(aes(x = day_of_year, y = aqi_value, group = year)) +
  geom_line(color = "red") + 
  gghighlight(year == 2018, use_direct_label = FALSE) +
    title = glue("AQI levels in SF in {year_to_highlight}"),
    subtitle = "Versus all years 2016 - 2022",
    x = "Day of year", y = "AQI value"
  ) +
  theme(legend.position = "none")  # Hide the legend