Superman Data: Actors, Box Office, and Reviews

This post documents the process of collecting and combining data about Superman actors, movie reviews, and box office performance from multiple sources.

library(dplyr)
library(haven)
library(labelled)
library(tinytable)
library(readr)
library(here)
library(stringr)
library(lubridate)
library(readxl)
library(ggplot2)
library(scales)

Part 1: Superman Actor Data

First, we compile data about the actors who have played Superman and Lois Lane across different films and TV shows.

Load and Process Actor Data

superman_df <- read_excel("superman_raw.xlsx", sheet = "superman")

superman_actors <- superman_df |>
  mutate(
    clark_birth = ymd(clark_birth),
    lois_birth = ymd(lois_birth),
    release_date = ymd(release_date),
    clark_age = time_length(interval(clark_birth, release_date), "years"), 
    lois_age = time_length(interval(lois_birth, release_date), "years")
  ) |> 
  select(-release_date, -clark_birth, -lois_birth)

numeric_cols <- which(sapply(superman_actors, is.numeric))

superman_actors |>
  tt(caption = "Superman and Lois Lane Actors") |>
  format_tt(j = numeric_cols, digits = 2) |>
  style_tt(bootstrap_class = "table table-striped table-hover")

Superman and Lois Lane Actors
type	title	year	clark_actor	clark_height	lois_actor	lois_height	clark_age	lois_age
Film	Superman	2025	David Corenswet	1.9	Rachel Brosnahan	1.6	32	35
Film	Superman: The Movie	1978	Christopher Reeve	1.9	Margot Kidder	1.7	26	30
TV Show	Smallville	2001	Tom Welling	1.9	Erica Durance	1.7	24	23
Film	Superman Returns	2006	Brandon Routh	1.9	Kate Bosworth	1.6	27	23
Film	Superman & the Mole Men	1951	George Reeves	1.9	Phyllis Coates	1.6	38	25
Film	Man of Steel	2013	Henry Cavill	1.9	Amy Adams	1.6	30	39
Serial	Superman	1948	Kirk Alyn	1.9	Noel Neill	1.6	37	27
TV Show	Superman & Lois	2021	Tyler Hoechlin	1.8	Elizabeth Tulloch	1.7	33	40
TV Show	Lois & Clark: The New Adventures of Superman	1993	Dean Cain	1.8	Teri Hatcher	1.7	27	29
TV Show	The Adventures of Superboy	1988	John Haymes Newton	1.8	NA	NA	23	NA
TV Show	The Adventures of Superboy	1989	Gerard Christopher	1.8	NA	NA	31	NA

Create SPSS Version with Labels

For use in statistics classes, we create a properly labeled SPSS file:

superman_data <- superman_actors |> 
  mutate(
    across(where(is.numeric), ~ifelse(is.na(.), -99, .)),
    across(where(is.character), ~ifelse(is.na(.), "-99", .))
  )

# Create value labels for categorical variables
type_values <- unique(superman_data$type)
type_labels <- setNames(1:length(type_values), type_values)

title_values <- unique(superman_data$title)
title_labels <- setNames(1:length(title_values), title_values)

actor_values <- unique(superman_data$clark_actor)
actor_values <- actor_values[!is.na(actor_values)]
actor_labels <- setNames(1:length(actor_values), actor_values)

lois_actor_values <- unique(superman_data$lois_actor)
lois_actor_values <- lois_actor_values[!is.na(lois_actor_values)]
lois_actor_labels <- setNames(1:length(lois_actor_values), lois_actor_values)

var_labels <- c(
  type = "Media Type",
  title = "Title of Superman Media",
  year = "Year of first superman media appearance",
  clark_actor = "Name of actor playing Superman/Clark Kent",
  clark_height = "Height of Clark Kent/Superman actor (meters)",
  lois_actor = "Name of actress playing Lois Lane",
  lois_height = "Height of Lois Lane actress (meters)",
  clark_age = "Age of Clark Kent/Superman actor at Release Date",
  lois_age = "Age of Lois Lane actress at Release Date"
)

superman_labelled <- superman_data |>
  mutate(
    type = as.numeric(factor(type, levels = names(type_labels))),
    title = as.numeric(factor(title, levels = names(title_labels))),
    clark_actor = as.numeric(factor(clark_actor, levels = names(actor_labels))),
    lois_actor = as.numeric(factor(lois_actor, levels = names(lois_actor_labels)))
  ) |>
  set_variable_labels(!!!var_labels) |>
  set_value_labels(
    type = type_labels,
    title = title_labels,
    clark_actor = actor_labels,
    lois_actor = lois_actor_labels
  ) |> 
  select(year, title, type, clark_actor, clark_height, clark_age, lois_actor, lois_height, lois_age)

# Set SPSS attributes
for (col in names(superman_labelled)) {
  if (col %in% c("type", "title", "clark_actor", "lois_actor")) {
    attr(superman_labelled[[col]], "spss_measure") <- "nominal"
    attr(superman_labelled[[col]], "spss_format") <- "F8.0"
  } else if (col %in% c("year")) {
    attr(superman_labelled[[col]], "spss_measure") <- "scale"
    attr(superman_labelled[[col]], "spss_format") <- "F4.0"
  } else if (col %in% c("clark_height", "lois_height")) {
    attr(superman_labelled[[col]], "spss_measure") <- "scale"
    attr(superman_labelled[[col]], "spss_format") <- "F4.2"
  } else if (col %in% c("clark_age", "lois_age")) {
    attr(superman_labelled[[col]], "spss_measure") <- "scale"
    attr(superman_labelled[[col]], "spss_format") <- "F5.2"
  }
}

attr(superman_labelled, "label") <- "Superman Data"

write_sav(superman_labelled, "superman.sav")
saveRDS(superman_actors, "superman.rds")

Part 2: Rotten Tomatoes Data

We scrape critic and audience scores from Rotten Tomatoes for Superman movies and TV shows.

Scraping Function

scrape_movie <- function(x, ...) {
  library(rvest)
  library(dplyr)
  library(stringr)
  
  movie_page <- read_html(
    x,
    user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
  )
  
  is_tv <- str_detect(x, "/tv/")
  
  title <- movie_page |> 
    html_element("title") |> 
    html_text() |>
    str_replace(" \\| Rotten Tomatoes$", "") |>
    str_trim()
  
  page_text <- movie_page |> html_text()
  
  critics_score <- page_text |>
    str_extract("(\\d+)%\\s*(Avg\\.\\s*)?Tomatometer") |>
    str_extract("\\d+") |>
    as.numeric()
  
  critics_count <- page_text |>
    str_extract("(\\d+)\\s*Reviews") |>
    str_extract("\\d+") |>
    as.numeric()
  
  critics_status <- NA_character_
  if (!is.na(critics_score)) {
    if (str_detect(page_text, regex("Certified Fresh", ignore_case = TRUE))) {
      critics_status <- "Certified Fresh"
    } else if (critics_score >= 60) {
      critics_status <- "Fresh"
    } else {
      critics_status <- "Rotten"
    }
  }
  
  audience_score <- page_text |>
    str_extract("(\\d+)%\\s*(Avg\\.\\s*)?Popcornmeter") |>
    str_extract("\\d+") |>
    as.numeric()
  
  audience_count_text <- page_text |>
    str_extract("([\\d,]+)\\+?\\s*(Verified\\s*)?Ratings")
  
  audience_count <- if (!is.na(audience_count_text)) {
    audience_count_text |>
      str_extract("[\\d,]+") |>
      str_replace_all(",", "") |>
      as.numeric()
  } else {
    NA_real_
  }
  
  synopsis <- movie_page |> 
    html_element('meta[name="description"]') |>
    html_attr("content")
  
  poster_url <- movie_page |> 
    html_element('meta[property="og:image"]') |>
    html_attr("content")
  
  tibble(
    title = title %||% NA_character_,
    critics_score = critics_score %||% NA_real_,
    critics_status = critics_status,
    critics_count = critics_count %||% NA_real_,
    audience_score = audience_score %||% NA_real_,
    audience_count = audience_count %||% NA_real_,
    synopsis = synopsis %||% NA_character_,
    poster_url = poster_url %||% NA_character_,
    url = x,
    type = if (is_tv) "TV" else "Movie"
  )
}

Scrape Multiple Movies

# Scrape all Superman movies and shows
mm_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_and_the_mole_men")
mm_rt$title <- str_replace(str_trim(mm_rt$title), "Superman and the Mole Men", "Superman & the Mole Men")

stm_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_the_movie")
s2_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_ii")
s3_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_iii")
s4_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_iv_the_quest_for_peace")
sr_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_returns")
mos_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_man_of_steel")
sm_25 <- scrape_movie("https://www.rottentomatoes.com/m/superman_2025")
lc_rt <- scrape_movie("https://www.rottentomatoes.com/tv/lois_clark_the_new_adventures_of_superman")
sb_rt <- scrape_movie("https://www.rottentomatoes.com/tv/the_adventures_of_superboy")
sl_rt <- scrape_movie("https://www.rottentomatoes.com/tv/superman_and_lois")
sm_rt <- scrape_movie("https://www.rottentomatoes.com/tv/smallville")

rt <- bind_rows(mm_rt, sm_25, stm_rt, lc_rt, sr_rt, sm_rt, mos_rt, sl_rt, sb_rt)

# Create display table
rt_display <- rt |>
  mutate(
    poster = case_when(
      is.na(poster_url) | poster_url == "Poster URL not available" ~ "--",
      TRUE ~ paste0('<img src="', poster_url, '" height="70">')
    ),
    critics_score = if_else(is.na(critics_score), "--", paste0(critics_score, "%")),
    audience_score = if_else(is.na(audience_score), "--", paste0(audience_score, "%")),
    critics_count = if_else(is.na(critics_count), "--", as.character(critics_count)),
    audience_count = if_else(is.na(audience_count), "--", as.character(audience_count)),
    critics_status = if_else(is.na(critics_status), "--", critics_status)
  ) |> 
  select(poster, title, critics_score, critics_status, critics_count, 
         audience_score, audience_count)

rt_display |>
  rename(
    Poster = poster,
    Title = title,
    `Critics Score` = critics_score,
    `Critics Status` = critics_status,
    `# Critics Reviews` = critics_count,
    `Audience Score` = audience_score,
    `# Audience Ratings` = audience_count
  ) |>
  tt() |>
  format_tt(escape = FALSE)

Create SPSS Version of RT Data

title_labels <- c(
  "Superman (2025)" = 1,
  "Superman & the Mole Men" = 5,
  "Superman: The Movie" = 2,
  "Lois & Clark: The New Adventures of Superman" = 8,
  "Superman Returns" = 4,
  "Smallville" = 3,
  "Man of Steel" = 6,
  "Superman & Lois" = 7,
  "The Adventures of Superboy" = 9
)

rt_data <- rt |> 
  mutate(
    across(where(is.numeric), ~ifelse(is.na(.), -99, .)),
    across(where(is.character), ~ifelse(is.na(.), "-99", .))
  )

var_labels <- list(
  title = "Title of Superman movie/TV show",
  critics_score = "Percentage of positive critic reviews (0-100)",
  critics_status = "Critic consensus: Fresh/Rotten/Certified Fresh",
  critics_count = "Number of critic reviews collected",
  audience_score = "Percentage of positive audience ratings (0-100)",
  audience_count = "Number of audience ratings collected",
  synopsis = "Brief description of the movie/TV show",
  poster_url = "URL of the poster image",
  url = "Rotten Tomatoes page URL"
)

value_labels <- list(
  title = title_labels,
  critics_status = c("Fresh" = 1, "Rotten" = 2, "Certified Fresh" = 3)
)

rt_labelled <- rt_data |>
  mutate(
    title = case_when(
      title %in% names(title_labels) ~ as.numeric(title_labels[title]),
      TRUE ~ -99
    ),
    critics_status = case_when(
      critics_status == "Fresh" ~ 1,
      critics_status == "Rotten" ~ 2,
      critics_status == "Certified Fresh" ~ 3,
      TRUE ~ -99
    )
  ) |>
  labelled::set_variable_labels(.labels = var_labels) |>
  labelled::set_value_labels(.labels = value_labels) |>
  select(title, critics_score, critics_status, critics_count,
         audience_score, audience_count, synopsis, poster_url, url)

write_sav(rt_labelled, "rtomatoes.sav")

Part 3: Box Office Data

We scrape box office performance data from Box Office Mojo.

Find Movie IDs

find_movie_id <- function(movie_title) {
  search_term <- gsub(" ", "+", movie_title)
  search_url <- paste0("https://www.boxofficemojo.com/search/?q=", search_term)
  
  search_page <- rvest::read_html(search_url)
  
  search_results <- search_page |>
    rvest::html_nodes("a.a-size-medium.a-link-normal.a-text-bold")
  
  result_links <- rvest::html_attr(search_results, "href")
  result_titles <- rvest::html_text(search_results)
  
  results_df <- data.frame(
    title = result_titles,
    link = result_links,
    stringsAsFactors = FALSE
  )
  
  results_df$movie_id <- stringr::str_extract(results_df$link, "tt[0-9]+")
  
  return(results_df)
}

superman_list <- find_movie_id("Superman")
superman_list |> tt()

Box Office Mojo Scraping Function

extract_complete_movie_data <- function(movie_id) {
  library(rvest)
  library(xml2)
  
  url <- paste0("https://www.boxofficemojo.com/title/", movie_id, "/")
  page <- read_html(url)
  
  movie_data <- data.frame(movie_id = movie_id, stringsAsFactors = FALSE)
  
  # Extract movie summary info box
  summary_box <- page |> html_node(".a-section.mojo-summary")
  
  if (!is.na(summary_box)) {
    title_element <- summary_box |> html_node("h1.a-size-extra-large")
    
    if (!is.na(title_element)) {
      full_title <- html_text(title_element) |> str_trim()
      main_title <- str_replace(full_title, "\\s*\\(\\d{4}\\)$", "")
      year <- str_extract(full_title, "\\(\\d{4}\\)") |> 
        str_replace_all("[\\(\\)]", "")
      
      movie_data$title <- main_title
      movie_data$year <- year
    }
    
    description <- summary_box |>
      html_node("span.a-size-medium") |>
      html_text() |>
      str_trim()
    
    if (!is.na(description)) {
      movie_data$description <- description
    }
    
    img_element <- summary_box |> html_node("img")
    
    if (!is.na(img_element)) {
      movie_data$poster_url <- html_attr(img_element, "src")
      img_hires <- html_attr(img_element, "data-a-hires")
      if (!is.na(img_hires)) {
        movie_data$poster_url_hires <- img_hires
      }
    }
  }
  
  # Extract box office summary table data
  summary_section <- page |>
    html_node(".a-section.a-spacing-none.mojo-summary-table")
  
  if (!is.na(summary_section)) {
    data_sections <- summary_section |>
      html_nodes(".a-section.a-spacing-none")
    
    for (section in data_sections) {
      if (length(xml_find_first(section, ".//span[@class='a-size-small']")) == 0) {
        next
      }
      
      category <- section |>
        html_node(".a-size-small") |>
        html_text() |>
        str_trim() |>
        str_replace_all("\\s*\\([^)]*\\)\\s*", "") |>
        str_trim()
      
      money_node <- section |> html_node("span.money")
      money_value <- if (!is.na(money_node)) html_text(money_node) else NA
      
      percent_node <- section |> html_node("span.percent")
      percent <- if (!is.na(percent_node)) html_text(percent_node) else NA
      
      if (!is.na(money_value)) {
        clean_money <- gsub("[$,]", "", money_value)
        numeric_money <- as.numeric(clean_money)
        
        category_clean <- tolower(str_replace_all(category, "[^[:alnum:]]", "_"))
        category_clean <- str_replace_all(category_clean, "_+", "_")
        category_clean <- str_remove(category_clean, "_$")
        
        movie_data[[paste0(category_clean, "_gross")]] <- money_value
        movie_data[[paste0(category_clean, "_gross_numeric")]] <- numeric_money
        
        if (!is.na(percent)) {
          movie_data[[paste0(category_clean, "_percent")]] <- percent
          percent_numeric <- as.numeric(gsub("[%]", "", percent))
          movie_data[[paste0(category_clean, "_percent_numeric")]] <- percent_numeric
        }
      }
    }
  }
  
  return(movie_data)
}

Extract Multiple Movies

extract_multiple_movies <- function(movie_ids) {
  all_data <- NULL
  
  for (id in movie_ids) {
    cat("Processing movie ID:", id, "\n")
    
    tryCatch({
      movie_data <- extract_complete_movie_data(id)
      
      if (is.null(all_data)) {
        all_data <- movie_data
      } else {
        # Handle different columns
        missing_cols <- setdiff(names(all_data), names(movie_data))
        for (col in missing_cols) movie_data[[col]] <- NA
        
        missing_cols <- setdiff(names(movie_data), names(all_data))
        for (col in missing_cols) all_data[[col]] <- NA
        
        all_data <- bind_rows(all_data, movie_data)
      }
    }, error = function(e) {
      cat("Error processing movie ID:", id, "- Error:", e$message, "\n")
    })
    
    Sys.sleep(2)  # Be polite to the server
  }
  
  return(all_data)
}

# Superman movie IDs from IMDB/Box Office Mojo
movie_ids <- c(
  "tt5950044",
  "tt0078346",
  "tt0770828",
  "tt0348150",
  "tt0081573",
  "tt0086393",
  "tt0094074", 
  "tt2975590"
)

all_movies <- extract_multiple_movies(movie_ids)

# Display results
all_movies |>
  mutate(
    poster = if_else(
      !is.na(poster_url),
      paste0('<img src="', poster_url, '" height="80">'),
      ""
    )
  ) |>
  select(poster, title, year, worldwide_gross, budget) |>
  tt() |>
  format_tt(escape = FALSE) |>
  style_tt(bootstrap_class = "table table-striped")

Clean Box Office Data

clean_boxoffice_df <- all_movies |>
  mutate(
    year = as.numeric(year),
    box_office_numeric = worldwide_gross_numeric,
    budget_numeric = as.numeric(budget_numeric),
    opening = as.numeric(domestic_opening_numeric),
    domestic = as.numeric(domestic_gross_numeric),
    percent = as.numeric(domestic_percent_numeric),
    
    title = str_trim(title),
    
    release_date = str_extract(earliest_release_date, "\\w+ \\d+, \\d{4}"),
    release_date = as.Date(release_date, format = "%B %d, %Y"),
    
    decade = paste0(floor(year / 10) * 10, "s"),
    
    mpaa_rating = str_trim(mpaa),
    mpaa_rating = if_else(is.na(mpaa_rating), "Unrated", mpaa_rating),
    
    is_original_series = year >= 1978 & year <= 1987,
    is_modern_era = year == 2006,
    is_dceu = year >= 2010 & year <= 2024,
    is_dcu = year >= 2025,
    
    clark_actor = case_when(
      is_original_series ~ "Christopher Reeve",
      is_modern_era ~ "Brandon Routh",
      is_dceu ~ "Henry Cavill",
      is_dcu ~ "David Corenswet"
    )
  ) |> 
  select(
    movie_id, title, year, release_date, decade, mpaa_rating,
    budget_numeric, box_office_numeric, opening, domestic, percent,
    is_original_series, is_modern_era, is_dceu, is_dcu, clark_actor,
    poster_url_hires
  )

write.csv(clean_boxoffice_df, "boxoffice_raw.csv", row.names = FALSE)
write_sav(boxoffice_labelled, "boxoffice.sav")

Part 4: Letterboxd Reviews

We also scrape user reviews from Letterboxd for sentiment analysis.

Letterboxd Scraping Function

# Source the letterboxd scraping functions
source("letterbox.R")

safe_scrape <- function(movie_slug, num_pages = 2, random_pages = TRUE, max_page = 5) {
  tryCatch({
    result <- scrape_movie_reviews(
      movie_slug, 
      num_pages = num_pages, 
      random_pages = random_pages, 
      max_page = max_page,
      file = FALSE
    )
    if (is.null(result) || nrow(result) == 0) {
      message("No data returned for: ", movie_slug)
      return(NULL)
    }
    return(result)
  }, error = function(e) {
    message("Error scraping ", movie_slug, ": ", e$message)
    return(NULL)
  })
}

# Scrape reviews for each movie
superman_1948 <- safe_scrape("superman-1948", max_page = 5)
atomman <- safe_scrape("atom-man-vs-superman", max_page = 10)
moleman <- safe_scrape("superman-and-the-mole-men", max_page = 10)
superman2025 <- safe_scrape("superman-2025", max_page = 50)
superman1 <- safe_scrape("superman", max_page = 50)
superman2 <- safe_scrape("superman-ii", max_page = 50)
superman3 <- safe_scrape("superman-iii", max_page = 50)
superman4 <- safe_scrape("superman-iv-the-quest-for-peace", max_page = 50)
superman_returns <- safe_scrape("superman-returns", max_page = 50)
man_of_steel <- safe_scrape("man-of-steel", max_page = 50)

# Combine all results
all_scrapes <- list(
  superman_1948, atomman, moleman, superman1, 
  superman_returns, man_of_steel, superman2025
)

valid_scrapes <- Filter(Negate(is.null), all_scrapes)

if (length(valid_scrapes) > 0) {
  letterboxd <- bind_rows(valid_scrapes) |>
    rename(title = movie_title)
  
  message("Successfully scraped ", nrow(letterboxd), " reviews")
}

Part 5: Combined Dataset

Finally, we combine all data sources into a single comprehensive dataset.

Combine All Sources

superman_spss <- read_sav("superman.sav")
rt_labelled <- read_sav("rtomatoes.sav")
boxoffice_df <- read_csv("boxoffice_raw.csv")

# Join actor data with RT data
dat <- superman_spss |> 
  full_join(rt_labelled, by = "title")

superman_rt <- dat |> 
  mutate(
    across(where(is.numeric), ~if_else(is.na(.), -99, .)),
    across(where(is.character), ~if_else(is.na(.), "-99", .))
  )

# Join with box office data
superman_boxoffice <- superman_actors |> 
  full_join(boxoffice_df, by = "clark_actor") |>
  mutate(
    title = coalesce(as.character(title.x), as.character(title.y)),
    year = coalesce(as.numeric(year.x), as.numeric(year.y))
  ) |>
  select(-ends_with(".x"), -ends_with(".y"))

# Save combined files
write_sav(superman_rt, "superman_rt.sav")
saveRDS(superman_rt, "superman_rt.rds")
write_sav(superman_boxoffice, "superman_complete.sav")

Part 6: Visualizations

Superman Actor Heights Over Time

ggplot(superman_actors |> filter(!is.na(clark_height)), 
       aes(x = year, y = clark_height)) +
  geom_point(aes(color = type), size = 5, alpha = 0.8) +
  geom_text(
    aes(label = clark_actor, color = type), 
    hjust = -0.1, 
    vjust = 0.5, 
    size = 3.5, 
    fontface = "bold",
    show.legend = FALSE
  ) +
  scale_color_manual(
    values = c("film" = "#0073CF", "tv" = "#E21A22"),
    labels = c("Film", "TV Show")
  ) +
  labs(
    title = "Superman Actor Heights Over Time",
    subtitle = "Height in meters by year of first appearance",
    x = "Year",
    y = "Height (m)",
    color = "Media Type"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold", size = 18),
    panel.grid.minor = element_blank()
  ) +
  scale_x_continuous(breaks = seq(1950, 2030, by = 10)) +
  scale_y_continuous(limits = c(1.75, 2.05)) +
  coord_cartesian(clip = "off")

Height Comparison: Superman vs Lois Lane

superman_actors |>
  filter(!is.na(lois_height) & !is.na(clark_height)) |>
  ggplot(aes(x = clark_height, y = lois_height)) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", alpha = 0.3) +
  geom_point(aes(color = type), size = 6, alpha = 0.7) +
  geom_text(
    aes(label = title), 
    hjust = -0.1, 
    vjust = 1.5, 
    size = 3
  ) +
  scale_color_manual(
    values = c("film" = "#0073CF", "tv" = "#E21A22"),
    labels = c("Film", "TV Show")
  ) +
  labs(
    title = "Superman vs Lois Lane: Actor Height Comparison",
    subtitle = "Dashed line represents equal height",
    x = "Superman Actor Height (m)",
    y = "Lois Lane Actress Height (m)",
    color = "Media Type"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold", size = 18),
    panel.grid.minor = element_blank()
  )

Age Comparison: Superman vs Lois Lane

superman_actors |>
  filter(!is.na(lois_age) & !is.na(clark_age)) |>
  ggplot(aes(x = clark_age, y = lois_age)) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", alpha = 0.3) +
  geom_point(aes(color = type), size = 6, alpha = 0.7) +
  geom_text(
    aes(label = clark_actor), 
    hjust = -0.1, 
    vjust = 1.5, 
    size = 3
  ) +
  scale_color_manual(
    values = c("film" = "#0073CF", "tv" = "#E21A22"),
    labels = c("Film", "TV Show")
  ) +
  labs(
    title = "Superman vs Lois Lane: Actor Age Comparison",
    subtitle = "Age at time of release; dashed line represents equal age",
    x = "Superman Actor Age (years)",
    y = "Lois Lane Actress Age (years)",
    color = "Media Type"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold", size = 18),
    panel.grid.minor = element_blank()
  ) +
  xlim(20, 45) +
  ylim(20, 45)

Reuse

CC BY 4.0