library(dplyr)
library(haven)
library(labelled)
library(tinytable)
library(readr)
library(here)
library(stringr)
library(lubridate)
library(readxl)
library(ggplot2)
library(scales)This post documents the process of collecting and combining data about Superman actors, movie reviews, and box office performance from multiple sources.
Part 1: Superman Actor Data
First, we compile data about the actors who have played Superman and Lois Lane across different films and TV shows.
Load and Process Actor Data
superman_df <- read_excel("superman_raw.xlsx", sheet = "superman")
superman_actors <- superman_df |>
mutate(
clark_birth = ymd(clark_birth),
lois_birth = ymd(lois_birth),
release_date = ymd(release_date),
clark_age = time_length(interval(clark_birth, release_date), "years"),
lois_age = time_length(interval(lois_birth, release_date), "years")
) |>
select(-release_date, -clark_birth, -lois_birth)
numeric_cols <- which(sapply(superman_actors, is.numeric))
superman_actors |>
tt(caption = "Superman and Lois Lane Actors") |>
format_tt(j = numeric_cols, digits = 2) |>
style_tt(bootstrap_class = "table table-striped table-hover")| type | title | year | clark_actor | clark_height | lois_actor | lois_height | clark_age | lois_age |
|---|---|---|---|---|---|---|---|---|
| Film | Superman | 2025 | David Corenswet | 1.9 | Rachel Brosnahan | 1.6 | 32 | 35 |
| Film | Superman: The Movie | 1978 | Christopher Reeve | 1.9 | Margot Kidder | 1.7 | 26 | 30 |
| TV Show | Smallville | 2001 | Tom Welling | 1.9 | Erica Durance | 1.7 | 24 | 23 |
| Film | Superman Returns | 2006 | Brandon Routh | 1.9 | Kate Bosworth | 1.6 | 27 | 23 |
| Film | Superman & the Mole Men | 1951 | George Reeves | 1.9 | Phyllis Coates | 1.6 | 38 | 25 |
| Film | Man of Steel | 2013 | Henry Cavill | 1.9 | Amy Adams | 1.6 | 30 | 39 |
| Serial | Superman | 1948 | Kirk Alyn | 1.9 | Noel Neill | 1.6 | 37 | 27 |
| TV Show | Superman & Lois | 2021 | Tyler Hoechlin | 1.8 | Elizabeth Tulloch | 1.7 | 33 | 40 |
| TV Show | Lois & Clark: The New Adventures of Superman | 1993 | Dean Cain | 1.8 | Teri Hatcher | 1.7 | 27 | 29 |
| TV Show | The Adventures of Superboy | 1988 | John Haymes Newton | 1.8 | NA | NA | 23 | NA |
| TV Show | The Adventures of Superboy | 1989 | Gerard Christopher | 1.8 | NA | NA | 31 | NA |
Create SPSS Version with Labels
For use in statistics classes, we create a properly labeled SPSS file:
superman_data <- superman_actors |>
mutate(
across(where(is.numeric), ~ifelse(is.na(.), -99, .)),
across(where(is.character), ~ifelse(is.na(.), "-99", .))
)
# Create value labels for categorical variables
type_values <- unique(superman_data$type)
type_labels <- setNames(1:length(type_values), type_values)
title_values <- unique(superman_data$title)
title_labels <- setNames(1:length(title_values), title_values)
actor_values <- unique(superman_data$clark_actor)
actor_values <- actor_values[!is.na(actor_values)]
actor_labels <- setNames(1:length(actor_values), actor_values)
lois_actor_values <- unique(superman_data$lois_actor)
lois_actor_values <- lois_actor_values[!is.na(lois_actor_values)]
lois_actor_labels <- setNames(1:length(lois_actor_values), lois_actor_values)
var_labels <- c(
type = "Media Type",
title = "Title of Superman Media",
year = "Year of first superman media appearance",
clark_actor = "Name of actor playing Superman/Clark Kent",
clark_height = "Height of Clark Kent/Superman actor (meters)",
lois_actor = "Name of actress playing Lois Lane",
lois_height = "Height of Lois Lane actress (meters)",
clark_age = "Age of Clark Kent/Superman actor at Release Date",
lois_age = "Age of Lois Lane actress at Release Date"
)
superman_labelled <- superman_data |>
mutate(
type = as.numeric(factor(type, levels = names(type_labels))),
title = as.numeric(factor(title, levels = names(title_labels))),
clark_actor = as.numeric(factor(clark_actor, levels = names(actor_labels))),
lois_actor = as.numeric(factor(lois_actor, levels = names(lois_actor_labels)))
) |>
set_variable_labels(!!!var_labels) |>
set_value_labels(
type = type_labels,
title = title_labels,
clark_actor = actor_labels,
lois_actor = lois_actor_labels
) |>
select(year, title, type, clark_actor, clark_height, clark_age, lois_actor, lois_height, lois_age)
# Set SPSS attributes
for (col in names(superman_labelled)) {
if (col %in% c("type", "title", "clark_actor", "lois_actor")) {
attr(superman_labelled[[col]], "spss_measure") <- "nominal"
attr(superman_labelled[[col]], "spss_format") <- "F8.0"
} else if (col %in% c("year")) {
attr(superman_labelled[[col]], "spss_measure") <- "scale"
attr(superman_labelled[[col]], "spss_format") <- "F4.0"
} else if (col %in% c("clark_height", "lois_height")) {
attr(superman_labelled[[col]], "spss_measure") <- "scale"
attr(superman_labelled[[col]], "spss_format") <- "F4.2"
} else if (col %in% c("clark_age", "lois_age")) {
attr(superman_labelled[[col]], "spss_measure") <- "scale"
attr(superman_labelled[[col]], "spss_format") <- "F5.2"
}
}
attr(superman_labelled, "label") <- "Superman Data"
write_sav(superman_labelled, "superman.sav")
saveRDS(superman_actors, "superman.rds")Part 2: Rotten Tomatoes Data
We scrape critic and audience scores from Rotten Tomatoes for Superman movies and TV shows.
Scraping Function
scrape_movie <- function(x, ...) {
library(rvest)
library(dplyr)
library(stringr)
movie_page <- read_html(
x,
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
is_tv <- str_detect(x, "/tv/")
title <- movie_page |>
html_element("title") |>
html_text() |>
str_replace(" \\| Rotten Tomatoes$", "") |>
str_trim()
page_text <- movie_page |> html_text()
critics_score <- page_text |>
str_extract("(\\d+)%\\s*(Avg\\.\\s*)?Tomatometer") |>
str_extract("\\d+") |>
as.numeric()
critics_count <- page_text |>
str_extract("(\\d+)\\s*Reviews") |>
str_extract("\\d+") |>
as.numeric()
critics_status <- NA_character_
if (!is.na(critics_score)) {
if (str_detect(page_text, regex("Certified Fresh", ignore_case = TRUE))) {
critics_status <- "Certified Fresh"
} else if (critics_score >= 60) {
critics_status <- "Fresh"
} else {
critics_status <- "Rotten"
}
}
audience_score <- page_text |>
str_extract("(\\d+)%\\s*(Avg\\.\\s*)?Popcornmeter") |>
str_extract("\\d+") |>
as.numeric()
audience_count_text <- page_text |>
str_extract("([\\d,]+)\\+?\\s*(Verified\\s*)?Ratings")
audience_count <- if (!is.na(audience_count_text)) {
audience_count_text |>
str_extract("[\\d,]+") |>
str_replace_all(",", "") |>
as.numeric()
} else {
NA_real_
}
synopsis <- movie_page |>
html_element('meta[name="description"]') |>
html_attr("content")
poster_url <- movie_page |>
html_element('meta[property="og:image"]') |>
html_attr("content")
tibble(
title = title %||% NA_character_,
critics_score = critics_score %||% NA_real_,
critics_status = critics_status,
critics_count = critics_count %||% NA_real_,
audience_score = audience_score %||% NA_real_,
audience_count = audience_count %||% NA_real_,
synopsis = synopsis %||% NA_character_,
poster_url = poster_url %||% NA_character_,
url = x,
type = if (is_tv) "TV" else "Movie"
)
}Scrape Multiple Movies
# Scrape all Superman movies and shows
mm_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_and_the_mole_men")
mm_rt$title <- str_replace(str_trim(mm_rt$title), "Superman and the Mole Men", "Superman & the Mole Men")
stm_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_the_movie")
s2_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_ii")
s3_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_iii")
s4_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_iv_the_quest_for_peace")
sr_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_returns")
mos_rt <- scrape_movie("https://www.rottentomatoes.com/m/superman_man_of_steel")
sm_25 <- scrape_movie("https://www.rottentomatoes.com/m/superman_2025")
lc_rt <- scrape_movie("https://www.rottentomatoes.com/tv/lois_clark_the_new_adventures_of_superman")
sb_rt <- scrape_movie("https://www.rottentomatoes.com/tv/the_adventures_of_superboy")
sl_rt <- scrape_movie("https://www.rottentomatoes.com/tv/superman_and_lois")
sm_rt <- scrape_movie("https://www.rottentomatoes.com/tv/smallville")
rt <- bind_rows(mm_rt, sm_25, stm_rt, lc_rt, sr_rt, sm_rt, mos_rt, sl_rt, sb_rt)
# Create display table
rt_display <- rt |>
mutate(
poster = case_when(
is.na(poster_url) | poster_url == "Poster URL not available" ~ "--",
TRUE ~ paste0('<img src="', poster_url, '" height="70">')
),
critics_score = if_else(is.na(critics_score), "--", paste0(critics_score, "%")),
audience_score = if_else(is.na(audience_score), "--", paste0(audience_score, "%")),
critics_count = if_else(is.na(critics_count), "--", as.character(critics_count)),
audience_count = if_else(is.na(audience_count), "--", as.character(audience_count)),
critics_status = if_else(is.na(critics_status), "--", critics_status)
) |>
select(poster, title, critics_score, critics_status, critics_count,
audience_score, audience_count)
rt_display |>
rename(
Poster = poster,
Title = title,
`Critics Score` = critics_score,
`Critics Status` = critics_status,
`# Critics Reviews` = critics_count,
`Audience Score` = audience_score,
`# Audience Ratings` = audience_count
) |>
tt() |>
format_tt(escape = FALSE)Create SPSS Version of RT Data
title_labels <- c(
"Superman (2025)" = 1,
"Superman & the Mole Men" = 5,
"Superman: The Movie" = 2,
"Lois & Clark: The New Adventures of Superman" = 8,
"Superman Returns" = 4,
"Smallville" = 3,
"Man of Steel" = 6,
"Superman & Lois" = 7,
"The Adventures of Superboy" = 9
)
rt_data <- rt |>
mutate(
across(where(is.numeric), ~ifelse(is.na(.), -99, .)),
across(where(is.character), ~ifelse(is.na(.), "-99", .))
)
var_labels <- list(
title = "Title of Superman movie/TV show",
critics_score = "Percentage of positive critic reviews (0-100)",
critics_status = "Critic consensus: Fresh/Rotten/Certified Fresh",
critics_count = "Number of critic reviews collected",
audience_score = "Percentage of positive audience ratings (0-100)",
audience_count = "Number of audience ratings collected",
synopsis = "Brief description of the movie/TV show",
poster_url = "URL of the poster image",
url = "Rotten Tomatoes page URL"
)
value_labels <- list(
title = title_labels,
critics_status = c("Fresh" = 1, "Rotten" = 2, "Certified Fresh" = 3)
)
rt_labelled <- rt_data |>
mutate(
title = case_when(
title %in% names(title_labels) ~ as.numeric(title_labels[title]),
TRUE ~ -99
),
critics_status = case_when(
critics_status == "Fresh" ~ 1,
critics_status == "Rotten" ~ 2,
critics_status == "Certified Fresh" ~ 3,
TRUE ~ -99
)
) |>
labelled::set_variable_labels(.labels = var_labels) |>
labelled::set_value_labels(.labels = value_labels) |>
select(title, critics_score, critics_status, critics_count,
audience_score, audience_count, synopsis, poster_url, url)
write_sav(rt_labelled, "rtomatoes.sav")Part 3: Box Office Data
We scrape box office performance data from Box Office Mojo.
Find Movie IDs
find_movie_id <- function(movie_title) {
search_term <- gsub(" ", "+", movie_title)
search_url <- paste0("https://www.boxofficemojo.com/search/?q=", search_term)
search_page <- rvest::read_html(search_url)
search_results <- search_page |>
rvest::html_nodes("a.a-size-medium.a-link-normal.a-text-bold")
result_links <- rvest::html_attr(search_results, "href")
result_titles <- rvest::html_text(search_results)
results_df <- data.frame(
title = result_titles,
link = result_links,
stringsAsFactors = FALSE
)
results_df$movie_id <- stringr::str_extract(results_df$link, "tt[0-9]+")
return(results_df)
}
superman_list <- find_movie_id("Superman")
superman_list |> tt()Box Office Mojo Scraping Function
extract_complete_movie_data <- function(movie_id) {
library(rvest)
library(xml2)
url <- paste0("https://www.boxofficemojo.com/title/", movie_id, "/")
page <- read_html(url)
movie_data <- data.frame(movie_id = movie_id, stringsAsFactors = FALSE)
# Extract movie summary info box
summary_box <- page |> html_node(".a-section.mojo-summary")
if (!is.na(summary_box)) {
title_element <- summary_box |> html_node("h1.a-size-extra-large")
if (!is.na(title_element)) {
full_title <- html_text(title_element) |> str_trim()
main_title <- str_replace(full_title, "\\s*\\(\\d{4}\\)$", "")
year <- str_extract(full_title, "\\(\\d{4}\\)") |>
str_replace_all("[\\(\\)]", "")
movie_data$title <- main_title
movie_data$year <- year
}
description <- summary_box |>
html_node("span.a-size-medium") |>
html_text() |>
str_trim()
if (!is.na(description)) {
movie_data$description <- description
}
img_element <- summary_box |> html_node("img")
if (!is.na(img_element)) {
movie_data$poster_url <- html_attr(img_element, "src")
img_hires <- html_attr(img_element, "data-a-hires")
if (!is.na(img_hires)) {
movie_data$poster_url_hires <- img_hires
}
}
}
# Extract box office summary table data
summary_section <- page |>
html_node(".a-section.a-spacing-none.mojo-summary-table")
if (!is.na(summary_section)) {
data_sections <- summary_section |>
html_nodes(".a-section.a-spacing-none")
for (section in data_sections) {
if (length(xml_find_first(section, ".//span[@class='a-size-small']")) == 0) {
next
}
category <- section |>
html_node(".a-size-small") |>
html_text() |>
str_trim() |>
str_replace_all("\\s*\\([^)]*\\)\\s*", "") |>
str_trim()
money_node <- section |> html_node("span.money")
money_value <- if (!is.na(money_node)) html_text(money_node) else NA
percent_node <- section |> html_node("span.percent")
percent <- if (!is.na(percent_node)) html_text(percent_node) else NA
if (!is.na(money_value)) {
clean_money <- gsub("[$,]", "", money_value)
numeric_money <- as.numeric(clean_money)
category_clean <- tolower(str_replace_all(category, "[^[:alnum:]]", "_"))
category_clean <- str_replace_all(category_clean, "_+", "_")
category_clean <- str_remove(category_clean, "_$")
movie_data[[paste0(category_clean, "_gross")]] <- money_value
movie_data[[paste0(category_clean, "_gross_numeric")]] <- numeric_money
if (!is.na(percent)) {
movie_data[[paste0(category_clean, "_percent")]] <- percent
percent_numeric <- as.numeric(gsub("[%]", "", percent))
movie_data[[paste0(category_clean, "_percent_numeric")]] <- percent_numeric
}
}
}
}
return(movie_data)
}Extract Multiple Movies
extract_multiple_movies <- function(movie_ids) {
all_data <- NULL
for (id in movie_ids) {
cat("Processing movie ID:", id, "\n")
tryCatch({
movie_data <- extract_complete_movie_data(id)
if (is.null(all_data)) {
all_data <- movie_data
} else {
# Handle different columns
missing_cols <- setdiff(names(all_data), names(movie_data))
for (col in missing_cols) movie_data[[col]] <- NA
missing_cols <- setdiff(names(movie_data), names(all_data))
for (col in missing_cols) all_data[[col]] <- NA
all_data <- bind_rows(all_data, movie_data)
}
}, error = function(e) {
cat("Error processing movie ID:", id, "- Error:", e$message, "\n")
})
Sys.sleep(2) # Be polite to the server
}
return(all_data)
}
# Superman movie IDs from IMDB/Box Office Mojo
movie_ids <- c(
"tt5950044",
"tt0078346",
"tt0770828",
"tt0348150",
"tt0081573",
"tt0086393",
"tt0094074",
"tt2975590"
)
all_movies <- extract_multiple_movies(movie_ids)
# Display results
all_movies |>
mutate(
poster = if_else(
!is.na(poster_url),
paste0('<img src="', poster_url, '" height="80">'),
""
)
) |>
select(poster, title, year, worldwide_gross, budget) |>
tt() |>
format_tt(escape = FALSE) |>
style_tt(bootstrap_class = "table table-striped")Clean Box Office Data
clean_boxoffice_df <- all_movies |>
mutate(
year = as.numeric(year),
box_office_numeric = worldwide_gross_numeric,
budget_numeric = as.numeric(budget_numeric),
opening = as.numeric(domestic_opening_numeric),
domestic = as.numeric(domestic_gross_numeric),
percent = as.numeric(domestic_percent_numeric),
title = str_trim(title),
release_date = str_extract(earliest_release_date, "\\w+ \\d+, \\d{4}"),
release_date = as.Date(release_date, format = "%B %d, %Y"),
decade = paste0(floor(year / 10) * 10, "s"),
mpaa_rating = str_trim(mpaa),
mpaa_rating = if_else(is.na(mpaa_rating), "Unrated", mpaa_rating),
is_original_series = year >= 1978 & year <= 1987,
is_modern_era = year == 2006,
is_dceu = year >= 2010 & year <= 2024,
is_dcu = year >= 2025,
clark_actor = case_when(
is_original_series ~ "Christopher Reeve",
is_modern_era ~ "Brandon Routh",
is_dceu ~ "Henry Cavill",
is_dcu ~ "David Corenswet"
)
) |>
select(
movie_id, title, year, release_date, decade, mpaa_rating,
budget_numeric, box_office_numeric, opening, domestic, percent,
is_original_series, is_modern_era, is_dceu, is_dcu, clark_actor,
poster_url_hires
)
write.csv(clean_boxoffice_df, "boxoffice_raw.csv", row.names = FALSE)
write_sav(boxoffice_labelled, "boxoffice.sav")Part 4: Letterboxd Reviews
We also scrape user reviews from Letterboxd for sentiment analysis.
Letterboxd Scraping Function
# Source the letterboxd scraping functions
source("letterbox.R")
safe_scrape <- function(movie_slug, num_pages = 2, random_pages = TRUE, max_page = 5) {
tryCatch({
result <- scrape_movie_reviews(
movie_slug,
num_pages = num_pages,
random_pages = random_pages,
max_page = max_page,
file = FALSE
)
if (is.null(result) || nrow(result) == 0) {
message("No data returned for: ", movie_slug)
return(NULL)
}
return(result)
}, error = function(e) {
message("Error scraping ", movie_slug, ": ", e$message)
return(NULL)
})
}
# Scrape reviews for each movie
superman_1948 <- safe_scrape("superman-1948", max_page = 5)
atomman <- safe_scrape("atom-man-vs-superman", max_page = 10)
moleman <- safe_scrape("superman-and-the-mole-men", max_page = 10)
superman2025 <- safe_scrape("superman-2025", max_page = 50)
superman1 <- safe_scrape("superman", max_page = 50)
superman2 <- safe_scrape("superman-ii", max_page = 50)
superman3 <- safe_scrape("superman-iii", max_page = 50)
superman4 <- safe_scrape("superman-iv-the-quest-for-peace", max_page = 50)
superman_returns <- safe_scrape("superman-returns", max_page = 50)
man_of_steel <- safe_scrape("man-of-steel", max_page = 50)
# Combine all results
all_scrapes <- list(
superman_1948, atomman, moleman, superman1,
superman_returns, man_of_steel, superman2025
)
valid_scrapes <- Filter(Negate(is.null), all_scrapes)
if (length(valid_scrapes) > 0) {
letterboxd <- bind_rows(valid_scrapes) |>
rename(title = movie_title)
message("Successfully scraped ", nrow(letterboxd), " reviews")
}Part 5: Combined Dataset
Finally, we combine all data sources into a single comprehensive dataset.
Combine All Sources
superman_spss <- read_sav("superman.sav")
rt_labelled <- read_sav("rtomatoes.sav")
boxoffice_df <- read_csv("boxoffice_raw.csv")
# Join actor data with RT data
dat <- superman_spss |>
full_join(rt_labelled, by = "title")
superman_rt <- dat |>
mutate(
across(where(is.numeric), ~if_else(is.na(.), -99, .)),
across(where(is.character), ~if_else(is.na(.), "-99", .))
)
# Join with box office data
superman_boxoffice <- superman_actors |>
full_join(boxoffice_df, by = "clark_actor") |>
mutate(
title = coalesce(as.character(title.x), as.character(title.y)),
year = coalesce(as.numeric(year.x), as.numeric(year.y))
) |>
select(-ends_with(".x"), -ends_with(".y"))
# Save combined files
write_sav(superman_rt, "superman_rt.sav")
saveRDS(superman_rt, "superman_rt.rds")
write_sav(superman_boxoffice, "superman_complete.sav")Part 6: Visualizations
Superman Actor Heights Over Time
ggplot(superman_actors |> filter(!is.na(clark_height)),
aes(x = year, y = clark_height)) +
geom_point(aes(color = type), size = 5, alpha = 0.8) +
geom_text(
aes(label = clark_actor, color = type),
hjust = -0.1,
vjust = 0.5,
size = 3.5,
fontface = "bold",
show.legend = FALSE
) +
scale_color_manual(
values = c("film" = "#0073CF", "tv" = "#E21A22"),
labels = c("Film", "TV Show")
) +
labs(
title = "Superman Actor Heights Over Time",
subtitle = "Height in meters by year of first appearance",
x = "Year",
y = "Height (m)",
color = "Media Type"
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold", size = 18),
panel.grid.minor = element_blank()
) +
scale_x_continuous(breaks = seq(1950, 2030, by = 10)) +
scale_y_continuous(limits = c(1.75, 2.05)) +
coord_cartesian(clip = "off")
Height Comparison: Superman vs Lois Lane
superman_actors |>
filter(!is.na(lois_height) & !is.na(clark_height)) |>
ggplot(aes(x = clark_height, y = lois_height)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", alpha = 0.3) +
geom_point(aes(color = type), size = 6, alpha = 0.7) +
geom_text(
aes(label = title),
hjust = -0.1,
vjust = 1.5,
size = 3
) +
scale_color_manual(
values = c("film" = "#0073CF", "tv" = "#E21A22"),
labels = c("Film", "TV Show")
) +
labs(
title = "Superman vs Lois Lane: Actor Height Comparison",
subtitle = "Dashed line represents equal height",
x = "Superman Actor Height (m)",
y = "Lois Lane Actress Height (m)",
color = "Media Type"
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold", size = 18),
panel.grid.minor = element_blank()
)
Age Comparison: Superman vs Lois Lane
superman_actors |>
filter(!is.na(lois_age) & !is.na(clark_age)) |>
ggplot(aes(x = clark_age, y = lois_age)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", alpha = 0.3) +
geom_point(aes(color = type), size = 6, alpha = 0.7) +
geom_text(
aes(label = clark_actor),
hjust = -0.1,
vjust = 1.5,
size = 3
) +
scale_color_manual(
values = c("film" = "#0073CF", "tv" = "#E21A22"),
labels = c("Film", "TV Show")
) +
labs(
title = "Superman vs Lois Lane: Actor Age Comparison",
subtitle = "Age at time of release; dashed line represents equal age",
x = "Superman Actor Age (years)",
y = "Lois Lane Actress Age (years)",
color = "Media Type"
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold", size = 18),
panel.grid.minor = element_blank()
) +
xlim(20, 45) +
ylim(20, 45)