Data
# read in the data from a .csv file
data <- read.csv('players.csv') %>%
# calculate average time on ice
mutate(ATOI = round(TOI / GP, 3)) %>%
# convert position to a factor
mutate(Pos = as.factor(Pos)) %>%
# remove empty rows
filter(Player != '') %>%
# remove non-standard positions
filter(!(Pos %in% c('F', 'W', 'G')))
head(data)
## Player Age Team Pos GP G A PTS PlusMin PS S TOI BLK HIT ATOI
## 1 Nicholas Abruzzese 22 TOR C 9 1 0 1 -1 0.0 8 92 3 7 10.222
## 2 Noel Acciari 30 FLA C 20 3 5 8 2 0.6 32 240 16 48 12.000
## 3 Calen Addison 21 MIN D 15 2 2 4 -4 0.3 17 207 6 12 13.800
## 4 Andrew Agozzino 31 OTT LW 1 0 0 0 0 0.0 1 7 0 4 7.000
## 5 Jack Ahcan 24 BOS D 6 1 0 1 -3 0.1 5 96 5 8 16.000
## 6 Sebastian Aho 25 NYI D 36 2 10 12 -6 1.7 34 592 42 32 16.444
Player Ages Between Teams
plt1 <- data %>%
# remove players without an identified team
filter(Team != 'TOT') %>%
# remove players with <10 games
filter(GP > 10) %>%
# create aesthetic mapping
ggplot(mapping = aes(x = Age,
y = PTS,
color = PlusMin)) +
# add scatter plot
geom_point(alpha = .3, size = 1) +
# add trend lines
geom_smooth(method = 'loess', alpha = .05, size = .2, col = 'red4') +
# plot separately by team
facet_wrap(. ~ Team, shrink = FALSE) +
# format axes
scale_x_continuous(breaks = seq(20, 40, 10)) +
scale_y_continuous(breaks = seq(0, 100, 50),
limits = c(-3, max(data$PTS))) +
# change legend format and title
scale_color_continuous(breaks = seq(-50, 50, 25)) +
guides(color = guide_colorbar(title = '+/-')) +
# change plot theme
theme_tufte(base_size = 10, base_family = 'sans') +
# change color palette
scale_color_viridis() +
# add plot and axis titles
labs(title = 'Where Do Hockey Teams\' Points Come From?',
subtitle = 'NHL 2021-22 season statistics (>10 games)',
x = 'Player Age [Years]',
y = 'Season Points (PTS) = Goals + Assists'); plt1

# export plot as .pdf file
pdf('plot1.pdf', height = 4, width = 8)
print(plt1)
invisible(dev.off())