require(readr)
require(tidyverse)
require(ggthemes)
require(dplyr)
require(lubridate)
require(ggrepel)
require(viridis)
Citation for Data used:
North America Land Data Assimilation System (NLDAS) Daily Sunlight (insolation) for years 1979-2011 on CDC WONDER Online Database, released 2013. Accessed at http://wonder.cdc.gov/NASA-INSOLAR.html
Final column of data frame was edited in the text file as it had an exponent that proved to be problematic
I used “Import Dataset > From Text (readr)” to import my dataset -
removing duplicate columns and formatting the
Month, Year Code
as a date.
regional_sunlight <- read_delim("NLDAS Daily Sunlight By Region.txt",
delim = "\t",escape_double = FALSE,
col_types = cols(Notes = col_skip(),
`Region Code`= col_skip(), `Month, Year`= col_skip(),
`Month, Year Code`=col_date(format = "%Y/%m")),
trim_ws = TRUE)
Make changes to data frame to make it easier to work with, including:
regional_sunlight <- regional_sunlight %>%
drop_na() %>%
rename("YearMo" = "Month, Year Code") %>%
mutate(Region = recode(Region,"Census Region 1: Northeast" = "Northeast",
"Census Region 2: Midwest" = "Midwest",
"Census Region 3: South" = "South",
"Census Region 4: West" = "West"))
Create new data frame filtering out data from 2011 (last year available), to maintain original data frame.
twenty_eleven <- regional_sunlight %>%
filter(year(YearMo)==2011)
Start with mapping the data, grouping it by Region
p_sun <- twenty_eleven %>%
ggplot(mapping = aes(x = month(YearMo,label=TRUE),
y = `Avg Daily Sunlight (KJ/m^2)`,
group = Region)) +
geom_line(aes(col = `Region`))
p_sun
Adjust the theme
p_sun <- p_sun +
theme_bw(base_size = 14,base_family = "sans") +
theme(legend.position = "none")
p_sun
Change axes, add descriptive labels & title
p_sun <- p_sun +
labs(x="Month",y="Average Daily Sunlight \n (by heat density in KJ/m^2)",
title = "Average Daily Sunlight in 2011 by Region") +
scale_y_continuous(labels = scales::label_comma()) +
theme(plot.title = element_text(hjust = 0.5))
p_sun
Set up direct labeling, following the steps shown in class
# find end of lines
line_ends <- ggplot_build(p_sun)$data[[1]] %>%
group_by(colour) %>%
filter(x==max(x))
# add region label
line_ends$Region <- regional_sunlight %>% pull(Region) %>%
unique() %>%
as.character() %>%
sort()
Finally, add the direct labels to the graph and change the color scale
p_sun <- p_sun + ggrepel::geom_label_repel(data = line_ends,
aes(x = line_ends$x, y = line_ends$y,
label = Region, colour = Region),
nudge_x = 1,
label.size = NA,
fill = alpha(c("white"),0)) +
scale_color_tableau()
p_sun