library(plotly)
library(htmlwidgets)
library(dplyr)
library(htmltools)
library(htmlwidgets)
library(viridis)
library(viridisLite)
library(tidyr)
library(xts)
library(dygraphs)
Interactive graphs - Ramzi Farhat
Graph 1:
Professor Salaries by rank, 2008-2009
This plot shows the salaries of faculty at a select U.S. college from 2009 (R package ‘carData’).
The data is grouped by professorial rank : Assistant, Associate, and full professors, and displayed by years since earning degree.
Steps taken:
data was transformed to highlight format
Moved title away from zoom and pan tab
tooltip shows rank
highlighting also shows subset of data by rank
Findings:
With the interactivity, we can see that
Assistant Professor salaries are the lowest
Most Associate Professors make more than Assistant Professors. Some (outliers) have graduated more than 10 years ago, but their salaries have not increased. These are those who probably chose not to seek promotion to Full Professor.
Most Full Professors make more than Assistants and Associates, but the variance is very large. Those in applied fields make more than those in theoretical fields.
The mean salary is around 113,000 USD
Issues: depending on its mood, the legend appears duplicated. not sure why!
#read data
data(Salaries, package="carData")
$yrs.service <- as.numeric(Salaries$yrs.service)
Salarieslevels(Salaries$discipline) <- list(Theoretical = "A", Applied = "B")
levels(Salaries$rank) <- list(Assistant_Professor = "AsstProf", Associate_Professor = "AssocProf", Professor = "Prof")
head(Salaries)
rank discipline yrs.since.phd yrs.service sex salary
1 Professor Applied 19 18 Male 139750
2 Professor Applied 20 16 Male 173200
3 Assistant_Professor Applied 4 3 Male 79750
4 Professor Applied 45 39 Male 115000
5 Professor Applied 40 41 Male 141500
6 Associate_Professor Applied 6 6 Male 97000
#transform data
<- highlight_key(Salaries, ~rank) Salaries_highlight
#ggplot graph
<- ggplot(data = Salaries_highlight, mapping = aes(x = yrs.since.phd,
p_ps y = salary,
color=discipline)) +
geom_point(aes(group = rank),
alpha=0.6) +
geom_smooth(method = "loess", alpha=0.2) +
theme_classic(base_size=12) +
scale_color_viridis(discrete = TRUE) +
ylab("Salary (USD)") +
xlab("Time elapsed since PhD (years)") +
scale_x_continuous(breaks = seq(10, 50, by = 10)) +
scale_y_continuous(labels = scales::dollar_format())
#plotly graph
<- ggplotly(p_ps, tooltip = "rank")%>%
p_ps_int highlight(on = "plotly_hover",
off = "plotly_relayout")%>%
layout(legend = list(x=10, y=0, xanchor='right', yanchor='bottom', title=list(text='<b> Field </b>')))%>%
layout(title = list(text='Professor Salaries', y = 0.95, x = 0.5, xanchor = 'center', yanchor = 'top'))
p_ps_int
mean(Salaries$salary)
[1] 113706.5
Graph 2:
Comparing U.S. and Professor Salaries, 1991-2019
Form the previous graph, the mean salary was around 113K USD in 2009. How does that compare to U.S. salaries then, and to salaries of professors nationwide?
The data for U.S. salaries was collected from:
https://www.ssa.gov/oact/cola/central.html
The data for professorial salaries was collected from:
https://nces.ed.gov/programs/digest/d19/tables/dt19_316.10.asp
Steps taken:
proper date formatting
data transformed to xts format
3 dyseries plotted
range selector added
hover options changed
annotation added at date college data was collected (2009) for comparison
vertical line added at 2008 - great recession - for reference
Findings:
In general, faculty salaries are higher than both the mean and median of U.S. salaries
In 2009, nationwide professors salaries were around 75K, while the mean U.S. salary was 39K, and the median was 26K (rounded up to the nearest thousand). Predictably, faculty make significantly more money than the average worker
After the great recession, salaries of university faculty and other workers started to diverge more
At 113K, the college in 2009 was paying salaries the were much higher than the national average. Although we don’t have more information about it, it seems to be an elite college
Issues: I tried resizing the graph to get rid of the scroll bars, but was only able to do it for the horizontal one!
#read data
<- read.csv("wagedataxts.csv")
wagexts $Year <- as.Date(wagexts$Year)
wagexts$Year wagexts
[1] "1991-01-01" "1992-01-01" "1993-01-01" "1994-01-01" "1995-01-01"
[6] "1996-01-01" "1997-01-01" "1998-01-01" "1999-01-01" "2000-01-01"
[11] "2001-01-01" "2002-01-01" "2003-01-01" "2004-01-01" "2005-01-01"
[16] "2006-01-01" "2007-01-01" "2008-01-01" "2009-01-01" "2010-01-01"
[21] "2011-01-01" "2012-01-01" "2013-01-01" "2014-01-01" "2015-01-01"
[26] "2016-01-01" "2017-01-01" "2018-01-01" "2019-01-01" "2020-01-01"
<- xts::xts(x = wagexts %>% select(Mean_Wage, Median_Wage, Professor_Wage), # data (y axis)
xts_wage order.by = wagexts %>% pull(Year)) # date (x axis)
xts_wage
Mean_Wage Median_Wage Professor_Wage
1991-01-01 20924 15076 42165
1992-01-01 22002 15610 43851
1993-01-01 22191 15691 44714
1994-01-01 22787 16118 46364
1995-01-01 23700 16650 47811
1996-01-01 24859 17403 49309
1997-01-01 26310 18277 50829
1998-01-01 27687 19157 52335
1999-01-01 29230 20102 54097
2000-01-01 30846 20957 55888
2001-01-01 31582 21767 57373
2002-01-01 31899 22153 59742
2003-01-01 32678 22577 61330
2004-01-01 34198 23356 62579
2005-01-01 35449 23962 64234
2006-01-01 37078 24892 66172
2007-01-01 38761 25737 68479
2008-01-01 39653 26514 71101
2009-01-01 39055 26261 73587
2010-01-01 39959 26364 74620
2011-01-01 41211 26965 75481
2012-01-01 42498 27519 76567
2013-01-01 43041 28031 77278
2014-01-01 44569 28851 78733
2015-01-01 46120 29930 80157
2016-01-01 46641 30533 82224
2017-01-01 48252 31561 84737
2018-01-01 50000 32838 86870
2019-01-01 51916 34248 88703
2020-01-01 53383 34612 NA
#basic plot
<- dygraph(xts_wage, height = "350", width="85%", main = "U.S. Mean and Median vs. Professor Salaries") %>%
p_wage_dy dyAxis("x", drawGrid = FALSE) %>%
dySeries("Mean_Wage", label = "Mean") %>%
dySeries("Median_Wage", label = "Median") %>%
dySeries("Professor_Wage", label = "Professor", color = "red", strokeWidth = 2) %>%
dyRangeSelector(height = 20, strokeColor = "")%>%
dyAnnotation("2009-01-01",
text = "2009",
width=34,
height=20,
tooltip = "Median salary from college survey in 2009 was $113,000")
#with labels
<- p_wage_dy %>%
p_wage_dy2 dyAxis(
"y",
label = "Salaries (USD)") %>%
dyAxis(
"x",
label = "Year", valueRange = c(1991, 2018)) %>%
dyOptions(axisLineWidth = 1.5)%>%
dyEvent("2008-01-01", "Great Recession", labelLoc = "bottom")
p_wage_dy2