::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE) knitr
Assignment 4 - Tables
Data Source
The UFO data set contains over 80,000 reports of UFO sightings over the last century. This data includes entries where the location of the sighting was not found or blank (0.8146%) or have an erroneous or blank time (8.0237%). (Kaggle Link)
What I want to convey
Just like the mapping one, I want to convey that just because a state has many sightings it doesn’t mean that they have a higher duration (in seconds) of sightings. Also that despite previous beliefs, states on the coast have more sightings than the midwest states like Nebraska or Oklahoma.
Functionality in the table
I added trend lines for the overall sightings and average seconds duration over the years of the data set. Minutes was not included in the trend lines because the same line is created as the one for seconds, just on a smaller scale.
Formatting in the table
The table does not include a year date since every number shown is from 2014, and the table includes a minutes column because we are displaying average number of sections a sighting had and having a unit of minutes makes the data easier to understand and digest.
The font size is 18 to allow everything to fit in one screen without having to scroll, and a header was added to note that the data is from 2014.
Code
# load data set and packages
library(dplyr)
library(mapproj)
library(tidyverse)
library(usdata)
library(ggplot2)
library(DT)
library(sparkline)
library(kableExtra)
setwd("~/Desktop/STAA566/Assignment4")
sparkline(0)
<- read.csv("complete.csv", header = T)
ufo # grab only data points for the US
<- ufo[ufo$country == "us",]
ufo # remove data points without a state & duration
<- ufo[!(is.na(ufo$state) | ufo$state ==""),]
ufo <- ufo[!(is.na(ufo$duration..seconds.) | ufo$duration..seconds. ==""),]
ufo # convert state to upper case - for matching on US Map data later
$state <- toupper(ufo$state)
ufo# get data summarizations
<- ufo %>%
ufoSummary group_by(state, year) %>%
summarise(Total = n(), AvgDurSeconds = mean(duration..seconds.),
AvgDurMin = mean(duration..seconds. / 60))
<- ufoSummary[ufoSummary$year == 2014,]
ufoSummary2014
# double check the lift join worked as expected
# us_states_ufo %>% head(n=14)
Create and display table
for(s in ufoSummary$state){
# vector of data for figure for sightings
<- ufoSummary %>%
state_sightings filter(state==s) %>%
arrange(year) %>%
pull(Total)
# add figure to data.frame
which(ufoSummary2014$state == s),"plt_sightings"] <- spk_chr(state_sightings)
ufoSummary2014[
# vector of data for figure for avg seconds
<- ufoSummary %>%
state_avgseconds filter(state==s) %>%
arrange(year) %>%
pull(AvgDurSeconds)
# add figure to data.frame
which(ufoSummary2014$state == s),"plt_avgseconds"] <- spk_chr(state_avgseconds)
ufoSummary2014[
}
<- ufoSummary2014 %>%
ktable select(state, Total, plt_sightings, AvgDurSeconds,
%>%
plt_avgseconds, AvgDurMin) kbl(escape = FALSE,
col.names = c("State", "Total Sightings",
"Sightings Trend",
"Avg Sighting Duration In Seconds",
"Second Duration Trend",
"Avg Sighting Duration In Minutes"),
align = c("l","r","r","r","r","r","r"),
digits = 1,
caption = "UFO Data by US State for 2014") %>%
kable_styling(font_size=18, fixed_thead = TRUE)%>%
gsub("font-size: initial !important;",
"font-size: 20pt !important;",.)
ktable
State | Total Sightings | Sightings Trend | Avg Sighting Duration In Seconds | Second Duration Trend | Avg Sighting Duration In Minutes |
---|---|---|---|---|---|
AK | 23 | 1246.1 | 20.8 | ||
AL | 47 | 502.9 | 8.4 | ||
AR | 9 | 272.0 | 4.5 | ||
AZ | 102 | 707.5 | 11.8 | ||
CA | 291 | 543.1 | 9.1 | ||
CO | 42 | 779.1 | 13.0 | ||
CT | 22 | 619.9 | 10.3 | ||
DE | 5 | 271.0 | 4.5 | ||
FL | 246 | 624.8 | 10.4 | ||
GA | 66 | 724.7 | 12.1 | ||
HI | 16 | 930.0 | 15.5 | ||
IA | 14 | 113.7 | 1.9 | ||
ID | 25 | 222.5 | 3.7 | ||
IL | 29 | 344.8 | 5.7 | ||
IN | 32 | 517.4 | 8.6 | ||
KS | 12 | 1177.2 | 19.6 | ||
KY | 28 | 669.2 | 11.2 | ||
LA | 25 | 387.9 | 6.5 | ||
MA | 31 | 799.2 | 13.3 | ||
MD | 32 | 303.2 | 5.1 | ||
ME | 14 | 1313.6 | 21.9 | ||
MI | 32 | 254.8 | 4.2 | ||
MN | 24 | 331.8 | 5.5 | ||
MO | 29 | 937.0 | 15.6 | ||
MS | 11 | 192.7 | 3.2 | ||
MT | 20 | 934.3 | 15.6 | ||
NC | 69 | 330.2 | 5.5 | ||
ND | 1 | 600.0 | 10.0 | ||
NH | 21 | 505.2 | 8.4 | ||
NJ | 38 | 401.9 | 6.7 | ||
NM | 40 | 421.3 | 7.0 | ||
NV | 30 | 1116.2 | 18.6 | ||
NY | 58 | 290.7 | 4.8 | ||
OH | 61 | 473.2 | 7.9 | ||
OK | 18 | 1483.5 | 24.7 | ||
OR | 58 | 763.6 | 12.7 | ||
PA | 73 | 576.7 | 9.6 | ||
RI | 4 | 240.0 | 4.0 | ||
SC | 53 | 581.1 | 9.7 | ||
SD | 3 | 440.0 | 7.3 | ||
TN | 45 | 834.0 | 13.9 | ||
TX | 73 | 805.1 | 13.4 | ||
UT | 30 | 1192.5 | 19.9 | ||
VA | 56 | 599.1 | 10.0 | ||
VT | 7 | 660.3 | 11.0 | ||
WA | 99 | 2425.0 | 40.4 | ||
WI | 30 | 1614.3 | 26.9 | ||
WV | 22 | 1727.1 | 28.8 | ||
WY | 3 | 2000.0 | 33.3 |