Pre-Load useful R packages.
This plot draws data from the US government website for the Mine Safety and Health Administrations.
The data for fatalities can be found at: https://arlweb.msha.gov/stats/centurystats/coalstats.asp https://arlweb.msha.gov/stats/centurystats/mnmstats.asp
Legislative events related to miner safety were taken from: https://www.msha.gov/about/history
Data for disasters with more than 5 people killed was taken from: https://wwwn.cdc.gov/NIOSH-Mining/MMWC/MineDisasters/Table
Read in the data for Miner fatalities from 1900 to present for both Coal mines and Metal/NonMetal Mines. Calculate the fatality rate per 1000 miners employed that year to account for decrease in the number of workers in this industry.
mines <- read.csv("miners.csv")
mines %<>% mutate(MiningSector=as.factor(MiningSector))
mines <- mines %>% mutate(Miners=str_replace(Miners,",",""),Fatalities=str_replace(Fatalities,",",""),Miners=str_replace(Miners,",",""),Fatalities=str_replace(Fatalities,",","")) %>% mutate(Miners=as.integer(Miners),Fatalities=as.integer(Fatalities))
mines %<>% mutate(perworkerfatality=1000.0*Fatalities/Miners)
#missing data for Metal and Mon Metal mines
# mines %>% filter(is.na(Miners))
Read in a list of major actions by the federal government related to mine safety.
minehistory <- read.csv("MiningSafetyHistory.csv")
Create a plot that shows the rate of fatalities for coal mines versus non-coal mines (ie. metal, stone) and labels important historic points. The 1969 legislation is highlighted because it is considered an important step because it created criminal and monitary punishments for safety violators.
p_fatalities <- ggplot()+
geom_line(data=mines,mapping=aes(x=Year,y=perworkerfatality,
color=MiningSector),size=1.25)+
theme_classic()+
geom_textvline(data=minehistory %>% filter(Year!=1969),
mapping=aes(xintercept=Year,label=Event),
color="darkgrey",size=2.62,linetype="dotted",hjust=1)+
geom_textvline(data=minehistory %>% filter(Year==1969),
mapping=aes(xintercept=Year,label=Event),
color="red",size=2.62,linetype="dotted",hjust=1)
eventyear <- c(minehistory$Year,1900,2021)
p_fatalies_final <- p_fatalities+
theme(legend.position=c(0.2,.08),
legend.title = element_blank(),
legend.background = element_blank(),
axis.text.x = element_text(angle = 90,vjust=0.5))+
ylab("Fatalities per 1000 Workers")+
scale_x_continuous(breaks=eventyear)+
coord_cartesian(ylim=c(0.15,5.5),xlim=c(1905,2018))+
scale_color_discrete(labels=c("Coal Mines","Metallic/NonMetallic Mines"))+
ggtitle("A History of Mine Fatality Rates in the USA since 1900")
p_fatalies_final
## Warning: Removed 31 row(s) containing missing values (geom_path).
ggsave("mine_fatalities.png",plot=p_fatalies_final)
## Saving 7 x 5 in image
## Warning: Removed 31 row(s) containing missing values (geom_path).
Let’s also look at the types of mining accidents and whether their distribution among categories has changed since the sixties. The data is filtered from 1900 to present, giving about 70 years of data before and after the legislation.
disasters <- read.csv("MiningDisasters.csv")
disasters %<>% mutate(Mining.Sector=str_to_lower(Mining.Sector))
disasters %<>% mutate(Date=parse_date_time(Date,"mdy")) %>% mutate(Year=year(Date))
disasters %<>% mutate(across(c(Mine.Type,State,City,Mining.Sector,Mine.Type),as.factor))
disasters %>% head()
## Date Mine.Name City State Killed Product
## 1 1839-03-17 Black Heath Richmond VA 53 Coal
## 2 1844-06-14 Black Heath Richmond VA 11 Coal
## 3 1846-01-11 No. 1 Carbondale PA 14 Coal
## 4 1847-02-18 Spencer Pottsville PA 7 Coal
## 5 1850-11-29 Cox's Pit, Clover Hill Winterpock VA 7 Coal
## 6 1854-05-14 Chesterfield New Richmond VA 20 Coal
## Accident.Type Mine.Type Mining.Sector Year
## 1 Explosion UG coal 1839
## 2 Explosion UG coal 1844
## 3 Cave-in UG coal 1846
## 4 Explosion UG coal 1847
## 5 Explosion UG coal 1850
## 6 Explosion UG coal 1854
Using String Searches, let’s try to lump Accident Types into general categories based on those named in https://wwwn.cdc.gov/NIOSH-Mining/MMWC/MineDisasters/AccidentType#
disasters %<>% mutate(AccidentCategory=NA)
disasters %<>% mutate(Accident.Type=str_to_lower(Accident.Type))
# classify with categories like https://wwwn.cdc.gov/NIOSH-Mining/MMWC/MineDisasters/AccidentType#
#can get a list of all accident types
# x <- unique(disasters$Accident.Type)
#can test searching for different string patterns in this list
# x[grepl("roof",x)]
disasters %<>% mutate(AccidentCategory=ifelse(grepl('explosion|detonat|blast|explosives|exposive',disasters$Accident.Type),"Explosion",AccidentCategory))
disasters %<>% mutate(AccidentCategory=ifelse(grepl('fire',disasters$Accident.Type),"Fires",AccidentCategory))
disasters %<>% mutate(AccidentCategory=ifelse(grepl('flood|water|inund|dam fail|snow',disasters$Accident.Type),"Water",AccidentCategory))
disasters %<>% mutate(AccidentCategory=ifelse(grepl('suff|smoke|fumes|sulfide gas|inrush hot gases',disasters$Accident.Type),"Suffocation",AccidentCategory))
disasters %<>% mutate(AccidentCategory=ifelse(grepl('haul|hoist|drawbar|cage|man-car|crosshead',disasters$Accident.Type),"Machinery/Equipment fall",AccidentCategory))
disasters %<>% mutate(AccidentCategory=ifelse(grepl('cave-in|roof|fall of|fall and slide|slide of bank|snow|collapse load',disasters$Accident.Type),"Ground Fall",AccidentCategory))
# disasters %<>% mutate(AccidentCategory=ifelse(grepl('bump',disasters$Accident.Type),"Bump/Collapse with Bump",AccidentCategory))
disasters %<>% mutate(AccidentCategory=as.factor(AccidentCategory))
disasters %>% select(Accident.Type,AccidentCategory) %>% head()
## Accident.Type AccidentCategory
## 1 explosion Explosion
## 2 explosion Explosion
## 3 cave-in Ground Fall
## 4 explosion Explosion
## 5 explosion Explosion
## 6 explosion Explosion
Keep the type of mine consistent with our earlier plot.
disasters %<>% mutate(CoalNonCoal=ifelse(disasters$Mining.Sector=='coal',"Coal Mines","Other Mines"))
Divide the data to times before and after the major safety legistation of the 60s.
disasters %<>% mutate(beforeCoalAct=cut(year(Date),breaks=c(1800.,1969.,2022.)))
disasters %<>% mutate(beforeMetalAct=cut(year(Date),breaks=c(1800.,1966.,2022.)))
Plot the distributions of accident types
disasters %>% head()
## Date Mine.Name City State Killed Product
## 1 1839-03-17 Black Heath Richmond VA 53 Coal
## 2 1844-06-14 Black Heath Richmond VA 11 Coal
## 3 1846-01-11 No. 1 Carbondale PA 14 Coal
## 4 1847-02-18 Spencer Pottsville PA 7 Coal
## 5 1850-11-29 Cox's Pit, Clover Hill Winterpock VA 7 Coal
## 6 1854-05-14 Chesterfield New Richmond VA 20 Coal
## Accident.Type Mine.Type Mining.Sector Year AccidentCategory CoalNonCoal
## 1 explosion UG coal 1839 Explosion Coal Mines
## 2 explosion UG coal 1844 Explosion Coal Mines
## 3 cave-in UG coal 1846 Ground Fall Coal Mines
## 4 explosion UG coal 1847 Explosion Coal Mines
## 5 explosion UG coal 1850 Explosion Coal Mines
## 6 explosion UG coal 1854 Explosion Coal Mines
## beforeCoalAct beforeMetalAct
## 1 (1.8e+03,1.97e+03] (1.8e+03,1.97e+03]
## 2 (1.8e+03,1.97e+03] (1.8e+03,1.97e+03]
## 3 (1.8e+03,1.97e+03] (1.8e+03,1.97e+03]
## 4 (1.8e+03,1.97e+03] (1.8e+03,1.97e+03]
## 5 (1.8e+03,1.97e+03] (1.8e+03,1.97e+03]
## 6 (1.8e+03,1.97e+03] (1.8e+03,1.97e+03]
disasters %>% filter(year(Date)>=1900) %>% group_by(CoalNonCoal) %>% summarise(z=sum(Killed))
## # A tibble: 2 × 2
## CoalNonCoal z
## <chr> <int>
## 1 Coal Mines 11703
## 2 Other Mines 1149
noncoaldisaster <- disasters %>% filter(year(Date)>=1900,CoalNonCoal=="Other Mines")
noncoaldisaster<- noncoaldisaster%>% group_by(beforeMetalAct,AccidentCategory) %>% summarise(Deaths=sum(Killed)) %>% ungroup() %>% complete(beforeMetalAct,AccidentCategory,fill=list(N=0,Deaths=0))
## `summarise()` has grouped output by 'beforeMetalAct'. You can override using
## the `.groups` argument.
coaldisaster <- disasters %>% filter(year(Date)>=1900,CoalNonCoal=="Coal Mines")
coaldisaster<- coaldisaster %>% group_by(beforeCoalAct,AccidentCategory) %>% summarise(Deaths=sum(Killed)) %>% ungroup() %>% complete(beforeCoalAct,AccidentCategory,fill=list(N=0,Deaths=0))
## `summarise()` has grouped output by 'beforeCoalAct'. You can override using the
## `.groups` argument.
p_coal_accidents <- ggplot(data=coaldisaster %>% ungroup() %>% complete()
)+geom_col(mapping=aes(x=AccidentCategory,y=Deaths,fill=beforeCoalAct),position="dodge")+
ggtitle("Coal Mine Disasters from 1900 to 2021")+
theme_classic()+
theme(legend.position=c(0.65,.90),
legend.title = element_blank(),
legend.background = element_blank(),
axis.text.x = element_text(angle = -10,hjust=0.5,vjust=0.0))+
ylab("Total Fatalities")+xlab("Accident Type")+
scale_fill_discrete(labels=c("Before 1969 Coal Mine Health and Safety Act","After Act"))
p_metal_accidents <- ggplot(data=noncoaldisaster %>% ungroup() %>% complete()
)+geom_col(mapping=aes(x=AccidentCategory,y=Deaths,fill=beforeMetalAct),position="dodge")+
ggtitle("Metallic/NonMetallic Mine Disasters from 1900 to 2021")+
theme_classic()+
theme(legend.position=c(0.65,.90),
legend.title = element_blank(),
legend.background = element_blank(),
axis.text.x = element_text(angle = -10,hjust=0.5,vjust=0.0))+
ylab("Total Fatalities")+xlab("Accident Type")+
scale_fill_discrete(labels=c("Before 1966 Metal and Nonmetallic Mine Safety Act","After Act"))
p_metal_accidents
p_coal_accidents
ggsave("coal_mine_disasters.png",plot=p_coal_accidents)
## Saving 7 x 5 in image
ggsave("non_coal_mine_disasters.png",plot=p_metal_accidents)
## Saving 7 x 5 in image
blankPlot <- ggplot()+geom_blank(aes(1,1))+
theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank()
)
library(ggpubr)
combinedplot <- grid.arrange(p_fatalies_final,p_coal_accidents+rremove("xlab"),p_metal_accidents+rremove("xlab"),nrow=3,heights=c(2,1,1))
## Warning: Removed 31 row(s) containing missing values (geom_path).
ggsave("Homework1Figure.png",combinedplot,width=6,height=10)
finalimage