Looking at the Boston dataset from the MASS package in R—examining crime rate as a function of various predictors:
zn—proportion of residential land zoned for lots < 25k sqft indus—proportion of non-retail business acres per town rm—avg num rooms per dwelling age—proportion of owner-occupied units built before 1940 dis—weighted mean of distances to 5 Boston job centers rad—index of accessibility to radial highways ptratio—pupil-teacher ratio lstat—lower socioeconomic status medv—median value of owner-occupied homes in thousands of dollars
Getting a correlation plot
library(tidyverse)
library(corrplot)
library(plotly)
library(htmlwidgets)
library(heatmaply)
library(MASS)
# help(Boston)
boston <- Boston[-c(4,5,10,12)]
boston.htmplycor <- heatmaply_cor(
cor(boston),
symm = TRUE,
dendrogram = "none"
)
boston.htmplycor
Linear regression with crime rate as the outcome variable
lm.boston <- lm(crim ~ ., data = boston)
summary(lm.boston)
##
## Call:
## lm(formula = crim ~ ., data = boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.294 -2.150 -0.312 1.078 74.255
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.07465 5.67146 0.895 0.37134
## zn 0.04298 0.01827 2.352 0.01905 *
## indus -0.13573 0.07113 -1.908 0.05695 .
## rm 0.65406 0.60784 1.076 0.28243
## age -0.01108 0.01731 -0.640 0.52233
## dis -0.83741 0.26782 -3.127 0.00187 **
## rad 0.52558 0.04606 11.409 < 2e-16 ***
## ptratio -0.16639 0.17290 -0.962 0.33633
## lstat 0.14765 0.07558 1.954 0.05130 .
## medv -0.19646 0.05718 -3.436 0.00064 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.472 on 496 degrees of freedom
## Multiple R-squared: 0.444, Adjusted R-squared: 0.4339
## F-statistic: 44.01 on 9 and 496 DF, p-value: < 2.2e-16
# confint(lm.boston)
# coef(lm.boston)
results <- data.frame(confint(lm.boston),coef(lm.boston))
colnames(results) = c("Lower CI", "Upper CI", "Estimate")
results2 <- results[-1,]
results2$name <- substr(rownames(results2), 1,9)
results2$sig <- (results2$`Lower CI` > 0 | results2$`Upper CI` < 0)
results2
## Lower CI Upper CI Estimate name sig
## zn 0.0070807145 0.078872844 0.04297678 zn TRUE
## indus -0.2754862079 0.004028157 -0.13572903 indus FALSE
## rm -0.5401950242 1.848307236 0.65405611 rm FALSE
## age -0.0450919960 0.022927942 -0.01108203 age FALSE
## dis -1.3636145492 -0.311203538 -0.83740904 dis TRUE
## rad 0.4350687032 0.616081455 0.52557508 rad TRUE
## ptratio -0.5060962772 0.173309741 -0.16639327 ptratio FALSE
## lstat -0.0008383331 0.296141452 0.14765156 lstat FALSE
## medv -0.3088016641 -0.084115698 -0.19645868 medv TRUE
Making the ggplot
boston.ggp <- ggplot(results2,
aes(x = Estimate,
y = name,
xmin = `Lower CI`,
xmax = `Upper CI`,
color = sig)) +
geom_vline(xintercept = 0,
color = "black",
linetype = 2) +
geom_errorbarh(height = 0.25) +
geom_point() +
theme_minimal() +
theme(axis.title.y = element_blank(),
legend.position = "none") +
#scale_x_continuous(breaks = seq(-21,2,0.1)) +
scale_color_manual(values = c("black","red"))
# boston.ggp
Converting it to plotly
boston.ggp.plotly <- ggplotly(boston.ggp, tooltip = c("Lower CI", "Estimate", "Upper CI"))
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
boston.ggp.plotly
Combining the 2 plots, side by side, into 1 plot
boston.htmplycor2 <- boston.htmplycor %>%
layout(
yaxis = list(showticklabels = FALSE)
)
boston.combo <- subplot(list(boston.ggp.plotly,boston.htmplycor2),
margin = 0.03,
nrows = 1,
widths = c(0.5,0.5))
boston.combo
Saving the plot
saveWidget(as_widget(boston.combo), "boston.combo.html")
save(boston.combo, file="boston.combo.rda")