I wish to explore secondary student responses to the questions: - Overall, how do you feel about your life? - How happy have you been feeling this week?

And compare difference across race/ethnicity and gender Responses were on a 5 point likert scale, which consisted of 5 emojis ranging from very sad to very happy for the first question. Responses for the second question rangrd from never happy (1) to happy all the time (5).

Data is provided by and property of Youth Truth Student Survey, a national nonprofit, and may only be shared in aggregate for the confidentiality of students and clients.

Our sample consisted of 161,340 secondary students (Grades 6-12) in the 2021-22 school year across 19 states, and 442 schools.(Note: not all students reponded to every question, so the sample is closer to 130,000).

For the confidentiallity of clients and students, I can’t share the original data.

Schools that choose to work with Youth Truth, and opt in to the Emotional and Mental health additional topic administered the questions to students.

Loading various libraries (as I was testing I lost track of which I used and chose not to use, so I kept them all in!)

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(htmlwidgets)
library(stringr)

Loading and prepping data

HS<-read.csv("/Users/valerier/Dropbox (CEP)/YouthTruth/Data and Research/EMH Back to School 2022/R Script and Results/HS/HS_dataclean_2022.csv")
MS<-read.csv("/Users/valerier/Dropbox (CEP)/YouthTruth/Data and Research/EMH Back to School 2022/R Script and Results/MS/MS_dataclean_2022.csv")
HS_subset<- HS[ ,c("em_life","gender", "racen", "em_feel_happy")]

MS_subset<-MS[ ,c("m_em_life","m_gender", "m_racen", "m_em_feel_happy")]

colnames(MS_subset)<-c("em_life","gender", "racen", "em_feel_happy")

Secondary<-rbind(HS_subset,MS_subset)

Secondary<-na.omit(Secondary)

summary(Secondary)
##     em_life          gender          racen        em_feel_happy  
##  Min.   :1.000   Min.   : 1.00   Min.   : 1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.: 1.00   1st Qu.: 1.000   1st Qu.:3.000  
##  Median :4.000   Median : 2.00   Median : 2.000   Median :4.000  
##  Mean   :3.608   Mean   :12.29   Mean   : 7.059   Mean   :3.446  
##  3rd Qu.:4.000   3rd Qu.: 2.00   3rd Qu.: 5.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :99.00   Max.   :99.000   Max.   :5.000
table(Secondary$gender)
## 
##     1     2     3    77    99 
## 70848 64819  5446  6030 13009
table(Secondary$race)
## 
##     1     2     3     4     5     6     7     8     9    10    77    99 
## 44233 60714  8426  2977 11343  1630  8498  4719  9262  1353  1166  5831
#removing n/as, skips, prefer not to say. etc. 

Secondary<-Secondary %>% filter(gender != 77 & gender !=99)
Secondary<-Secondary %>% filter(racen != 77 & racen !=99 & racen !=9)
table(Secondary$gender)
## 
##     1     2     3 
## 65802 61187  4975
table(Secondary$racen)
## 
##     1     2     3     4     5     6     7     8    10 
## 41515 55840  7844  2734 10216  1511  6943  4154  1207
dim(Secondary)
## [1] 131964      4
#replacing gender codes with names so it is easier to highlight

Secondary<-Secondary %>%
            mutate(gender=replace(gender,gender==1, "Boy/Man"))
Secondary<-Secondary %>%
            mutate(gender=replace(gender,gender==2, "Girl/Woman"))
Secondary<-Secondary %>%
            mutate(gender=replace(gender,gender==3, "Non-Binary"))

Creating a scatter bubble plot. I wish to explore the linear relationship between these two questions, as well as the difference in response distribution by gender. Note that x and y data are discrete likert scales, but I’ve added dodge/jitter so the points do not overlap and are easier to see. The size of the bubbles represents the proportion by gender that selected that cross-section of responses.

#Manually add our YT colors for our pallette
YTPalette<-c("#245971", "#f99c25","#b0c5cc")

set.seed(42)
EMH <-ggplot(Secondary, aes(x=em_life, y=em_feel_happy, group= gender, color=factor(gender)))+
  geom_count(alpha=.7, 
             position=position_jitterdodge(jitter.width = 0 , 
                                           jitter.height = .2, 
                                           dodge.width = .2), 
             aes(size=after_stat(prop), group = factor(gender)))+
  scale_size(range=c(2,12))+
  xlab(str_wrap("Overall, how do you feel about your life?")) + 
  ylab(str_wrap("How happy have you been feeling this week?")) +
  labs(title="Secondary Student Mental Health", subtitle= "proportion of each gender that selected each likert response combination", colour= "Gender", shape= "Proportion of students by gender", caption = "Source:YouthTruth Student Survey")+
  coord_equal()+
  scale_fill_manual(values= YTPalette,labels = c("Boy/Man", "Girl/Woman","Non-Binary"),aesthetics = "colour")+
  guides(colour = guide_legend(override.aes = list(size=5)), size = guide_legend("Prop", override.aes=list(col="#0e3051")))+
  theme_bw()+
  theme(plot.title = element_text(family = "Helvetica", colour = "#0e3051", face = "bold", size = (15)),
        plot.subtitle = element_text(family = "Helvetica", colour = "#0e3051", face = "italic", size = (8)),
        legend.title = element_text(colour = "#245971", face = "bold", family = "Helvetica", , size = (10)),
        legend.text = element_text(face = "italic", colour = "#0e3051", family = "Helvetica"),
        axis.title = element_text (family = "Helvetica", size = (12), colour = "#245971"),
        axis.text = element_text(family = "Courier", colour = "#245971", size = (12)))




EMH

#adjusting height and width to keep the graph a square
#adding hover for only gender and proportion (I spent hours trying to make them neater, but alas, the way I calculated prop does not make it straight foward)
#removing legend as points are labeled
#removing zoom as we want to keep it a square and zooming in and out are not helpful
#adding info from caption and subtitle back into plot

EMHplotly<-ggplotly(EMH, width = 450, height = 450, tooltip= c("group", "prop"))%>% 
  layout(showlegend = FALSE, title = list(text = paste0('Secondary Student Mental Health',
                                    '<br>',
                                    '<sup>',
                                    'Likert response combination by gender; Source:YouthTruth',
                                    '</sup>')))%>%
  config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d"))%>%
  highlight(on = "plotly_hover")

  
 
 EMHplotly
saveWidget(as_widget(EMHplotly), "EMHplotly.html")

save(EMHplotly,file="EMHplotly.rda")