I am interesting in looking and how different groups of secondary students experienced bullying in the last 3 years.

Data are from the 2019-20, 2020-21, and 2021-22 school years. And come from 30 states, 782 schools, 203 districts, and over 350K students.

I want to summarize their responses to the question “Have you been bullied this school year” and further probing into how that bullying occured (physically, verbally, digitally, etc.). These were asked through a series of yes/no questions. As such, I will use a chi-square test o compare their results

Load libraries

library(data.table)
library(fastDummies)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.6     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between()   masks data.table::between()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::first()     masks data.table::first()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ dplyr::last()      masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(DT)
library("scales")
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
results_dem<-read.csv("/Users/valerier/Dropbox (CEP)/YouthTruth/Data and Research/Bullying Data/results_demographics.csv")
dem_props<-read.csv("/Users/valerier/Dropbox (CEP)/YouthTruth/Data and Research/Bullying Data/dem_props.csv")
#Look at What will need to be Renamed
names(dem_props)
##  [1] "X"                   "bully_yes..pos"      "bully_no..pos"      
##  [4] "bully_idk..pos"      "bully_phy..pos"      "bully_verb..pos"    
##  [7] "bully_soc..pos"      "bully_elec..pos"     "hardbest_bully..pos"
## [10] "total"
dem_props$X
##  [1] "lgbtqia_0" "lgbtqia_1" "gender_1"  "gender_2"  "gender_3"  "gender_77"
##  [7] "orient_1"  "orient_2"  "orient_3"  "orient_77" "orient_88" "trans_0"  
## [13] "trans_1"   "poc_0"     "poc_1"     "racen_1"   "racen_2"   "racen_3"  
## [19] "racen_4"   "racen_5"   "racen_6"   "racen_7"   "racen_8"   "racen_10" 
## [25] "year_5"    "year_6"    "year_7"    "year_8"    "year_9"    "year_10"  
## [31] "year_11"   "year_12"   "year_13"   "year_77"
X<-c("Does not identify as LGBTQ+",
    "Identifies as LGBTQ+",
    "Boy/Man",
    "Girl/Woman",
    "Non-binary or gender non-confirming",
    "Prefer to self-describe/identify gender",
    "Gay or Lesbian",
    "Heterosexual or straight",
    "Bisexual",
    "Identifies another way", 
    "I am not sure", 
    "Not transgender",
    "Transgender",
    "Does not identify as POC",     
    "Identifies as POC",
    "Hispanic or Latina/o/x",
    "White",
    "Black or African American",  
    "American Indian, Alaska Native, or Indigenous",
    "Asian or Asian American",
    "Native Hawaiian or Pacific Islander",
    "Race or ethnicity not included above (optional, please describe)",
    "Multiracial and/or Multi-ethnic",
    "Middle Eastern or North African",
    "5th Grade",    
    "6th Grade",   
    "7th Grade",
    "8th Grade",
    "9th Grade",
    "10th Grade",
    "11th Grade",
    "12th Grade",
    "13th Grade",
    "Other Grade")


dem_props$X<-X
#get overall n and prop from results dataframe
overall<-results_dem[4:37,2:3]
dem_props<-cbind(dem_props,overall)

#dem_props[,c(2:9,12)]<-percent(as.numeric(unlist(dem_props[,c(2:9,12)])))

Converting to percent

for (i in c(2:9,12)){
  dem_props[i]<-percent(as.numeric(unlist(dem_props[,i])))
}
kbl_props<-dem_props%>%
  select(-"total")%>%
  kbl(col.names = c("Demographic",
                    "Yes",
                    "No",
                    "IDK",
                    "Physically",
                    "Verbally",
                    "Socially",
                    "Cyber", 
                    "Bullying is an Obstacle to Learning",
                    "Total",
                    "Overall Proportion"),
      row.names =FALSE,
      digits = 2,
      caption   ="Source:Youth Truth Student Survey")  %>%
   add_header_above(c(" "=1, "Have you been bullied in school?" = 3, "If so, how?" = 4, " ", "Sample Size"=2))  %>%
  kable_styling("striped",font_size=12, full_width = TRUE)

First iteration. It is not easy to look at

kbl_props%>%
  kable_paper(full_width = FALSE, html_font = "helvetica")
Source:Youth Truth Student Survey
Have you been bullied in school?
If so, how?
Sample Size
Demographic Yes No IDK Physically Verbally Socially Cyber Bullying is an Obstacle to Learning Total Overall Proportion
Does not identify as LGBTQ+ 10.1482% 81.5813% 7.9081% 42.277% 86.109% 71.982% 38.6587% 8.328% 87661 24.9133%
Identifies as LGBTQ+ 23.6186% 63.5635% 13.0654% 40.167% 92.607% 78.261% 46.1632% 15.806% 24341 6.9177%
Boy/Man 12.0624% 79.1541% 8.2982% 51.689% 87.381% 65.093% 30.7162% 10.300% 127056 36.1094%
Girl/Woman 15.2318% 74.8695% 9.9381% 36.502% 90.187% 81.616% 48.9324% 11.756% 120340 34.2007%
Non-binary or gender non-confirming 32.3846% 53.1362% 15.3446% 42.484% 94.173% 79.791% 45.4089% 23.681% 7238 2.0570%
Prefer to self-describe/identify gender 27.6602% 56.9519% 15.3782% 51.596% 92.328% 78.470% 47.9074% 22.852% 9501 2.7002%
Gay or Lesbian 25.0104% 61.7989% 13.2075% 39.516% 93.101% 73.757% 42.3260% 16.558% 4814 1.3681%
Heterosexual or straight 10.2411% 82.0049% 7.3997% 40.563% 87.036% 72.824% 39.0536% 7.430% 85840 24.3958%
Bisexual 22.6567% 66.1493% 11.2916% 37.669% 92.308% 80.617% 47.9152% 13.451% 14883 4.2298%
Identifies another way 19.8087% 67.3513% 12.7790% 47.731% 91.116% 76.598% 44.9954% 15.232% 12757 3.6255%
I am not sure 13.5658% 70.7154% 15.8830% 44.030% 86.746% 71.180% 40.2500% 13.655% 10497 2.9833%
Not transgender 11.8188% 79.1235% 8.8560% 40.686% 88.013% 75.043% 41.3618% 9.128% 90432 25.7008%
Transgender 32.8041% 51.0485% 15.7355% 45.345% 92.151% 76.107% 47.0213% 23.394% 3338 0.9487%
Does not identify as POC 15.8138% 74.3543% 9.6508% 40.130% 90.388% 75.957% 41.0903% 12.126% 113243 32.1837%
Identifies as POC 13.5536% 76.7444% 9.1973% 47.698% 87.359% 73.217% 43.3728% 11.877% 70985 20.1740%
Hispanic or Latina/o/x 10.2454% 81.1847% 8.3231% 45.068% 87.274% 72.316% 40.4451% 8.582% 74482 21.1678%
White 17.0122% 73.1412% 9.7299% 39.700% 91.445% 77.180% 41.4482% 12.228% 102268 29.0646%
Black or African American 13.4229% 77.4399% 8.4705% 48.160% 85.893% 71.044% 39.6853% 12.462% 19273 5.4774%
American Indian, Alaska Native, or Indigenous 20.6668% 67.3431% 11.3014% 54.772% 85.686% 73.191% 46.5675% 20.475% 4829 1.3724%
Asian or Asian American 11.5228% 78.5679% 9.9692% 40.987% 88.524% 68.225% 37.4242% 11.296% 20082 5.7073%
Native Hawaiian or Pacific Islander 14.7556% 73.2000% 11.0251% 48.428% 86.337% 73.684% 46.7949% 13.329% 2250 0.6395%
Race or ethnicity not included above (optional, please describe) 19.6409% 68.8833% 11.4036% 53.947% 90.537% 76.540% 42.6780% 18.273% 17820 5.0645%
Multiracial and/or Multi-ethnic 17.4176% 73.0157% 9.3815% 43.359% 90.557% 76.917% 43.0825% 9.797% 16202 4.6046%
Middle Eastern or North African 17.9402% 69.4906% 11.4744% 49.363% 83.728% 70.336% 44.5946% 15.561% 1806 0.5133%
5th Grade 19.6020% 67.2637% 14.2264% 60.969% 82.888% 65.964% 32.1875% 31.230% 2010 0.5712%
6th Grade 20.6797% 66.8419% 13.0294% 58.867% 89.190% 73.105% 35.9539% 24.607% 29483 8.3791%
7th Grade 18.5960% 68.9209% 12.8541% 53.010% 89.864% 73.705% 37.7432% 18.692% 45101 12.8177%
8th Grade 16.7901% 71.7275% 11.5168% 47.231% 90.043% 73.294% 39.5434% 13.573% 45783 13.0116%
9th Grade 13.8786% 76.5809% 9.1649% 40.667% 90.365% 75.226% 41.4493% 9.281% 46287 13.1548%
10th Grade 12.2889% 79.4117% 7.8987% 34.341% 89.314% 76.064% 44.3513% 7.704% 43039 12.2317%
11th Grade 10.6019% 81.8886% 7.0915% 30.479% 89.050% 78.211% 47.4004% 6.817% 39257 11.1569%
12th Grade 9.7759% 83.2454% 6.5980% 27.632% 87.891% 77.851% 47.3025% 6.471% 34677 9.8552%
13th Grade 25.0000% 58.3333% 15.5172% 40.000% 78.571% 66.667% 45.4545% 21.569% 60 0.0171%
Other Grade 15.9438% 70.8613% 15.9434% 64.751% 85.929% 74.751% 53.0612% 23.034% 3274 0.9305%
tbl_props<-dem_props%>%
  select(-"total")%>%
  datatable(colnames = c("Demographic",
                    "Yes - Bullied",
                    "No - Bullied",
                    "IDK - Bullied",
                    "Physically Bullied",
                    "Verbally Bullied",
                    "Socially Bullied",
                    "Cyber Bullied", 
                    "Bullying is an Obstacle to Learning",
                    "Total",
                    "Overall Proportion"),
            rownames = FALSE) %>%
  formatRound(columns = 2:11, 
              digits = 2)

This one is a bit more user friendly

tbl_props