I am interesting in looking and how different groups of secondary students experienced bullying in the last 3 years.
Data are from the 2019-20, 2020-21, and 2021-22 school years. And come from 30 states, 782 schools, 203 districts, and over 350K students.
I want to summarize their responses to the question “Have you been bullied this school year” and further probing into how that bullying occured (physically, verbally, digitally, etc.). These were asked through a series of yes/no questions. As such, I will use a chi-square test o compare their results
Load libraries
library(data.table)
library(fastDummies)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.6 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(DT)
library("scales")
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
results_dem<-read.csv("/Users/valerier/Dropbox (CEP)/YouthTruth/Data and Research/Bullying Data/results_demographics.csv")
dem_props<-read.csv("/Users/valerier/Dropbox (CEP)/YouthTruth/Data and Research/Bullying Data/dem_props.csv")
#Look at What will need to be Renamed
names(dem_props)
## [1] "X" "bully_yes..pos" "bully_no..pos"
## [4] "bully_idk..pos" "bully_phy..pos" "bully_verb..pos"
## [7] "bully_soc..pos" "bully_elec..pos" "hardbest_bully..pos"
## [10] "total"
dem_props$X
## [1] "lgbtqia_0" "lgbtqia_1" "gender_1" "gender_2" "gender_3" "gender_77"
## [7] "orient_1" "orient_2" "orient_3" "orient_77" "orient_88" "trans_0"
## [13] "trans_1" "poc_0" "poc_1" "racen_1" "racen_2" "racen_3"
## [19] "racen_4" "racen_5" "racen_6" "racen_7" "racen_8" "racen_10"
## [25] "year_5" "year_6" "year_7" "year_8" "year_9" "year_10"
## [31] "year_11" "year_12" "year_13" "year_77"
X<-c("Does not identify as LGBTQ+",
"Identifies as LGBTQ+",
"Boy/Man",
"Girl/Woman",
"Non-binary or gender non-confirming",
"Prefer to self-describe/identify gender",
"Gay or Lesbian",
"Heterosexual or straight",
"Bisexual",
"Identifies another way",
"I am not sure",
"Not transgender",
"Transgender",
"Does not identify as POC",
"Identifies as POC",
"Hispanic or Latina/o/x",
"White",
"Black or African American",
"American Indian, Alaska Native, or Indigenous",
"Asian or Asian American",
"Native Hawaiian or Pacific Islander",
"Race or ethnicity not included above (optional, please describe)",
"Multiracial and/or Multi-ethnic",
"Middle Eastern or North African",
"5th Grade",
"6th Grade",
"7th Grade",
"8th Grade",
"9th Grade",
"10th Grade",
"11th Grade",
"12th Grade",
"13th Grade",
"Other Grade")
dem_props$X<-X
#get overall n and prop from results dataframe
overall<-results_dem[4:37,2:3]
dem_props<-cbind(dem_props,overall)
#dem_props[,c(2:9,12)]<-percent(as.numeric(unlist(dem_props[,c(2:9,12)])))
Converting to percent
for (i in c(2:9,12)){
dem_props[i]<-percent(as.numeric(unlist(dem_props[,i])))
}
kbl_props<-dem_props%>%
select(-"total")%>%
kbl(col.names = c("Demographic",
"Yes",
"No",
"IDK",
"Physically",
"Verbally",
"Socially",
"Cyber",
"Bullying is an Obstacle to Learning",
"Total",
"Overall Proportion"),
row.names =FALSE,
digits = 2,
caption ="Source:Youth Truth Student Survey") %>%
add_header_above(c(" "=1, "Have you been bullied in school?" = 3, "If so, how?" = 4, " ", "Sample Size"=2)) %>%
kable_styling("striped",font_size=12, full_width = TRUE)
First iteration. It is not easy to look at
kbl_props%>%
kable_paper(full_width = FALSE, html_font = "helvetica")
Demographic | Yes | No | IDK | Physically | Verbally | Socially | Cyber | Bullying is an Obstacle to Learning | Total | Overall Proportion |
---|---|---|---|---|---|---|---|---|---|---|
Does not identify as LGBTQ+ | 10.1482% | 81.5813% | 7.9081% | 42.277% | 86.109% | 71.982% | 38.6587% | 8.328% | 87661 | 24.9133% |
Identifies as LGBTQ+ | 23.6186% | 63.5635% | 13.0654% | 40.167% | 92.607% | 78.261% | 46.1632% | 15.806% | 24341 | 6.9177% |
Boy/Man | 12.0624% | 79.1541% | 8.2982% | 51.689% | 87.381% | 65.093% | 30.7162% | 10.300% | 127056 | 36.1094% |
Girl/Woman | 15.2318% | 74.8695% | 9.9381% | 36.502% | 90.187% | 81.616% | 48.9324% | 11.756% | 120340 | 34.2007% |
Non-binary or gender non-confirming | 32.3846% | 53.1362% | 15.3446% | 42.484% | 94.173% | 79.791% | 45.4089% | 23.681% | 7238 | 2.0570% |
Prefer to self-describe/identify gender | 27.6602% | 56.9519% | 15.3782% | 51.596% | 92.328% | 78.470% | 47.9074% | 22.852% | 9501 | 2.7002% |
Gay or Lesbian | 25.0104% | 61.7989% | 13.2075% | 39.516% | 93.101% | 73.757% | 42.3260% | 16.558% | 4814 | 1.3681% |
Heterosexual or straight | 10.2411% | 82.0049% | 7.3997% | 40.563% | 87.036% | 72.824% | 39.0536% | 7.430% | 85840 | 24.3958% |
Bisexual | 22.6567% | 66.1493% | 11.2916% | 37.669% | 92.308% | 80.617% | 47.9152% | 13.451% | 14883 | 4.2298% |
Identifies another way | 19.8087% | 67.3513% | 12.7790% | 47.731% | 91.116% | 76.598% | 44.9954% | 15.232% | 12757 | 3.6255% |
I am not sure | 13.5658% | 70.7154% | 15.8830% | 44.030% | 86.746% | 71.180% | 40.2500% | 13.655% | 10497 | 2.9833% |
Not transgender | 11.8188% | 79.1235% | 8.8560% | 40.686% | 88.013% | 75.043% | 41.3618% | 9.128% | 90432 | 25.7008% |
Transgender | 32.8041% | 51.0485% | 15.7355% | 45.345% | 92.151% | 76.107% | 47.0213% | 23.394% | 3338 | 0.9487% |
Does not identify as POC | 15.8138% | 74.3543% | 9.6508% | 40.130% | 90.388% | 75.957% | 41.0903% | 12.126% | 113243 | 32.1837% |
Identifies as POC | 13.5536% | 76.7444% | 9.1973% | 47.698% | 87.359% | 73.217% | 43.3728% | 11.877% | 70985 | 20.1740% |
Hispanic or Latina/o/x | 10.2454% | 81.1847% | 8.3231% | 45.068% | 87.274% | 72.316% | 40.4451% | 8.582% | 74482 | 21.1678% |
White | 17.0122% | 73.1412% | 9.7299% | 39.700% | 91.445% | 77.180% | 41.4482% | 12.228% | 102268 | 29.0646% |
Black or African American | 13.4229% | 77.4399% | 8.4705% | 48.160% | 85.893% | 71.044% | 39.6853% | 12.462% | 19273 | 5.4774% |
American Indian, Alaska Native, or Indigenous | 20.6668% | 67.3431% | 11.3014% | 54.772% | 85.686% | 73.191% | 46.5675% | 20.475% | 4829 | 1.3724% |
Asian or Asian American | 11.5228% | 78.5679% | 9.9692% | 40.987% | 88.524% | 68.225% | 37.4242% | 11.296% | 20082 | 5.7073% |
Native Hawaiian or Pacific Islander | 14.7556% | 73.2000% | 11.0251% | 48.428% | 86.337% | 73.684% | 46.7949% | 13.329% | 2250 | 0.6395% |
Race or ethnicity not included above (optional, please describe) | 19.6409% | 68.8833% | 11.4036% | 53.947% | 90.537% | 76.540% | 42.6780% | 18.273% | 17820 | 5.0645% |
Multiracial and/or Multi-ethnic | 17.4176% | 73.0157% | 9.3815% | 43.359% | 90.557% | 76.917% | 43.0825% | 9.797% | 16202 | 4.6046% |
Middle Eastern or North African | 17.9402% | 69.4906% | 11.4744% | 49.363% | 83.728% | 70.336% | 44.5946% | 15.561% | 1806 | 0.5133% |
5th Grade | 19.6020% | 67.2637% | 14.2264% | 60.969% | 82.888% | 65.964% | 32.1875% | 31.230% | 2010 | 0.5712% |
6th Grade | 20.6797% | 66.8419% | 13.0294% | 58.867% | 89.190% | 73.105% | 35.9539% | 24.607% | 29483 | 8.3791% |
7th Grade | 18.5960% | 68.9209% | 12.8541% | 53.010% | 89.864% | 73.705% | 37.7432% | 18.692% | 45101 | 12.8177% |
8th Grade | 16.7901% | 71.7275% | 11.5168% | 47.231% | 90.043% | 73.294% | 39.5434% | 13.573% | 45783 | 13.0116% |
9th Grade | 13.8786% | 76.5809% | 9.1649% | 40.667% | 90.365% | 75.226% | 41.4493% | 9.281% | 46287 | 13.1548% |
10th Grade | 12.2889% | 79.4117% | 7.8987% | 34.341% | 89.314% | 76.064% | 44.3513% | 7.704% | 43039 | 12.2317% |
11th Grade | 10.6019% | 81.8886% | 7.0915% | 30.479% | 89.050% | 78.211% | 47.4004% | 6.817% | 39257 | 11.1569% |
12th Grade | 9.7759% | 83.2454% | 6.5980% | 27.632% | 87.891% | 77.851% | 47.3025% | 6.471% | 34677 | 9.8552% |
13th Grade | 25.0000% | 58.3333% | 15.5172% | 40.000% | 78.571% | 66.667% | 45.4545% | 21.569% | 60 | 0.0171% |
Other Grade | 15.9438% | 70.8613% | 15.9434% | 64.751% | 85.929% | 74.751% | 53.0612% | 23.034% | 3274 | 0.9305% |
tbl_props<-dem_props%>%
select(-"total")%>%
datatable(colnames = c("Demographic",
"Yes - Bullied",
"No - Bullied",
"IDK - Bullied",
"Physically Bullied",
"Verbally Bullied",
"Socially Bullied",
"Cyber Bullied",
"Bullying is an Obstacle to Learning",
"Total",
"Overall Proportion"),
rownames = FALSE) %>%
formatRound(columns = 2:11,
digits = 2)
This one is a bit more user friendly
tbl_props