-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path02_demographic-preprocess.Rmd
110 lines (88 loc) · 3.45 KB
/
02_demographic-preprocess.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
---
title: "Demographic Pre-processing"
author: "Meg Hutch"
date: "2022-11-09"
output: html_document
---
Pre-process demographic table for use in downstream demographic figure (Figure 2).
```{r message=FALSE, warning=FALSE}
library(tidyverse)
source("R/demo_tables.R")
source("R/utils.R")
```
## **Import Data from each Healthcare System**
```{r message=FALSE, warning=FALSE}
# read in files from results folder
# this folder contains all of the local healthcare system level analyses
rdas <- list.files(
path = "results",
pattern = ".rda",
full.names = TRUE
)
for (rda in rdas) {
load(rda)
}
rm(rdas, rda)
# create a list of participating healthcare systems from our study tracking spreadsheet
site_google_url <- "https://docs.google.com/spreadsheets/d/1epcYNd_0jCUMktOHf8mz5v651zy1JALD6PgzobrGWDY/edit?usp=sharing"
# load site parameters
site_params <- googlesheets4::read_sheet(site_google_url, sheet = 1)
site_avails <- googlesheets4::read_sheet(site_google_url, sheet = 2)
# filter the list of sites who ran the analysis
sorted_sites <- site_avails %>%
filter(!is.na(date_v4_received)) %>%
pull(siteid) %>%
paste("results", sep = "_")
# list sites without race
sites_wo_race <- site_params %>%
filter(!include_race) %>%
pull(siteid)
# combine all rda files with 'results' in name
results <- mget(ls(pattern = "results"))
```
### **Abstract adult & pediatric tables from each healthcare system**
```{r}
# load in previously generated patient count table by site
pt_counts <- read.csv('tables/site_pt_counts.csv')
# create list of hospitals for adult and pediatric analyses
adult_sites <- pt_counts %>%
filter(population == "Adult") %>%
pull(site)
pediatric_sites <- pt_counts %>%
filter(population == "Pediatric") %>%
pull(site)
# create demographics table
demo_table_adult <- create_demo_tableone(sorted_sites = adult_sites, is_pediatric = FALSE)
demo_table_pediatric <- create_demo_tableone(sorted_sites = pediatric_sites, is_pediatric = TRUE)
```
### **Combine adult and pediatric tables**
```{r}
demo_table_combine <- rbind(demo_table_adult, demo_table_pediatric) %>%
select(site, variable, Demo_var, contains('n_var')) %>%
pivot_longer(., cols = c(contains('n_var'))) %>%
group_by(site, variable, Demo_var, name) %>%
mutate(total_variable = sum(value, na.rm = TRUE)) %>%
group_by(site, Demo_var, name) %>%
mutate(total_demo_var = sum(value, na.rm = TRUE)) %>%
ungroup() %>%
select(-Demo_var, -value) %>%
distinct()
demo_table_site <- demo_table_combine %>%
pivot_wider(., id_cols = c("site", "variable"),
names_from = "name",
values_from = c("total_variable", "total_demo_var")) %>%
mutate(NNC_perc = round(total_variable_n_var_None / total_demo_var_n_var_None * 100,1),
CNS_perc = round(total_variable_n_var_Central / total_demo_var_n_var_Central * 100,1),
PNS_perc = round(total_variable_n_var_Peripheral / total_demo_var_n_var_Peripheral * 100,1)) %>%
rename(NNC_n = 'total_variable_n_var_None',
CNS_n = 'total_variable_n_var_Central',
PNS_n = 'total_variable_n_var_Peripheral') %>%
select(site, variable, NNC_n, CNS_n, PNS_n, NNC_perc, CNS_perc, PNS_perc) %>%
mutate(NNC = paste0(NNC_n, " (", NNC_perc, "%)"),
CNS = paste0(CNS_n, " (", CNS_perc, "%)"),
PNS = paste0(PNS_n, " (", PNS_perc, "%)"))
```
**Save patient demographic data**
```{r}
save(demo_table_site, file = 'demographic-figure/demographic-data/demo_table_site.rda')
```