-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathConfoundReducer.R
261 lines (193 loc) · 10.1 KB
/
ConfoundReducer.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# ConfoundReducer.R | v2024.05.01 (HAPPY MAY DAY; SUPPORT PALESTINE)
# Very much a work in progress.
# Simply cuts out excess covariates that fMRIPrep generates
# Note: Confounds list allows a user to enter the names of desired columns manually. Confounds search will use regrex to identify any column that meets certain patterns. It should be set to NA if not in use.
ConfoundReducer <- function(PIDs,
dir = "/data/Uncertainty/data/deriv/pipeline_1/fmriprep",
runs = 2,
Components = c("Test", "Control"),
motion_censor = TRUE,
motion_censor_thresh = 0.9,
check_VIF = TRUE,
confounds_list = c("trans_x", "trans_y", "trans_z", "rot_x", "rot_y", "rot_z", # Head motion and it's derivatives
'trans_x_derivative1', 'trans_y_derivative1', 'trans_z_derivative1', 'rot_x_derivative1', 'rot_y_derivative1', 'rot_z_derivative1',
"framewise_displacement"),
confounds_search = c("^cosine*", # Adjusting for scanner and physiological related drifts (especially needed in long video studies)
"^t_comp_cor_0[0-4]$", # Time-related components to capture temporal physioogical confounds
"^a_comp_cor_0[0-4]$")) # Anatomical components to capture spatial physiological confounds
{
# Check whether this file path is valid.
if (!dir.exists(dir)){
stop(print(paste("Error: The filepath you provided for dir (", dir, ") does not appear to exist. This directory should ideally contain the confound files generated by fMRIPrep for each participant (It should be the level just above individual participant directories). Please respecify your filepath and try again.")))
}
# Check whether runs is entered validly.
if (runs <= 0 | !is.numeric(runs) | is.na(runs)){
stop(print(paste("Error: runs should contain the number of runs your task contains. It must be a numeric value greater than 0. Please respecify the number of runs and try again.")))
}
# Check whether task was entered validly.
if (any(is.na(Components)) | any(is.null(Components))){
# stop(print(paste("Error: task should be the name heudiconv or your preferred BIDS organizing program assigns to your task. It cannot be left undefined. Please enter something and try again.")))
stop(print(paste("Error: Components should be either 'Control' and/or 'Test'. It cannot be left undefined. Please enter something and try again.")))
}
# I need to return and add QA checks for censor and confound arguments
# If we want to check the VIF to apply a PCA and avoid collinearity
if (check_VIF == TRUE){
# Creating a subfunction to check collinearity
VIF_Checker <- function(data = df){
# Need to come back and setup dependencies better here (i.e., install if not already installed)
# Dependencies
library(car)
# Create a simulated outcome variable
data$outcome <- rnorm(nrow(data))
# Creating a dummy model to test VIF
model <- lm(outcome ~., data = data)
# Capturing vif values
vif_values <- car::vif(model)
# Viewing the vif values
if (any(vif_values > 5)){
# Remove the dummy outcome variable
data <- data[,-which(names(data) == "outcome")]
# Scale all the data
data <- scale(data)
# Perform PCA
data <- prcomp(data, center = TRUE, scale. = TRUE)
}
return(data)
}
}
# Creating a subfunction to make this all smoother
Reducer <- function(file = filename, confound_list = confounds_list, confound_search = confounds_search)
{
# Check whether this file exists, and if it doesn't, print an error and give up.
if (!file.exists(file)){
print(paste("Error:", PID, "'s", Run, "file could not be located:", file))
}
# If it does exist . . .
if (file.exists(file)){
# Check if the file is empty
if (file.info(file)$size <= 0){
print(paste("Error:", PID,"'s ", Run,"file does not contain data:", file))
}
# And if the file isn't empty . . .
if (file.info(file)$size > 0){
# Read in the file as a dataframe
df <- read.table(file = file,
sep = '\t',
header = T,
na.strings = c("","NA","n/a"))
# If a confound search was entered
# NOTE that this is a quick and dirty solution, which should be switched out later.
if (!is.na(confound_search[1])){
# Create an empty array
search_results <- NULL
# Iterate through all of the items in the search list
for (ITEM in confound_search){
# Save all columns names meeting the pattern into the array
search_results <- c(search_results,
names(df)[grep(x = names(df), pattern = ITEM)])
}
# and combine them with the list
confounds <- c(confound_list, search_results)
}
# If a confound search was not entered
if (is.na(confound_search[1])){
# define the confound list as the only confounds to keep
confounds <- confound_list
}
# Subset the desired columns
df <- subset(df, select = confounds)
# Check if censoring should occur
if (motion_censor == TRUE){
# Tracking how many variables are present
cols <- names(df)
# Iterate through each observation in the dataframe
for (OBS in 2:nrow(df)){
# If that observation isn't an NA
if (!is.na(df$framewise_displacement[OBS])){
# If that observation has a FWD value greater than the threshold ...
if (df$framewise_displacement[OBS] > motion_censor_thresh){
# Create a new column of zeroes
df[,ncol(df) + 1] <- 0
# Add a one for this specific observation in that column
df[OBS, ncol(df)] <- 1
}
}
}
# If any of the observations were greater than out threshold
if (length(which(df$framewise_displacement > motion_censor_thresh)) > 0){
# If we have more than 9 motion outliers
if (length(which(df$framewise_displacement > motion_censor_thresh)) > 09){
# Assigning variable headers
names(df) <- c(cols,
paste0("motion_outlier0", 1:9),
paste0("motion_outlier", 10:length(which(df$framewise_displacement > motion_censor_thresh))))
}
# If we have less than 10 motion outliers
if (length(which(df$framewise_displacement > motion_censor_thresh)) < 10){
# Assigning variable headers
names(df) <- c(cols,
paste0("motion_outlier0", 1:length(which(df$framewise_displacement > motion_censor_thresh))))
}
}
}
# If we want to check the VIF to apply a PCA and avoid collinearity
if (check_VIF == TRUE){
# Check for multicollinearity and perform PCA if necessary
df <- VIF_Checker(data = df)
}
}
}
return(df)
}
# Dependencies
library(assertthat)
library(dplyr)
library(stringr)
# Iterating through each of the participants
for (PID in PIDs){
# Iterate through the components
for (COMPONENT in Components){
# If we're looking at a control component
if (COMPONENT == "Control"){
# If the luma file exists
if (file.exists(paste0(dir, "/sub-", PID, "/func/sub-", PID, "_task-luma_desc-confounds_timeseries.tsv"))){
# Create a variable to capture the file name for this run
filename <- paste0(dir, "/sub-", PID, "/func/sub-", PID, "_task-luma_desc-confounds_timeseries.tsv")
# Note what kind of control we have here
control <- "luma"
}
# If the fish file exists
if (file.exists(paste0(dir, "/sub-", PID, "/func/sub-", PID, "_task-fish_desc-confounds_timeseries.tsv"))){
# Create a variable to capture the file name for this run
filename <- paste0(dir, "/sub-", PID, "/func/sub-", PID, "_task-fish_desc-confounds_timeseries.tsv")
# Note what kind of control we have here
control <- "fish"
}
# Run the found reducer subfunction
df <- Reducer(file = filename)
# And then save that dataframe as a new text file within that "look_onsets" folder
write.table(df,
file = paste0(dir, "/sub-", PID, "/func/sub-", PID, "_task-", control, "_desc-confounds_timeseries_reduced.tsv"),
sep = "\t",
row.names = FALSE,
col.names = TRUE)
}
# If we're looking at a test component
if (COMPONENT == "Test"){
# Iterate through the runs in the study
for (Run in paste0("run-",1:runs)){
# Create a variable to capture the file name for this run
filename <- paste0(dir, "/sub-", PID, "/func/sub-", PID, "_task-uncertainty_", Run,"_desc-confounds_timeseries.tsv")
# Run the found reducer subfunction
df <- Reducer(file = filename)
# And then save that dataframe as a new text file within that "look_onsets" folder
write.table(df,
file = paste0(dir, "/sub-", PID, "/func/sub-", PID, "_task-uncertainty_", Run,"_desc-confounds_timeseries_reduced.tsv"),
sep = "\t",
row.names = FALSE,
col.names = TRUE)
}
}
}
}
}