-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path22_qualitative_immune_infiltration_analysis_with_deconvolution.R
330 lines (276 loc) · 20.9 KB
/
22_qualitative_immune_infiltration_analysis_with_deconvolution.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
# Qualitative immune infiltration analysis with deconvolution
# Packages
library(tidyverse)
library(ggpubr)
library(corrplot)
library(gdata)
# Data and masterfiles
load("/Users/ahamypet/RT2Lab/BC_BILAT_NEO/NGS/RNASeq/data/processed/results_CIBERSORT_BC_BILAT_NEO_LM22_df_3.RData")
head(results_CIBERSORT_BC_BILAT_NEO_LM22_df_3)
load("/Users/ahamypet/RT2Lab/BC_BILAT_NEO/article_new/Nature_med/rebuttal_nature/submission_R1_Nature/R2_nature/panels_tils_total.RData")
load("/Users/ahamypet/RT2Lab/BC_BILAT_NEO/article_new/Nature_med/rebuttal_nature/submission_R1_Nature/R2_nature/panels_macrophages_total.RData")
load("/Users/ahamypet/RT2Lab/BC_BILAT_NEO/immunofluo/data/processed/panels_macrophages.RData")
head(panels_tils_total)
# Functions and colors
colsubtype_cvd_pCR3 <- c( "PT" = "#49AAE3", "RD" = "#57EBC3")
# We applied on the 20 samples of the cohort the CIBERSORT algorithm using the “absolute” mode to deconvolute RNAseq expression profiles
# into 22 subsets of immune subpopulations.
# Cf how to preprocess it
load("/Users/ahamypet/RT2Lab/BC_BILAT_NEO/NGS/RNASeq/data/processed/CIBERSORT_BC_BILAT_NEO_LM22_3.RData")
CIBERSORT_BC_BILAT_NEO_LM22_3$all <- "all"
CIBERSORT_BC_BILAT_NEO_LM22_3$value <- CIBERSORT_BC_BILAT_NEO_LM22_3$value*100
head(CIBERSORT_BC_BILAT_NEO_LM22_3)
nrow(CIBERSORT_BC_BILAT_NEO_LM22_3) # 440
p_boxplot_global_LM22_wp_abs <- CIBERSORT_BC_BILAT_NEO_LM22_3 %>%
ggplot( aes(x=reorder(celltype,value), y=value, fill = celltype)) +
geom_boxplot() +
# geom_jitter() +
xlab('') + ylab('Absolute abundance score') +
coord_flip(ylim = c(0,110))+
theme_bw() +
theme(axis.text.x = element_text( hjust=1),
legend.position = "bottom",
legend.text=element_text(size=12),legend.title=element_text(size=12))
p_boxplot_global_LM22_wp_abs
p_boxplot_global_LM22_rel_PT_RD_abs <- CIBERSORT_BC_BILAT_NEO_LM22_3 %>% ggplot( aes(x=reorder(celltype,value), y=value, fill = tumortype)) +
geom_boxplot() +
geom_jitter(size=0.5) +
xlab('') + ylab('Absolute abundance score') +
coord_flip(ylim = c(0,110))+
theme_bw() +
stat_compare_means( label = "p.format",label.y = 100) +
scale_fill_manual(name = "", values = colsubtype_cvd_pCR3, drop = TRUE) +
theme(axis.text.x = element_text(hjust=1),
legend.position = "bottom",
legend.text=element_text(size=12),legend.title=element_text(size=12))
p_boxplot_global_LM22_rel_PT_RD_abs
p_boxplot_global_LM22_wp_PT_RD_abs <- plot_grid(p_boxplot_global_LM22_wp_abs + theme(legend.position = "none") ,
p_boxplot_global_LM22_rel_PT_RD_abs +
theme(legend.position = "right", axis.ticks.y = element_blank(),
axis.text.y = element_blank()),
labels = "AUTO", rel_widths = c(6,5))
p_boxplot_global_LM22_wp_PT_RD_abs
save_plot(file= "/Users/ahamypet/RT2Lab/bc_bilat_neo_git/figures/FigS8_p_boxplot_global_LM22_wp_PT_RD_abs.pdf",
p_boxplot_global_LM22_wp_PT_RD_abs, nrow=1, ncol=2, base_height=6.5, base_width=6)
# The top 3 most abundant immune subpopulations were M2 macrophages,
# CD4 memory resting T cells and M1 macrophages (FigS8A).
# CD4 memory T cells and M2 macrophages were increased in RD compared with PT (FigS8B).
# To further characterize the microenvironment of these samples using an orthogonal experimental approach, we performed immunofluorescence stainings
# using 2 antibodies panels (Panel1: CD8 CD45RO CD20 CD4 and FoxP3; Panel 1: CD4 – CD8 – CD45RO – FOXP3 – CD20 – pan-cytokeratin; FigS8A-F);
# Panel 2: CD68 – CD163 – CD138 – CKIT – pan-cytokeratin; FigS8G-H) to assess the concordance between the immune subpopulations inferred by gene expression
# and the number of cells stained on pathologic slides.
# Cytotoxic CD8+
p_Somme_de_CD8pos_Cytotox_T_cells <- ggscatter(panels_tils_total, x = "Somme_de_CD8pos", y = "Cytotox_T_cells",
add = "reg.line", shape=21, size = 2.5, fill = "lightgray",
add.params = list(color = "#BE9A32", fill = "#BE9A32"),
conf.int = TRUE) + stat_cor(method = "pearson") + theme_bw() +
xlab("Number of CD8+ cells") + ylab("Cytotoxic T cells \n Absolute abundance score")
# B cells : CD 20+
p_Somme_de_CD20pos_B_cells_naive <- ggscatter(panels_tils_total, x = "Somme_de_CD20pos", y = "B_cells_naive",
add = "reg.line",shape=21, size = 2.5,fill = "lightgray",
add.params = list(color = "#F9766D", fill = "#F9766D"), conf.int = TRUE) +
stat_cor(method = "pearson") + theme_bw() +
xlab("Number of CD20+ cells") + ylab("B cells naive \n Absolute abundance score")
# T reg CD4+ / FOXP3+
p_Somme_de_CD4pos_FOXP3pos_T_reg <- ggscatter(panels_tils_total, x = "Somme_de_CD4pos_FOXP3pos", y = "T_reg",
add = "reg.line", shape=21, size = 2.5,fill = "lightgray",
add.params = list(color = "#53B867", fill = "#53B867"), conf.int = TRUE) +
stat_cor(method = "pearson") + theme_bw() +
theme_bw() + xlab("Number of CD4+/FOXP3+ cells") + ylab("T Regs \n Absolute abundance score")
# Effector memory CD45RO+
p_Somme_de_CD45R0pos_T_cells_CD4_memory_activated <- ggscatter(panels_tils_total, x = "Somme_de_CD45R0pos", y = "T_cells_CD4_memory_activated",
add = "reg.line",shape=21, size = 2.5,fill = "lightgray",
add.params = list(color = "#76AE33", fill = "#76AE33"),conf.int = TRUE) +
stat_cor(method = "pearson") + theme_bw() +
xlab("Number of CD45RO+ cells") + ylab("T Cells memory activated \n Absolute abundance score")
# CD68+/CD163+ and Macrophages M2
p_Somme_de_CD68pos_CD163pos_MacroM2 <- ggscatter(panels_macrophages_total, x = 'Somme_de_cd68pos/cd163pos', y = "Macrophages_M2",
add = "reg.line", shape=21, size = 2.5,fill = "lightgray",
add.params = list(color = "#7B95FF", fill = "#7B95FF"), conf.int = TRUE) +
stat_cor(method = "pearson") + theme_bw() +
xlab("Number of CD68+/CD163+ cells") + ylab("Macrophages M2 \n Absolute abundance score")
# CD138+ and PLasma cells M2
p_Somme_de_CD138pos_plasma_cells <- ggscatter(panels_macrophages_total, x = 'Somme_de_cd138pos', y = "Plasma_cells",
add = "reg.line", shape=21, size = 2.5,fill = "lightgray",
add.params = list(color = "#DB8E01", fill = "#DB8E01"),conf.int = TRUE) +
stat_cor(method = "pearson") + theme_bw() +
xlab("Number of CD138+ cells") + ylab("Plasma cells \n Absolute abundance score")
# CD68+/CD163+ and Macrophages M2
p_Somme_de_CD68pos_MacroM0_M1_M2 <- ggscatter(panels_macrophages_total, x = 'Somme_de_cd68pos', y = "Macrophages_M0_M1_M2",
add = "reg.line", shape=21, size = 2.5,fill = "lightgray",
add.params = list(color = "#BE9A32", fill = "#BE9A32"), conf.int = TRUE) +
stat_cor(method = "pearson") + theme_bw() +
xlab("Number of CD68+ cells") + ylab("Macrophages M0, M1 and M2 \n Absolute abundance score")
# CKIT+/CK- cells ( Only on CK- for mast cells)
p_Somme_de_ckitpos_mast_cells <- ggscatter(panels_macrophages %>% filter(Tissue_Category == "CK-"),
x = 'Somme_de_ck-/ckitpos', y = "Mast_cells_resting",
add = "reg.line", shape=21, size = 2.5, fill = "lightgray",
add.params = list(color = "#EF67EB", fill = "#EF67EB"), conf.int = TRUE) +
stat_cor(method = "pearson") + theme_bw() +
xlab("Number of CKIT+/CK- cells") + ylab("Mast cells resting \n Absolute abundance score")
p_Somme_de_ckitpos_mast_cells
p_correl_cibersort_immunoflu_compil_8panels <- plot_grid( p_Somme_de_CD8pos_Cytotox_T_cells ,
p_Somme_de_CD4pos_FOXP3pos_T_reg,
p_Somme_de_CD45R0pos_T_cells_CD4_memory_activated,
p_Somme_de_CD20pos_B_cells_naive,
p_Somme_de_CD68pos_CD163pos_MacroM2 ,
p_Somme_de_CD68pos_MacroM0_M1_M2,
p_Somme_de_CD138pos_plasma_cells,
p_Somme_de_ckitpos_mast_cells + coord_cartesian(xlim = c(0,410)),
labels = c("I","J","K","L","M","N","O","P") ,
ncol = 2)
p_correl_cibersort_immunoflu_compil_8panels
save_plot(p_correl_cibersort_immunoflu_compil_8panels,
file = "/Users/ahamypet/RT2Lab/bc_bilat_neo_git/figures/ExtendedFig7_p_correl_cibersort_immunoflu_compil_8panels.pdf",
base_height = 13, base_width = 9)
# The correlation coefficient between both metrics was statistically significant
# regarding cytotoxic T cells (CD8+ cells), T regs (CD4+/FOXP3+ cells), Mast cells resting (CKIT+/CK- cells) (ExtendedFig7I-J-Q),
# was nearly significant for T cells memory activated (CD45RO+ cells) and B cells naïve (CD20+) (ExtendedFig7K-L);
# and was not significant in macrophages M2 (CD68+/CD163+ cells) or macrophages of any type (CD68+ cells) and plasma cells (CD138+ cells)(ExtendedFig7M-N-O).
# We average Patient6 for each
mat_patient_6_L_R <- CIBERSORT_BC_BILAT_NEO_LM22_3 %>%
filter(tumortype == "PT" ) %>%
select(samplename,patient,patient_sideAB,tumortype,celltype,side,value) %>% filter(patient == "Patient6") %>% group_by(patient,celltype,side) %>% summarise(value = mean(value))
head(mat_patient_6_L_R)
mat_patient_no6_L_R <- CIBERSORT_BC_BILAT_NEO_LM22_3 %>%
filter(tumortype == "PT" ) %>%
select(samplename,patient,patient_sideAB,tumortype,celltype,side,value) %>% filter(patient != "Patient6") %>% select(patient, celltype, side, value)
head(mat_patient_no6_L_R)
mat_all_patients_cibersort <- rbind(mat_patient_6_L_R,mat_patient_no6_L_R)
p_L_R_lines_cibersort <- ggplot(mat_all_patients_cibersort ,
aes (x=celltype,y=value, color =celltype, shape = side)) +
theme_bw()+
theme(axis.ticks.x = element_blank(),
legend.position = "bottom")+
facet_wrap( . ~ patient) + geom_point(size = 2) +
coord_flip()+
geom_line(aes (x=celltype,y=value,group=celltype, colour =celltype),
size = 1,
position = position_dodge(width=0.1)) +
scale_shape_manual(values = c(8,17), name = " ", labels = c("Left","Right")) +
guides(colour = FALSE) +
ylab("Absolute abundance score")+ xlab("") +
ggtitle("Difference between left and right tumors regarding immune subsets levels")
p_L_R_lines_cibersort
# Create the plot with arrows for PT and RD
collevelside <- c("R"="red3","L"="limegreen")
coltumortype <- c("PT"="#dd9d03", "RD"="#f6ce48")
coltumortype_shape <- c(coltumortype,collevelside)
coltumortype_shape["PT"] <- c(15)
coltumortype_shape["RD"] <- c(4)
coltumortype_shape["R"] <- c(8)
coltumortype_shape["L"] <- c(11)
p_PT_RD_lines_horiz_cibersort <- ggplot(CIBERSORT_BC_BILAT_NEO_LM22_3 %>%
filter(tumortype2 %in% c("PT_with_RD","RD")) %>%
select(samplename,patient,patient_sideAB,tumortype,celltype,side,value) ,
aes (x=celltype,y=value, color =celltype, shape = tumortype)) +
theme_bw()+
theme(axis.ticks.x = element_blank(),
legend.position = "bottom")+
facet_wrap( . ~ patient_sideAB) + geom_point(size = 2) +
coord_flip()+
geom_line(aes (x=celltype,y=value,group=celltype, colour =celltype),
size = 1,
arrow = arrow(length=unit(0.2,"cm") ,type = "closed" , angle = 35),
position = position_dodge(width=0.1)) +
scale_shape_manual(values = c(1,5), name = " " , labels = c("Primary tumor (PT)","Residual disease (RD)")) +
guides(colour = FALSE) +
ylab("Absolute abundance score") + xlab("") +
ggtitle("Difference between PT and RD regarding immune subsets levels")
p_PT_RD_lines_horiz_cibersort
p_lines_horiz_compil_L_R_PT_RD <- plot_grid(p_L_R_lines_cibersort ,
p_PT_RD_lines_horiz_cibersort + xlab(""),
labels = c("AUTO"),
ncol = 2, nrow = 1,
align="hv")
save_plot(file="/Users/ahamypet/RT2Lab/bc_bilat_neo_git/figures/ExtendedFig8.pdf",
p_lines_horiz_compil_L_R_PT_RD,
base_height=9, base_width=17)
# At the patient level, the immune composition of the paired contralateral tumors were different regarding several immune subsets
# (Macrophages M0, M1, M2, T cells CD4 memory activated and resting) (Extended Fig8A), while the variation of the immune composition between PT and RD
# mostly concerned increasing levels of macrophages M2, M0 and T cells CD4 memory resting (Extended Fig8B).
# See previous code
# preprocess_correlation_matrix_cibersort_LM22.r
mat_wide <- results_CIBERSORT_BC_BILAT_NEO_LM22_df_3 %>% rename( sample = rowname) %>%
select(-Correlation, - RMSE, -sig.score, - Pvalue)
levelsample_corr <- c( "PT1_R","PT1_L","PT2_R","PT2_L","PT3_R","PT3_L","PT4_R","PT4_L","PT5_R","PT5_L",
"PT6A_R","PT6B_R","PT6A_L","PT6B_L",
"RD1_L","RD3_R","RD3_L","RD4_R","RD5_R","RD6B_R")
mat_wide$sample <- factor(mat_wide$sample, levels = levelsample_corr)
mat_wide_tmp <- mat_wide[match(mat_wide$sample,levelsample_corr),]
mat_wide <- mat_wide_tmp
rep.row<-function(x,n){
matrix(rep(x,each=n),nrow=n)
}
levelsample <- mat_wide$sample
mat_sum_squared <- matrix(NA, ncol = 20, nrow = 20) %>% as.data.frame()
colnames(mat_sum_squared) <- levelsample
rownames(mat_sum_squared) <- levelsample
for (i in 1 : length(levelsample)){
# i=1
print(i)
tmp_pat <- levelsample[i]; print(tmp_pat)
tmp_mat_pat <- mat_wide %>% filter(sample == tmp_pat) %>% select(-sample) %>% as.matrix()
tmp_mat_pat_unnamed <- as.vector(unname(tmp_mat_pat))
tmp_mat_tmp_pat <- matrix(rep(tmp_mat_pat_unnamed,20), nrow = 20,byrow = TRUE) %>% as.data.frame()
M1 <- mat_wide %>% select(-sample)
M2 <- tmp_mat_tmp_pat
tmp_mat_pat_soustract <- M1 - M2
tmp_mat_pat_soustract_squared <- tmp_mat_pat_soustract*tmp_mat_pat_soustract
sum_all_difference_squared_tmp_sample <- rowSums(tmp_mat_pat_soustract_squared)
mat_sum_squared[i,] <- sum_all_difference_squared_tmp_sample
}
mat_sum_squared <- mat_sum_squared %>% as.matrix()
head(mat_sum_squared)
# mat_sum_squared <- mat_sum_squared*100
# Create correlation matrix
colxx <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
pdf("/Users/ahamypet/RT2Lab/bc_bilat_neo_git/figures/ExtendedFig9_correlogram_cibersort.pdf", height = 10, width = 15)
corrplot(mat_sum_squared,
type="upper",
# method="color",
is.corr = FALSE,addCoef.col = TRUE,
# title = "toto",
cl.lim=c(0,1.5),diag = F,
# col=colxx(200),
# col=colorRampPalette(c("white","lightpink", "red","brown"))(100),
cl.pos = "n", number.cex=0.75,tl.col="black") # ,) #
dev.off()
# We compared the predicted immune contexture patterns between samples of the cohort using a dissimilarity index (Idissimilarity).
# The higher the dissimilarity index, the more the composition of the immune infiltration differs.
# Overall matrix
all <- mat_sum_squared %>% upperTriangle()
all %>% mean() # 0.337
# Pairs left and right (individual)
pairs_L_R <- c( mat_sum_squared["PT1_L","PT1_R"],
mat_sum_squared["PT2_L","PT2_R"],
mat_sum_squared["PT3_L","PT3_R"],
mat_sum_squared["PT4_L","PT4_R"],
mat_sum_squared["PT5_L","PT5_R"])
pairs_L_R %>% mean() # 0.2228
# Pairs PT and RD (individual)
pairs_PT_RD <- c( mat_sum_squared["PT1_L","RD1_L"],
mat_sum_squared["PT3_R","RD3_R"],
mat_sum_squared["PT3_L","RD3_L"],
mat_sum_squared["PT4_R","RD4_R"],
mat_sum_squared["PT5_R","RD5_R"],
mat_sum_squared["PT6B_R","RD6B_R"])
pairs_PT_RD %>% mean() # 0.29
# Neither the mean dissimilarity indices
# (Extended Fig9) of paired left and right tumor (green bordered squares, mean Idissimilarity= 0.22) nor paired PT and RD
# (yellow bordered squared, mean Idissimilarity= 0.29) were statistically different from the rest of the samples.
# Matrix PT versus PT
PT_vs_PT <- mat_sum_squared[grep("PT", rownames(mat_sum_squared)),grep("PT",colnames(mat_sum_squared))] %>% upperTriangle()
PT_vs_PT %>% mean() # 0.24
# Matrix RD versus RD
RD_vs_RD <- mat_sum_squared[grep("RD", rownames(mat_sum_squared)),grep("RD",colnames(mat_sum_squared))] %>% upperTriangle()
RD_vs_RD %>% mean() # 0.37
wilcox.test(PT_vs_PT,RD_vs_RD) # <0.01
# Matrix PT versus RD
PT_vs_RD <- mat_sum_squared[grep("PT", rownames(mat_sum_squared)),grep("RD",colnames(mat_sum_squared))] %>% upperTriangle()
PT_vs_RD %>% mean() # 0.49
# At the cohort level, the dissimilarity was lower among the PT samples (blue area) than among the RD samples (orange area)
# (mean Idissimilarity: 0.24 versus 0.37, p=0.009), while the greatest difference was seen between PTs samples compared with RDs samples
# (yellow area, mean Idissimilarity = 0.49).
# These results suggest that the composition of the immune microenvironment is strongly associated with the pretreated or non-pretreated character of the sample
# (PT or RD), in line with the changes in the immune contexture induced by NAC treatment.