TCGA学习笔记11-生存曲线

前面,我们在差异表达结果中按照padj筛选出了TOP10基因,现在,我们对TOP10基因绘制生存曲线,看它们的表达是否与病例的生存时间相关。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
> library(survival)
> kidney_clinic$newID <- gsub("\\_diagnosis","\\-01",kidney_clinic$id) # 将id列中的"diagnosis"替换为01,以便保持样本名称一致
> dim(kidney_clinic)
[1] 530 14
> head(kidney_clinic)
id classification_of_tumor tumor_stage gender
1 TCGA-CZ-5986_diagnosis not reported stage i male
2 TCGA-CZ-4858_diagnosis not reported stage ii male
3 TCGA-B8-5551_diagnosis not reported stage i female
4 TCGA-B0-4817_diagnosis not reported stage iii male
5 TCGA-BP-4325_diagnosis not reported stage i female
6 TCGA-B0-4698_diagnosis not reported stage iv male
year_of_birth year_of_death year_of_diagnosis days_to_death age
1 1945 not reported 2006 not reported 61
2 1966 not reported 2005 2105 39
3 1945 not reported 2010 not reported 65
4 1921 2004 2002 1019 81
5 1937 not reported 2001 not reported 64
6 1928 2003 2003 42 75
deadORlive race alcohol years_smoked
1 Alive white Not Reported not reported
2 Dead white Not Reported not reported
3 Alive black or african american Not Reported not reported
4 Dead white Not Reported not reported
5 Alive white Not Reported not reported
6 Dead white Not Reported not reported
newID
1 TCGA-CZ-5986-01
2 TCGA-CZ-4858-01
3 TCGA-B8-5551-01
4 TCGA-B0-4817-01
5 TCGA-BP-4325-01
6 TCGA-B0-4698-01
> km_plot <- apply(padj_top10,1,function(x){ifelse(x>median(x),"up","down")}) # 这里的1表示对行操作,即对每个基因判断表达高,注意,输入的是data.frame,返回的是matrix
> dim(km_plot)
[1] 611 10
> head(km_plot)
ENSG00000174514 ENSG00000249859 ENSG00000224490
TCGA-CZ-5465-01 "up" "up" "up"
TCGA-BP-4355-01 "down" "up" "down"
TCGA-CZ-5451-01 "down" "down" "up"
TCGA-B0-5081-01 "down" "up" "down"
TCGA-CZ-5454-11 "up" "down" "down"
TCGA-B0-5697-01 "up" "down" "up"
ENSG00000061656 ENSG00000213963 ENSG00000185633
TCGA-CZ-5465-01 "up" "up" "up"
TCGA-BP-4355-01 "down" "up" "up"
TCGA-CZ-5451-01 "up" "down" "up"
TCGA-B0-5081-01 "down" "up" "up"
TCGA-CZ-5454-11 "down" "up" "down"
TCGA-B0-5697-01 "down" "up" "up"
ENSG00000014257 ENSG00000187730 ENSG00000129521
TCGA-CZ-5465-01 "down" "up" "up"
TCGA-BP-4355-01 "down" "up" "up"
TCGA-CZ-5451-01 "down" "up" "down"
TCGA-B0-5081-01 "up" "up" "down"
TCGA-CZ-5454-11 "up" "down" "down"
TCGA-B0-5697-01 "up" "down" "down"
ENSG00000135245
TCGA-CZ-5465-01 "up"
TCGA-BP-4355-01 "up"
TCGA-CZ-5451-01 "down"
TCGA-B0-5081-01 "down"
TCGA-CZ-5454-11 "down"
TCGA-B0-5697-01 "down"
> km_plot_cancer <- km_plot[grepl("TCGA\\S{9}01",rownames(km_plot)),] # km_plot来源于padj_top10,后者包含611个样本,有对照有肿瘤,而在做生存分析时只需要肿瘤样本,因此匹配01是为了筛选所有肿瘤样本
> dim(km_plot_cancer)
[1] 538 10 # 611个样本中,538个是肿瘤,然而,CASE只有530个,可见有来源于同一个病例的多个样本
> class(km_plot_cancer)
[1] "matrix" # matrix允许重复的行
> head(km_plot_cancer)
ENSG00000174514 ENSG00000249859 ENSG00000224490
TCGA-CZ-5465-01 "up" "up" "up"
TCGA-BP-4355-01 "down" "up" "down"
TCGA-CZ-5451-01 "down" "down" "up"
TCGA-B0-5081-01 "down" "up" "down"
TCGA-B0-5697-01 "up" "down" "up"
TCGA-A3-A8OV-01 "up" "down" "down"
ENSG00000061656 ENSG00000213963 ENSG00000185633
TCGA-CZ-5465-01 "up" "up" "up"
TCGA-BP-4355-01 "down" "up" "up"
TCGA-CZ-5451-01 "up" "down" "up"
TCGA-B0-5081-01 "down" "up" "up"
TCGA-B0-5697-01 "down" "up" "up"
TCGA-A3-A8OV-01 "up" "down" "up"
ENSG00000014257 ENSG00000187730 ENSG00000129521
TCGA-CZ-5465-01 "down" "up" "up"
TCGA-BP-4355-01 "down" "up" "up"
TCGA-CZ-5451-01 "down" "up" "down"
TCGA-B0-5081-01 "up" "up" "down"
TCGA-B0-5697-01 "up" "down" "down"
TCGA-A3-A8OV-01 "down" "down" "up"
ENSG00000135245
TCGA-CZ-5465-01 "up"
TCGA-BP-4355-01 "up"
TCGA-CZ-5451-01 "down"
TCGA-B0-5081-01 "down"
TCGA-B0-5697-01 "down"
TCGA-A3-A8OV-01 "up"
> kidney_clinic_km <- kidney_clinic[match(rownames(km_plot_cancer),kidney_clinic$newID),]
> dim(kidney_clinic_km)
[1] 538 14 # 还是538行,有8行重复
> class(kidney_clinic_km)
[1] "data.frame" # data.frame行名不能重复,但内容可以重复
> head(kidney_clinic_km)
id classification_of_tumor tumor_stage gender
274 TCGA-CZ-5465_diagnosis not reported stage iii female
358 TCGA-BP-4355_diagnosis not reported stage iii female
517 TCGA-CZ-5451_diagnosis not reported stage ii male
119 TCGA-B0-5081_diagnosis not reported stage iii female
201 TCGA-B0-5697_diagnosis not reported stage i male
484 TCGA-A3-A8OV_diagnosis not reported stage i male
year_of_birth year_of_death year_of_diagnosis days_to_death age
274 1931 not reported 2007 2564 76
358 1949 2010 2008 953 59
517 1932 not reported 2006 not reported 74
119 1926 2005 2005 362 79
201 1957 not reported 2007 not reported 50
484 1936 not reported 2011 not reported 75
deadORlive race alcohol years_smoked
274 Dead not reported Not Reported not reported
358 Dead white Not Reported not reported
517 Alive white Not Reported not reported
119 Dead white Not Reported not reported
201 Alive white Not Reported not reported
484 Alive black or african american Not Reported 65
newID
274 TCGA-CZ-5465-01
358 TCGA-BP-4355-01
517 TCGA-CZ-5451-01
119 TCGA-B0-5081-01
201 TCGA-B0-5697-01
484 TCGA-A3-A8OV-01

> daysToDeath <- as.numeric(as.character(kidney_clinic_km$days_to_death)) # 临床数据需要用到的是days_to_death
Warning message:
NAs introduced by coercion
> daysToDeath # 有许多是NA
[1] 2564 953 NA 362 NA NA 62 1493 51 NA NA NA 1588
[14] NA 885 2227 137 NA NA 1432 NA NA NA NA NA NA
[27] 1657 NA 18 NA 1964 NA 109 NA 1661 NA NA NA NA
[40] NA NA NA NA 1315 1404 562 445 NA NA NA NA 1238
[53] 182 1912 NA NA NA NA 2454 344 NA 1913 1111 NA 1980
[66] NA 600 1003 NA NA NA NA 2105 1371 334 NA 1337 NA
[79] NA NA NA NA NA NA 727 NA NA 2256 NA NA NA
[92] 782 NA NA NA NA NA NA NA 1625 NA NA NA NA
[105] NA NA 768 NA NA NA NA NA 563 NA 2145 NA NA
[118] NA 561 NA NA NA NA NA NA NA NA 1200 NA NA
[131] NA NA NA NA NA NA 1317 NA NA NA NA NA NA
[144] 329 NA NA NA NA 242 NA NA 646 NA NA NA NA
[157] 683 587 952 0 NA 320 1092 NA NA 68 510 NA NA
[170] 245 2 NA 793 NA NA NA 2601 NA NA NA 1714 NA
[183] NA NA 1230 1170 2386 NA 571 883 1912 NA NA NA 1986
[196] NA 1091 NA NA NA NA NA 1417 NA NA 65 NA NA
[209] NA NA NA NA 1378 NA NA NA NA NA NA NA NA
[222] NA NA 679 866 NA NA NA NA NA 330 NA NA 885
[235] NA 946 NA NA 375 NA NA NA 1133 NA NA 561 NA
[248] 431 NA 2764 NA NA NA NA 1696 NA 42 NA 59 0
[261] NA NA NA NA NA 1034 1270 1200 2241 333 NA NA NA
[274] NA 342 NA 722 NA NA 1075 NA NA NA NA 709 NA
[287] NA NA NA NA NA NA NA NA NA NA NA NA 164
[300] NA NA NA NA NA 701 69 NA NA 574 106 NA NA
[313] NA NA 475 1019 NA NA NA 162 845 NA NA 552 139
[326] NA NA NA NA 1589 NA 1045 735 NA NA NA NA NA
[339] NA NA 1972 459 NA 2299 NA NA NA 819 1463 NA NA
[352] 1724 NA NA 313 NA NA NA 183 NA NA NA NA NA
[365] NA NA 1626 822 NA 206 932 1097 NA NA NA NA NA
[378] NA NA NA 166 478 NA 1584 NA NA NA NA 480 NA
[391] NA 311 NA NA NA NA 202 NA 927 1343 1590 454 NA
[404] 834 NA NA NA 2090 1598 NA 1639 878 992 NA NA NA
[417] NA 43 NA 2419 NA NA NA NA 1121 NA NA NA NA
[430] 2343 485 110 NA NA 313 204 NA NA NA 238 NA NA
[443] NA NA NA NA 480 NA 168 NA 73 NA NA 101 NA
[456] NA NA NA 446 3615 NA 645 NA 1567 NA 3554 NA NA
[469] NA NA NA 139 NA NA NA 841 NA NA 336 NA NA
[482] NA 99 NA NA NA NA 637 NA NA NA NA 1111 NA
[495] NA NA 77 770 NA NA NA NA 222 NA NA NA NA
[508] 41 NA NA NA NA NA NA NA NA 1610 224 NA 1191
[521] NA 93 NA NA 307 NA 211 NA NA 1625 NA 578 NA
[534] NA NA NA NA NA

> daysToDeathN <- ifelse(is.na(daysToDeath),max(na.omit(daysToDeath)),daysToDeath) # 没有报道死亡天数的,认为还活着,使用最大天数
> daysToDeathN
[1] 2564 953 3615 362 3615 3615 62 1493 51 3615 3615 3615 1588
[14] 3615 885 2227 137 3615 3615 1432 3615 3615 3615 3615 3615 3615
[27] 1657 3615 18 3615 1964 3615 109 3615 1661 3615 3615 3615 3615
[40] 3615 3615 3615 3615 1315 1404 562 445 3615 3615 3615 3615 1238
[53] 182 1912 3615 3615 3615 3615 2454 344 3615 1913 1111 3615 1980
[66] 3615 600 1003 3615 3615 3615 3615 2105 1371 334 3615 1337 3615
[79] 3615 3615 3615 3615 3615 3615 727 3615 3615 2256 3615 3615 3615
[92] 782 3615 3615 3615 3615 3615 3615 3615 1625 3615 3615 3615 3615
[105] 3615 3615 768 3615 3615 3615 3615 3615 563 3615 2145 3615 3615
[118] 3615 561 3615 3615 3615 3615 3615 3615 3615 3615 1200 3615 3615
[131] 3615 3615 3615 3615 3615 3615 1317 3615 3615 3615 3615 3615 3615
[144] 329 3615 3615 3615 3615 242 3615 3615 646 3615 3615 3615 3615
[157] 683 587 952 0 3615 320 1092 3615 3615 68 510 3615 3615
[170] 245 2 3615 793 3615 3615 3615 2601 3615 3615 3615 1714 3615
[183] 3615 3615 1230 1170 2386 3615 571 883 1912 3615 3615 3615 1986
[196] 3615 1091 3615 3615 3615 3615 3615 1417 3615 3615 65 3615 3615
[209] 3615 3615 3615 3615 1378 3615 3615 3615 3615 3615 3615 3615 3615
[222] 3615 3615 679 866 3615 3615 3615 3615 3615 330 3615 3615 885
[235] 3615 946 3615 3615 375 3615 3615 3615 1133 3615 3615 561 3615
[248] 431 3615 2764 3615 3615 3615 3615 1696 3615 42 3615 59 0
[261] 3615 3615 3615 3615 3615 1034 1270 1200 2241 333 3615 3615 3615
[274] 3615 342 3615 722 3615 3615 1075 3615 3615 3615 3615 709 3615
[287] 3615 3615 3615 3615 3615 3615 3615 3615 3615 3615 3615 3615 164
[300] 3615 3615 3615 3615 3615 701 69 3615 3615 574 106 3615 3615
[313] 3615 3615 475 1019 3615 3615 3615 162 845 3615 3615 552 139
[326] 3615 3615 3615 3615 1589 3615 1045 735 3615 3615 3615 3615 3615
[339] 3615 3615 1972 459 3615 2299 3615 3615 3615 819 1463 3615 3615
[352] 1724 3615 3615 313 3615 3615 3615 183 3615 3615 3615 3615 3615
[365] 3615 3615 1626 822 3615 206 932 1097 3615 3615 3615 3615 3615
[378] 3615 3615 3615 166 478 3615 1584 3615 3615 3615 3615 480 3615
[391] 3615 311 3615 3615 3615 3615 202 3615 927 1343 1590 454 3615
[404] 834 3615 3615 3615 2090 1598 3615 1639 878 992 3615 3615 3615
[417] 3615 43 3615 2419 3615 3615 3615 3615 1121 3615 3615 3615 3615
[430] 2343 485 110 3615 3615 313 204 3615 3615 3615 238 3615 3615
[443] 3615 3615 3615 3615 480 3615 168 3615 73 3615 3615 101 3615
[456] 3615 3615 3615 446 3615 3615 645 3615 1567 3615 3554 3615 3615
[469] 3615 3615 3615 139 3615 3615 3615 841 3615 3615 336 3615 3615
[482] 3615 99 3615 3615 3615 3615 637 3615 3615 3615 3615 1111 3615
[495] 3615 3615 77 770 3615 3615 3615 3615 222 3615 3615 3615 3615
[508] 41 3615 3615 3615 3615 3615 3615 3615 3615 1610 224 3615 1191
[521] 3615 93 3615 3615 307 3615 211 3615 3615 1625 3615 578 3615
[534] 3615 3615 3615 3615 3615

> fit <- Surv(daysToDeathN,ifelse(kidney_clinic_km$deadORlive=="Dead",1,0)==1)
> fit
[1] 2564 953 3615+ 362 3615+ 3615+ 62 1493 51 3615+
[11] 3615+ 3615+ 1588 3615+ 885 2227 137 3615+ 3615+ 1432
[21] 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 1657 3615+ 18 3615+
[31] 1964 3615+ 109 3615+ 1661 3615+ 3615+ 3615+ 3615+ 3615+
[41] 3615+ 3615+ 3615+ 1315 1404 562 445 3615+ 3615+ 3615+
[51] 3615+ 1238 182 1912 3615+ 3615+ 3615+ 3615+ 2454 344
[61] 3615+ 1913 1111 3615+ 1980 3615+ 600 1003 3615+ 3615+
[71] 3615+ 3615+ 2105 1371 334 3615+ 1337 3615+ 3615+ 3615+
[81] 3615+ 3615+ 3615+ 3615+ 727 3615+ 3615+ 2256 3615+ 3615+
[91] 3615+ 782 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 1625
[101] 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 768 3615+ 3615+ 3615+
[111] 3615+ 3615+ 563 3615+ 2145 3615+ 3615+ 3615+ 561 3615+
[121] 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 1200 3615+ 3615+
[131] 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 1317 3615+ 3615+ 3615+
[141] 3615+ 3615+ 3615+ 329 3615+ 3615+ 3615+ 3615+ 242 3615+
[151] 3615+ 646 3615+ 3615+ 3615+ 3615+ 683 587 952 0
[161] 3615+ 320 1092 3615+ 3615+ 68 510 3615+ 3615+ 245
[171] 2 3615+ 793 3615+ 3615+ 3615+ 2601 3615+ 3615+ 3615+
[181] 1714 3615+ 3615+ 3615+ 1230 1170 2386 3615+ 571 883
[191] 1912 3615+ 3615+ 3615+ 1986 3615+ 1091 3615+ 3615+ 3615+
[201] 3615+ 3615+ 1417 3615+ 3615+ 65 3615+ 3615+ 3615+ 3615+
[211] 3615+ 3615+ 1378 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 3615+
[221] 3615+ 3615+ 3615+ 679 866 3615+ 3615+ 3615+ 3615+ 3615+
[231] 330 3615+ 3615+ 885 3615+ 946 3615+ 3615+ 375 3615+
[241] 3615+ 3615+ 1133 3615+ 3615+ 561 3615+ 431 3615+ 2764
[251] 3615+ 3615+ 3615+ 3615+ 1696 3615+ 42 3615+ 59 0
[261] 3615+ 3615+ 3615+ 3615+ 3615+ 1034 1270 1200 2241 333
[271] 3615+ 3615+ 3615+ 3615+ 342 3615+ 722 3615+ 3615+ 1075
[281] 3615+ 3615+ 3615+ 3615+ 709 3615+ 3615+ 3615+ 3615+ 3615+
[291] 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 164 3615+
[301] 3615+ 3615+ 3615+ 3615+ 701 69 3615+ 3615+ 574 106
[311] 3615+ 3615+ 3615+ 3615+ 475 1019 3615+ 3615+ 3615+ 162
[321] 845 3615+ 3615+ 552 139 3615+ 3615+ 3615+ 3615+ 1589
[331] 3615+ 1045 735 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 3615+
[341] 1972 459 3615+ 2299 3615+ 3615+ 3615+ 819 1463 3615+
[351] 3615+ 1724 3615+ 3615+ 313 3615+ 3615+ 3615+ 183 3615+
[361] 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 1626 822 3615+ 206
[371] 932 1097 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 3615+
[381] 166 478 3615+ 1584 3615+ 3615+ 3615+ 3615+ 480 3615+
[391] 3615+ 311 3615+ 3615+ 3615+ 3615+ 202 3615+ 927 1343
[401] 1590 454 3615+ 834 3615+ 3615+ 3615+ 2090 1598 3615+
[411] 1639 878 992 3615+ 3615+ 3615+ 3615+ 43 3615+ 2419
[421] 3615+ 3615+ 3615+ 3615+ 1121 3615+ 3615+ 3615+ 3615+ 2343
[431] 485 110 3615+ 3615+ 313 204 3615+ 3615+ 3615+ 238
[441] 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 480 3615+ 168 3615+
[451] 73 3615+ 3615+ 101 3615+ 3615+ 3615+ 3615+ 446 3615
[461] 3615+ 645 3615+ 1567 3615+ 3554 3615+ 3615+ 3615+ 3615+
[471] 3615+ 139 3615+ 3615+ 3615+ 841 3615+ 3615+ 336 3615+
[481] 3615+ 3615+ 99 3615+ 3615+ 3615+ 3615+ 637 3615+ 3615+
[491] 3615+ 3615+ 1111 3615+ 3615+ 3615+ 77 770 3615+ 3615+
[501] 3615+ 3615+ 222 3615+ 3615+ 3615+ 3615+ 41 3615+ 3615+
[511] 3615+ 3615+ 3615+ 3615+ 3615+ 3615+ 1610 224 3615+ 1191
[521] 3615+ 93 3615+ 3615+ 307 3615+ 211 3615+ 3615+ 1625
[531] 3615+ 578 3615+ 3615+ 3615+ 3615+ 3615+ 3615+

> par(mfrow=c(2,5))
> for(i in 1:length(padj_top10_id)){
+ kmfit1 <- survfit(fit~km_plot_cancer[,i])
+ plot(kmfit1,lty=c("solid","solid"),col=c("blue","red"),lwd=c(2,2),cex.main=1.8,
+ xlab="survival time in days",ylab="survival probabilities",cex.lab=1.6,
+ cex.axis=1.8,main=padj_top10_symbol[i])
+ m <- survdiff(y~km_plot_cancer[,i])
+ p_value <- 1 - pchisq(m$chisq, length(m$n) -1)
+ text(2000,0.95,paste("P=", round(p_value,digit=4)),cex=1.2)
+ legend("bottom",c("High","Low"),lty=c("solid","solid"),seg.len = 1, col=c("red","blue"),border="white",bty="n",cex=1.2,lwd=c(2,2))
+ }

这个结果并不理想,只有前两个基因的表达量与生存时间有关联,MFSD4A高表达生存时间长,而PVT1低表达生存时间长。

我们可以换个角度思考同一个问题,如果以FoldChange来挑选TOP10基因,结果如何?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
> annot_order_FC <- result_select_annot[order(abs(result_select_annot$log2FoldChange),decreasing = T),]
> head(annot_order_FC)
ensembl_gene_id baseMean log2FoldChange lfcSE stat
3736 ENSG00000224490 1714.5650 8.442435 0.2386643 35.37369
271 ENSG00000074803 22193.6542 -8.275817 0.3329828 -24.85359
2856 ENSG00000178776 886.3988 7.706962 0.3261798 23.62795
3468 ENSG00000204362 354.1751 7.570398 0.2945341 25.70296
597 ENSG00000104327 2844.9568 -7.529189 0.2863865 -26.29031
2280 ENSG00000164434 11701.5196 7.440639 0.3247560 22.91148
pvalue padj hgnc_symbol
3736 4.335266e-274 2.619801e-270 TTC21B-AS1
271 2.364992e-136 4.660319e-134 SLC12A1
2856 1.989551e-123 2.632742e-121 C5orf46
3468 1.083083e-145 2.727112e-143 LINC02783
597 2.474996e-152 8.309111e-150 CALB1
2280 3.570399e-116 3.785249e-114 FABP7
> FC_top10_symbol <- head(annot_order_FC$hgnc_symbol,10)
> FC_top10_id <- head(annot_order_FC$ensembl_gene_id,10)
> FC_top10 <- data_select[match(FC_top10_id,rownames(data_select)),]
> colnames(FC_top10) <- substr(colnames(FC_top10),1,15)
> head(FC_top10)[,1:3]
TCGA-CZ-5465-01 TCGA-BP-4355-01 TCGA-CZ-5451-01
ENSG00000224490 8222 864 3772
ENSG00000074803 19360 100 10
ENSG00000178776 1165 97 423
ENSG00000204362 201 519 20
ENSG00000104327 24 35 6
ENSG00000164434 17120 6341 21755
> dim(FC_top10)
[1] 10 611

kidney_clinic$newID <- gsub("\\_diagnosis","\\-01",kidney_clinic$id)
dim(kidney_clinic)
head(kidney_clinic)
km_plot <- apply(FC_top10,1,function(x){ifelse(x>median(x),"up","down")})
dim(km_plot)
head(km_plot)
km_plot_cancer <- km_plot[grepl("TCGA\\S{9}01",rownames(km_plot)),]
dim(km_plot_cancer)
head(km_plot_cancer)
kidney_clinic_km <- kidney_clinic[match(rownames(km_plot_cancer),kidney_clinic$newID),]
dim(kidney_clinic_km)
head(kidney_clinic_km)
daysToDeath <- as.numeric(as.character(kidney_clinic_km$days_to_death))
daysToDeath
daysToDeathN <- ifelse(is.na(daysToDeath),max(na.omit(daysToDeath)),daysToDeath)
daysToDeathN
fit <- Surv(daysToDeathN,ifelse(kidney_clinic_km$deadORlive=="Dead",1,0)==1)
par(mfrow=c(2,5))
for(i in 1:length(FC_top10_id)){
kmfit1 <- survfit(fit~km_plot_cancer[,i])
plot(kmfit1,lty=c("solid","solid"),col=c("blue","red"),lwd=c(2,2),cex.main=1.8,
xlab="survival time in days",ylab="survival probabilities",cex.lab=1.6,
cex.axis=1.8,main=FC_top10_symbol[i])
m <- survdiff(y~km_plot_cancer[,i])
p_value <- 1 - pchisq(m$chisq, length(m$n) -1)
text(2000,0.95,paste("P=", round(p_value,digit=4)),cex=1.2)
legend("bottom",c("High","Low"),lty=c("solid","solid"),seg.len = 1, col=c("red","blue"),border="white",bty="n",cex=1.2,lwd=c(2,2))
}

结果还不如使用padj,可见,表达差异大的基因不一定与生存时间有关联。

  • 本文作者:括囊无誉
  • 本文链接: TCGA/TCGA11Survival/
  • 版权声明: 本博客所有文章均为原创作品,转载请注明出处!
------ 本文结束 ------
坚持原创文章分享,您的支持将鼓励我继续创作!