TCGA学习笔记10-受试者工作曲线

得到差异表达结果之后,我们可以根据padj来挑选排名前10的基因,来绘制受试者工作曲线,评估这些基因是否能作为判断肿瘤的标志基因。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
> annot_order <- result_select_annot[order(result_select_annot$padj),] # 按padj由小到大排序
> head(annot_order)
ensembl_gene_id baseMean log2FoldChange lfcSE stat
2718 ENSG00000174514 3783.7694 -5.246630 0.1256629 -41.75163
4214 ENSG00000249859 880.8528 4.645581 0.1208287 38.44768
3736 ENSG00000224490 1714.5650 8.442435 0.2386643 35.37369
190 ENSG00000061656 2539.8779 4.097458 0.1185899 34.55149
3632 ENSG00000213963 495.6315 -3.594074 0.1061639 -33.85401
3104 ENSG00000185633 84959.2141 5.793502 0.1737995 33.33440
pvalue padj hgnc_symbol
2718 0.000000e+00 0.000000e+00 MFSD4A
4214 0.000000e+00 0.000000e+00 PVT1
3736 4.335266e-274 2.619801e-270 TTC21B-AS1
190 1.353764e-261 6.135596e-258 SPAG4
3632 3.168412e-251 1.148803e-247 # 这个基因注释后没有名字
3104 1.226108e-243 3.704687e-240 NDUFA4L2
> padj_top10_symbol <- head(annot_order$hgnc_symbol,10) # TOP10基因名称
> padj_top10_symbol
[1] "MFSD4A" "PVT1" "TTC21B-AS1" "SPAG4" ""
[6] "NDUFA4L2" "ACP3" "GABRD" "EGLN3" "HILPDA"
> padj_top10_id <- head(annot_order$ensembl_gene_id,10) # TOP10基因ID
> padj_top10_id
[1] "ENSG00000174514" "ENSG00000249859" "ENSG00000224490"
[4] "ENSG00000061656" "ENSG00000213963" "ENSG00000185633"
[7] "ENSG00000014257" "ENSG00000187730" "ENSG00000129521"
[10] "ENSG00000135245"
> padj_top10 <- data_select[match(padj_top10_id,rownames(data_select)),] # TOP10表达数据
> head(padj_top10)[,1:3]
TCGA-CZ-5465-01A-01R-1503 TCGA-BP-4355-01A-01R-1289
ENSG00000174514 1784 295
ENSG00000249859 3335 1050
ENSG00000224490 8222 864
ENSG00000061656 3397 2055
ENSG00000213963 346 243
ENSG00000185633 153042 114967
TCGA-CZ-5451-01A-01R-1503
ENSG00000174514 511
ENSG00000249859 415
ENSG00000224490 3772
ENSG00000061656 3020
ENSG00000213963 124
ENSG00000185633 114580
> dim(padj_top10)
[1] 10 611
> colnames(padj_top10) <- substr(colnames(padj_top10),1,15) # 这里对列名作处理,为了后面与临床信息对应
> head(padj_top10)[,1:3]
TCGA-CZ-5465-01 TCGA-BP-4355-01 TCGA-CZ-5451-01
ENSG00000174514 1784 295 511
ENSG00000249859 3335 1050 415
ENSG00000224490 8222 864 3772
ENSG00000061656 3397 2055 3020
ENSG00000213963 346 243 124
ENSG00000185633 153042 114967 114580

> library(pROC) # 加载ROC软件包
> par(mfrow=c(2,5)) # 绘图区布局,2行5列
> group_number <- as.factor(ifelse(group=="cancer",1,0))
> for(i in 1:length(padj_top10_id)){
+ plot.roc(group_number,as.numeric(padj_top10[i,]),main=padj_top10_symbol[i],
+ print.auc=T, percent=T,cex.lab=1.5,print.auc.cex=1.5,col=i)
+ }
Setting levels: control = 0, case = 1
Setting direction: controls > cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls > cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls > cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases

换个角度,如果以FoldChange筛选排名前10的基因,结果如何?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
annot_order_FC <- result_select_annot[order(abs(result_select_annot$log2FoldChange),decreasing = T),]
head(annot_order_FC)
FC_top10_symbol <- head(annot_order_FC$hgnc_symbol,10)
FC_top10_id <- head(annot_order_FC$ensembl_gene_id,10)
FC_top10 <- data_select[match(FC_top10_id,rownames(data_select)),]
colnames(FC_top10) <- substr(colnames(FC_top10),1,15)
head(FC_top10)[,1:3]
dim(FC_top10)

par(mfrow=c(2,5))
group_number <- as.factor(ifelse(group=="cancer",1,0))
for(i in 1:length(FC_top10_id)){
plot.roc(group_number,as.numeric(FC_top10[i,]),main=FC_top10_symbol[i],
print.auc=T, percent=T,cex.lab=1.5,print.auc.cex=1.5,col=i)
}

  • 本文作者:括囊无誉
  • 本文链接: TCGA/TCGA10Roc/
  • 版权声明: 本博客所有文章均为原创作品,转载请注明出处!
------ 本文结束 ------
坚持原创文章分享,您的支持将鼓励我继续创作!