本文首发于“生信补给站”公众号 https://mp.weixin.qq.com/s/Wu61c-_MKsDlMj0IpQyVgQ
使用 XENA下载的TCGA-LAML.mutect2_snv.tsv文件绘制基因词云和突变景观图。
1.1 加载R包和数据
将XENA下载后的数据TCGA-LAML.mutect2_snv.tsv.gz解压,然后直接读入
#一键清空 rm(list = ls()) #载入R包 library(tidyverse) #读入数据 mut <- read.table("TCGA-LAML.mutect2_snv.tsv",sep = "\t" , header = T, stringsAsFactors = FALSE , check.names = FALSE) head(mut,2)
1.2 计算基因频次,绘制词云
#计算每个基因出现的个数 mut2 <- mut %>% filter(effect %in% c("missense_variant","inframe_insertion")) %>% select(Sample_ID,gene) %>% group_by(gene) %>% summarise(Freq = n()) %>% arrange(desc(Freq)) head(mut2) ####绘制基因词云##### library(wordcloud2) #绘制频次大于等于5的 da <- subset(mut2,Freq >= 5) #、 wordcloud2(da)
1.3 maf文件绘制词云图
如果使用maftools中的maf文件绘制呢?首先根据maftools|TCGA肿瘤突变数据的汇总,分析和可视化得到了laml数据,那么可以用以下方式获得基因云图
library(wordcloud2) data2 <- as.data.frame(table(laml@data$Hugo_Symbol)) da2 <- subset(data2,Freq >= 3) #3就是minMut参数的值 wordcloud2(da2) 二瀑布图(oncoplot)
2.1 提取基因
提取 1.2中突变频次较高的基因,进行绘制
mut3 <- mut %>% filter(gene %in% da$gene) %>% select(Sample_ID,gene,effect) %>% #只选择"missense_variant","inframe_insertion"两种类型 filter(effect %in% c("missense_variant","inframe_insertion")) %>% unique() #转成绘制热图的数据形式(宽型数据) library(reshape2) mut3_dcast <- mut3 %>% dcast(Sample_ID ~ gene,value.var='effect') %>% dplyr::select(Sample_ID, da$gene) %>% column_to_rownames("Sample_ID") %>% t()
2.2 ComplexHeatmap绘制突变景观图
library(ComplexHeatmap) library(circlize) mat <- mut3_dcast mat[is.na(mat)]<-"" mat[1:6,1:6] oncoPrint(mat)
2.3 景观图调整
#指定颜色, 调整颜色代码即可 col <- c( "missense_variant" = "blue" , "inframe_insertion" = "green") #指定变异的样子,x,y,w,h代表变异的位置(x,y)和宽度(w),高度(h) alter_fun <- list( background = function(x, y, w, h) { grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = "#CCCCCC", col = NA)) }, missense_variant = function(x, y, w, h) { grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), gp = gpar(fill = col["missense_variant"], col = NA)) }, inframe_insertion = function(x, y, w, h) { grid.rect(x, y, w-unit(0.5, "mm"), h*0.33, gp = gpar(fill = col["inframe_insertion"], col = NA)) } ) #指定变异类型的标签,和数据中的类型对应 heatmap_legend_param <- list(title = "Alternations", at = c("missense_variant","inframe_insertion"), labels = c( "missense_variant","inframe_insertion")) #设定标题 column_title <- "This is Oncoplot " oncoPrint(mat, alter_fun = alter_fun, col = col, column_title = column_title, remove_empty_columns = TRUE, #去掉空列 remove_empty_rows = TRUE, #去掉空行 row_names_side = "left", #基因在左 pct_side = "right", heatmap_legend_param = heatmap_legend_param)
更多参数参考ComplexHeatmap|根据excel表绘制突变景观图(oncoplot)。