超详细的R语言热图之complexheatmap系列07

# 读取内置数据
mat = read.table(system.file("extdata", package = "ComplexHeatmap", "tcga_lung_adenocarcinoma_provisional_ras_raf_mek_jnk_signalling.txt"), header = TRUE, stringsAsFactors = FALSE, sep = "\t")
mat[is.na(mat)] = ""
rownames(mat) = mat[, 1]
mat = mat[, -1]
mat=  mat[, -ncol(mat)]
mat = t(as.matrix(mat))
mat[1:3, 1:3]
##      TCGA-05-4384-01 TCGA-05-4390-01 TCGA-05-4425-01
## KRAS "  "            "MUT;"          "  "           
## HRAS "  "            "  "            "  "           
## BRAF "  "            "  "            "  "

首先为不同的变异类型设置颜色。

col = c("HOMDEL" = "blue", "AMP" = "red", "MUT" = "#008000")
alter_fun = list(
    background = function(x, y, w, h) {
        grid.rect(x, y, w-unit(2, "pt"), h-unit(2, "pt"), 
            gp = gpar(fill = "#CCCCCC", col = NA))
    },
    
    HOMDEL = function(x, y, w, h) {
        grid.rect(x, y, w-unit(2, "pt"), h-unit(2, "pt"), 
            gp = gpar(fill = col["HOMDEL"], col = NA))
    },
    
    AMP = function(x, y, w, h) {
        grid.rect(x, y, w-unit(2, "pt"), h-unit(2, "pt"), 
            gp = gpar(fill = col["AMP"], col = NA))
    },
    
    MUT = function(x, y, w, h) {
        grid.rect(x, y, w-unit(2, "pt"), h*0.33, 
            gp = gpar(fill = col["MUT"], col = NA))
    }
)

也提供了另外一种设置颜色的方法：

# 另一种方法
library(ComplexHeatmap)
## 载入需要的程辑包：grid
## ========================================
## ComplexHeatmap version 2.8.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
alter_fun = list(
    background = alter_graphic("rect", fill = "#CCCCCC"),   
    HOMDEL = alter_graphic("rect", fill = col["HOMDEL"]),
    AMP = alter_graphic("rect", fill = col["AMP"]),
    MUT = alter_graphic("rect", height = 0.33, fill = col["MUT"])
)

下面就是画图时刻：

column_title = "OncoPrint for TCGA Lung Adenocarcinoma, genes in Ras Raf MEK JNK signalling"
heatmap_legend_param = list(title = "Alternations", at = c("HOMDEL", "AMP", "MUT"), 
        labels = c("Deep deletion", "Amplification", "Mutation"))
oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    column_title = column_title, heatmap_legend_param = heatmap_legend_param)
## All mutation types: MUT, AMP, HOMDEL.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.

plot of chunk unnamed-chunk-4

7.2.1 移除空的行和列

当一个样本或基因没有突变时，默认也会画出来，可以通过设置去除。

oncoPrint(mat, alter_fun = alter_fun, col = col, 
          remove_empty_columns = T, # 删除空列
          remove_empty_rows = T, # 删除空行
          column_title = column_title, heatmap_legend_param = heatmap_legend_param
          )
## All mutation types: MUT, AMP, HOMDEL.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.

plot of chunk unnamed-chunk-5

需要注意一个小问题，当此图和其他注释条、热图等联合时，可能会出错，需要手动调整好数据。

7.2.2 重新排列瀑布图

可以通过自定义重新排列行或列的顺序。

# 自定义顺序
sample_order = scan(paste0(system.file("extdata", package = "ComplexHeatmap"), "/sample_order.txt"), what = "character")

oncoPrint(mat, alter_fun = alter_fun, col = col,row_order = 1:nrow(mat), 
          column_order = sample_order, remove_empty_columns = TRUE, 
          remove_empty_rows = TRUE,
          column_title = column_title, 
          heatmap_legend_param = heatmap_legend_param)
## All mutation types: MUT, AMP, HOMDEL.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.

plot of chunk unnamed-chunk-6

7.2.3瀑布图注释

默认的瀑布图包括了很多注释，比如上侧的条形图表示每个基因的突变情况，右侧条形图表示每个样本的突变情况，左侧数字表示某个基因突变样本占的比例。

条形图是通过anno_oncoprint_barplot()函数设置的。

oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    top_annotation = HeatmapAnnotation(
        column_barplot = anno_oncoprint_barplot("MUT", border = TRUE, 
            height = unit(4, "cm"))
    ),
    right_annotation = rowAnnotation(
        row_barplot = anno_oncoprint_barplot(c("AMP", "HOMDEL"),  
            border = TRUE, height = unit(4, "cm"), 
            axis_param = list(side = "bottom", labels_rot = 90))
    ),
    remove_empty_columns = TRUE, remove_empty_rows = TRUE,
    column_title = column_title, heatmap_legend_param = heatmap_legend_param)
## All mutation types: MUT, AMP, HOMDEL.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.

plot of chunk unnamed-chunk-7

条形图注释条默认显示数字，可以通过设置show_fraction = TRUE变为比例：

oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    top_annotation = HeatmapAnnotation(
        column_barplot = anno_oncoprint_barplot(show_fraction = TRUE)
    ),
    right_annotation = rowAnnotation(
        row_barplot = anno_oncoprint_barplot(show_fraction = TRUE)
    ),
    remove_empty_columns = TRUE, remove_empty_rows = TRUE,
    column_title = column_title, heatmap_legend_param = heatmap_legend_param)
## All mutation types: MUT, AMP, HOMDEL.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.

plot of chunk unnamed-chunk-8

通过pct_side = "right"，row_names_side = "left"设置左右两侧的数字和基因：

oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    remove_empty_columns = TRUE, remove_empty_rows = TRUE,
    pct_side = "right", row_names_side = "left",
    column_title = column_title, heatmap_legend_param = heatmap_legend_param)
## All mutation types: MUT, AMP, HOMDEL.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.

plot of chunk unnamed-chunk-9

还可以自己添加更多注释：

oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    remove_empty_columns = TRUE, remove_empty_rows = TRUE,
    top_annotation = HeatmapAnnotation(cbar = anno_oncoprint_barplot(),
        foo1 = 1:172,
        bar1 = anno_points(1:172)
    ),
    left_annotation = rowAnnotation(foo2 = 1:26),
    right_annotation = rowAnnotation(bar2 = anno_barplot(1:26)),
    column_title = column_title, heatmap_legend_param = heatmap_legend_param)
## All mutation types: MUT, AMP, HOMDEL.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.

plot of chunk unnamed-chunk-10

7.2.4 瀑布图作为热图组图

oncoPrint返回的还是一个Heatmap类，当然可以和其他热图继续进行组合！

ht_list = oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    column_title = column_title, heatmap_legend_param = heatmap_legend_param) +
Heatmap(matrix(rnorm(nrow(mat)*10), ncol = 10), name = "expr", width = unit(4, "cm"))
## All mutation types: MUT, AMP, HOMDEL.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.
draw(ht_list)

plot of chunk unnamed-chunk-11

或者竖着组图：

ht_list = oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    column_title = column_title, heatmap_legend_param = heatmap_legend_param) %v%
Heatmap(matrix(rnorm(ncol(mat)*10), nrow = 10), name = "expr", height = unit(4, "cm"))
## All mutation types: MUT, AMP, HOMDEL.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.
draw(ht_list)

plot of chunk unnamed-chunk-12

当然也可以进行热图分割：

ht_list = oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    column_title = column_title, heatmap_legend_param = heatmap_legend_param) +
Heatmap(matrix(rnorm(nrow(mat)*10), ncol = 10), name = "expr", width = unit(4, "cm"))
## All mutation types: MUT, AMP, HOMDEL.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.
draw(ht_list, row_split = sample(c("a", "b"), nrow(mat), replace = TRUE))

plot of chunk unnamed-chunk-13

上面就是画瀑布图的小例子，可以发挥你的想象力，进行各种自定义设置，大家可以自行探索。

下面是对一些内置函数的解释，为什么要这样设置等的说明。

7.2 一般设置

7.2.1 输入数据的格式

输入数据是矩阵，行是基因，列是样本，内容是突变类型，类似以下矩阵：

mat = read.table(textConnection(
"s1,s2,s3
g1,snv;indel,snv,indel
g2,,snv;indel,snv
g3,snv,,indel;snv"), row.names = 1, header = TRUE, sep = ",", stringsAsFactors = FALSE)
mat = as.matrix(mat)
mat
##    s1          s2          s3         
## g1 "snv;indel" "snv"       "indel"    
## g2 ""          "snv;indel" "snv"      
## g3 "snv"       ""          "indel;snv"

在这个例子中，我们需要设置一个函数提取不同的突变类型，方便使用不同的颜色。但其实oncoPrint内置了这样的函数，不需要自己写，只需要你的分隔符是; : , |，其中的任意一种都可以。

# 这样的函数其实不用自己写
get_type_fun = function(x) strsplit(x, ";")[[1]]
get_type_fun(mat[1, 1])
## [1] "snv"   "indel"

对于一个样本中的一个基因，不同的突变类型可能会在画在一个单元格中，我们需要定义去和添加这些元素，主要包括元素的位置（x和y），和宽度和高度。

col = c(snv = "red", indel = "blue") # 定义颜色
oncoPrint(mat,
    alter_fun = list(
        snv = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.9, 
            gp = gpar(fill = col["snv"], col = NA)), # 设置 snv 的属性
        indel = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.4, 
            gp = gpar(fill = col["indel"], col = NA)) # 设置 indel 的属性
    ), col = col)
## All mutation types: snv, indel.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.

plot of chunk unnamed-chunk-16

输入数据也可以是列表格式。例如对于上面一幅图，输入数据因该是一个含有2个矩阵的列表，每个矩阵都是由0和1组成的，分别代表不同的突变类型（snv, indel）。两个矩阵的名字应该和突变类型吻合（snv, indel）。

mat_list <- list(
  snv = matrix(c(1, 0, 1, 1, 1, 0, 0, 1, 1), nrow = 3), 
  indel = matrix(c(1, 0, 0, 0, 1, 0, 1, 0, 0), nrow = 3))
rownames(mat_list$snv) <- rownames(mat_list$indel) <- c("g1", "g2", "g3")
colnames(mat_list$snv) <- colnames(mat_list$indel) <- c("s1", "s2", "s3")
mat_list
## $snv
##    s1 s2 s3
## g1  1  1  0
## g2  0  1  1
## g3  1  0  1
## 
## $indel
##    s1 s2 s3
## g1  1  0  1
## g2  0  1  0
## g3  0  0  0

oncoPrint需要列表中的所有矩阵都有相同的行名和列名

# now you don't need `get_type`
oncoPrint(mat_list,
    alter_fun = list(
        snv = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.9, 
            gp = gpar(fill = col["snv"], col = NA)),
        indel = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.4, 
            gp = gpar(fill = col["indel"], col = NA))
    ), col = col)
## All mutation types: snv, indel.
## `alter_fun` is assumed vectorizable. If it does not generate correct
## plot, please set `alter_fun_is_vectorized = FALSE` in `oncoPrint()`.

plot of chunk unnamed-chunk-18

7.2.2 自定义`alter_fun()`

oncoPrint(mat,
    alter_fun = list(
        background = function(x, y, w, h) {
            grid.polygon(
                unit.c(x - 0.5*w, x - 0.5*w, x + 0.5*w), 
                unit.c(y - 0.5*h, y + 0.5*h, y - 0.5*h),
                gp = gpar(fill = "grey", col = "white"))
            grid.polygon(
                unit.c(x + 0.5*w, x + 0.5*w, x - 0.5*w), 
                unit.c(y + 0.5*h, y - 0.5*h, y + 0.5*h),
                gp = gpar(fill = "grey", col = "white"))
        },
        snv = function(x, y, w, h) {
            grid.polygon(
                unit.c(x - 0.5*w, x - 0.5*w, x + 0.5*w), 
                unit.c(y - 0.5*h, y + 0.5*h, y - 0.5*h),
                gp = gpar(fill = col["snv"], col = "white"))
        },
        indel = function(x, y, w, h) {
            grid.polygon(
                unit.c(x + 0.5*w, x + 0.5*w, x - 0.5*w), 
                unit.c(y + 0.5*h, y - 0.5*h, y + 0.5*h),
                gp = gpar(fill = col["indel"], col = "white"))
        }
    ), col = col)
## All mutation types: snv, indel.

plot of chunk unnamed-chunk-19

以上就是今天的内容。

本系列所有代码会在本系列结束后给出获取方式，欢迎点赞，关注，转发！

欢迎关注我的公众号：医学和生信笔记

医学和生信笔记 公众号主要分享：1.医学小知识、肛肠科小知识；2.R语言和Python相关的数据分析、可视化、机器学习等；3.生物信息学学习资料和自己的学习笔记！

更多精彩内容

使用R语言美化PCA图

R语言可视化聚类树

R语言画多时间点ROC和多指标ROC曲线

这样的洞庭湖决堤，实在让人同情不起来

李尚福、魏凤和双双被拿下，与美国一份报告是否有关？

抗洪靠嘴，堵漏靠沙？印度官员真是绝了！

有的人走了，却永远活着

圈内疯传某谣言

超详细的R语言热图之complexheatmap系列07

第七章瀑布图

7.1 可视化突变数据的瀑布图

7.2.1 移除空的行和列

7.2.2 重新排列瀑布图

7.2.3瀑布图注释

7.2.4 瀑布图作为热图组图

7.2 一般设置

7.2.1 输入数据的格式

7.2.2 自定义`alter_fun()`

您可能也对以下帖子感兴趣

这样的洞庭湖决堤，实在让人同情不起来

李尚福、魏凤和双双被拿下，与美国一份报告是否有关？

抗洪靠嘴，堵漏靠沙？印度官员真是绝了！

有的人走了，却永远活着

圈内疯传某谣言

生成图片，分享到微信朋友圈

超详细的R语言热图之complexheatmap系列07

第七章 瀑布图

7.1 可视化突变数据的瀑布图

7.2.1 移除空的行和列

7.2.2 重新排列瀑布图

7.2.3瀑布图注释

7.2.4 瀑布图作为热图组图

7.2 一般设置

7.2.1 输入数据的格式

7.2.2 自定义alter_fun()

您可能也对以下帖子感兴趣

第七章瀑布图

7.2.2 自定义`alter_fun()`