pheatmap_advanced
Case 1
The datasets were provided by data-to-viz
library(tidyverse)
library(pheatmap)
library(ggplot2)
library(viridis)
library(kableExtra)
### dataset 1
<- read.table("https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/13_AdjacencyDirectedWeighted.csv", header=TRUE)
data # show data
%>% head(3) %>% select(1:3) %>% kable() %>%
data kable_styling(bootstrap_options = "striped", full_width = F)
Africa |
East.Asia |
Europe |
|
---|---|---|---|
Africa |
3.142471 |
0.000000 |
2.107883 |
East Asia |
0.000000 |
1.630997 |
0.601265 |
Europe |
0.000000 |
0.000000 |
2.401476 |
### the following function were embeded in pheatmap source code
= function(x){
scale_rows = apply(x, 1, mean, na.rm = T)
m = apply(x, 1, sd, na.rm = T)
s return((x - m) / s)
}
= function(mat, scale){
scale_mat if(!(scale %in% c("none", "row", "column"))){
stop("scale argument shoud take values: 'none', 'row' or 'column'")
}= switch(scale, none = mat, row = scale_rows(mat), column = t(scale_rows(t(mat))))
mat return(mat)
}
= function(x, n, center = F){
generate_breaks if(center){
= max(abs(c(min(x, na.rm = T), max(x, na.rm = T))))
m = seq(-m, m, length.out = n + 1)
res
}else{
= seq(min(x, na.rm = T), max(x, na.rm = T), length.out = n + 1)
res
}
return(res)
}
<- scale_mat(mat = data,scale = "column")
data.plot <- generate_breaks(data.plot,n = 256,center = F)
breaks
::pheatmap(mat = data.plot,
pheatmapcluster_cols = F,
cluster_rows = F,
scale = "column",border_color = "white",
color = viridis(n = 256, alpha = 1,
begin = 0, end = 1, option = "viridis"),
breaks = breaks)
case 2
the codes were adapted from slowkow Sort dendrogram is very important
set.seed(42)
<- function(n) {
random_string substr(paste(sample(letters), collapse = ""), 1, n)
}
<- matrix(rgamma(1000, shape = 1) * 5, ncol = 50)
mat
colnames(mat) <- paste(
rep(1:3, each = ncol(mat) / 3),
replicate(ncol(mat), random_string(5)),
sep = ""
)rownames(mat) <- replicate(nrow(mat), random_string(3))
%>% as.data.frame %>% head(3) %>% select(1:3) %>% kable() %>%
mat kable_styling(bootstrap_options = "striped", full_width = F)
1jrqxa |
1pskvw |
1ojvwz |
|
---|---|---|---|
abv |
9.6964789 |
9.172811 |
2.827695 |
nft |
0.9020955 |
15.575853 |
4.328376 |
xha |
2.6721643 |
3.127039 |
1.765077 |
split data into 3 groups, and increase the values in group1
<- substr(colnames(mat), 1, 1)
col_groups == "1"] <- mat[,col_groups == "1"] * 5 mat[,col_groups
making the heatmap
# install.packages("pheatmap", "RColorBrewer", "viridis")
library(pheatmap)
library(RColorBrewer)
library(viridis)
# Data frame with column annotations.
<- data.frame(group = col_groups)
mat_col rownames(mat_col) <- colnames(mat)
# List with colors for each annotation.
<- list(group = brewer.pal(3, "Set1"))
mat_colors names(mat_colors$group) <- unique(col_groups)
pheatmap(
mat = mat,
color = inferno(10),
border_color = NA,
show_colnames = FALSE,
show_rownames = FALSE,
annotation_col = mat_col,
annotation_colors = mat_colors,
drop_levels = TRUE,
fontsize = 14,
main = "Default Heatmap"
)
The default color breaks in pheatmap are uniformly distributed across the range of the data.
We can see that values in group 1 are larger than values in groups 2 and 3. However, we can’t distinguish different values within groups 2 and 3.
## ----uniform-color-breaks------------------------------------------------
<- seq(min(mat), max(mat), length.out = 10)
mat_breaks
<- data.frame(values = as.numeric(mat))
dat
## ----uniform-color-breaks-detail, fig.height=2, echo=FALSE---------------
<- data.frame(
dat_colors xmin = mat_breaks[1:(length(mat_breaks)-1)],
xmax = mat_breaks[2:length(mat_breaks)],
ymin = 0,
ymax = max(density(mat, bw = "SJ")$y),
fill = rev(inferno(length(mat_breaks) - 1)),
stringsAsFactors = FALSE
)ggplot() +
geom_rect(
data = dat_colors,
mapping = aes(
xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = fill
)+
) geom_density(
data = dat,
mapping = aes(values),
bw = "SJ", color = "cyan"
+
) scale_fill_manual(values = dat_colors$fill) +
::theme_cowplot()+
cowplottheme(legend.position = "none") +
labs(title = "Uniform breaks")
there are 6 data points greater than or equal to 100 are represented with 4 different colors.
<- as.data.frame(table(cut(
dat2
mat, mat_breaks
)))$fill <- inferno(nrow(dat2))
dat2ggplot() +
geom_bar(
data = dat2,
mapping = aes(x = Var1, weight = Freq, fill = Var1),
color = "black", size = 0.1
+
) coord_flip() +
scale_fill_manual(values = dat2$fill) +
::theme_cowplot()+
cowplottheme(legend.position = "none") +
labs(y = "data points", x = "breaks",
title = "Number of data points per color")
If we reposition the breaks at the quantiles of the data, then each color will represent an equal proportion of the data:
<- function(xs, n = 10) {
quantile_breaks <- quantile(xs, probs = seq(0, 1, length.out = n))
breaks !duplicated(breaks)]
breaks[
}
<- quantile_breaks(mat, n = 11) mat_breaks
lets see
<- data.frame(
dat_colors xmin = mat_breaks[1:(length(mat_breaks)-1)],
xmax = mat_breaks[2:length(mat_breaks)],
ymin = 0,
ymax = max(density(mat, bw = "SJ")$y),
fill = rev(inferno(length(mat_breaks) - 1)),
stringsAsFactors = FALSE
)ggplot() +
geom_rect(
data = dat_colors,
mapping = aes(
xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = fill
)+
) geom_density(
data = dat,
mapping = aes(values),
bw = "SJ", color = "cyan"
+
) scale_fill_manual(values = dat_colors$fill) +
theme(legend.position = "none") +
labs(title = "Quantile breaks")
<- as.data.frame(table(cut(
dat2
mat, mat_breaks
)))$fill <- inferno(nrow(dat2))
dat2ggplot() +
geom_bar(
data = dat2,
mapping = aes(x = Var1, weight = Freq, fill = Var1),
color = "black", size = 0.1
+
) coord_flip() +
scale_fill_manual(values = dat2$fill) +
theme(legend.position = "none") +
labs(y = "data points", x = "breaks",
title = "Number of data points per color")
When we use quantile breaks in the heatmap, we can clearly see that group 1 values are much larger than values in groups 2 and 3, and we can also distinguish different values within groups 2 and 3:
pheatmap(
mat = mat,
color = inferno(length(mat_breaks) - 1),
breaks = mat_breaks,
border_color = NA,
show_colnames = FALSE,
show_rownames = FALSE,
annotation_col = mat_col,
annotation_colors = mat_colors,
drop_levels = TRUE,
fontsize = 14,
main = "Quantile Color Scale"
)
We can also transform data
pheatmap(
mat = log10(mat),
color = inferno(10),
border_color = NA,
show_colnames = FALSE,
show_rownames = FALSE,
annotation_col = mat_col,
annotation_colors = mat_colors,
drop_levels = TRUE,
fontsize = 14,
main = "Log10 Transformed Values"
)
sort dendrograms
library(dendsort)
<- hclust(dist(t(mat)))
mat_cluster_cols
<- function(...) as.hclust(dendsort(as.dendrogram(...)))
sort_hclust
<- sort_hclust(mat_cluster_cols)
mat_cluster_cols plot(mat_cluster_cols, main = "Sorted Dendrogram", xlab = "", sub = "")
sort Dendrogram heatmap
<- sort_hclust(hclust(dist(mat)))
mat_cluster_rows pheatmap(
mat = mat,
color = inferno(length(mat_breaks) - 1),
breaks = mat_breaks,
border_color = NA,
cluster_cols = mat_cluster_cols,
cluster_rows = mat_cluster_rows,
show_colnames = FALSE,
show_rownames = FALSE,
annotation_col = mat_col,
annotation_colors = mat_colors,
drop_levels = TRUE,
fontsize = 14,
main = "Sorted Dendrograms"
)
change colnames angle
pheatmap(
mat = mat,
color = inferno(length(mat_breaks) - 1),
breaks = mat_breaks,
border_color = NA,
cluster_cols = mat_cluster_cols,
cluster_rows = mat_cluster_rows,
show_colnames = TRUE,
show_rownames = FALSE,
annotation_col = mat_col,
angle_col = 90,
fontsize_col = 8,
annotation_colors = mat_colors,
drop_levels = TRUE,
fontsize = 10,
main = "Sorted Dendrograms"
)