Set up the R environment by including a few necessary R libraries
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
library(reshape2)
library(RColorBrewer)
Get the midian size and the number of SEs/StrEs/TEs in each cell line, and format the data as a data frame with additional information.
.proc <- function(name) {
SE <- read.table(paste0(name,".SE.bed"))
TE <- read.table(paste0(name,".TE.bed"))
StrE <- read.table(paste0(name,".StrE.bed"))
SE.num <- nrow(SE)
TE.num <- nrow(TE)
StrE.num <- nrow(StrE)
SE.median_size <- median(SE[,3]-SE[,2])
TE.median_size <- median(TE[,3]-TE[,2])
StrE.median_size <- median(StrE[,3]-StrE[,2])
data.frame(sample=name, type=c("SE","TE","StrE"), number=c(SE.num, TE.num, StrE.num), median_size=c(SE.median_size, TE.median_size, StrE.median_size))
}
samples <- c("GM12878", "H1", "HepG2", "HMEC", "HSMM", "Huvec", "K562", "NHLF")
data <- do.call(rbind, sapply(samples, .proc, simplify=F))
Using box-plots to compare the median size of SEs, StrEs, and TEs. Here, the median size is in bp, and the values are plotted on a log scale.
ggplot(data, aes(x=type, y=median_size, color=type)) + geom_boxplot(outlier.size=0.3) +
geom_jitter(alpha=0.3, size=0.3) +
scale_colour_manual(values=brewer.pal(7, "Set1")[c(2,4,3)]) +
scale_y_log10() + theme(legend.title=element_blank()) +
xlab("") + ylab("Median size (bp)")
Here, the values are plotted in a linear scale.
ggplot(data, aes(x=type, y=median_size, color=type)) + geom_boxplot(outlier.size=0.3) +
geom_jitter(alpha=0.3, size=0.3) +
scale_colour_manual(values=brewer.pal(7, "Set1")[c(2,4,3)]) +
theme(legend.title=element_blank()) +
xlab("") + ylab("Median size (bp)")
Using box-plots to compare the number of SEs and TEs, and split the samples into cell lines, primary cells, and tissues. Here, the values are plotted on a log scale.
ggplot(data, aes(x=type, y=number, color=type)) + geom_boxplot(outlier.size=0.3) +
geom_jitter(alpha=0.3, size=0.3) +
scale_y_log10() +
scale_colour_manual(values=brewer.pal(7, "Set1")[c(2,4,3)]) +
theme(legend.title=element_blank()) +
xlab("") + ylab("Number")
Here, the values are plotted in a linear scale.
ggplot(data, aes(x=type, y=number, color=type)) + geom_boxplot(outlier.size=0.3) +
geom_jitter(alpha=0.3, size=0.3) +
scale_colour_manual(values=brewer.pal(7, "Set1")[c(2,4,3)]) +
theme(legend.title=element_blank()) +
xlab("") + ylab("Number")
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: OS X El Capitan 10.11.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] RColorBrewer_1.1-2 reshape2_1.4.3 cowplot_0.9.4
## [4] ggplot2_3.1.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.1 pillar_1.4.1 compiler_3.6.0 plyr_1.8.4
## [5] tools_3.6.0 digest_0.6.19 evaluate_0.14 tibble_2.1.3
## [9] gtable_0.3.0 pkgconfig_2.0.2 rlang_0.3.4 rstudioapi_0.10
## [13] yaml_2.2.0 xfun_0.7 withr_2.1.2 stringr_1.4.0
## [17] dplyr_0.8.1 knitr_1.23 grid_3.6.0 tidyselect_0.2.5
## [21] glue_1.3.1 R6_2.4.0 rmarkdown_1.13 purrr_0.3.2
## [25] magrittr_1.5 scales_1.0.0 htmltools_0.3.6 assertthat_0.2.1
## [29] colorspace_1.4-1 labeling_0.3 stringi_1.4.3 lazyeval_0.2.2
## [33] munsell_0.5.0 crayon_1.3.4