Preparation

Set up the R environment by including a few necessary R libraries

library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
library(reshape2)
library(RColorBrewer)

Data import and process

Get the midian size and the number of SEs/StrEs/TEs in each cell line, and format the data as a data frame with additional information.

.proc <- function(name) { 
  SE <- read.table(paste0(name,".SE.bed"))
  TE <- read.table(paste0(name,".TE.bed"))
  StrE <- read.table(paste0(name,".StrE.bed"))
  SE.num <- nrow(SE)
  TE.num <- nrow(TE)
  StrE.num <- nrow(StrE)
  SE.median_size <- median(SE[,3]-SE[,2])
  TE.median_size <- median(TE[,3]-TE[,2])
  StrE.median_size <- median(StrE[,3]-StrE[,2])
  data.frame(sample=name, type=c("SE","TE","StrE"), number=c(SE.num, TE.num, StrE.num), median_size=c(SE.median_size, TE.median_size, StrE.median_size))
}
samples <- c("GM12878", "H1", "HepG2", "HMEC", "HSMM", "Huvec", "K562", "NHLF")
data <- do.call(rbind, sapply(samples, .proc, simplify=F))

Median size

Using box-plots to compare the median size of SEs, StrEs, and TEs. Here, the median size is in bp, and the values are plotted on a log scale.

ggplot(data, aes(x=type, y=median_size, color=type)) + geom_boxplot(outlier.size=0.3) + 
  geom_jitter(alpha=0.3, size=0.3) + 
  scale_colour_manual(values=brewer.pal(7, "Set1")[c(2,4,3)]) + 
  scale_y_log10() + theme(legend.title=element_blank()) +
  xlab("") + ylab("Median size (bp)")

Here, the values are plotted in a linear scale.

ggplot(data, aes(x=type, y=median_size, color=type)) + geom_boxplot(outlier.size=0.3) + 
  geom_jitter(alpha=0.3, size=0.3) +
  scale_colour_manual(values=brewer.pal(7, "Set1")[c(2,4,3)]) + 
  theme(legend.title=element_blank()) +
  xlab("") + ylab("Median size (bp)")

Numbers

Using box-plots to compare the number of SEs and TEs, and split the samples into cell lines, primary cells, and tissues. Here, the values are plotted on a log scale.

ggplot(data, aes(x=type, y=number, color=type)) + geom_boxplot(outlier.size=0.3) + 
  geom_jitter(alpha=0.3, size=0.3) + 
  scale_y_log10() + 
  scale_colour_manual(values=brewer.pal(7, "Set1")[c(2,4,3)]) + 
  theme(legend.title=element_blank()) +
  xlab("") + ylab("Number")

Here, the values are plotted in a linear scale.

ggplot(data, aes(x=type, y=number, color=type)) + geom_boxplot(outlier.size=0.3) + 
  geom_jitter(alpha=0.3, size=0.3) +
  scale_colour_manual(values=brewer.pal(7, "Set1")[c(2,4,3)]) + 
  theme(legend.title=element_blank()) +
  xlab("") + ylab("Number")

Show the analysis environment

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: OS X El Capitan 10.11.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] RColorBrewer_1.1-2 reshape2_1.4.3     cowplot_0.9.4     
## [4] ggplot2_3.1.1     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.1       pillar_1.4.1     compiler_3.6.0   plyr_1.8.4      
##  [5] tools_3.6.0      digest_0.6.19    evaluate_0.14    tibble_2.1.3    
##  [9] gtable_0.3.0     pkgconfig_2.0.2  rlang_0.3.4      rstudioapi_0.10 
## [13] yaml_2.2.0       xfun_0.7         withr_2.1.2      stringr_1.4.0   
## [17] dplyr_0.8.1      knitr_1.23       grid_3.6.0       tidyselect_0.2.5
## [21] glue_1.3.1       R6_2.4.0         rmarkdown_1.13   purrr_0.3.2     
## [25] magrittr_1.5     scales_1.0.0     htmltools_0.3.6  assertthat_0.2.1
## [29] colorspace_1.4-1 labeling_0.3     stringi_1.4.3    lazyeval_0.2.2  
## [33] munsell_0.5.0    crayon_1.3.4