Set up the R environment by including a few necessary R libraries
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
library(reshape2)
library(RColorBrewer)
sampleInfo <- read.csv2("SE_samples.csv")
SE_enc <- read.table("SE_enc.bed.gz")
TE_enc <- read.table("TE_enc.bed.gz")
First, get the midian size of SEs/TEs in each cell type/tissue, and format the data as a data frame with additional information.
SE_enc.median_size <- tapply(SE_enc[,5], as.factor(SE_enc[,4]), median)
SE_enc.median_size.df <- data.frame(Sample.ID=names(SE_enc.median_size), type="SE", size=SE_enc.median_size)
TE_enc.median_size <- tapply(TE_enc[,5], as.factor(TE_enc[,4]), median)
TE_enc.median_size.df <- data.frame(Sample.ID=names(TE_enc.median_size), type="TE", size=TE_enc.median_size)
median_size <- rbind(SE_enc.median_size.df, TE_enc.median_size.df)
data <- merge(median_size, sampleInfo, by="Sample.ID")
data <- subset(data, Biosample.type %in% c("Cell line", "Primary cell", "Tissue"))
Using box-plots to compare the median size of SEs and TEs, and split the samples into cell lines, primary cells, and tissues. Here, the median size is in bp, and the values are plotted on a log scale.
ggplot(data, aes(x=type, y=size, color=type)) + geom_boxplot(outlier.size=0.3) +
geom_jitter(alpha=0.3, size=0.3) +
facet_grid( ~ Biosample.type) +
scale_colour_manual(values=brewer.pal(7, "Set1")[2:3]) +
scale_y_log10() + theme(legend.title=element_blank()) +
xlab("") + ylab("Median size (bp)")
Here, the values are plotted in a linear scale.
ggplot(data, aes(x=type, y=size, color=type)) + geom_boxplot(outlier.size=0.3) +
geom_jitter(alpha=0.3, size=0.3) +
facet_grid( ~ Biosample.type) +
scale_colour_manual(values=brewer.pal(7, "Set1")[2:3]) +
theme(legend.title=element_blank()) +
xlab("") + ylab("Median size (bp)")
Get the number of SEs/TEs in each cell type/tissue, and format the data as a data frame with additional information.
.SE.num <- table(SE_enc[,4])
SE.num <- data.frame(Sample.ID=names(.SE.num), number=c(.SE.num), type="SE")
.TE.num <- table(TE_enc[,4])
TE.num <- data.frame(Sample.ID=names(.TE.num), number=c(.TE.num), type="TE")
num <- rbind(SE.num, TE.num)
data <- merge(num, sampleInfo, by="Sample.ID")
data <- subset(data, Biosample.type %in% c("Cell line", "Primary cell", "Tissue"))
Using box-plots to compare the number of SEs and TEs, and split the samples into cell lines, primary cells, and tissues. Here, the values are plotted on a log scale.
ggplot(data, aes(x=type, y=number, color=type)) + geom_boxplot(outlier.size=0.3) +
geom_jitter(alpha=0.3, size=0.3) +
scale_y_log10() +
facet_grid( ~ Biosample.type) +
scale_colour_manual(values=brewer.pal(7, "Set1")[2:3]) +
theme(legend.title=element_blank()) +
xlab("") + ylab("Number")
Here, the values are plotted in a linear scale.
ggplot(data, aes(x=type, y=number, color=type)) + geom_boxplot(outlier.size=0.3) +
geom_jitter(alpha=0.3, size=0.3) +
facet_grid( ~ Biosample.type) +
scale_colour_manual(values=brewer.pal(7, "Set1")[2:3]) +
theme(legend.title=element_blank()) +
xlab("") + ylab("Number")
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: OS X El Capitan 10.11.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] RColorBrewer_1.1-2 reshape2_1.4.3 cowplot_0.9.4
## [4] ggplot2_3.1.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.1 pillar_1.4.1 compiler_3.6.0 plyr_1.8.4
## [5] tools_3.6.0 digest_0.6.19 evaluate_0.14 tibble_2.1.3
## [9] gtable_0.3.0 pkgconfig_2.0.2 rlang_0.3.4 rstudioapi_0.10
## [13] yaml_2.2.0 xfun_0.7 withr_2.1.2 stringr_1.4.0
## [17] dplyr_0.8.1 knitr_1.23 grid_3.6.0 tidyselect_0.2.5
## [21] glue_1.3.1 R6_2.4.0 rmarkdown_1.13 purrr_0.3.2
## [25] magrittr_1.5 scales_1.0.0 htmltools_0.3.6 assertthat_0.2.1
## [29] colorspace_1.4-1 labeling_0.3 stringi_1.4.3 lazyeval_0.2.2
## [33] munsell_0.5.0 crayon_1.3.4