Set up the R environment by including a few necessary R libraries
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
library(ape)
library(Seurat)
## Registered S3 method overwritten by 'R.oo':
## method from
## throw.default R.methodsS3
Data import and simple process
## chromosome information
chrsize <- read.table("hg19.chr_size", header=F, stringsAsFactor=T, col.names=c("chr", "stop"))
chrsize$chr <- factor(chrsize$chr, levels = c('chr1', 'chr2', 'chr3', 'chr4', 'chr5',
'chr6', 'chr7', 'chr8', 'chr9', 'chr10',
'chr11', 'chr12', 'chr13', 'chr14',
'chr15', 'chr16', 'chr17', 'chr18', 'chr19',
'chr20', 'chr21', 'chr22', 'chrX', 'chrY', 'chrM'))
chrsize <- subset(chrsize, chr != 'chrM' & chr != 'chrY')
chrsize$start <- 0
## SE of tissues
SE.ti <- read.table("enc.tissue.SE_merged.bed", header=F, stringsAsFactor=F,
col.names=c("chr", "start", "stop", "samples", "id"))
SE.ti$chr <- factor(SE.ti$chr, levels = c('chr1', 'chr2', 'chr3', 'chr4', 'chr5',
'chr6', 'chr7', 'chr8', 'chr9', 'chr10',
'chr11', 'chr12', 'chr13', 'chr14', 'chr15',
'chr16', 'chr17', 'chr18', 'chr19',
'chr20', 'chr21', 'chr22', 'chrX', 'chrY'))
SE.ti <- with(SE.ti, SE.ti[order(chr),])
SE.ti$samples <- sapply(SE.ti$id, function(x) length(unique(substr(unlist(strsplit(x, ",")),1,13))))
## SE of cell lines
SE.cl <- read.table("enc.cell_line.SE_merged.bed", header=F, stringsAsFactor=F,
col.names=c("chr", "start", "stop", "samples", "id"))
SE.cl$chr <- factor(SE.cl$chr, levels = c('chr1', 'chr2', 'chr3', 'chr4', 'chr5',
'chr6', 'chr7', 'chr8', 'chr9', 'chr10',
'chr11', 'chr12', 'chr13', 'chr14', 'chr15',
'chr16', 'chr17', 'chr18', 'chr19',
'chr20', 'chr21', 'chr22', 'chrX', 'chrY'))
SE.cl <- with(SE.cl, SE.cl[order(chr),])
SE.cl$samples <- sapply(SE.cl$id, function(x) length(unique(substr(unlist(strsplit(x, ",")),1,13))))
## SE of primary cells
SE.pc <- read.table("enc.primary_cell.SE_merged.bed", header=F, stringsAsFactor=F,
col.names=c("chr", "start", "stop", "samples", "id"))
SE.pc$chr <- factor(SE.pc$chr, levels = c('chr1', 'chr2', 'chr3', 'chr4', 'chr5',
'chr6', 'chr7', 'chr8', 'chr9', 'chr10',
'chr11', 'chr12', 'chr13', 'chr14',
'chr15', 'chr16', 'chr17', 'chr18', 'chr19',
'chr20', 'chr21', 'chr22', 'chrX', 'chrY'))
SE.pc <- with(SE.pc, SE.pc[order(chr),])
SE.pc$samples <- sapply(SE.pc$id, function(x) length(unique(substr(unlist(strsplit(x, ",")),1,13))))
## For hierarchical clustering
sampleInfo <- read.csv2("SE_samples.csv")
sampleInfo <- subset(sampleInfo, Biosample.type %in% c("Cell line", "Primary cell", "Tissue") &
Data.sources %in% "ENCODE")
Jaccard <- read.table("ENCODE_network/Jaccard.txt", stringsAsFactors=F)
Jaccard <- subset(Jaccard, V1 %in% sampleInfo$Sample.ID & V2 %in% sampleInfo$Sample.ID)
## For PCA analysis
SE.occur <- read.table("ENCODE_network/enc_SE_merged.cnt.matrix")
SE.occur <- SE.occur[, colnames(SE.occur) %in% sampleInfo$Sample.ID]
Showing the landscape of SEs on each chromosome. The most majority of the SEs are not shared by difference cell types.
ggplot(data=chrsize, aes(xmin=start/1000000, xmax=stop/1000000, ymin=0, ymax=3)) +
facet_grid(chr ~ ., switch='y') +
geom_rect(fill='grey95', color='grey95') +
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line.x = element_line(colour = "black"),
axis.line.y = element_blank(),
strip.text.y = element_text(angle = 180),
strip.background = element_blank()) +
geom_tile(data=SE.ti, aes(x=(start+stop)/2/1000000, y=0.5, fill=samples, color=samples)) +
geom_tile(data=SE.cl, aes(x=(start+stop)/2/1000000, y=1.5, fill=samples, color=samples)) +
geom_tile(data=SE.pc, aes(x=(start+stop)/2/1000000, y=2.5, fill=samples, color=samples)) +
scale_fill_gradientn(colours = rev(rainbow(10)[1:8]), trans='log10') +
scale_color_gradientn(colours = rev(rainbow(10)[1:8]), trans='log10') +
xlab("Position on chromosome (Mb)") +
labs(fill="Shared by # samples", color="Shared by # samples")
Hierarchical clustering of the samples based on pair-wise co-occurrence of SEs.
Jaccard.mat <- do.call(cbind, tapply(1:nrow(Jaccard), as.factor(Jaccard[,1]), function(x)
data.frame(Jaccard[x,3],row.names=Jaccard[x,2])))
sample.id <- data.frame(Sample.ID=colnames(Jaccard.mat))
sampleInfo <- merge(sample.id, sampleInfo, by="Sample.ID")
colnames(Jaccard.mat) <- paste(sampleInfo$Biosample.type,
substr(sampleInfo$Tissue.type,1,10),
1:ncol(Jaccard.mat), sep="_")
rownames(Jaccard.mat) <- colnames(Jaccard.mat)
mat_dist <- as.dist(1-sqrt(Jaccard.mat))
hc <- hclust(mat_dist, method='ward.D2')
colors = c("#FF00FF", "#8000FF", "#FF8000")
clus3 = cutree(hc, 3)
plot(as.phylo(hc), type = "fan", tip.color = colors[clus3], cex = 0.6)
Principal component analysis of the occurrence of SEs in the investigated samples. The first two principal components are shown in the plot.
sample.id <- data.frame(Sample.ID=colnames(SE.occur))
sampleInfo <- merge(sample.id, sampleInfo, by="Sample.ID")
# using Seurat for PCA
obj <- CreateSeuratObject(SE.occur, assay="SE")
obj@assays$SE@var.features <- rownames(obj@assays$SE@counts)
obj@assays$SE@scale.data <- as.matrix(obj@assays$SE@counts)
obj <- RunPCA(object = obj, pc.genes = obj@var.genes, do.print = F)
## PC_ 1
## Positive: chr2-166421703-166504582, chr8-81776390-81793533, chr10-42383114-42397173, chr1-30721449-30734189, chr3-103209547-103219033, chr11-39239590-39245279, chr11-86901001-86913049, chr10-115603202-115617190, chr2-242743346-242777521, chr6-168567636-168583993
## chr15-45196707-45216056, chr9-90802437-90814884, chr2-223977387-223997135, chr8-127919066-127931147, chr11-102785327-102809961, chr13-21770057-21775046, chr10-71793488-71815578, chr12-17522467-17527121, chr4-117101251-117118475, chr5-144621479-144627988
## chr12-12389150-12405955, chr4-150265177-150271637, chr15-97594817-97637782, chr16-8620435-8632860, chr14-57508534-57539176, chr2-226656618-226661338, chr3-34296517-34337616, chr14-63055028-63082388, chr5-145375770-145385868, chr10-110948607-110964291
## Negative: chr1-234610051-235192382, chr2-43054277-43604458, chr1-149783661-149861972, chr5-171951314-172387461, chr1-8042255-8668389, chr11-65169424-65293668, chr2-238025629-238654437, chr17-79287388-79496032, chr17-75079810-75492078, chr20-52180179-52579524
## chr2-232435233-232593236, chr22-46390744-46514656, chr20-48836950-49212251, chr22-36616046-36905388, chr11-11963882-12311106, chr10-80787186-81120833, chr12-125092737-125438341, chr7-130551474-130811428, chr1-145073235-145292974, chr7-5558027-5750219
## chr10-3762505-4127780, chr17-38213886-38293387, chr5-149777956-150057665, chr12-124829518-125078062, chr9-130696640-130883276, chr9-72965156-73122240, chr14-68911015-69182461, chr10-73972293-74128717, chr1-27002723-27136764, chr14-50308058-50585122
## PC_ 2
## Positive: chr17-79287388-79496032, chr2-232435233-232593236, chr2-43054277-43604458, chr1-149783661-149861972, chr17-38213886-38293387, chr22-41738045-41846014, chr19-42744319-42791747, chr10-134177600-134460043, chr7-5558027-5750219, chr12-58207429-58297497
## chr1-2107059-2263623, chr20-35074896-35180260, chr9-130696640-130883276, chr17-48914346-49066625, chr11-113895366-114133874, chr8-38557958-38666550, chr5-148746350-148846783, chr2-66646090-66835375, chr9-131864504-131966327, chr1-27002723-27136764
## chr11-119167282-119253784, chr12-57449155-57582529, chr1-10681342-10886360, chr10-126656950-126852012, chr2-174762474-174928264, chr19-16362565-16584689, chr1-164526202-164776187, chr8-38175192-38330619, chr1-207961675-208116788, chr19-48092091-48249734
## Negative: chr20-52180179-52579524, chr12-76223928-76432927, chr11-11963882-12311106, chr12-13244020-13378157, chr2-161036644-161351389, chr10-3762505-4127780, chr12-46736228-46972021, chr11-121998354-122091940, chr8-128793698-128984602, chr1-94012053-94248992
## chr2-36555701-36826369, chr7-92237974-92485564, chr11-95765800-96077401, chr12-52526850-52685291, chr21-36165691-36432151, chr12-66204990-66378976, chr1-67979201-68322599, chr1-214534683-214788853, chr10-33387536-33667930, chr12-65980541-66069346
## chr17-57822089-57953405, chr3-171756078-172043341, chr10-33212002-33337288, chr20-45881084-46250308, chr14-61750166-62136627, chr3-149003339-149133711, chr7-55080902-55238061, chr5-14137984-14285271, chr8-118805533-119139654, chr14-51928223-52084667
## PC_ 3
## Positive: chr14-50308058-50585122, chr1-149783661-149861972, chr20-52180179-52579524, chr4-40151421-40339107, chr5-42981248-43050805, chr6-159029585-159293279, chr16-11636074-11919163, chr2-196996112-197160488, chr14-22878347-23053746, chr14-61750166-62136627
## chr2-70294838-70372074, chr21-34657990-34829079, chr11-65169424-65293668, chr2-64835970-65093395, chr17-62952499-63103462, chr4-185230731-185401813, chr17-61991008-62210507, chr15-75063015-75111821, chr17-56394563-56426848, chr17-73628003-73785724
## chrX-12964119-13052506, chr13-99843528-100041108, chr5-150378949-150639046, chr17-37892036-38054004, chr17-76680092-76823376, chr6-27739618-27809589, chr11-3845230-4029753, chr1-26551206-26655259, chr3-46956050-47079673, chr13-30902132-31050921
## Negative: chr10-29748217-30188048, chr11-11963882-12311106, chr2-238025629-238654437, chr12-2040792-2543978, chr5-148746350-148846783, chr8-124462285-124743503, chr4-169398456-169852902, chr3-187918733-188071492, chr1-85954066-86102776, chr7-134373321-134653428
## chr21-47439437-47564351, chr3-159477846-159666635, chr10-123746043-124013168, chr5-171951314-172387461, chr10-88408298-88591220, chr14-55030187-55276565, chr1-201404025-201578930, chr11-68764101-68857662, chr9-133630964-133763855, chr3-99551354-99701008
## chr3-57860735-58122482, chr2-33233409-33638371, chr2-36555701-36826369, chr8-38175192-38330619, chr11-119167282-119253784, chr2-1576127-1841989, chr1-8042255-8668389, chr1-203408193-203542290, chr5-146761781-146890764, chr3-134026398-134126541
## PC_ 4
## Positive: chr2-33233409-33638371, chr14-22878347-23053746, chr11-95765800-96077401, chr2-196996112-197160488, chr12-11800725-12060196, chr15-85922308-86332558, chr3-187918733-188071492, chr20-43146161-43378352, chr19-16362565-16584689, chr3-151900196-152162028
## chr11-72629770-73137985, chr10-29748217-30188048, chr7-101434256-101689718, chr2-238025629-238654437, chr3-16320102-16566364, chr9-132613230-132836938, chr20-55947556-56082461, chr1-85954066-86102776, chr18-60746093-61014803, chr5-71401030-71607145
## chr12-2040792-2543978, chr1-120488069-120697349, chr9-97533568-97718653, chr10-26718387-26818735, chr1-198563799-198699870, chr4-40151421-40339107, chr15-101631431-101808005, chr5-88019240-88249803, chr18-13351849-13630652, chr18-56494640-56763252
## Negative: chr12-52526850-52685291, chr1-149783661-149861972, chr12-53240197-53407679, chr17-73628003-73785724, chr14-68911015-69182461, chr22-46390744-46514656, chr12-6375774-6514845, chr2-85114821-85203184, chr8-134202536-134354869, chr11-34606605-34708425
## chr10-73972293-74128717, chr10-105301977-105684589, chr19-11172240-11321305, chr17-48914346-49066625, chr17-79287388-79496032, chr1-201974297-202136423, chr12-76223928-76432927, chr21-44720317-44943783, chr1-16425986-16554013, chr17-17704339-17789196
## chr20-48836950-49212251, chr1-151890956-151980362, chr6-10372185-10432729, chr7-130551474-130811428, chr15-90536059-90778807, chr18-67946326-68196329, chr6-43728126-44052993, chr17-39765942-39862962, chr19-41208215-41231665, chr12-46736228-46972021
## PC_ 5
## Positive: chr10-29748217-30188048, chr20-55947556-56082461, chr12-2040792-2543978, chr7-24850363-25022192, chr19-16362565-16584689, chr4-40151421-40339107, chr7-101434256-101689718, chr10-63482249-63874678, chr8-124462285-124743503, chr20-43146161-43378352
## chr2-173881734-174097506, chr4-144255760-144381376, chr10-88408298-88591220, chr14-59642956-59846076, chr2-28789090-29020323, chr5-52613920-52736716, chr3-73539202-73684272, chr10-71068273-71268191, chr12-52526850-52685291, chr9-522234-762996
## chr17-47642611-47862223, chr3-5163490-5256079, chr2-43054277-43604458, chr19-13093213-13217525, chr22-36121376-36319042, chr20-42733314-42858528, chr20-25170517-25318548, chr11-68764101-68857662, chr4-75222975-75324549, chr10-76929346-77036941
## Negative: chr11-95765800-96077401, chr8-128793698-128984602, chr16-29119830-29395565, chr7-47322089-47721406, chr21-46709650-46912365, chr2-102604838-102783705, chr16-11636074-11919163, chr16-89358961-89576370, chr3-11473793-11693721, chr19-1062413-1219498
## chr2-238025629-238654437, chr5-95033116-95227808, chr22-31598812-31704951, chr7-22599287-22771738, chr3-171756078-172043341, chr4-185230731-185401813, chr10-16975584-17131503, chr17-76218462-76425802, chr16-88217272-88316903, chr6-143139852-143275021
## chr15-78265092-78381598, chr9-127004676-127088085, chr1-85954066-86102776, chr9-37908158-38083981, chr9-112775234-112978986, chr5-42981248-43050805, chr4-2786592-2897361, chr13-114755129-114928596, chr6-43728126-44052993, chr11-121998354-122091940
pca.df <- data.frame(obj@reductions$pca@cell.embeddings[,1:2],
celltype = sampleInfo$Biosample.type)
ggplot(pca.df, aes(PC_1, PC_2, color=celltype)) + geom_point() +
scale_colour_manual(values = c("#8000FF", "#FF00FF", "#FF8000")) +
theme(legend.title=element_blank()) +
xlab("PC1") + ylab("PC2")
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: OS X El Capitan 10.11.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] Seurat_3.0.1 ape_5.3 cowplot_0.9.4 ggplot2_3.1.1
##
## loaded via a namespace (and not attached):
## [1] tsne_0.1-3 nlme_3.1-140 bitops_1.0-6
## [4] RColorBrewer_1.1-2 httr_1.4.0 sctransform_0.2.0
## [7] tools_3.6.0 R6_2.4.0 irlba_2.3.3
## [10] KernSmooth_2.23-15 lazyeval_0.2.2 colorspace_1.4-1
## [13] npsurv_0.4-0 withr_2.1.2 gridExtra_2.3
## [16] tidyselect_0.2.5 compiler_3.6.0 plotly_4.9.0
## [19] labeling_0.3 caTools_1.17.1.2 scales_1.0.0
## [22] lmtest_0.9-37 ggridges_0.5.1 pbapply_1.4-0
## [25] stringr_1.4.0 digest_0.6.19 rmarkdown_1.13
## [28] R.utils_2.8.0 pkgconfig_2.0.2 htmltools_0.3.6
## [31] bibtex_0.4.2 htmlwidgets_1.3 rlang_0.3.4
## [34] rstudioapi_0.10 zoo_1.8-6 jsonlite_1.6
## [37] ica_1.0-2 gtools_3.8.1 dplyr_0.8.1
## [40] R.oo_1.22.0 magrittr_1.5 Matrix_1.2-17
## [43] Rcpp_1.0.1 munsell_0.5.0 reticulate_1.12
## [46] R.methodsS3_1.7.1 stringi_1.4.3 yaml_2.2.0
## [49] gbRd_0.4-11 MASS_7.3-51.4 gplots_3.0.1.1
## [52] Rtsne_0.15 plyr_1.8.4 grid_3.6.0
## [55] parallel_3.6.0 gdata_2.18.0 listenv_0.7.0
## [58] ggrepel_0.8.1 crayon_1.3.4 lattice_0.20-38
## [61] splines_3.6.0 SDMTools_1.1-221.1 knitr_1.23
## [64] pillar_1.4.1 igraph_1.2.4.1 future.apply_1.2.0
## [67] reshape2_1.4.3 codetools_0.2-16 glue_1.3.1
## [70] evaluate_0.14 lsei_1.2-0 metap_1.1
## [73] data.table_1.12.2 png_0.1-7 Rdpack_0.11-0
## [76] gtable_0.3.0 RANN_2.6.1 purrr_0.3.2
## [79] tidyr_0.8.3 future_1.13.0 assertthat_0.2.1
## [82] xfun_0.7 rsvd_1.0.1 survival_2.44-1.1
## [85] viridisLite_0.3.0 tibble_2.1.3 cluster_2.0.9
## [88] globals_0.12.4 fitdistrplus_1.0-14 ROCR_1.0-7