Last updated: 2018-09-28
Code version: e0ca9da
Here I read in a dataframe of counts summarized at the gene-level, then do some pre-processing and normalization to obtain a voom object to later run linear modeling analyses on. This is identical code to what was done in the case with HOMER, since this gene expression analysis is completely orthogonal to the Hi-C normalization and significance calling schemes employed.
#Read in counts data, create DGEList object out of them and convert to log counts per million (CPM). Also read in metadata.
counts <- fread("~/Desktop/Hi-C/gene_expression/counts_iPSC.txt", header=TRUE, data.table=FALSE, stringsAsFactors = FALSE, na.strings=c("NA",""))
colnames(counts) <- c("genes", "C-3649", "G-3624", "H-3651", "D-40300", "F-28834", "B-28126", "E-28815", "A-21792")
rownames(counts) <- counts$genes
counts <- counts[,-1]
dge <- DGEList(counts, genes=rownames(counts))
#Now, convert counts into RPKM to account for gene length differences between species. First load in and re-organize metadata, then the gene lengths for both species, and then the function to convert counts to RPKM.
meta_data <- fread("~/Desktop/Hi-C/gene_expression/Meta_data.txt", sep="\t",stringsAsFactors = FALSE,header=T,na.strings=c("NA",""))
meta_data$fullID <- c("C-3649", "H-3651", "B-28126", "D-40300", "G-3624", "A-21792", "E-28815", "F-28834")
ord <- data.frame(fullID=colnames(counts)) #Pull order of samples from expression object
left_join(ord, meta_data, by="fullID") -> group_ref #left join meta data to this to make sure sample IDs correct
Warning: Column `fullID` joining factor and character vector, coercing into
character vector
#Read in human and chimp gene lengths for the RPKM function:
human_lengths<- fread("~/Desktop/Hi-C/gene_expression/human_lengths.txt", sep="\t",stringsAsFactors = FALSE,header=T,na.strings=c("NA",""))
chimp_lengths<- fread("~/Desktop/Hi-C/gene_expression/chimp_lengths.txt", sep="\t",stringsAsFactors = FALSE,header=T,na.strings=c("NA",""))
#The function for RPKM conversion.
vRPKM <- function(expr.obj,chimp_lengths,human_lengths,meta4) {
if (is.null(expr.obj$E)) {
meta4%>%filter(SP=="C" & fullID %in% colnames(counts))->chimp_meta
meta4%>%filter(SP=="H" & fullID %in% colnames(counts))->human_meta
#using RPKM function:
#Put genes in correct order:
expr.obj$genes %>%select(Geneid=genes)%>%
left_join(.,chimp_lengths,by="Geneid")%>%select(Geneid,ch.length)->chlength
expr.obj$genes %>%select(Geneid=genes)%>%
left_join(.,human_lengths,by="Geneid")%>%select(Geneid,hu.length)->hulength
#Chimp RPKM
expr.obj$genes$Length<-(chlength$ch.length)
RPKMc=rpkm(expr.obj,normalized.lib.sizes=TRUE, log=TRUE)
RPKMc[,colnames(RPKMc) %in% chimp_meta$fullID]->rpkm_chimp
#Human RPKM
expr.obj$genes$Length<-hulength$hu.length
RPKMh=rpkm(expr.obj,normalized.lib.sizes=TRUE, log=TRUE)
RPKMh[,colnames(RPKMh) %in% human_meta$fullID]->rpkm_human
cbind(rpkm_chimp,rpkm_human)->allrpkm
expr.obj$E <- allrpkm
return(expr.obj)
}
else {
#Pull out gene order from voom object and add in gene lengths from feature counts file
#Put genes in correct order:
expr.obj$genes %>%select(Geneid=genes)%>%
left_join(.,chimp_lengths,by="Geneid")%>%select(Geneid,ch.length)->chlength
expr.obj$genes %>%select(Geneid=genes)%>%
left_join(.,human_lengths,by="Geneid")%>%select(Geneid,hu.length)->hulength
#Filter meta data to be able to separate human and chimp
meta4%>%filter(SP=="C")->chimp_meta
meta4%>%filter(SP=="H")->human_meta
#Pull out the expression data in cpm to convert to RPKM
expr.obj$E->forRPKM
forRPKM[,colnames(forRPKM) %in% chimp_meta$fullID]->rpkm_chimp
forRPKM[,colnames(forRPKM) %in% human_meta$fullID]->rpkm_human
#Make log2 in KB:
row.names(chlength)=chlength$Geneid
chlength %>% select(-Geneid)->chlength
as.matrix(chlength)->chlength
row.names(hulength)=hulength$Geneid
hulength %>% select(-Geneid)->hulength
as.matrix(hulength)->hulength
log2(hulength/1000)->l2hulength
log2(chlength/1000)->l2chlength
#Subtract out log2 kb:
sweep(rpkm_chimp, 1,l2chlength,"-")->chimp_rpkm
sweep(rpkm_human, 1,l2hulength,"-")->human_rpkm
colnames(forRPKM)->column_order
cbind(chimp_rpkm,human_rpkm)->vRPKMS
#Put RPKMS back into the VOOM object:
expr.obj$E <- (vRPKMS[,colnames(vRPKMS) %in% column_order])
return(expr.obj)
}
}
dge <- vRPKM(dge, chimp_lengths, human_lengths, group_ref) #Normalize via log2 RPKM.
#A typical low-expression filtering step: use default prior count adding (0.25), and filtering out anything that has fewer than half the individuals within each species having logCPM less than 1.5 (so want 2 humans AND 2 chimps with log2CPM >= 1.5)
lcpms <- cpm(dge$counts, log=TRUE) #Obtain log2CPM!
good.chimps <- which(rowSums(lcpms[,1:4]>=1.5)>=2) #Obtain good chimp indices
good.humans <- which(rowSums(lcpms[,5:8]>=1.5)>=2) #Obtain good human indices
filt <- good.humans[which(good.humans %in% good.chimps)] #Subsets us down to a solid 11,292 genes--will go for a similar percentage with RPKM cutoff vals! (25.6% of total)
#Repeat filtering step, this time on RPKMs. 0.4 was chosen as a cutoff as it obtains close to the same results as 1.5 lcpm (in terms of percentage of genes retained)
good.chimps <- which(rowSums(dge$E[,1:4]>=0.4)>=2) #Obtain good chimp indices.
good.humans <- which(rowSums(dge$E[,5:8]>=0.4)>=2) #Obtain good human indices.
RPKM_filt <- good.humans[which(good.humans %in% good.chimps)] #Still leaves us with 11,946 genes (27.1% of total)
#Do the actual filtering.
dge_filt <- dge[RPKM_filt,]
dge_filt$E <- dge$E[RPKM_filt,]
dge_filt$counts <- dge$counts[RPKM_filt,]
dge_final <- calcNormFactors(dge_filt, method="TMM") #Calculate normalization factors with trimmed mean of M-values (TMM).
dge_norm <- calcNormFactors(dge, method="TMM") #Calculate normalization factors with TMM on dge before filtering out lowly expressed genes, for normalization visualization.
#Quick visualization of the filtering I've just performed:
col <- brewer.pal(8, "Paired")
par(mfrow=c(1,2))
plot(density(dge$E[,1]), col=col[1], lwd=2, ylim=c(0,0.35), las=2,
main="", xlab="")
title(main="A. Raw data", xlab="Log-RPKM")
abline(v=1, lty=3)
for (i in 2:8){
den <- density(dge$E[,i])
lines(den$x, den$y, col=col[i], lwd=2)
}
legend("topright", colnames(dge$E[,1:8]), text.col=col, bty="n")
plot(density(dge_final$E[,1]), col=col[1], lwd=2, ylim=c(0,0.35), las=2,
main="", xlab="")
title(main="B. Filtered data", xlab="Log-RPKM")
abline(v=1, lty=3)
for (i in 2:8){
den <- density(dge_final$E[,i])
lines(den$x, den$y, col=col[i], lwd=2)
}
legend("topright", colnames(dge_final[,1:8]), text.col=col, bty="n")
#Quick visualization of the normalization on the whole set of genes.
col <- brewer.pal(8, "Paired")
raw <- as.data.frame(dge$E[,1:8])
normed <- as.data.frame(dge_norm$E[,1:8])
par(mfrow=c(1,2))
boxplot(raw, las=2, col=col, main="")
title(main="Unnormalized data",ylab="Log-RPKM")
boxplot(normed, las=2, col=col, main="")
title(main="Normalized data",ylab="Log-RPKM")
#Now, observe normalization on the filtered set of genes.
col <- brewer.pal(8, "Paired")
raw <- as.data.frame(dge_filt$E[,1:8])
normed <- as.data.frame(dge_final$E[,1:8])
par(mfrow=c(1,2))
boxplot(raw, las=2, col=col, main="")
title(main="Unnormalized data",ylab="Log-RPKM")
boxplot(normed, las=2, col=col, main="")
title(main="Normalized data",ylab="Log-RPKM")
#Now, do some quick MDS plotting to make sure this expression data separates out species properly.
species <- c("C", "C", "C", "C", "H", "H", "H", "H")
color <- c(rep("red", 4), rep("blue", 4))
par(mfrow=c(1,1))
plotMDS(dge_final$E[,1:8], labels=species, col=color, main="MDS Plot") #Shows separation of the species along the logFC dimension representing the majority of the variance--orthogonal check to PCA, and looks great!
###Now, apply voom to get quality weights.
meta.exp.data <- data.frame("SP"=c("C", "C", "C", "C", "H", "H", "H", "H"), "SX"=c("M","M" ,"F","F","F", "M","M","F"))
SP <- factor(meta.exp.data$SP,levels = c("H","C"))
exp.design <- model.matrix(~0+SP)#(~1+meta.exp.data$SP+meta.exp.data$SX)
colnames(exp.design) <- c("Human", "Chimp")
weighted.data <- voom(dge_final, exp.design, plot=TRUE, normalize.method = "cyclicloess")
##Obtain rest of LM results, with particular eye to DE table!
vfit <- lmFit(weighted.data, exp.design)
efit <- eBayes(vfit)
mycon <- makeContrasts(HvC = Human-Chimp, levels = exp.design)
diff_species <- contrasts.fit(efit, mycon)
finalfit <- eBayes(diff_species)
detable <- topTable(finalfit, coef = 1, adjust.method = "BH", number = Inf, sort.by="none")
plotSA(efit, main="Final model: Mean−variance trend")
#Get lists of the DE and non-DE genes so I can run separate analyses on them at any point.
DEgenes <- detable$genes[which(detable$adj.P.Val<=0.05)]
nonDEgenes <- detable$genes[-which(detable$adj.P.Val<=0.05)]
#Rearrange RPKM and weight columns in voom object to be similar to the rest of my setup throughout in other dataframes.
weighted.data$E <- weighted.data$E[,c(8, 6, 1, 4, 7, 5, 2, 3)]
weighted.data$weights <- weighted.data$weights[,c(8, 6, 1, 4, 7, 5, 2, 3)]
RPKM <- weighted.data$E
rownames(RPKM) <- NULL #Just to match what I had before on midway2, about to write this out.
saveRDS(RPKM, file="~/Desktop/Hi-C/HiC_covs/IEE.RPKM.RDS")
saveRDS(weighted.data, file="~/Desktop/Hi-C/HiC_covs/IEE_voom_object.RDS") #write this object out, can then be read in with readRDS.
In this section I find the overlap between the 2 final filtered set of Hi-C Juicer significant hits and genes picked up on by an orthogonal RNA-seq experiment in the same set of cell lines. I utilize an in-house curated set of orthologous genes between humans and chimpanzees. Given that the resolution of the data is 10kb, I choose a simple and conservative approach and use a 1-nucleotide interval at the start of each gene as a proxy for the promoter. I then take a conservative pass and only use genes that had direct overlap with a bin from the Hi-C significant hits data, with more motivation explained below.
#Now, read in filtered data from linear_modeling_QC.Rmd.
data.KR <- fread("~/Desktop/Hi-C/juicer.filt.KR.final", header=TRUE, data.table=FALSE, stringsAsFactors = FALSE, showProgress=FALSE)
data.VC <- fread("~/Desktop/Hi-C/juicer.filt.VC.final", header=TRUE, data.table=FALSE, stringsAsFactors = FALSE, showProgress = FALSE)
meta.data <- data.frame("SP"=c("H", "H", "C", "C", "H", "H", "C", "C"), "SX"=c("F", "M", "M", "F", "M", "F", "M", "F"), "Batch"=c(1, 1, 1, 1, 2, 2, 2, 2))
#####GENE Hi-C Hit overlap: Already have hgenes and cgenes properly formatted and in the necessary folder from the HOMER version of this analysis. See HOMER version of gene_expression analysis for more details on how I obtained orthologous genes.
#Now, what I will need to do is prep bed files from the 2 sets of filtered data for each bin, in order to run bedtools-closest on them with the human and chimp gene data. This is for getting each bin's proximity to TSS by overlapping with the dfs I was just referring to (humgenes and chimpgenes). In the end this set of bedfiles is fairly useless, because really it would be preferable to get rid of duplicates so that I can merely group_by on a given bin afterwards and left_join as necessary. So somewhat deprecated, but I keep it here still:
hbin1KR <- data.frame(chr=data.KR$Hchr, start=as.numeric(gsub("chr.*-", "", data.KR$H1)), end=as.numeric(gsub("chr.*-", "", data.KR$H1))+10000)
hbin2KR <- data.frame(chr=data.KR$Hchr, start=as.numeric(gsub("chr.*-", "", data.KR$H2)), end=as.numeric(gsub("chr.*-", "", data.KR$H2))+10000)
cbin1KR <- data.frame(chr=data.KR$Cchr, start=as.numeric(gsub("chr.*-", "", data.KR$C1)), end=as.numeric(gsub("chr.*-", "", data.KR$C1))+10000)
cbin2KR <- data.frame(chr=data.KR$Cchr, start=as.numeric(gsub("chr.*-", "", data.KR$C2)), end=as.numeric(gsub("chr.*-", "", data.KR$C2))+10000)
hbin1VC <- data.frame(chr=data.VC$Hchr, start=as.numeric(gsub("chr.*-", "", data.VC$H1)), end=as.numeric(gsub("chr.*-", "", data.VC$H1))+10000)
hbin2VC <- data.frame(chr=data.VC$Hchr, start=as.numeric(gsub("chr.*-", "", data.VC$H2)), end=as.numeric(gsub("chr.*-", "", data.VC$H2))+10000)
cbin1VC <- data.frame(chr=data.VC$Cchr, start=as.numeric(gsub("chr.*-", "", data.VC$C1)), end=as.numeric(gsub("chr.*-", "", data.VC$C1))+10000)
cbin2VC <- data.frame(chr=data.VC$Cchr, start=as.numeric(gsub("chr.*-", "", data.VC$C2)), end=as.numeric(gsub("chr.*-", "", data.VC$C2))+10000)
#In most analyses, it will make more sense to have a single bed file for both sets of bins, and remove all duplicates. I create that here:
hbinsKR <- rbind(hbin1KR[!duplicated(hbin1KR),], hbin2KR[!duplicated(hbin2KR),])
hbinsKR <- hbinsKR[!duplicated(hbinsKR),]
cbinsKR <- rbind(cbin1KR[!duplicated(cbin1KR),], cbin2KR[!duplicated(cbin2KR),])
cbinsKR <- cbinsKR[!duplicated(cbinsKR),]
hbinsVC <- rbind(hbin1VC[!duplicated(hbin1VC),], hbin2VC[!duplicated(hbin2VC),])
hbinsVC <- hbinsVC[!duplicated(hbinsVC),]
cbinsVC <- rbind(cbin1VC[!duplicated(cbin1VC),], cbin2VC[!duplicated(cbin2VC),])
cbinsVC <- cbinsVC[!duplicated(cbinsVC),]
#Now, write all of these files out for analysis with bedtools.
options(scipen=999) #Don't want any scientific notation in these BED files, will mess up some bedtools analyses at times
write.table(hbin1KR, "~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/unsorted/hbin1KR.bed", quote = FALSE, sep="\t", row.names = FALSE, col.names=FALSE)
write.table(hbin2KR, "~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/unsorted/hbin2KR.bed", quote = FALSE, sep="\t", row.names = FALSE, col.names=FALSE)
write.table(cbin1KR, "~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/unsorted/cbin1KR.bed", quote = FALSE, sep="\t", row.names = FALSE, col.names=FALSE)
write.table(cbin2KR, "~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/unsorted/cbin2KR.bed", quote = FALSE, sep="\t", row.names = FALSE, col.names=FALSE)
write.table(hbinsKR, "~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/unsorted/hbinsKR.bed", quote=FALSE, sep="\t", row.names=FALSE, col.names=FALSE)
write.table(cbinsKR, "~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/unsorted/cbinsKR.bed", quote=FALSE, sep="\t", row.names=FALSE, col.names=FALSE)
write.table(hbin1VC, "~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/unsorted/hbin1VC.bed", quote = FALSE, sep="\t", row.names = FALSE, col.names=FALSE)
write.table(hbin2VC, "~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/unsorted/hbin2VC.bed", quote = FALSE, sep="\t", row.names = FALSE, col.names=FALSE)
write.table(cbin1VC, "~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/unsorted/cbin1VC.bed", quote = FALSE, sep="\t", row.names = FALSE, col.names=FALSE)
write.table(cbin2VC, "~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/unsorted/cbin2VC.bed", quote = FALSE, sep="\t", row.names = FALSE, col.names=FALSE)
write.table(hbinsVC, "~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/unsorted/hbinsVC.bed", quote=FALSE, sep="\t", row.names=FALSE, col.names=FALSE)
write.table(cbinsVC, "~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/unsorted/cbinsVC.bed", quote=FALSE, sep="\t", row.names=FALSE, col.names=FALSE)
options(scipen=0)
#Read in new, simpler bedtools closest files for genes. This is after running two commands, after sorting the files w/ sort -k1,1 -k2,2n in.bed > out.bed:
#bedtools closest -D a -a cgenes.sorted.bed -b cbins.sorted.bed > cgene.hic.overlap
#bedtools closest -D a -a hgenes.sorted.bed -b hbins.sorted.bed > hgene.hic.overlap
hgene.hic.KR <- fread("~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/hgene.hic.KR.overlap", header=FALSE, stringsAsFactors = FALSE, data.table=FALSE)
cgene.hic.KR <- fread("~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/cgene.hic.KR.overlap", header=FALSE, stringsAsFactors = FALSE, data.table=FALSE)
hgene.hic.VC <- fread("~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/hgene.hic.VC.overlap", header=FALSE, stringsAsFactors = FALSE, data.table=FALSE)
cgene.hic.VC <- fread("~/Desktop/Hi-C/gene_expression/juicer_10kb_filt_overlaps/cgene.hic.VC.overlap", header=FALSE, stringsAsFactors = FALSE, data.table=FALSE)
#Visualize the overlap of genes with bins and see how many genes we get back! Left out here b/c not super necessary, but code is collapsed here:
# hum.genelap <- data.frame(overlap=seq(0, 100000, 1000), perc.genes = NA, tot.genes=NA)
# for(row in 1:nrow(hum.genelap)){
# hum.genelap$perc.genes[row] <- sum(abs(hgene.hic$V10)<=hum.genelap$overlap[row])/length(hgene.hic$V10)
# hum.genelap$tot.genes[row] <- sum(abs(hgene.hic$V10)<=hum.genelap$overlap[row])
# }
#
# c.genelap <- data.frame(overlap=seq(0, 100000, 1000), perc.genes=NA, tot.genes=NA)
# for(row in 1:nrow(c.genelap)){
# c.genelap$perc.genes[row] <- sum(abs(cgene.hic$V10)<=c.genelap$overlap[row])/length(cgene.hic$V10)
# c.genelap$tot.genes[row] <- sum(abs(cgene.hic$V10)<=c.genelap$overlap[row])
# }
# c.genelap$type <- "chimp"
# hum.genelap$type <- "human"
#Examine what the potential gains are here if we are more lenient about the overlap/closeness to a TSS...
#ggoverlap <- rbind(hum.genelap, c.genelap)
#ggplot(data=ggoverlap) + geom_line(aes(x=overlap, y=perc.genes*100, color=type)) + ggtitle("Percent of Total Genes Picked Up | Min. Distance from TSS") + xlab("Distance from TSS") + ylab("Percentage of genes in ortho exon trios file (~44k)") + scale_color_discrete(guide=guide_legend(title="Species")) + coord_cartesian(xlim=c(0, 30000)) + scale_x_continuous(breaks=seq(0, 30000, 5000))
#ggplot(data=ggoverlap) + geom_line(aes(x=overlap, y=tot.genes, color=type)) + ggtitle("Total # Genes Picked Up | Min. Distance from TSS") + xlab("Distance from TSS") + ylab("Total # of Genes Picked up On (of ~44k)") + scale_color_discrete(guide=guide_legend(title="Species")) + coord_cartesian(xlim=c(0, 30000)) + scale_x_continuous(breaks=seq(0, 30000, 5000))
#Start with a conservative pass--only take those genes that had an actual overlap with a bin, not ones that were merely close to one. Allowing some leeway to include genes that are within 1kb, 2kb, 3kb etc. of a Hi-C bin adds an average of ~800 genes per 1kb. We can also examine the distribution manually to motivate this decision:
quantile(abs(hgene.hic.KR$V10), probs=seq(0, 1, 0.025))
0% 2.5% 5% 7.5% 10% 12.5% 15%
0.0 0.0 0.0 0.0 1.0 1.0 111.2
17.5% 20% 22.5% 25% 27.5% 30% 32.5%
2801.8 5837.6 9170.0 12712.0 16555.3 20657.4 25122.3
35% 37.5% 40% 42.5% 45% 47.5% 50%
30415.8 35854.0 41976.0 48686.7 56315.6 64775.6 74086.0
52.5% 55% 57.5% 60% 62.5% 65% 67.5%
84493.0 96003.4 109333.4 124395.4 141705.5 161762.6 185550.4
70% 72.5% 75% 77.5% 80% 82.5% 85%
213137.8 245623.6 284011.0 326249.0 378968.0 446350.3 531628.2
87.5% 90% 92.5% 95% 97.5% 100%
644495.0 813443.4 1066258.7 1411418.6 2117706.0 8009993.0
quantile(abs(cgene.hic.KR$V10), probs=seq(0, 1, 0.025))
0% 2.5% 5% 7.5% 10% 12.5% 15%
0.0 0.0 0.0 0.0 1.0 1.0 35.8
17.5% 20% 22.5% 25% 27.5% 30% 32.5%
2576.0 5543.2 9001.0 12613.0 16346.1 20659.2 25304.9
35% 37.5% 40% 42.5% 45% 47.5% 50%
30415.0 36090.5 42336.4 49350.3 57203.8 65644.9 74758.0
52.5% 55% 57.5% 60% 62.5% 65% 67.5%
85634.5 97537.4 110882.8 125779.8 143152.5 163577.6 187829.8
70% 72.5% 75% 77.5% 80% 82.5% 85%
216384.2 248610.8 288103.0 330921.3 384602.4 448229.0 536251.6
87.5% 90% 92.5% 95% 97.5% 100%
645908.0 806354.8 1046000.8 1398275.0 2071010.3 7529283.0
quantile(abs(hgene.hic.VC$V10), probs=seq(0, 1, 0.025))
0% 2.5% 5% 7.5% 10% 12.5% 15%
0.0 0.0 0.0 0.0 0.0 748.5 2939.6
17.5% 20% 22.5% 25% 27.5% 30% 32.5%
5319.8 8011.8 10835.5 13955.0 16964.2 20339.0 24054.6
35% 37.5% 40% 42.5% 45% 47.5% 50%
28368.6 33051.5 38083.0 43953.0 50389.2 57444.3 64971.0
52.5% 55% 57.5% 60% 62.5% 65% 67.5%
73526.5 83118.4 94055.9 106232.2 119220.5 135994.6 153922.6
70% 72.5% 75% 77.5% 80% 82.5% 85%
175048.2 199192.6 228645.0 261707.1 302957.2 351916.0 413198.4
87.5% 90% 92.5% 95% 97.5% 100%
501881.5 641720.2 865347.1 1205121.8 1836870.5 6456300.0
quantile(abs(cgene.hic.VC$V10), probs=seq(0, 1, 0.025))
0% 2.5% 5% 7.5% 10% 12.5% 15%
0.0 0.0 0.0 0.0 0.0 512.5 2649.0
17.5% 20% 22.5% 25% 27.5% 30% 32.5%
5010.2 7760.8 10585.5 13538.0 16670.4 20081.6 24187.6
35% 37.5% 40% 42.5% 45% 47.5% 50%
28398.8 33210.5 38408.6 44155.4 50593.8 57687.0 65471.0
52.5% 55% 57.5% 60% 62.5% 65% 67.5%
73931.1 83568.4 94835.9 107072.8 120344.5 136233.2 154759.7
70% 72.5% 75% 77.5% 80% 82.5% 85%
176847.0 202064.3 230537.0 264555.9 303717.0 353338.0 415156.6
87.5% 90% 92.5% 95% 97.5% 100%
501151.5 629124.0 840981.2 1183185.6 1767476.4 6500471.0
#So it looks as though we pick up on approximately 10% of 44k genes (~4.4k)
#Note I looked at proportion of overlap with DE and with non-DE genes just for curiosity, and roughly 66% of the DE genes have overlap with a Hi-C bin while roughly 70% of the non-DE genes do. Since this result isn't particularly interesting I have collapsed that analysis here.
#Also are interested in seeing how this differs for DE and non-DE genes.
sum(hgene.hic.KR$V10==0&(hgene.hic.KR$V4 %in% DEgenes)) #263 genes
[1] 263
sum(hgene.hic.KR$V10==0&(hgene.hic.KR$V4 %in% nonDEgenes)) #1011 genes
[1] 1011
sum(hgene.hic.KR$V10==0&(!hgene.hic.KR$V4 %in% DEgenes)&(!hgene.hic.KR$V4 %in% nonDEgenes)) #Checking which genes have direct overlap with bins, but were not picked up on in our RNAseq data. 2717
[1] 2717
sum(hgene.hic.VC$V10==0&(hgene.hic.VC$V4 %in% DEgenes)) #334 genes
[1] 334
sum(hgene.hic.VC$V10==0&(hgene.hic.VC$V4 %in% nonDEgenes)) #1296 genes
[1] 1296
sum(hgene.hic.VC$V10==0&(!hgene.hic.VC$V4 %in% DEgenes)&(!hgene.hic.VC$V4 %in% nonDEgenes)) #Checking which genes have direct overlap with bins, but were not picked up on in our RNAseq data. 3441
[1] 3441
In this next section I simply add information obtained from linear modeling on the Hi-C interaction frequencies to the appropriate genes having overlap with Hi-C bins. Because one Hi-C bin frequently shows up many times in the data, this means I must choose some kind of summary for Hi-C contact frequencies and linear modeling annotations for each gene. I toy with a variety of these summaries here, including choosing the minimum FDR contact, the maximum beta contact, the upstream contact, or summarizing all a bin’s contacts with the weighted Z-combine method or median FDR values.
hgene.hic.overlap.KR <- filter(hgene.hic.KR, V10==0) #Still leaves a solid ~4k genes.
cgene.hic.overlap.KR <- filter(cgene.hic.KR, V10==0) #Still leaves a solid ~4k genes.
hgene.hic.overlap.VC <- filter(hgene.hic.VC, V10==0) #Still leaves a solid ~5k genes.
cgene.hic.overlap.VC <- filter(cgene.hic.VC, V10==0) #Still leaves a solid ~5k genes.
#Add a column to both dfs indicating where along a bin the gene in question is found (from 0-10k):
hgene.hic.overlap.KR$bin_pos <- abs(hgene.hic.overlap.KR$V8-hgene.hic.overlap.KR$V2)
cgene.hic.overlap.KR$bin_pos <- abs(cgene.hic.overlap.KR$V8-cgene.hic.overlap.KR$V2)
hgene.hic.overlap.VC$bin_pos <- abs(hgene.hic.overlap.VC$V8-hgene.hic.overlap.VC$V2)
cgene.hic.overlap.VC$bin_pos <- abs(cgene.hic.overlap.VC$V8-cgene.hic.overlap.VC$V2)
#Rearrange columns and create another column of the bin ID.
hgene.hic.overlap.KR <- hgene.hic.overlap.KR[,c(4, 7:9, 6, 11, 1:2)]
hgene.hic.overlap.KR$HID <- paste(hgene.hic.overlap.KR$V7, hgene.hic.overlap.KR$V8, sep="-")
cgene.hic.overlap.KR <- cgene.hic.overlap.KR[,c(4, 7:9, 6, 11, 1:2)]
cgene.hic.overlap.KR$CID <- paste(cgene.hic.overlap.KR$V7, cgene.hic.overlap.KR$V8, sep="-")
colnames(hgene.hic.overlap.KR) <- c("genes", "HiC_chr", "H1start", "H1end", "Hstrand", "bin_pos", "genechr", "genepos", "HID")
colnames(cgene.hic.overlap.KR) <- c("genes", "HiC_chr", "C1start", "C1end", "Cstrand", "bin_pos", "genechr", "genepos", "CID")
hgene.hic.overlap.VC <- hgene.hic.overlap.VC[,c(4, 7:9, 6, 11, 1:2)]
hgene.hic.overlap.VC$HID <- paste(hgene.hic.overlap.VC$V7, hgene.hic.overlap.VC$V8, sep="-")
cgene.hic.overlap.VC <- cgene.hic.overlap.VC[,c(4, 7:9, 6, 11, 1:2)]
cgene.hic.overlap.VC$CID <- paste(cgene.hic.overlap.VC$V7, cgene.hic.overlap.VC$V8, sep="-")
colnames(hgene.hic.overlap.VC) <- c("genes", "HiC_chr", "H1start", "H1end", "Hstrand", "bin_pos", "genechr", "genepos", "HID")
colnames(cgene.hic.overlap.VC) <- c("genes", "HiC_chr", "C1start", "C1end", "Cstrand", "bin_pos", "genechr", "genepos", "CID")
#Before extracting some data from the data.KR and data.VC dfs, need to reformat their H1, H2, C1, and C2 columns to include chromosome (this was how homer data already was formatted so didn't have to think about it there)
data.KR$H1 <- paste(data.KR$Hchr, data.KR$H1, sep="-")
data.KR$H2 <- paste(data.KR$Hchr, data.KR$H2, sep="-")
data.VC$H1 <- paste(data.VC$Hchr, data.VC$H1, sep="-")
data.VC$H2 <- paste(data.VC$Hchr, data.VC$H2, sep="-")
data.KR$C1 <- paste(data.KR$Cchr, data.KR$C1, sep="-")
data.KR$C2 <- paste(data.KR$Cchr, data.KR$C2, sep="-")
data.VC$C1 <- paste(data.VC$Cchr, data.VC$C1, sep="-")
data.VC$C2 <- paste(data.VC$Cchr, data.VC$C2, sep="-")
hbindf.KR <- select(data.KR, "H1", "H2", "ALLvar", "SE", "sp_beta", "sp_pval", "sp_BH_pval", "Hdist")
names(hbindf.KR) <- c("HID", "HID2", "ALLvar", "SE", "sp_beta", "sp_pval", "sp_BH_pval", "distance") #I have confirmed that all the HID2s are higher numbered coordinates than the HID1s, the only instance in which this isn't the case is when the two bins are identical (this should have been filtered out long before now).
hbindf.VC <- select(data.VC, "H1", "H2", "ALLvar", "SE", "sp_beta", "sp_pval", "sp_BH_pval", "Hdist")
names(hbindf.VC) <- c("HID", "HID2", "ALLvar", "SE", "sp_beta", "sp_pval", "sp_BH_pval", "distance")
#This step is unnecessary in this paradigm as there are no hits like this, but keep it in here for the premise's sake:
hbindf.KR <- hbindf.KR[(which(hbindf.KR$distance!=0)),] #Removes pairs where the same bin represents both mates. These instances occur exclusively when liftOver of the genomic coordinates from one species to another, and the subsequent rounding to the nearest 10kb, results in a contact between adjacent bins in one species being mapped as a contact between the same bin in the other species. Because there are less than 50 instances of this total in the dataset I simply remove it here without further worry.
hbindf.VC <- hbindf.VC[(which(hbindf.VC$distance!=0)),]
#For explanations about the motivations and intents behind code in the rest of this chunk, please see the same file for the HOMER analysis.
group_by(hbindf.KR, HID) %>% summarise(DS_bin=HID2[which.min(distance)], DS_FDR=sp_BH_pval[which.min(distance)], DS_dist=distance[which.min(distance)]) -> hbin1.downstream.KR
group_by(hbindf.KR, HID2) %>% summarise(US_bin=HID[which.min(distance)], US_FDR=sp_BH_pval[which.min(distance)], US_dist=distance[which.min(distance)]) -> hbin2.upstream.KR
colnames(hbin2.upstream.KR) <- c("HID", "US_bin", "US_FDR", "US_dist")
Hstreams.KR <- full_join(hbin1.downstream.KR, hbin2.upstream.KR, by="HID")
hbindf.KR.flip <- hbindf.KR[,c(2, 1, 3:7)]
colnames(hbindf.KR.flip)[1:2] <- c("HID", "HID2")
hbindf.KR_x2 <- rbind(hbindf.KR[,1:7], hbindf.KR.flip)
as.data.frame(group_by(hbindf.KR_x2, HID) %>% summarise(min_FDR_bin=HID2[which.min(sp_BH_pval)], min_FDR=min(sp_BH_pval), min_FDR_pval=sp_pval[which.min(sp_BH_pval)], min_FDR_B=sp_beta[which.min(sp_BH_pval)], median_FDR=median(sp_BH_pval), weighted_Z.ALLvar=pnorm((sum((1/ALLvar)*((qnorm(1-sp_pval))))/sqrt(sum((1/ALLvar)^2))), lower.tail=FALSE), weighted_Z.s2post=pnorm(sum((1/(SE^2))*qnorm(1-sp_pval))/sqrt(sum(1/SE^2)), lower.tail=FALSE), fisher=-2*sum(log(sp_pval)), numcontacts=n(), max_B_bin=HID2[which.max(abs(sp_beta))], max_B_FDR=sp_BH_pval[which.max(abs(sp_beta))], max_B=sp_beta[which.max(abs(sp_beta))])) -> hbin.KR.info
full_join(hbin.KR.info, Hstreams.KR, by="HID") -> hbin.KR.full.info
left_join(hgene.hic.overlap.KR, hbin.KR.full.info, by="HID") -> humgenes.KR.full
colnames(humgenes.KR.full)[1:5] <- c("genes", "Hchr", "Hstart", "Hend", "Hstrand") #Fix column names for what was just created
group_by(hbindf.VC, HID) %>% summarise(DS_bin=HID2[which.min(distance)], DS_FDR=sp_BH_pval[which.min(distance)], DS_dist=distance[which.min(distance)]) -> hbin1.downstream.VC
group_by(hbindf.VC, HID2) %>% summarise(US_bin=HID[which.min(distance)], US_FDR=sp_BH_pval[which.min(distance)], US_dist=distance[which.min(distance)]) -> hbin2.upstream.VC
colnames(hbin2.upstream.VC) <- c("HID", "US_bin", "US_FDR", "US_dist")
Hstreams.VC <- full_join(hbin1.downstream.VC, hbin2.upstream.VC, by="HID")
hbindf.VC.flip <- hbindf.VC[,c(2, 1, 3:7)]
colnames(hbindf.VC.flip)[1:2] <- c("HID", "HID2")
hbindf.VC_x2 <- rbind(hbindf.VC[,1:7], hbindf.VC.flip)
as.data.frame(group_by(hbindf.VC_x2, HID) %>% summarise(min_FDR_bin=HID2[which.min(sp_BH_pval)], min_FDR=min(sp_BH_pval), min_FDR_pval=sp_pval[which.min(sp_BH_pval)], min_FDR_B=sp_beta[which.min(sp_BH_pval)], median_FDR=median(sp_BH_pval), weighted_Z.ALLvar=pnorm((sum((1/ALLvar)*((qnorm(1-sp_pval))))/sqrt(sum((1/ALLvar)^2))), lower.tail=FALSE), weighted_Z.s2post=pnorm(sum((1/(SE^2))*qnorm(1-sp_pval))/sqrt(sum(1/SE^2)), lower.tail=FALSE), fisher=-2*sum(log(sp_pval)), numcontacts=n(), max_B_bin=HID2[which.max(abs(sp_beta))], max_B_FDR=sp_BH_pval[which.max(abs(sp_beta))], max_B=sp_beta[which.max(abs(sp_beta))])) -> hbin.VC.info
full_join(hbin.VC.info, Hstreams.VC, by="HID") -> hbin.VC.full.info
left_join(hgene.hic.overlap.VC, hbin.VC.full.info, by="HID") -> humgenes.VC.full
colnames(humgenes.VC.full)[1:5] <- c("genes", "Hchr", "Hstart", "Hend", "Hstrand") #Fix column names for what was just created
###Do chimps in both as well, to maximize overlaps:
###KR
cbin.KR <- select(data.KR, "C1", "C2", "ALLvar", "SE", "sp_beta", "sp_pval", "sp_BH_pval", "Cdist")
names(cbin.KR) <- c("CID", "CID2", "ALLvar", "SE", "sp_beta", "sp_pval", "sp_BH_pval", "distance")
cbin.KR <- cbin.KR[(which(cbin.KR$dist!=0)),]
group_by(cbin.KR, CID) %>% summarise(DS_bin=CID2[which.min(distance)], DS_FDR=sp_BH_pval[which.min(distance)], DS_dist=distance[which.min(distance)]) -> cbin1.downstream.KR
group_by(cbin.KR, CID2) %>% summarise(US_bin=CID[which.min(distance)], US_FDR=sp_BH_pval[which.min(distance)], US_dist=distance[which.min(distance)]) -> cbin2.upstream.KR
colnames(cbin2.upstream.KR) <- c("CID", "US_bin", "US_FDR", "US_dist")
Cstreams.KR <- full_join(cbin1.downstream.KR, cbin2.upstream.KR, by="CID")
cbin.KR.flip <- cbin.KR[,c(2, 1, 3:7)]
colnames(cbin.KR.flip)[1:2] <- c("CID", "CID2")
cbin.KR_x2 <- rbind(cbin.KR[,1:7], cbin.KR.flip)
group_by(cbin.KR_x2, CID) %>% summarise(min_FDR_bin=CID2[which.min(sp_BH_pval)], min_FDR=min(sp_BH_pval), min_FDR_B=sp_beta[which.min(sp_BH_pval)], median_FDR=median(sp_BH_pval), weighted_Z.ALLvar=pnorm((sum((1/ALLvar)*((qnorm(1-sp_pval))))/sqrt(sum((1/ALLvar)^2))), lower.tail=FALSE), weighted_Z.s2post=pnorm(sum((1/(SE^2))*qnorm(1-sp_pval))/sqrt(sum(1/SE^2)), lower.tail=FALSE), fisher=-2*sum(log(sp_pval)), numcontacts=n(), max_B_bin=CID2[which.max(abs(sp_beta))], max_B_FDR=sp_BH_pval[which.max(abs(sp_beta))], max_B=sp_beta[which.max(abs(sp_beta))]) -> cbin.info.KR
full_join(cbin.info.KR, Cstreams.KR, by="CID") -> cbin.full.info.KR
left_join(cgene.hic.overlap.KR, cbin.full.info.KR, by="CID") -> chimpgenes.hic.KR
colnames(chimpgenes.hic.KR)[1:5] <- c("genes", "Hchr", "Hstart", "Hend", "Hstrand") #Fix column names for what was just created
###VC
cbin.VC <- select(data.VC, "C1", "C2", "ALLvar", "SE", "sp_beta", "sp_pval", "sp_BH_pval", "Cdist")
names(cbin.VC) <- c("CID", "CID2", "ALLvar", "SE", "sp_beta", "sp_pval", "sp_BH_pval", "distance")
cbin.VC <- cbin.VC[(which(cbin.VC$dist!=0)),]
group_by(cbin.VC, CID) %>% summarise(DS_bin=CID2[which.min(distance)], DS_FDR=sp_BH_pval[which.min(distance)], DS_dist=distance[which.min(distance)]) -> cbin1.downstream.VC
group_by(cbin.VC, CID2) %>% summarise(US_bin=CID[which.min(distance)], US_FDR=sp_BH_pval[which.min(distance)], US_dist=distance[which.min(distance)]) -> cbin2.upstream.VC
colnames(cbin2.upstream.VC) <- c("CID", "US_bin", "US_FDR", "US_dist")
Cstreams.VC <- full_join(cbin1.downstream.VC, cbin2.upstream.VC, by="CID")
cbin.VC.flip <- cbin.VC[,c(2, 1, 3:7)]
colnames(cbin.VC.flip)[1:2] <- c("CID", "CID2")
cbin.VC_x2 <- rbind(cbin.VC[,1:7], cbin.VC.flip)
group_by(cbin.VC_x2, CID) %>% summarise(min_FDR_bin=CID2[which.min(sp_BH_pval)], min_FDR=min(sp_BH_pval), min_FDR_B=sp_beta[which.min(sp_BH_pval)], median_FDR=median(sp_BH_pval), weighted_Z.ALLvar=pnorm((sum((1/ALLvar)*((qnorm(1-sp_pval))))/sqrt(sum((1/ALLvar)^2))), lower.tail=FALSE), weighted_Z.s2post=pnorm(sum((1/(SE^2))*qnorm(1-sp_pval))/sqrt(sum(1/SE^2)), lower.tail=FALSE), fisher=-2*sum(log(sp_pval)), numcontacts=n(), max_B_bin=CID2[which.max(abs(sp_beta))], max_B_FDR=sp_BH_pval[which.max(abs(sp_beta))], max_B=sp_beta[which.max(abs(sp_beta))]) -> cbin.info.VC
full_join(cbin.info.VC, Cstreams.VC, by="CID") -> cbin.full.info.VC
left_join(cgene.hic.overlap.VC, cbin.full.info.VC, by="CID") -> chimpgenes.hic.VC
colnames(chimpgenes.hic.VC)[1:5] <- c("genes", "Hchr", "Hstart", "Hend", "Hstrand") #Fix column names for what was just created
#Now, combine chimpgenes.hic.full and humgenes.hic.full before a final left_join on detable:
full_join(humgenes.VC.full, chimpgenes.hic.VC, by="genes", suffix=c(".H", ".C")) -> genes.hic.VC
full_join(humgenes.KR.full, chimpgenes.hic.KR, by="genes", suffix=c(".H", ".C")) -> genes.hic.KR
###Final join of human and chimp values for both:
left_join(detable, genes.hic.VC, by="genes") -> gene.hic.VC
left_join(detable, genes.hic.KR, by="genes") -> gene.hic.KR
#Clean this dataframe up, removing rows where there is absolutely no Hi-C information for the gene.
filt.VC <- rowSums(is.na(gene.hic.VC)) #51 NA values are found when there is absolutely no Hi-C information.
filt.KR <- rowSums(is.na(gene.hic.KR)) #same.
filt.VC <- which(filt.VC==51)
filt.KR <- which(filt.KR==51)
gene.hic.VC <- gene.hic.VC[-filt.VC,]
gene.hic.KR <- gene.hic.KR[-filt.KR,]
saveRDS(gene.hic.KR, "~/Desktop/Hi-C/gene.hic.filt.KR.RDS")
saveRDS(gene.hic.VC, "~/Desktop/Hi-C/gene.hic.filt.VC.RDS")
Now I look for enrichment of DHi-C in DE genes using a variety of different metrics to call DHi-C. I now look to see if genes that are differentially expressed are also differential in Hi-C contacts (DHi-C). That is to say, are differentially expressed genes enriched in their overlapping bins for Hi-C contacts that are also differential between the species? To do this I utilize p-values from my prior linear modeling as well as previous RNA-seq analysis. I construct a function to calculate proportions of DE and DHi-C genes, as well as a function to plot this out in a variety of different ways.
####Enrichment analyses!
#A function for calculating proportion of DE genes that are DHi-C under a variety of different paradigms. Accounts for when no genes are DHi-C and when all genes are DHi-C. Returns the proportion of DE genes that are also DHi-C, as well as the expected proportion based on conditional probability alone.
prop.calculator <- function(de.vec, hic.vec, i, k){
my.result <- data.frame(prop=NA, exp.prop=NA, chisq.p=NA, Dneither=NA, DE=NA, DHiC=NA, Dboth=NA)
bad.indices <- which(is.na(hic.vec)) #First obtain indices where Hi-C info is missing, if there are any, then remove from both vectors.
if(length(bad.indices>0)){
de.vec <- de.vec[-bad.indices]
hic.vec <- hic.vec[-bad.indices]}
de.vec <- ifelse(de.vec<=i, 1, 0)
hic.vec <- ifelse(hic.vec<=k, 1, 0)
if(sum(hic.vec, na.rm=TRUE)==0){#The case where no genes show up as DHi-C.
my.result[1,] <- c(0, 0, 0, sum(de.vec==0, na.rm=TRUE), sum(de.vec==1, na.rm=TRUE), 0, 0) #Since no genes are DHi-C, the proportion is 0 and our expectation is 0, set p-val=0 since it's irrelevant.
}
else if(sum(hic.vec)==length(hic.vec)){ #The case where every gene shows up as DHi-C
my.result[1,] <- c(1, 1, 0, 0, 0, sum(hic.vec==1&de.vec==0, na.rm = TRUE), sum(de.vec==1&hic.vec==1, na.rm=TRUE)) #If every gene is DHi-C, the observed proportion of DE genes DHi-C is 1, and the expected proportion of DE genes also DHi-C would also be 1 (all DE genes are DHi-C, since all genes are). Again set p-val to 0 since irrelevant comparison.
}
else{#The typical case, where we get an actual table
mytable <- table(as.data.frame(cbind(de.vec, hic.vec)))
my.result[1,1] <- mytable[2,2]/sum(mytable[2,]) #The observed proportion of DE genes that are also DHi-C. # that are both/total # DE
my.result[1,2] <- (((sum(mytable[2,])/sum(mytable))*((sum(mytable[,2])/sum(mytable))))*sum(mytable))/sum(mytable[2,]) #The expected proportion: (p(DE) * p(DHiC)) * total # genes / # DE genes
my.result[1,3] <- chisq.test(mytable)$p.value
my.result[1,4] <- mytable[1,1]
my.result[1,5] <- mytable[2,1]
my.result[1,6] <- mytable[1,2]
my.result[1,7] <- mytable[2,2]
}
return(my.result)
}
#This is a function that computes observed and expected proportions of DE and DHiC enrichments, and spits out a variety of different visualizations for them. As input it takes a dataframe, the names of its DHiC and DE p-value columns, and a name to represent the type of Hi-C contact summary for the gene that ends up on the x-axis of all the plots.
enrichment.plotter <- function(df, HiC_col, DE_col, xlab, xmax=0.3, i=c(0.01, 0.025, 0.05, 0.075, 0.1), k=seq(0.01, 1, 0.01)){
enrich.table <- data.frame(DEFDR = c(rep(i[1], 100), rep(i[2], 100), rep(i[3], 100), rep(i[4], 100), rep(i[5], 100)), DHICFDR=rep(k, 5), prop.obs=NA, prop.exp=NA, chisq.p=NA, Dneither=NA, DE=NA, DHiC=NA, Dboth=NA)
for(de.FDR in i){
for(hic.FDR in k){
enrich.table[which(enrich.table$DEFDR==de.FDR&enrich.table$DHICFDR==hic.FDR), 3:9] <- prop.calculator(df[,DE_col], df[,HiC_col], de.FDR, hic.FDR)
}
}
des.enriched <- ggplot(data=enrich.table, aes(x=DHICFDR, y=prop.obs, group=as.factor(DEFDR), color=as.factor(DEFDR))) +geom_line()+ geom_line(aes(y=prop.exp), linetype="dashed", size=0.5) + ggtitle("Enrichment of DC in DE Genes") + xlab(xlab) + ylab("Proportion of DE genes that are DC") + guides(color=guide_legend(title="FDR for DE Genes"))
dhics.enriched <- ggplot(data=enrich.table, aes(x=DHICFDR, y=Dboth/(Dboth+DHiC), group=as.factor(DEFDR), color=as.factor(DEFDR))) + geom_line() + geom_line(aes(y=(((((DE+Dboth)/(Dneither+DE+DHiC+Dboth))*((DHiC+Dboth)/(Dneither+DE+DHiC+Dboth)))*(Dneither+DE+DHiC+Dboth))/(DHiC+Dboth))), linetype="dashed") + ylab("Proportion of DC genes that are DE") +xlab(xlab) + ggtitle("Enrichment of DE in DC Genes") + coord_cartesian(xlim=c(0, xmax)) + guides(color=guide_legend(title="DE FDR"))
joint.enriched <- ggplot(data=enrich.table, aes(x=DHICFDR, y=Dboth/(Dneither+DE+DHiC+Dboth), group=as.factor(DEFDR), color=as.factor(DEFDR))) + geom_line() + ylab("Proportion of ALL Genes both DE & DHi-C") + xlab(xlab) + geom_line(aes(y=((DE+Dboth)/(Dneither+DE+DHiC+Dboth))*((DHiC+Dboth)/(Dneither+DE+DHiC+Dboth))), linetype="dashed") + ggtitle("Enrichment of Joint DE & DHi-C in All Genes")
chisq.p <- ggplot(data=enrich.table, aes(x=DHICFDR, y=-log10(chisq.p), group=as.factor(DEFDR), color=as.factor(DEFDR))) + geom_point() + geom_hline(yintercept=-log10(0.05), color="red") + ggtitle("Chi-squared Test P-values for Enrichment of DC in DE Genes") + xlab(xlab) + ylab("-log10(chi-squared p-values)") + coord_cartesian(xlim=c(0, xmax)) + guides(color=guide_legend(title="DE FDR"))
print(des.enriched)
print(dhics.enriched)
print(joint.enriched)
print(chisq.p)
print(enrich.table[which(enrich.table$DEFDR==0.025),]) #Added to figure out comparison for the paper.
}
#Visualization of enrichment of DE/DC in one another. For most of these, using the gene.hic.filt df is sufficient as their Hi-C FDR numbers are the same. For the upstream genes it's a little more complicated because gene.hic.filt doesn't incorporate strand information on the genes, so use the specific US dfs for that, with the USFDR column.
enrichment.plotter(gene.hic.VC, "min_FDR.H", "adj.P.Val", "Minimum FDR of VC Hi-C Contacts Overlapping Gene", xmax=1)
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
DEFDR DHICFDR prop.obs prop.exp chisq.p Dneither DE DHiC Dboth
101 0.025 0.01 0.01754386 0.01779141 1.00000000 1377 224 25 4
102 0.025 0.02 0.04385965 0.03558282 0.59285521 1354 218 48 10
103 0.025 0.03 0.07017544 0.06319018 0.74844939 1315 212 87 16
104 0.025 0.04 0.07894737 0.07546012 0.93641307 1297 210 105 18
105 0.025 0.05 0.09210526 0.09386503 1.00000000 1270 207 132 21
106 0.025 0.06 0.10964912 0.11288344 0.95727298 1243 203 159 25
107 0.025 0.07 0.13596491 0.13312883 0.97541215 1216 197 186 31
108 0.025 0.08 0.15789474 0.15337423 0.91624724 1188 192 214 36
109 0.025 0.09 0.17982456 0.16993865 0.73876738 1166 187 236 41
110 0.025 0.10 0.19298246 0.18834356 0.91887460 1139 184 263 44
111 0.025 0.11 0.20175439 0.19754601 0.93431729 1126 182 276 46
112 0.025 0.12 0.22807018 0.20736196 0.45714447 1116 176 286 52
113 0.025 0.13 0.24561404 0.21533742 0.26599015 1107 172 295 56
114 0.025 0.14 0.25877193 0.22944785 0.29347111 1087 169 315 59
115 0.025 0.15 0.26754386 0.23742331 0.28524810 1076 167 326 61
116 0.025 0.16 0.29385965 0.24969325 0.11437228 1062 161 340 67
117 0.025 0.17 0.31578947 0.26503067 0.07320091 1042 156 360 72
118 0.025 0.18 0.33333333 0.27852761 0.05601957 1024 152 378 76
119 0.025 0.19 0.35087719 0.28711656 0.02671540 1014 148 388 80
120 0.025 0.20 0.36842105 0.29631902 0.01268109 1003 144 399 84
121 0.025 0.21 0.36842105 0.30245399 0.02378674 993 144 409 84
122 0.025 0.22 0.37280702 0.31411043 0.04748361 975 143 427 85
123 0.025 0.23 0.39473684 0.32638037 0.02159607 960 138 442 90
124 0.025 0.24 0.40350877 0.34049080 0.03663515 939 136 463 92
125 0.025 0.25 0.40789474 0.34846626 0.05049965 927 135 475 93
126 0.025 0.26 0.42105263 0.36196319 0.05390464 908 132 494 96
127 0.025 0.27 0.43859649 0.36932515 0.02364280 900 128 502 100
128 0.025 0.28 0.44298246 0.37914110 0.03856780 885 127 517 101
129 0.025 0.29 0.45175439 0.38895706 0.04297312 871 125 531 103
130 0.025 0.30 0.46491228 0.40306748 0.04770651 851 122 551 106
131 0.025 0.31 0.47368421 0.41411043 0.05787378 835 120 567 108
132 0.025 0.32 0.47807018 0.42760736 0.11216822 814 119 588 109
133 0.025 0.33 0.48245614 0.44478528 0.24509083 787 118 615 110
134 0.025 0.34 0.49122807 0.45092025 0.21234769 779 116 623 112
135 0.025 0.35 0.50000000 0.46380368 0.26693651 760 114 642 114
136 0.025 0.36 0.50877193 0.47361963 0.28249414 746 112 656 116
137 0.025 0.37 0.51754386 0.48036810 0.25428445 737 110 665 118
138 0.025 0.38 0.52631579 0.48895706 0.25205891 725 108 677 120
139 0.025 0.39 0.53070175 0.50245399 0.39620517 704 107 698 121
140 0.025 0.40 0.53947368 0.51288344 0.42678869 689 105 713 123
141 0.025 0.41 0.54824561 0.51840491 0.36764763 682 103 720 125
142 0.025 0.42 0.56140351 0.52822086 0.31215477 669 100 733 128
143 0.025 0.43 0.57017544 0.53190184 0.23908682 665 98 737 130
144 0.025 0.44 0.58333333 0.53803681 0.15923876 658 95 744 133
145 0.025 0.45 0.60087719 0.54969325 0.10888967 643 91 759 137
146 0.025 0.46 0.60526316 0.55889571 0.14747350 629 90 773 138
147 0.025 0.47 0.61842105 0.57177914 0.14360002 611 87 791 141
148 0.025 0.48 0.62719298 0.57852761 0.12545552 602 85 800 143
149 0.025 0.49 0.63157895 0.59141104 0.20848034 582 84 820 144
150 0.025 0.50 0.64035088 0.60245399 0.23490791 566 82 836 146
151 0.025 0.51 0.64912281 0.61226994 0.24678832 552 80 850 148
152 0.025 0.52 0.65789474 0.61963190 0.22641009 542 78 860 150
153 0.025 0.53 0.67105263 0.62699387 0.15869530 533 75 869 153
154 0.025 0.54 0.67982456 0.63619632 0.16083799 520 73 882 155
155 0.025 0.55 0.69298246 0.64355828 0.10836950 511 70 891 158
156 0.025 0.56 0.71052632 0.65398773 0.06288059 498 66 904 162
157 0.025 0.57 0.71052632 0.66196319 0.11049384 485 66 917 162
158 0.025 0.58 0.72368421 0.67177914 0.08476833 472 63 930 165
159 0.025 0.59 0.72807018 0.68036810 0.11208973 459 62 943 166
160 0.025 0.60 0.73245614 0.68466258 0.11008002 453 61 949 167
161 0.025 0.61 0.73684211 0.69202454 0.13277696 442 60 960 168
162 0.025 0.62 0.75000000 0.69938650 0.08555825 433 57 969 171
163 0.025 0.63 0.75877193 0.70858896 0.08553381 420 55 982 173
164 0.025 0.64 0.76315789 0.71963190 0.13408581 403 54 999 174
165 0.025 0.65 0.78070175 0.72638037 0.05694501 396 50 1006 178
166 0.025 0.66 0.78508772 0.73619632 0.08448235 381 49 1021 179
167 0.025 0.67 0.78947368 0.74478528 0.11252583 368 48 1034 180
168 0.025 0.68 0.78947368 0.75030675 0.16429036 359 48 1043 180
169 0.025 0.69 0.79824561 0.76012270 0.17070008 345 46 1057 182
170 0.025 0.70 0.79824561 0.76503067 0.23354632 337 46 1065 182
171 0.025 0.71 0.80263158 0.77300613 0.28631683 325 45 1077 183
172 0.025 0.72 0.80701754 0.77730061 0.28144899 319 44 1083 184
173 0.025 0.73 0.80701754 0.78159509 0.35998888 312 44 1090 184
174 0.025 0.74 0.81578947 0.78650307 0.28171362 306 42 1096 186
175 0.025 0.75 0.81578947 0.79570552 0.47000885 291 42 1111 186
176 0.025 0.76 0.82894737 0.80061350 0.28676648 286 39 1116 189
177 0.025 0.77 0.83771930 0.81165644 0.32023462 270 37 1132 191
178 0.025 0.78 0.85087719 0.81533742 0.16174984 267 34 1135 194
179 0.025 0.79 0.85087719 0.82331288 0.27878955 254 34 1148 194
180 0.025 0.80 0.85964912 0.83312883 0.28811359 240 32 1162 196
181 0.025 0.81 0.86842105 0.84049080 0.25244409 230 30 1172 198
182 0.025 0.82 0.88157895 0.85030675 0.18449850 217 27 1185 201
183 0.025 0.83 0.89473684 0.85828221 0.10972262 207 24 1195 204
184 0.025 0.84 0.89912281 0.86441718 0.12204781 198 23 1204 205
185 0.025 0.85 0.90350877 0.87239264 0.15813738 186 22 1216 206
186 0.025 0.86 0.92105263 0.88343558 0.07229154 172 18 1230 210
187 0.025 0.87 0.92543860 0.89141104 0.09572916 160 17 1242 211
188 0.025 0.88 0.92982456 0.90245399 0.16709282 143 16 1259 212
189 0.025 0.89 0.93421053 0.91840491 0.41815957 118 15 1284 213
190 0.025 0.90 0.93421053 0.92883436 0.84024590 101 15 1301 213
191 0.025 0.91 0.94736842 0.93926380 0.68696721 87 12 1315 216
192 0.025 0.92 0.95614035 0.94478528 0.51368387 80 10 1322 218
193 0.025 0.93 0.96929825 0.95153374 0.23777935 72 7 1330 221
194 0.025 0.94 0.97368421 0.95766871 0.26368339 63 6 1339 222
195 0.025 0.95 0.98245614 0.96809816 0.25973393 48 4 1354 224
196 0.025 0.96 0.98684211 0.97177914 0.20576095 43 3 1359 225
197 0.025 0.97 0.98684211 0.97791411 0.45558545 33 3 1369 225
198 0.025 0.98 0.99561404 0.98711656 0.36271687 20 1 1382 227
199 0.025 0.99 0.99561404 0.99018405 0.59294582 15 1 1387 227
200 0.025 1.00 1.00000000 1.00000000 0.00000000 0 0 1402 228
enrichment.plotter(gene.hic.KR, "min_FDR.H", "adj.P.Val", "Minimum FDR of KR Hi-C Contacts Overlapping Gene, Chimp", xmax=1)
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning: Removed 5 rows containing missing values (geom_path).
Warning: Removed 5 rows containing missing values (geom_path).
DEFDR DHICFDR prop.obs prop.exp chisq.p Dneither DE DHiC Dboth
101 0.025 0.01 0.00000000 0.000000000 0.0000000 1100 174 0 0
102 0.025 0.02 0.00000000 0.003139717 0.9461552 1096 174 4 0
103 0.025 0.03 0.00000000 0.009419152 0.3360654 1088 174 12 0
104 0.025 0.04 0.02873563 0.033751962 0.8662375 1062 169 38 5
105 0.025 0.05 0.05172414 0.063579278 0.6012921 1028 165 72 9
106 0.025 0.06 0.09195402 0.098116170 0.8752942 991 158 109 16
107 0.025 0.07 0.13218391 0.119309262 0.6613957 971 151 129 23
108 0.025 0.08 0.13793103 0.142072214 0.9588927 943 150 157 24
109 0.025 0.09 0.15517241 0.173469388 0.5631071 906 147 194 27
110 0.025 0.10 0.19540230 0.203296703 0.8594275 875 140 225 34
111 0.025 0.11 0.20689655 0.229199372 0.5116904 844 138 256 36
112 0.025 0.12 0.25862069 0.256671900 1.0000000 818 129 282 45
113 0.025 0.13 0.29310345 0.279434851 0.7327157 795 123 305 51
114 0.025 0.14 0.33333333 0.296703297 0.2941624 780 116 320 58
115 0.025 0.15 0.33908046 0.306907378 0.3671477 768 115 332 59
116 0.025 0.16 0.36206897 0.322605965 0.2665157 752 111 348 63
117 0.025 0.17 0.37931034 0.343014129 0.3175669 729 108 371 66
118 0.025 0.18 0.40229885 0.357927786 0.2191319 714 104 386 70
119 0.025 0.19 0.41379310 0.380690738 0.3768152 687 102 413 72
120 0.025 0.20 0.43678161 0.395604396 0.2661303 672 98 428 76
121 0.025 0.21 0.46551724 0.414442700 0.1648331 653 93 447 81
122 0.025 0.22 0.47126437 0.430926217 0.2828294 633 92 467 82
123 0.025 0.23 0.47701149 0.441130298 0.3453178 621 91 479 83
124 0.025 0.24 0.47701149 0.445839874 0.4189822 615 91 485 83
125 0.025 0.25 0.48275862 0.465463108 0.6814792 591 90 509 84
126 0.025 0.26 0.51149425 0.484301413 0.4896858 572 85 528 89
127 0.025 0.27 0.51149425 0.489795918 0.5929399 565 85 535 89
128 0.025 0.28 0.53448276 0.501569859 0.3937285 554 81 546 93
129 0.025 0.29 0.54022989 0.510204082 0.4406704 544 80 556 94
130 0.025 0.30 0.54597701 0.521193093 0.5335250 531 79 569 95
131 0.025 0.31 0.54597701 0.532182104 0.7560128 517 79 583 95
132 0.025 0.32 0.55747126 0.540031397 0.6782184 509 77 591 97
133 0.025 0.33 0.56896552 0.550235479 0.6509207 498 75 602 99
134 0.025 0.34 0.56896552 0.554160126 0.7332758 493 75 607 99
135 0.025 0.35 0.56896552 0.559654631 0.8539491 486 75 614 99
136 0.025 0.36 0.59195402 0.575353218 0.6934009 470 71 630 103
137 0.025 0.37 0.60919540 0.585557300 0.5495944 460 68 640 106
138 0.025 0.38 0.60919540 0.596546311 0.7772778 446 68 654 106
139 0.025 0.39 0.61494253 0.604395604 0.8237142 437 67 663 107
140 0.025 0.40 0.62643678 0.616954474 0.8469684 423 65 677 109
141 0.025 0.41 0.63793103 0.627158556 0.8166244 412 63 688 111
142 0.025 0.42 0.65517241 0.635792779 0.6263015 404 60 696 114
143 0.025 0.43 0.66091954 0.644427002 0.6862983 394 59 706 115
144 0.025 0.44 0.66091954 0.656985871 0.9747102 378 59 722 115
145 0.025 0.45 0.67241379 0.671899529 1.0000000 361 57 739 117
146 0.025 0.46 0.68965517 0.679748823 0.8305636 354 54 746 120
147 0.025 0.47 0.70689655 0.685243328 0.5659419 350 51 750 123
148 0.025 0.48 0.71839080 0.691522763 0.4608209 344 49 756 125
149 0.025 0.49 0.71839080 0.696232339 0.5516460 338 49 762 125
150 0.025 0.50 0.71839080 0.699372057 0.6171827 334 49 766 125
151 0.025 0.51 0.72988506 0.705651491 0.5058383 328 47 772 127
152 0.025 0.52 0.72988506 0.711930926 0.6364056 320 47 780 127
153 0.025 0.53 0.72988506 0.716640502 0.7438865 314 47 786 127
154 0.025 0.54 0.76436782 0.729199372 0.3022179 304 41 796 133
155 0.025 0.55 0.76436782 0.731554160 0.3375076 301 41 799 133
156 0.025 0.56 0.76436782 0.732339089 0.3498790 300 41 800 133
157 0.025 0.57 0.76436782 0.737833595 0.4450468 293 41 807 133
158 0.025 0.58 0.76436782 0.744113030 0.5717628 285 41 815 133
159 0.025 0.59 0.77011494 0.750392465 0.5804929 278 40 822 134
160 0.025 0.60 0.77011494 0.758241758 0.7654011 268 40 832 134
161 0.025 0.61 0.77586207 0.766091052 0.8170766 259 39 841 135
162 0.025 0.62 0.78735632 0.771585557 0.6627507 254 37 846 137
163 0.025 0.63 0.79310345 0.779434851 0.7116826 245 36 855 138
164 0.025 0.64 0.80459770 0.785714286 0.5796560 239 34 861 140
165 0.025 0.65 0.81034483 0.790423862 0.5521151 234 33 866 141
166 0.025 0.66 0.81609195 0.795918367 0.5422863 228 32 872 142
167 0.025 0.67 0.81609195 0.799843014 0.6351083 223 32 877 142
168 0.025 0.68 0.82183908 0.803767661 0.5869620 219 31 881 143
169 0.025 0.69 0.82758621 0.810047096 0.5955978 212 30 888 144
170 0.025 0.70 0.83333333 0.816326531 0.6043586 205 29 895 145
171 0.025 0.71 0.83908046 0.824960754 0.6743915 195 28 905 146
172 0.025 0.72 0.83908046 0.838304553 1.0000000 178 28 922 146
173 0.025 0.73 0.83908046 0.840659341 1.0000000 175 28 925 146
174 0.025 0.74 0.85632184 0.848508634 0.8449347 168 25 932 149
175 0.025 0.75 0.85632184 0.851648352 0.9426931 164 25 936 149
176 0.025 0.76 0.86206897 0.854003140 0.8346465 162 24 938 150
177 0.025 0.77 0.86781609 0.858712716 0.7995729 157 23 943 151
178 0.025 0.78 0.87931034 0.866562009 0.6801633 149 21 951 153
179 0.025 0.79 0.87931034 0.870486656 0.8013771 144 21 956 153
180 0.025 0.80 0.88505747 0.879905808 0.9207534 133 20 967 154
181 0.025 0.81 0.89655172 0.888540031 0.8167113 124 18 976 156
182 0.025 0.82 0.89655172 0.890894819 0.8991508 121 18 979 156
183 0.025 0.83 0.90804598 0.896389325 0.6824454 116 16 984 158
184 0.025 0.84 0.91379310 0.900313972 0.6152778 112 15 988 159
185 0.025 0.85 0.91954023 0.906593407 0.6231413 105 14 995 160
186 0.025 0.86 0.92528736 0.915227630 0.7141844 95 13 1005 161
187 0.025 0.87 0.93678161 0.925431711 0.6469097 84 11 1016 163
188 0.025 0.88 0.94252874 0.932496075 0.6854239 76 10 1024 164
189 0.025 0.89 0.94252874 0.934850863 0.7822749 73 10 1027 164
190 0.025 0.90 0.94252874 0.941915228 1.0000000 64 10 1036 164
191 0.025 0.91 0.94827586 0.949764521 1.0000000 55 9 1045 165
192 0.025 0.92 0.95977011 0.955259027 0.9104711 50 7 1050 167
193 0.025 0.93 0.95977011 0.962323391 1.0000000 41 7 1059 167
194 0.025 0.94 0.97126437 0.966248038 0.8662375 38 5 1062 169
195 0.025 0.95 0.97126437 0.970172684 1.0000000 33 5 1067 169
196 0.025 0.96 0.97701149 0.975667190 1.0000000 27 4 1073 170
197 0.025 0.97 0.98275862 0.985871272 0.9770576 15 3 1085 171
198 0.025 0.98 0.98275862 0.992935636 0.2157441 6 3 1094 171
199 0.025 0.99 0.99425287 0.998430141 0.6401642 1 1 1099 173
200 0.025 1.00 1.00000000 1.000000000 0.0000000 0 0 1100 174
#enrichment.plotter(h_US, "USFDR", "adj.P.Val", "FDR for Closest Upstream Hi-C Contact Overlapping Gene, Human") #These two are ugly, and can't be run anyway until next chunk is complete to create their DFs. It's OK without them.
#enrichment.plotter(c_US, "USFDR", "adj.P.Val", "FDR for Closest Upstream Hi-C Contact Overlapping Gene, Chimp")
#enrichment.plotter(gene.hic.filt, "max_B_FDR.H", "adj.P.Val", "FDR for Hi-C Contact Overlapping Gene w/ Strongest Effect Size, Human")
#enrichment.plotter(gene.hic.filt, "max_B_FDR.C", "adj.P.Val", "FDR for Hi-C Contact Overlapping Gene w/ Strongest Effect Size, Chimp")
#enrichment.plotter(gene.hic.filt, "median_FDR.H", "adj.P.Val", "Median FDR of Hi-C Contacts Overlapping Gene, Human")
#enrichment.plotter(gene.hic.filt, "median_FDR.C", "adj.P.Val", "Median FDR of Hi-C Contacts Overlapping Gene, Chimp")
enrichment.plotter(gene.hic.VC, "weighted_Z.s2post.H", "adj.P.Val", "FDR for Weighted p-val Combine of VC Hi-C Contacts Overlapping Gene", xmax=1)
DEFDR DHICFDR prop.obs prop.exp chisq.p Dneither DE DHiC Dboth
101 0.025 0.01 0.5219298 0.4828221 0.22907489 734 109 668 119
102 0.025 0.02 0.5526316 0.5036810 0.12786275 707 102 695 126
103 0.025 0.03 0.5745614 0.5226994 0.10544398 681 97 721 131
104 0.025 0.04 0.5877193 0.5337423 0.09101518 666 94 736 134
105 0.025 0.05 0.5964912 0.5466258 0.11896633 647 92 755 136
106 0.025 0.06 0.6096491 0.5539877 0.07989392 638 89 764 139
107 0.025 0.07 0.6271930 0.5631902 0.04246358 627 85 775 143
108 0.025 0.08 0.6403509 0.5699387 0.02486832 619 82 783 146
109 0.025 0.09 0.6535088 0.5791411 0.01730370 607 79 795 149
110 0.025 0.10 0.6578947 0.5815951 0.01444978 604 78 798 150
111 0.025 0.11 0.6622807 0.5858896 0.01418556 598 77 804 151
112 0.025 0.12 0.6622807 0.5901840 0.02065730 591 77 811 151
113 0.025 0.13 0.6622807 0.5944785 0.02958599 584 77 818 151
114 0.025 0.14 0.6710526 0.6018405 0.02581156 574 75 828 153
115 0.025 0.15 0.6710526 0.6055215 0.03486092 568 75 834 153
116 0.025 0.16 0.6710526 0.6104294 0.05108028 560 75 842 153
117 0.025 0.17 0.6710526 0.6141104 0.06708667 554 75 848 153
118 0.025 0.18 0.6798246 0.6190184 0.04940579 548 73 854 155
119 0.025 0.19 0.6842105 0.6239264 0.05087720 541 72 861 156
120 0.025 0.20 0.6842105 0.6257669 0.05842344 538 72 864 156
121 0.025 0.21 0.6842105 0.6269939 0.06396079 536 72 866 156
122 0.025 0.22 0.6885965 0.6306748 0.06010761 531 71 871 157
123 0.025 0.23 0.6929825 0.6368098 0.06763219 522 70 880 158
124 0.025 0.24 0.6929825 0.6423313 0.09976023 513 70 889 158
125 0.025 0.25 0.6973684 0.6453988 0.09025385 509 69 893 159
126 0.025 0.26 0.7017544 0.6478528 0.07797052 506 68 896 160
127 0.025 0.27 0.7061404 0.6503067 0.06704398 503 67 899 161
128 0.025 0.28 0.7236842 0.6546012 0.02200172 500 63 902 165
129 0.025 0.29 0.7324561 0.6588957 0.01424774 495 61 907 167
130 0.025 0.30 0.7324561 0.6607362 0.01680808 492 61 910 167
131 0.025 0.31 0.7368421 0.6638037 0.01462045 488 60 914 168
132 0.025 0.32 0.7368421 0.6650307 0.01632663 486 60 916 168
133 0.025 0.33 0.7368421 0.6656442 0.01724451 485 60 917 168
134 0.025 0.34 0.7368421 0.6687117 0.02255725 480 60 922 168
135 0.025 0.35 0.7412281 0.6699387 0.01674004 479 59 923 169
136 0.025 0.36 0.7412281 0.6717791 0.01970266 476 59 926 169
137 0.025 0.37 0.7412281 0.6748466 0.02568183 471 59 931 169
138 0.025 0.38 0.7456140 0.6785276 0.02368458 466 58 936 170
139 0.025 0.39 0.7456140 0.6803681 0.02770891 463 58 939 170
140 0.025 0.40 0.7456140 0.6822086 0.03232147 460 58 942 170
141 0.025 0.41 0.7500000 0.6840491 0.02555596 458 57 944 171
142 0.025 0.42 0.7543860 0.6871166 0.02230723 454 56 948 172
143 0.025 0.43 0.7587719 0.6883436 0.01645828 453 55 949 173
144 0.025 0.44 0.7587719 0.6895706 0.01837320 451 55 951 173
145 0.025 0.45 0.7587719 0.6938650 0.02673086 444 55 958 173
146 0.025 0.46 0.7587719 0.6950920 0.02966572 442 55 960 173
147 0.025 0.47 0.7587719 0.6987730 0.04023061 436 55 966 173
148 0.025 0.48 0.7587719 0.7012270 0.04896639 432 55 970 173
149 0.025 0.49 0.7631579 0.7036810 0.04110648 429 54 973 174
150 0.025 0.50 0.7631579 0.7061350 0.05003213 425 54 977 174
151 0.025 0.51 0.7631579 0.7067485 0.05250793 424 54 978 174
152 0.025 0.52 0.7631579 0.7079755 0.05777576 422 54 980 174
153 0.025 0.53 0.7631579 0.7092025 0.06348788 420 54 982 174
154 0.025 0.54 0.7631579 0.7104294 0.06967225 418 54 984 174
155 0.025 0.55 0.7631579 0.7122699 0.07989711 415 54 987 174
156 0.025 0.56 0.7631579 0.7141104 0.09134851 412 54 990 174
157 0.025 0.57 0.7675439 0.7159509 0.07450337 410 53 992 175
158 0.025 0.58 0.7675439 0.7208589 0.10634338 402 53 1000 175
159 0.025 0.59 0.7675439 0.7233129 0.12603400 398 53 1004 175
160 0.025 0.60 0.7763158 0.7263804 0.08123637 395 51 1007 177
161 0.025 0.61 0.7807018 0.7282209 0.06571016 393 50 1009 178
162 0.025 0.62 0.7807018 0.7306748 0.07915871 389 50 1013 178
163 0.025 0.63 0.7807018 0.7331288 0.09484812 385 50 1017 178
164 0.025 0.64 0.7807018 0.7343558 0.10361308 383 50 1019 178
165 0.025 0.65 0.7807018 0.7349693 0.10823986 382 50 1020 178
166 0.025 0.66 0.7807018 0.7368098 0.12314653 379 50 1023 178
167 0.025 0.67 0.7807018 0.7386503 0.13967850 376 50 1026 178
168 0.025 0.68 0.7850877 0.7417178 0.12559558 372 49 1030 179
169 0.025 0.69 0.7850877 0.7429448 0.13665236 370 49 1032 179
170 0.025 0.70 0.7894737 0.7447853 0.11252583 368 48 1034 180
171 0.025 0.71 0.7894737 0.7472393 0.13359564 364 48 1038 180
172 0.025 0.72 0.7938596 0.7527607 0.14202181 356 47 1046 181
173 0.025 0.73 0.7938596 0.7539877 0.15433701 354 47 1048 181
174 0.025 0.74 0.7938596 0.7558282 0.17438696 351 47 1051 181
175 0.025 0.75 0.7938596 0.7588957 0.21227493 346 47 1056 181
176 0.025 0.76 0.7982456 0.7619632 0.19249935 342 46 1060 182
177 0.025 0.77 0.8026316 0.7644172 0.16696881 339 45 1063 183
178 0.025 0.78 0.8026316 0.7644172 0.16696881 339 45 1063 183
179 0.025 0.79 0.8070175 0.7674847 0.15011400 335 44 1067 184
180 0.025 0.80 0.8114035 0.7711656 0.14034315 330 43 1072 185
181 0.025 0.81 0.8114035 0.7717791 0.14647156 329 43 1073 185
182 0.025 0.82 0.8114035 0.7742331 0.17316659 325 43 1077 185
183 0.025 0.83 0.8114035 0.7748466 0.18040810 324 43 1078 185
184 0.025 0.84 0.8157895 0.7766871 0.14904141 322 42 1080 186
185 0.025 0.85 0.8201754 0.7809816 0.14522864 316 41 1086 187
186 0.025 0.86 0.8201754 0.7871166 0.21957730 306 41 1096 187
187 0.025 0.87 0.8201754 0.7907975 0.27651749 300 41 1102 187
188 0.025 0.88 0.8245614 0.7963190 0.29229895 292 40 1110 188
189 0.025 0.89 0.8245614 0.8030675 0.42941752 281 40 1121 188
190 0.025 0.90 0.8245614 0.8055215 0.48830870 277 40 1125 188
191 0.025 0.91 0.8289474 0.8079755 0.43762365 274 39 1128 189
192 0.025 0.92 0.8333333 0.8159509 0.52336925 262 38 1140 190
193 0.025 0.93 0.8377193 0.8276074 0.73284982 244 37 1158 191
194 0.025 0.94 0.8377193 0.8337423 0.93781706 234 37 1168 191
195 0.025 0.95 0.8552632 0.8392638 0.54052911 229 33 1173 195
196 0.025 0.96 0.8552632 0.8435583 0.66988567 222 33 1180 195
197 0.025 0.97 0.8640351 0.8539877 0.71724625 207 31 1195 197
198 0.025 0.98 0.8859649 0.8644172 0.35732592 195 26 1207 202
199 0.025 0.99 0.9122807 0.8858896 0.21529733 166 20 1236 208
200 0.025 1.00 1.0000000 1.0000000 0.00000000 0 0 1402 228
enrichment.plotter(gene.hic.KR, "weighted_Z.s2post.H", "adj.P.Val", "FDR for Weighted p-val Combine of KR Hi-C Contacts Overlapping Gene", xmax=1)
DEFDR DHICFDR prop.obs prop.exp chisq.p Dneither DE DHiC Dboth
101 0.025 0.01 0.5919540 0.5518053 0.2873130 500 71 600 103
102 0.025 0.02 0.6206897 0.5745683 0.2143213 476 66 624 108
103 0.025 0.03 0.6436782 0.5902669 0.1446126 460 62 640 112
104 0.025 0.04 0.6494253 0.6036107 0.2126821 444 61 656 113
105 0.025 0.05 0.6494253 0.6083203 0.2661959 438 61 662 113
106 0.025 0.06 0.6551724 0.6106750 0.2255754 436 60 664 114
107 0.025 0.07 0.6666667 0.6169545 0.1713818 430 58 670 116
108 0.025 0.08 0.6666667 0.6216641 0.2175070 424 58 676 116
109 0.025 0.09 0.6724138 0.6271586 0.2134258 418 57 682 117
110 0.025 0.10 0.6896552 0.6357928 0.1325295 410 54 690 120
111 0.025 0.11 0.6954023 0.6405024 0.1237700 405 53 695 121
112 0.025 0.12 0.6954023 0.6444270 0.1537234 400 53 700 121
113 0.025 0.13 0.6954023 0.6514914 0.2214840 391 53 709 121
114 0.025 0.14 0.6954023 0.6514914 0.2214840 391 53 709 121
115 0.025 0.15 0.6954023 0.6554160 0.2675974 386 53 714 121
116 0.025 0.16 0.7011494 0.6593407 0.2435148 382 52 718 122
117 0.025 0.17 0.7011494 0.6616954 0.2723963 379 52 721 122
118 0.025 0.18 0.7068966 0.6679749 0.2772023 372 51 728 123
119 0.025 0.19 0.7068966 0.6703297 0.3089288 369 51 731 123
120 0.025 0.20 0.7068966 0.6734694 0.3550071 365 51 735 123
121 0.025 0.21 0.7126437 0.6781790 0.3370799 360 50 740 124
122 0.025 0.22 0.7126437 0.6852433 0.4534289 351 50 749 124
123 0.025 0.23 0.7126437 0.6868132 0.4822571 349 50 751 124
124 0.025 0.24 0.7183908 0.6883830 0.4055917 348 49 752 125
125 0.025 0.25 0.7183908 0.6907378 0.4466055 345 49 755 125
126 0.025 0.26 0.7241379 0.6962323 0.4396997 339 48 761 126
127 0.025 0.27 0.7298851 0.6978022 0.3665430 338 47 762 127
128 0.025 0.28 0.7356322 0.6993721 0.3013083 337 46 763 128
129 0.025 0.29 0.7356322 0.7040816 0.3724654 331 46 769 128
130 0.025 0.30 0.7356322 0.7072214 0.4256303 327 46 773 128
131 0.025 0.31 0.7413793 0.7080063 0.3409714 327 45 773 129
132 0.025 0.32 0.7471264 0.7103611 0.2888295 325 44 775 130
133 0.025 0.33 0.7471264 0.7103611 0.2888295 325 44 775 130
134 0.025 0.34 0.7528736 0.7127159 0.2421232 323 43 777 131
135 0.025 0.35 0.7586207 0.7142857 0.1926150 322 42 778 132
136 0.025 0.36 0.7586207 0.7158556 0.2092502 320 42 780 132
137 0.025 0.37 0.7586207 0.7189953 0.2457615 316 42 784 132
138 0.025 0.38 0.7586207 0.7205651 0.2656961 314 42 786 132
139 0.025 0.39 0.7586207 0.7221350 0.2867836 312 42 788 132
140 0.025 0.40 0.7586207 0.7221350 0.2867836 312 42 788 132
141 0.025 0.41 0.7586207 0.7252747 0.3324947 308 42 792 132
142 0.025 0.42 0.7586207 0.7260597 0.3446697 307 42 793 132
143 0.025 0.43 0.7643678 0.7284144 0.2910577 305 41 795 133
144 0.025 0.44 0.7701149 0.7307692 0.2431008 303 40 797 134
145 0.025 0.45 0.7701149 0.7315542 0.2529535 302 40 798 134
146 0.025 0.46 0.7701149 0.7339089 0.2842756 299 40 801 134
147 0.025 0.47 0.7701149 0.7386185 0.3550914 293 40 807 134
148 0.025 0.48 0.7701149 0.7394035 0.3679744 292 40 808 134
149 0.025 0.49 0.7758621 0.7409733 0.2995502 291 39 809 135
150 0.025 0.50 0.7758621 0.7417582 0.3110771 290 39 810 135
151 0.025 0.51 0.7758621 0.7425432 0.3229145 289 39 811 135
152 0.025 0.52 0.7816092 0.7441130 0.2600110 288 38 812 136
153 0.025 0.53 0.7816092 0.7441130 0.2600110 288 38 812 136
154 0.025 0.54 0.7816092 0.7448980 0.2704876 287 38 813 136
155 0.025 0.55 0.7816092 0.7472527 0.3037634 284 38 816 136
156 0.025 0.56 0.7816092 0.7511774 0.3655209 279 38 821 136
157 0.025 0.57 0.7873563 0.7527473 0.2963600 278 37 822 137
158 0.025 0.58 0.7873563 0.7543171 0.3198609 276 37 824 137
159 0.025 0.59 0.7873563 0.7551020 0.3320930 275 37 825 137
160 0.025 0.60 0.7873563 0.7566719 0.3575289 273 37 827 137
161 0.025 0.61 0.7873563 0.7590267 0.3981275 270 37 830 137
162 0.025 0.62 0.7873563 0.7613815 0.4416625 267 37 833 137
163 0.025 0.63 0.7873563 0.7621664 0.4568228 266 37 834 137
164 0.025 0.64 0.7873563 0.7629513 0.4723044 265 37 835 137
165 0.025 0.65 0.7873563 0.7668760 0.5544299 260 37 840 137
166 0.025 0.66 0.7931034 0.7676609 0.4480750 260 36 840 138
167 0.025 0.67 0.7988506 0.7692308 0.3674960 259 35 841 139
168 0.025 0.68 0.8045977 0.7715856 0.3081386 257 34 843 140
169 0.025 0.69 0.8045977 0.7770801 0.4005951 250 34 850 140
170 0.025 0.70 0.8045977 0.7778650 0.4151801 249 34 851 140
171 0.025 0.71 0.8045977 0.7802198 0.4609993 246 34 854 140
172 0.025 0.72 0.8045977 0.7833595 0.5268371 242 34 858 140
173 0.025 0.73 0.8045977 0.7841444 0.5441232 241 34 859 140
174 0.025 0.74 0.8103448 0.7872841 0.4837525 238 33 862 141
175 0.025 0.75 0.8103448 0.7888540 0.5172609 236 33 864 141
176 0.025 0.76 0.8103448 0.7904239 0.5521151 234 33 866 141
177 0.025 0.77 0.8103448 0.7935636 0.6256979 230 33 870 141
178 0.025 0.78 0.8103448 0.7967033 0.7040755 226 33 874 141
179 0.025 0.79 0.8103448 0.7974882 0.7243542 225 33 875 141
180 0.025 0.80 0.8218391 0.8037677 0.5869620 219 31 881 143
181 0.025 0.81 0.8218391 0.8045526 0.6058766 218 31 882 143
182 0.025 0.82 0.8218391 0.8045526 0.6058766 218 31 882 143
183 0.025 0.83 0.8218391 0.8092622 0.7258825 212 31 888 143
184 0.025 0.84 0.8218391 0.8139717 0.8554424 206 31 894 143
185 0.025 0.85 0.8275862 0.8163265 0.7585041 204 30 896 144
186 0.025 0.86 0.8333333 0.8194662 0.6849286 201 29 899 145
187 0.025 0.87 0.8333333 0.8233909 0.7924348 196 29 904 145
188 0.025 0.88 0.8390805 0.8257457 0.6954288 194 28 906 146
189 0.025 0.89 0.8390805 0.8288854 0.7825661 190 28 910 146
190 0.025 0.90 0.8390805 0.8351648 0.9681968 182 28 918 146
191 0.025 0.91 0.8448276 0.8383046 0.8880944 179 27 921 147
192 0.025 0.92 0.8448276 0.8398744 0.9358379 177 27 923 147
193 0.025 0.93 0.8563218 0.8430141 0.6838856 175 25 925 149
194 0.025 0.94 0.8678161 0.8516484 0.5954581 166 23 934 151
195 0.025 0.95 0.8735632 0.8571429 0.5826152 160 22 940 152
196 0.025 0.96 0.8735632 0.8618524 0.7161775 154 22 946 152
197 0.025 0.97 0.8793103 0.8642072 0.6123034 152 21 948 153
198 0.025 0.98 0.8850575 0.8720565 0.6668997 143 20 957 154
199 0.025 0.99 0.8908046 0.8838305 0.8558436 129 19 971 155
200 0.025 1.00 1.0000000 1.0000000 0.0000000 0 0 1100 174
enrichment.plotter(gene.hic.VC, "weighted_Z.ALLvar.H", "adj.P.Val", "FDR for Weighted p-val Combine of VC Hi-C Contacts Overlapping Gene", xmax=1)
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
DEFDR DHICFDR prop.obs prop.exp chisq.p Dneither DE DHiC Dboth
101 0.025 0.01 0.1622807 0.1361963 0.25676655 1217 191 185 37
102 0.025 0.02 0.1929825 0.1766871 0.54717641 1158 184 244 44
103 0.025 0.03 0.2192982 0.1993865 0.47026993 1127 178 275 50
104 0.025 0.04 0.2719298 0.2263804 0.09164595 1095 166 307 62
105 0.025 0.05 0.2938596 0.2441718 0.07185914 1071 161 331 67
106 0.025 0.06 0.3026316 0.2558282 0.09599076 1054 159 348 69
107 0.025 0.07 0.3201754 0.2742331 0.11035166 1028 155 374 73
108 0.025 0.08 0.3289474 0.2852761 0.13476600 1012 153 390 75
109 0.025 0.09 0.3421053 0.3006135 0.16289065 990 150 412 78
110 0.025 0.10 0.3552632 0.3141104 0.17175699 971 147 431 81
111 0.025 0.11 0.3684211 0.3276074 0.18033127 952 144 450 84
112 0.025 0.12 0.3815789 0.3466258 0.26237939 924 141 478 87
113 0.025 0.13 0.3903509 0.3668712 0.47207520 893 139 509 89
114 0.025 0.14 0.4122807 0.3791411 0.29904069 878 134 524 94
115 0.025 0.15 0.4210526 0.3871166 0.28867753 867 132 535 96
116 0.025 0.16 0.4342105 0.3975460 0.25145941 853 129 549 99
117 0.025 0.17 0.4429825 0.4128834 0.35610932 830 127 572 101
118 0.025 0.18 0.4561404 0.4202454 0.26628774 821 124 581 104
119 0.025 0.19 0.4649123 0.4343558 0.35151820 800 122 602 106
120 0.025 0.20 0.4692982 0.4392638 0.36105852 793 121 609 107
121 0.025 0.21 0.4868421 0.4472393 0.22057666 784 117 618 111
122 0.025 0.22 0.4956140 0.4564417 0.22676521 771 115 631 113
123 0.025 0.23 0.5175439 0.4687117 0.12809178 756 110 646 118
124 0.025 0.24 0.5263158 0.4803681 0.15390862 739 108 663 120
125 0.025 0.25 0.5394737 0.4901840 0.12505954 726 105 676 123
126 0.025 0.26 0.5482456 0.5061350 0.19363005 702 103 700 125
127 0.025 0.27 0.5482456 0.5134969 0.28892624 690 103 712 125
128 0.025 0.28 0.5614035 0.5233129 0.24192522 677 100 725 128
129 0.025 0.29 0.5833333 0.5368098 0.14777399 660 95 742 133
130 0.025 0.30 0.5833333 0.5435583 0.21928372 649 95 753 133
131 0.025 0.31 0.5877193 0.5496933 0.24094883 640 94 762 134
132 0.025 0.32 0.6008772 0.5582822 0.18529777 629 91 773 137
133 0.025 0.33 0.6096491 0.5674847 0.18898445 616 89 786 139
134 0.025 0.34 0.6184211 0.5736196 0.16069915 608 87 794 141
135 0.025 0.35 0.6228070 0.5809816 0.19094222 597 86 805 142
136 0.025 0.36 0.6271930 0.5852761 0.18926982 591 85 811 143
137 0.025 0.37 0.6359649 0.5938650 0.18583914 579 83 823 145
138 0.025 0.38 0.6403509 0.6042945 0.25954094 563 82 839 146
139 0.025 0.39 0.6491228 0.6110429 0.23072437 554 80 848 148
140 0.025 0.40 0.6578947 0.6165644 0.19002059 547 78 855 150
141 0.025 0.41 0.6666667 0.6233129 0.16665957 538 76 864 152
142 0.025 0.42 0.6754386 0.6331288 0.17534683 524 74 878 154
143 0.025 0.43 0.6929825 0.6417178 0.09566872 514 70 888 158
144 0.025 0.44 0.6973684 0.6472393 0.10239636 506 69 896 159
145 0.025 0.45 0.7105263 0.6552147 0.06882517 496 66 906 162
146 0.025 0.46 0.7105263 0.6619632 0.11049384 485 66 917 162
147 0.025 0.47 0.7149123 0.6668712 0.11325464 478 65 924 163
148 0.025 0.48 0.7236842 0.6748466 0.10497047 467 63 935 165
149 0.025 0.49 0.7324561 0.6822086 0.09289489 457 61 945 167
150 0.025 0.50 0.7368421 0.6895706 0.11267026 446 60 956 168
151 0.025 0.51 0.7456140 0.6932515 0.07651190 442 58 960 170
152 0.025 0.52 0.7456140 0.6993865 0.11791809 432 58 970 170
153 0.025 0.53 0.7631579 0.7073620 0.05508802 423 54 979 174
154 0.025 0.54 0.7631579 0.7104294 0.06967225 418 54 984 174
155 0.025 0.55 0.7675439 0.7184049 0.08924920 406 53 996 175
156 0.025 0.56 0.7719298 0.7239264 0.09524241 398 52 1004 176
157 0.025 0.57 0.7719298 0.7300613 0.14563742 388 52 1014 176
158 0.025 0.58 0.7807018 0.7368098 0.12314653 379 50 1023 178
159 0.025 0.59 0.7894737 0.7411043 0.08609842 374 48 1028 180
160 0.025 0.60 0.7894737 0.7453988 0.11751957 367 48 1035 180
161 0.025 0.61 0.7894737 0.7546012 0.21630109 352 48 1050 180
162 0.025 0.62 0.7982456 0.7619632 0.19249935 342 46 1060 182
163 0.025 0.63 0.8070175 0.7687117 0.16319330 333 44 1069 184
164 0.025 0.64 0.8114035 0.7742331 0.17316659 325 43 1077 185
165 0.025 0.65 0.8157895 0.7822086 0.21566617 313 42 1089 186
166 0.025 0.66 0.8201754 0.7895706 0.25643777 302 41 1100 187
167 0.025 0.67 0.8245614 0.7957055 0.28161943 293 40 1109 188
168 0.025 0.68 0.8245614 0.7987730 0.33795762 288 40 1114 188
169 0.025 0.69 0.8289474 0.8042945 0.35668894 280 39 1122 189
170 0.025 0.70 0.8333333 0.8098160 0.37633186 272 38 1130 190
171 0.025 0.71 0.8333333 0.8184049 0.59067725 258 38 1144 190
172 0.025 0.72 0.8333333 0.8208589 0.66245528 254 38 1148 190
173 0.025 0.73 0.8508772 0.8282209 0.37707699 246 34 1156 194
174 0.025 0.74 0.8552632 0.8331288 0.38388918 239 33 1163 195
175 0.025 0.75 0.8640351 0.8404908 0.34241374 229 31 1173 197
176 0.025 0.76 0.8684211 0.8490798 0.43542672 216 30 1186 198
177 0.025 0.77 0.8728070 0.8552147 0.47615129 207 29 1195 199
178 0.025 0.78 0.8815789 0.8656442 0.51179611 192 27 1210 201
179 0.025 0.79 0.8815789 0.8711656 0.68952958 183 27 1219 201
180 0.025 0.80 0.8903509 0.8809816 0.71822985 169 25 1233 203
181 0.025 0.81 0.8991228 0.8877301 0.63517810 160 23 1242 205
182 0.025 0.82 0.9078947 0.8944785 0.55199675 151 21 1251 207
183 0.025 0.83 0.9210526 0.9042945 0.42019543 138 18 1264 210
184 0.025 0.84 0.9298246 0.9092025 0.29634739 132 16 1270 212
185 0.025 0.85 0.9342105 0.9128834 0.26929726 127 15 1275 213
186 0.025 0.86 0.9385965 0.9202454 0.33151616 116 14 1286 214
187 0.025 0.87 0.9385965 0.9251534 0.48638596 108 14 1294 214
188 0.025 0.88 0.9473684 0.9319018 0.39096247 99 12 1303 216
189 0.025 0.89 0.9473684 0.9374233 0.60228642 90 12 1312 216
190 0.025 0.90 0.9517544 0.9423313 0.61358405 83 11 1319 217
191 0.025 0.91 0.9561404 0.9472393 0.62516609 76 10 1326 218
192 0.025 0.92 0.9561404 0.9533742 0.96469811 66 10 1336 218
193 0.025 0.93 0.9561404 0.9546012 1.00000000 64 10 1338 218
194 0.025 0.94 0.9649123 0.9625767 0.99023930 53 8 1349 220
195 0.025 0.95 0.9736842 0.9674847 0.71303401 47 6 1355 222
196 0.025 0.96 0.9868421 0.9748466 0.30810997 38 3 1364 225
197 0.025 0.97 0.9912281 0.9773006 0.19959242 35 2 1367 226
198 0.025 0.98 0.9912281 0.9822086 0.40047575 27 2 1375 226
199 0.025 0.99 0.9956140 0.9938650 1.00000000 9 1 1393 227
200 0.025 1.00 1.0000000 1.0000000 0.00000000 0 0 1402 228
enrichment.plotter(gene.hic.KR, "weighted_Z.ALLvar.H", "adj.P.Val", "FDR for Weighted p-val Combine of KR Hi-C Contacts Overlapping Gene", xmax=1)
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
DEFDR DHICFDR prop.obs prop.exp chisq.p Dneither DE DHiC Dboth
101 0.025 0.01 0.1149425 0.1145997 1.0000000 974 154 126 20
102 0.025 0.02 0.1666667 0.1742543 0.8599647 907 145 193 29
103 0.025 0.03 0.2241379 0.2189953 0.9379175 860 135 240 39
104 0.025 0.04 0.2758621 0.2519623 0.4917466 827 126 273 48
105 0.025 0.05 0.3103448 0.2723705 0.2630143 807 120 293 54
106 0.025 0.06 0.3275862 0.2990581 0.4263562 776 117 324 57
107 0.025 0.07 0.3563218 0.3265306 0.4151562 746 112 354 62
108 0.025 0.08 0.3678161 0.3524333 0.7101029 715 110 385 64
109 0.025 0.09 0.3793103 0.3673469 0.7889540 698 108 402 66
110 0.025 0.10 0.3793103 0.3806907 1.0000000 681 108 419 66
111 0.025 0.11 0.4080460 0.3940345 0.7462602 669 103 431 71
112 0.025 0.12 0.4252874 0.4058085 0.6311919 657 100 443 74
113 0.025 0.13 0.4367816 0.4230769 0.7556332 637 98 463 76
114 0.025 0.14 0.4367816 0.4324961 0.9677273 625 98 475 76
115 0.025 0.15 0.4540230 0.4427002 0.8091824 615 95 485 79
116 0.025 0.16 0.4885057 0.4552590 0.3865878 605 89 495 85
117 0.025 0.17 0.4942529 0.4615385 0.3954622 598 88 502 86
118 0.025 0.18 0.5000000 0.4733124 0.4983497 584 87 516 87
119 0.025 0.19 0.5287356 0.4843014 0.2377760 575 82 525 92
120 0.025 0.20 0.5287356 0.4952904 0.3853819 561 82 539 92
121 0.025 0.21 0.5287356 0.5015699 0.4903809 553 82 547 92
122 0.025 0.22 0.5402299 0.5156986 0.5384190 537 80 563 94
123 0.025 0.23 0.5574713 0.5337520 0.5530465 517 77 583 97
124 0.025 0.24 0.5574713 0.5392465 0.6619677 510 77 590 97
125 0.025 0.25 0.5632184 0.5455259 0.6726667 503 76 597 98
126 0.025 0.26 0.5632184 0.5572998 0.9306509 488 76 612 98
127 0.025 0.27 0.5747126 0.5698587 0.9547179 474 74 626 100
128 0.025 0.28 0.5919540 0.5816327 0.8302877 462 71 638 103
129 0.025 0.29 0.6149425 0.5879121 0.4859837 458 67 642 107
130 0.025 0.30 0.6149425 0.5973312 0.6696771 446 67 654 107
131 0.025 0.31 0.6264368 0.6043956 0.5778913 439 65 661 109
132 0.025 0.32 0.6379310 0.6106750 0.4777848 433 63 667 111
133 0.025 0.33 0.6436782 0.6177394 0.5004319 425 62 675 112
134 0.025 0.34 0.6436782 0.6240188 0.6227538 417 62 683 112
135 0.025 0.35 0.6551724 0.6310832 0.5325076 410 60 690 114
136 0.025 0.36 0.6609195 0.6389325 0.5721334 401 59 699 115
137 0.025 0.37 0.6666667 0.6444270 0.5657513 395 58 705 116
138 0.025 0.38 0.6724138 0.6491366 0.5439018 390 57 710 117
139 0.025 0.39 0.6724138 0.6538462 0.6395684 384 57 716 117
140 0.025 0.40 0.6896552 0.6624804 0.4656651 376 54 724 120
141 0.025 0.41 0.6954023 0.6687598 0.4734283 369 53 731 121
142 0.025 0.42 0.6954023 0.6726845 0.5482686 364 53 736 121
143 0.025 0.43 0.6954023 0.6805338 0.7149655 354 53 746 121
144 0.025 0.44 0.7068966 0.6844584 0.5500873 351 51 749 123
145 0.025 0.45 0.7183908 0.6891680 0.4189891 347 49 753 125
146 0.025 0.46 0.7298851 0.6915228 0.2753680 346 47 754 127
147 0.025 0.47 0.7356322 0.7009419 0.3238852 335 46 765 128
148 0.025 0.48 0.7413793 0.7040816 0.2843481 332 45 768 129
149 0.025 0.49 0.7471264 0.7072214 0.2479759 329 44 771 130
150 0.025 0.50 0.7471264 0.7158556 0.3714097 318 44 782 130
151 0.025 0.51 0.7586207 0.7213501 0.2760944 313 42 787 132
152 0.025 0.52 0.7643678 0.7299843 0.3136785 303 41 797 133
153 0.025 0.53 0.7643678 0.7339089 0.3755397 298 41 802 133
154 0.025 0.54 0.7758621 0.7448980 0.3603058 286 39 814 135
155 0.025 0.55 0.7758621 0.7503925 0.4585858 279 39 821 135
156 0.025 0.56 0.7758621 0.7574568 0.6069666 270 39 830 135
157 0.025 0.57 0.7816092 0.7653061 0.6528283 261 38 839 136
158 0.025 0.58 0.7816092 0.7692308 0.7487775 256 38 844 136
159 0.025 0.59 0.7873563 0.7731554 0.7010032 252 37 848 137
160 0.025 0.60 0.7931034 0.7802198 0.7314768 244 36 856 138
161 0.025 0.61 0.7931034 0.7841444 0.8336838 239 36 861 138
162 0.025 0.62 0.7931034 0.7849294 0.8546904 238 36 862 138
163 0.025 0.63 0.7988506 0.7888540 0.8043177 234 35 866 139
164 0.025 0.64 0.8045977 0.7967033 0.8594275 225 34 875 140
165 0.025 0.65 0.8218391 0.8061224 0.6446717 216 31 884 143
166 0.025 0.66 0.8218391 0.8092622 0.7258825 212 31 888 143
167 0.025 0.67 0.8275862 0.8186813 0.8241371 201 30 899 144
168 0.025 0.68 0.8390805 0.8281005 0.7603588 191 28 909 146
169 0.025 0.69 0.8505747 0.8351648 0.6314783 184 26 916 148
170 0.025 0.70 0.8505747 0.8367347 0.6736090 182 26 918 148
171 0.025 0.71 0.8505747 0.8414443 0.8078706 176 26 924 148
172 0.025 0.72 0.8620690 0.8485086 0.6721908 169 24 931 150
173 0.025 0.73 0.8620690 0.8547881 0.8590482 161 24 939 150
174 0.025 0.74 0.8678161 0.8594976 0.8239815 156 23 944 151
175 0.025 0.75 0.8735632 0.8634223 0.7638518 152 22 948 152
176 0.025 0.76 0.8793103 0.8681319 0.7275054 147 21 953 153
177 0.025 0.77 0.8850575 0.8744113 0.7391619 140 20 960 154
178 0.025 0.78 0.8850575 0.8806907 0.9478620 132 20 968 154
179 0.025 0.79 0.8850575 0.8822606 1.0000000 130 20 970 154
180 0.025 0.80 0.8965517 0.8893250 0.8438452 123 18 977 156
181 0.025 0.81 0.9022989 0.8924647 0.7497552 120 17 980 157
182 0.025 0.82 0.9022989 0.8956044 0.8592023 116 17 984 157
183 0.025 0.83 0.9022989 0.9018838 1.0000000 108 17 992 157
184 0.025 0.84 0.9022989 0.9089482 0.8521950 99 17 1001 157
185 0.025 0.85 0.9195402 0.9152276 0.9415352 94 14 1006 160
186 0.025 0.86 0.9252874 0.9207221 0.9291722 88 13 1012 161
187 0.025 0.87 0.9252874 0.9301413 0.9121818 76 13 1024 161
188 0.025 0.88 0.9425287 0.9356358 0.8161399 72 10 1028 164
189 0.025 0.89 0.9425287 0.9379906 0.9219472 69 10 1031 164
190 0.025 0.90 0.9597701 0.9442700 0.4345871 64 7 1036 167
191 0.025 0.91 0.9597701 0.9481947 0.5772773 59 7 1041 167
192 0.025 0.92 0.9597701 0.9521193 0.7507696 54 7 1046 167
193 0.025 0.93 0.9655172 0.9591837 0.8039487 46 6 1054 168
194 0.025 0.94 0.9770115 0.9686028 0.6522932 36 4 1064 170
195 0.025 0.95 0.9827586 0.9740973 0.6049841 30 3 1070 171
196 0.025 0.96 0.9827586 0.9819466 1.0000000 20 3 1080 171
197 0.025 0.97 0.9827586 0.9866562 0.8991846 14 3 1086 171
198 0.025 0.98 0.9942529 0.9937206 1.0000000 7 1 1093 173
199 0.025 0.99 1.0000000 0.9968603 0.9461552 4 0 1096 174
200 0.025 1.00 1.0000000 1.0000000 0.0000000 0 0 1100 174
#VCs appear to be better on both, but we'll see in final combining of it--pretty sure it's the second set of doing it that's wthe way I did it for HOMER, but double-check.
Now that I’ve seen some enrichment of differential contact (DC) in differential expression (DE) based on my linear modeling results from before, I would like to further quantify this effect. In order to do so, I now extract log2 observed/expected homer-normalized contact frequency values and RPKM expression values for each gene/bin set, so I can look at correlations of these values and assess their explanatory power.
In this section, I proceed to create a function in order to extract the Hi-C interaction frequency values for the different types of summaries I’ve made gene overlaps with above. This allows for operations to be performed separately utilizing different summaries of Hi-C contacts.
######Get a df with the H and C coordinates of the hits, and the IF values from homer. This subset df makes things easier to extract. Both for VC and KR.
###VC
contacts.VC <- data.frame(h1=data.VC$H1, h2=data.VC$H2, c1=data.VC$C1, c2=data.VC$C2, A_21792_HIC=data.VC$`A-21792_VC`, B_28126_HIC=data.VC$`B-28126_VC`, C_3649_HIC=data.VC$`C-3649_VC`, D_40300_HIC=data.VC$`D-40300_VC`, E_28815_HIC=data.VC$`E-28815_VC`, F_28834_HIC=data.VC$`F-28834_VC`, G_3624_HIC=data.VC$`G-3624_VC`, H_3651_HIC=data.VC$`H-3651_VC`, stringsAsFactors = FALSE)
newH1 <- as.numeric(gsub(".*-", "", contacts.VC$h1))
newH2 <- as.numeric(gsub(".*-", "", contacts.VC$h2))
lower.HID <- ifelse(newH1<newH2, contacts.VC$h1, contacts.VC$h2)
higher.HID2 <- ifelse(newH1<newH2, contacts.VC$h2, contacts.VC$h1)
contacts.VC$hpair <- paste(lower.HID, higher.HID2, sep="_")
newC1 <- as.numeric(gsub(".*-", "", contacts.VC$c1))
newC2 <- as.numeric(gsub(".*-", "", contacts.VC$c2))
lower.CID <- ifelse(newC1<newC2, contacts.VC$c1, contacts.VC$c2)
higher.CID2 <- ifelse(newC1<newC2, contacts.VC$c2, contacts.VC$c1)
contacts.VC$cpair <- paste(lower.CID, higher.CID2, sep="_")
###KR
contacts.KR <- data.frame(h1=data.KR$H1, h2=data.KR$H2, c1=data.KR$C1, c2=data.KR$C2, A_21792_HIC=data.KR$`A-21792_KR`, B_28126_HIC=data.KR$`B-28126_KR`, C_3649_HIC=data.KR$`C-3649_KR`, D_40300_HIC=data.KR$`D-40300_KR`, E_28815_HIC=data.KR$`E-28815_KR`, F_28834_HIC=data.KR$`F-28834_KR`, G_3624_HIC=data.KR$`G-3624_KR`, H_3651_HIC=data.KR$`H-3651_KR`, stringsAsFactors = FALSE)
newH1 <- as.numeric(gsub(".*-", "", contacts.KR$h1))
newH2 <- as.numeric(gsub(".*-", "", contacts.KR$h2))
lower.HID <- ifelse(newH1<newH2, contacts.KR$h1, contacts.KR$h2)
higher.HID2 <- ifelse(newH1<newH2, contacts.KR$h2, contacts.KR$h1)
contacts.KR$hpair <- paste(lower.HID, higher.HID2, sep="_")
newC1 <- as.numeric(gsub(".*-", "", contacts.KR$c1))
newC2 <- as.numeric(gsub(".*-", "", contacts.KR$c2))
lower.CID <- ifelse(newC1<newC2, contacts.KR$c1, contacts.KR$c2)
higher.CID2 <- ifelse(newC1<newC2, contacts.KR$c2, contacts.KR$c1)
contacts.KR$cpair <- paste(lower.CID, higher.CID2, sep="_")
#Fix column names for both contacts.VC and contacts.KR
colnames(contacts.KR)[5:12] <- colnames(contacts.VC)[5:12] <- c("A_21792_HIC", "B_28126_HIC", "C_3649_HIC", "D_40300_HIC", "E_28815_HIC", "F_28834_HIC", "G_3624_HIC", "H_3651_HIC")
#A function that takes a dataframe (like gene.hic.filt) and two columns from the dataframe to create a pair vector for the given interaction. First ensures the first bin in a pair is always lowest to make this easier. Then extracts the IF values for that vector from the contacts df created above. This provides me with the appropriate Hi-C data values for the different bin classes we're examining here, so that I can later test them with linear modeling to quantify their effect on expression.
IF.extractor <- function(dataframe, col1, col2, contacts, species, strand=FALSE){
new1 <- as.numeric(gsub(".*-", "", dataframe[,col1]))
if(strand==FALSE){#In the case where I'm not worried about strand, I just work with the second column selected.
new2 <- as.numeric(gsub(".*-", "", dataframe[,col2]))
lower1 <- ifelse(new1<new2, dataframe[,col1], dataframe[,col2])
higher2 <- ifelse(new1<new2, dataframe[,col2], dataframe[,col1]) #Fix all the columns first
if(species=="H"){ #Then depending on species create the pair column and merge to contact info.
dataframe[,"hpair"] <- paste(lower1, higher2, sep="_")
finaldf <- left_join(dataframe, contacts[,c("A_21792_HIC", "B_28126_HIC", "C_3649_HIC", "D_40300_HIC", "E_28815_HIC", "F_28834_HIC", "G_3624_HIC", "H_3651_HIC", "hpair")], by="hpair")
}
else if(species=="C"){
dataframe[,"cpair"] <- paste(lower1, higher2, sep="_")
finaldf <- left_join(dataframe, contacts[,c("A_21792_HIC", "B_28126_HIC", "C_3649_HIC", "D_40300_HIC", "E_28815_HIC", "F_28834_HIC", "G_3624_HIC", "H_3651_HIC", "cpair")], by="cpair")
}
}
else if(strand==TRUE){#If dealing with upstream and downstream hits, need to do things separately for genes on the + and - strand.
if(species=="H"){
US <- ifelse(dataframe[,"Hstrand.H"]=="+", dataframe[,"US_bin.H"], dataframe[,"DS_bin.H"]) #Obtain upstream bins depending on strand.
new2 <- as.numeric(gsub(".*-", "", US)) #Now rearrange the pairs to ensure regardless of stream we can find the pair (first mate lower coordinates than 2nd).
lower1 <- ifelse(new1<new2, dataframe[,col1], US)
higher2 <- ifelse(new1<new2, US, dataframe[,col1])
dataframe[,"hpair"] <- paste(lower1, higher2, sep="_")
dataframe[,"USFDR"] <- ifelse(dataframe[,"Hstrand.H"]=="+", dataframe[,"US_FDR.H"], dataframe[,"DS_FDR.H"])
finaldf <- left_join(dataframe, contacts[,c("A_21792_HIC", "B_28126_HIC", "C_3649_HIC", "D_40300_HIC", "E_28815_HIC", "F_28834_HIC", "G_3624_HIC", "H_3651_HIC", "hpair")], by="hpair")
}
else if(species=="C"){
US <- ifelse(dataframe[,"Hstrand.C"]=="+", dataframe[,"US_bin.C"], dataframe[,"DS_bin.C"]) #Obtain upstream bins depending on strand.
new2 <- as.numeric(gsub(".*-", "", US)) #Now rearrange the pairs to ensure regardless of stream we can find the pair (first mate lower coordinates than 2nd).
lower1 <- ifelse(new1<new2, dataframe[,col1], US)
higher2 <- ifelse(new1<new2, US, dataframe[,col1])
dataframe[,"cpair"] <- paste(lower1, higher2, sep="_")
dataframe[,"USFDR"] <- ifelse(dataframe[,"Hstrand.C"]=="+", dataframe[,"US_FDR.C"], dataframe[,"DS_FDR.C"])
finaldf <- left_join(dataframe, contacts[,c("A_21792_HIC", "B_28126_HIC", "C_3649_HIC", "D_40300_HIC", "E_28815_HIC", "F_28834_HIC", "G_3624_HIC", "H_3651_HIC", "cpair")], by="cpair")
}
}
#before finally returning, remove rows where we don't have full Hi-C data.
finaldf <- finaldf[complete.cases(finaldf[,c("A_21792_HIC", "B_28126_HIC", "C_3649_HIC", "D_40300_HIC", "E_28815_HIC", "F_28834_HIC", "G_3624_HIC", "H_3651_HIC")]),]
return(finaldf)
}
#Now I can use the IF.extractor function to make a number of different dataframes for actually testing different IF values with the RPKM expression values.
h_minFDR.KR <- IF.extractor(gene.hic.KR, "HID", "min_FDR_bin.H", contacts.KR, "H")
h_minFDR.VC <- IF.extractor(gene.hic.VC, "HID", "min_FDR_bin.H", contacts.VC, "H")
#h_maxB <- IF.extractor(gene.hic.filt, "HID", "max_B_bin.H", contacts, "H")
#c_maxB <- IF.extractor(gene.hic.filt, "CID", "max_B_bin.C", contacts, "C")
#h_US <- IF.extractor(gene.hic.filt, "HID", "US_bin.H", contacts, "H", TRUE)
#c_US <- IF.extractor(gene.hic.filt, "CID", "US_bin.C", contacts, "C", TRUE)
#Write these out so they can be permuted upon on midway2.
fwrite(h_minFDR.KR, "~/Desktop/Hi-C/HiC_covs/juicer/h_minFDR.KR", quote = FALSE, sep = "\t", na = "NA", col.names = TRUE)
fwrite(h_minFDR.VC, "~/Desktop/Hi-C/HiC_covs/juicer/h_minFDR.VC", quote = FALSE, sep = "\t", na = "NA", col.names = TRUE)
#fwrite(h_maxB, "~/Desktop/Hi-C/HiC_covs/h_maxB", quote = FALSE, sep = "\t", na = "NA", col.names = TRUE)
#fwrite(c_maxB, "~/Desktop/Hi-C/HiC_covs/c_maxB", quote = FALSE, sep = "\t", na = "NA", col.names = TRUE)
#fwrite(h_US, "~/Desktop/Hi-C/HiC_covs/h_US", quote = FALSE, sep = "\t", na = "NA", col.names = TRUE)
#fwrite(c_US, "~/Desktop/Hi-C/HiC_covs/c_US", quote = FALSE, sep = "\t", na = "NA", col.names = TRUE)
#Now the same thing, but subsetting down to ONLY the genes that show evidence for DE at 5% FDR.
h_minFDR_DE.KR <- filter(h_minFDR.KR, adj.P.Val<=0.05)
h_minFDR_DE.VC <- filter(h_minFDR.VC, adj.P.Val<=0.05)
#h_maxB_DE <- filter(h_maxB, adj.P.Val<=0.05)
#c_maxB_DE <- filter(c_maxB, adj.P.Val<=0.05)
#h_US_DE <- filter(h_US, adj.P.Val<=0.05)
#c_US_DE <- filter(c_US, adj.P.Val<=0.05)
fwrite(h_minFDR_DE.KR, "~/Desktop/Hi-C/HiC_covs/juicer/h_minFDR_DE.KR", quote = FALSE, sep = "\t", na = "NA", col.names = TRUE)
fwrite(h_minFDR_DE.VC, "~/Desktop/Hi-C/HiC_covs/juicer/h_minFDR_DE.VC", quote = FALSE, sep = "\t", na = "NA", col.names = TRUE)
#fwrite(h_maxB_DE, "~/Desktop/Hi-C/HiC_covs/h_maxB_DE", quote = FALSE, sep = "\t", na = "NA", col.names = TRUE)
#fwrite(c_maxB_DE, "~/Desktop/Hi-C/HiC_covs/c_maxB_DE", quote = FALSE, sep = "\t", na = "NA", col.names = TRUE)
#fwrite(h_US_DE, "~/Desktop/Hi-C/HiC_covs/h_US_DE", quote = FALSE, sep = "\t", na = "NA", col.names = TRUE)
#fwrite(c_US_DE, "~/Desktop/Hi-C/HiC_covs/c_US_DE", quote = FALSE, sep = "\t", na = "NA", col.names = TRUE)
Now I use these data frames and join each again on the RPKM table by genes, in order to obtain RPKM expression values for each gene-Hi-C bin set.
In this next section I join each of the data frames created above on the RPKM table by genes, in order to have a merged table with expression values and contact frequencies for each gene-Hi-C bin set. I move then to explore correlations between the values.
#Join all of the previously-made Hi-C interaction frequency tables to the RPKM table by genes, to obtain RPKM values in concert with contact frequency values.
RPKM <- as.data.frame(weighted.data$E)
RPKM$genes <- rownames(RPKM)
hmin.KR <- left_join(h_minFDR.KR, RPKM, by="genes")
hmin.VC <- left_join(h_minFDR.VC, RPKM, by="genes")
#hmaxB <- left_join(h_maxB, RPKM, by="genes")
#cmaxB <- left_join(c_maxB, RPKM, by="genes")
#hUS <- left_join(h_US, RPKM, by="genes")
#cUS <- left_join(c_US, RPKM, by="genes")
#Get the same thing filtered for only DE genes.
hminDE.KR <- filter(hmin.KR, adj.P.Val<=0.05)
hminDE.VC <- filter(hmin.VC, adj.P.Val<=0.05)
#hmaxBDE <- filter(hmaxB, adj.P.Val<=0.05)
#cmaxBDE <- filter(cmaxB, adj.P.Val <=0.05)
#hUSDE <- filter(hUS, adj.P.Val<=0.05)
#cUSDE <- filter(cUS, adj.P.Val<=0.05)
hmin_noDE.KR <- filter(hmin.KR, adj.P.Val>0.05) #Pull out specific set of non-DE hits.
hmin_noDE.VC <- filter(hmin.VC, adj.P.Val>0.05)
#Extract contacts and expression for the contact with the lowest FDR from linear modeling.
KRcontacts <- hmin.KR[,61:68]
KRexprs <- hmin.KR[,69:76]
KRcontactsDE <- hminDE.KR[,61:68]
KRexprsDE <- hminDE.KR[,69:76]
nonDEcontactsKR <- hmin_noDE.KR[,61:68]
nonDEexprsKR <- hmin_noDE.KR[,69:76]
VCcontacts <- hmin.VC[,61:68]
VCexprs <- hmin.VC[,69:76]
VCcontactsDE <- hminDE.VC[,61:68]
VCexprsDE <- hminDE.VC[,69:76]
nonDEcontactsVC <- hmin_noDE.VC[,61:68]
nonDEexprsVC <- hmin_noDE.VC[,69:76]
#Function for calculating gene-wise correlations.
cor.calc <- function(hicdata, exprdata){
cor.exp <- vector(length=nrow(hicdata))
for(i in 1:nrow(hicdata)){
cor.exp[i] <- cor(as.numeric(hicdata[i,]), as.numeric(exprdata[i,]))
}
return(cor.exp)
}
#Calculate correlations for different sets! I've commented out the species-specific calculations here because they just aren't that interesting. If you look at DE dynamics within each species you do NOT get a gorgeous bimodal as you do for across--looks more uniform and messy.
fullcorsKR <- data.frame(cor=cor.calc(KRcontacts, KRexprs), type="all")
fullDEcorsKR <- data.frame(cor=cor.calc(KRcontactsDE, KRexprsDE), type="DE")
fullnoDEcorsKR <- data.frame(cor=cor.calc(nonDEcontactsKR, nonDEexprsKR), type="non-DE")
#VC now
fullcorsVC <- data.frame(cor=cor.calc(VCcontacts, VCexprs), type="all")
fullDEcorsVC <- data.frame(cor=cor.calc(VCcontactsDE, VCexprsDE), type="DE")
fullnoDEcorsVC <- data.frame(cor=cor.calc(nonDEcontactsVC, nonDEexprsVC), type="non-DE")
#Combine these dfs to plot in one gorgeous ggplot!
ggcorsKR <- rbind(fullcorsKR, fullDEcorsKR, fullnoDEcorsKR)
ggcorsVC <- rbind(fullcorsVC, fullDEcorsVC, fullnoDEcorsVC)
#VC then KR
ggplot(data=filter(ggcorsVC, type=="all"|type=="DE"|type=="non-DE")) + stat_density(aes(x=cor, group=type, color=type, y=..scaled..), position="identity", geom="line") + ggtitle("Correlation b/t RPKM Expression and VC Hi-C Contact Frequency") + xlab("Pearson Correlations b/t RPKM Expression and VC Hi-C Contact Frequency") + ylab("Density") + scale_color_manual("Gene Set", values=c("red", "blue", "green"), labels=c("All genes", "DE genes", "non-DE genes"))
ggplot(data=filter(ggcorsKR, type=="all"|type=="DE"|type=="non-DE")) + stat_density(aes(x=cor, group=type, color=type, y=..scaled..), position="identity", geom="line") + ggtitle("Correlation b/t RPKM Expression and KR Hi-C Contact Frequency") + xlab("Pearson Correlations b/t RPKM Expression and KR Hi-C Contact Frequency") + ylab("Density") + scale_color_manual("Gene Set", values=c("red", "blue", "green"), labels=c("All genes", "DE genes", "non-DE genes"))
###This looks great, showing strong bimodal distribution for the DE genes and broader distributions with a peak at 0 for the non-DE set and the full set. To assess if this is legit at all, I now re-run this correlation analysis after permuting the Hi-C values. I shuffle sample IDs on a gene-by-gene basis to accomplish this:
cor.permuter <- function(hicdata, exprdata, nperm){
result <- data.frame(cor=NA, type=rep(1:nperm, each=nrow(exprdata)))
for(perm in 1:nperm){
permute <- hicdata
for(row in 1:nrow(hicdata)){
permute[row,] <- sample(hicdata[row,])
}
myindices <- which(result$type==perm)
result[myindices,1] <- cor.calc(permute, exprdata)
}
return(result)
}
#Just do it with 5 permutations to see the general effect quickly:
full.perm.KR <- cor.permuter(KRcontacts, KRexprs, 5)
DE.perm.KR <- cor.permuter(KRcontactsDE, KRexprsDE, 5)
nonDE.perm.KR <- cor.permuter(nonDEcontactsKR, nonDEexprsKR, 5)
full.perm.VC <- cor.permuter(VCcontacts, VCexprs, 5)
DE.perm.VC <- cor.permuter(VCcontactsDE, VCexprsDE, 5)
nonDE.perm.VC <- cor.permuter(nonDEcontactsVC, nonDEexprsVC, 5)
#Now visualize.
ggplot(data=filter(ggcorsKR, type=="all"|type=="DE"|type=="non-DE")) + stat_density(aes(x=cor, group=type, color=type, y=..scaled..), position="identity", geom="line") + stat_density(data=full.perm.KR, aes(x=cor, group=type), geom="line", linetype="dotted", position="identity") + stat_density(data=DE.perm.KR, aes(x=cor, group=type), geom="line", linetype="dashed", position="identity") + stat_density(data=nonDE.perm.KR, aes(x=cor, group=type), geom="line", linetype="twodash", position="identity") + ggtitle("Correlation b/t RPKM Expression and KR Hi-C Contact Frequency") + xlab("Pearson Correlations b/t RPKM Expression and KR Hi-C Contact Frequency") + ylab("Density") + scale_color_manual("Gene Set", values=c("red", "blue", "green"), labels=c("All genes", "DE genes", "non-DE genes"))
ggplot(data=filter(ggcorsVC, type=="all"|type=="DE"|type=="non-DE")) + stat_density(aes(x=cor, group=type, color=type, y=..scaled..), position="identity", geom="line") + stat_density(data=full.perm.VC, aes(x=cor, group=type), geom="line", linetype="dotted", position="identity") + stat_density(data=DE.perm.VC, aes(x=cor, group=type), geom="line", linetype="dashed", position="identity") + stat_density(data=nonDE.perm.VC, aes(x=cor, group=type), geom="line", linetype="twodash", position="identity") + ggtitle("Correlation b/t RPKM Expression and VC Hi-C Contact Frequency") + xlab("Pearson Correlations b/t RPKM Expression and VC Hi-C Contact Frequency") + ylab("Density") + scale_color_manual("Gene Set", values=c("red", "blue", "green"), labels=c("All genes", "DE genes", "non-DE genes"))
#See that permuted datasets have a tighter correlation distribution with a strong peak at 0. Reassuring. In the future I will repeat this analysis on Midway2, running 10000 permutations to see the full range of permuted data possible.
Now that I’ve seen some nice effects in the correlation between expression and contact for different sets of genes, and for permutations on those sets, I move to see what kind of quantitiative explanatory power differential contact (DC) might actually have for differential expression (DE).
In this next section I “regress out” the effect of Hi-C contacts from their overlapping genes’ RPKM expression values, comparing a linear model run on the base values to one run on the residuals of expression after regressing out Hi-C data. Comparing the p-values before and after this regression can give some sense of whether the DE is being driven by differential Hi-C contacts (DC).
###A function to calculate gene-wise correlations between Hi-C data and expression data, but for spearman correlations. Pearson correlation calculator function is in the chunk above.
cor.calc.spear <- function(hicdata, exprdata){
cor.exp <- vector(length=nrow(hicdata))
for(i in 1:nrow(hicdata)){
cor.exp[i] <- cor(as.numeric(hicdata[i,]), as.numeric(exprdata[i,]), method="spearman")
}
return(cor.exp)
}
###A function to permute a Hi-C df by going gene-by-gene (row-by-row) and shuffling all sample IDs.
shuffler <- function(hicdata){
for(row in 1:nrow(hicdata)){
hicdata[row,] <- sample(hicdata[row,])
}
return(hicdata)
}
###A function that runs a linear model, both with and without Hi-C corrected expression values, and returns a dataframe of hit classes (DE or not before and after correction). Also spits back out p-values before and after correction, as well as correlations between Hi-C data and expression data. Since the former is a one-row data frame of 4 points and the latter is a 3-column data frame with the number of genes rows, returns a list.
lmcorrect <- function(voom.obj, exprs, cov_matrix, meta_df){
mygenes <- cov_matrix$genes #Pull out the relevant genes here; used for subsetting the voom.obj in a bit.
cov_matrix <- cov_matrix[, -9] #Remove genes from the cov matrix
hic_present <- sapply(1:nrow(cov_matrix), function(i) !any(is.na(cov_matrix[i,]))) #First, remove any rows w/ missing Hi-C data.
exprs <- data.matrix(exprs[hic_present,]) #Filter expression with this
cov_matrix <- data.matrix(cov_matrix[hic_present,]) #Filter Hi-C data with this
#Now, prepare to run the actual models. First run a model w/ Hi-C as a covariate to evaluate
SP <- factor(meta_df$SP,levels = c("H","C"))
design <- model.matrix(~0+SP)
colnames(design) <- c("Human", "Chimp")
resid_hic <- array(0, dim=c(nrow(exprs), ncol(cov_matrix))) #Initialize a dataframe for storing the residuals.
for(i in 1:nrow(exprs)){#Loop through rows of the expression df, running linear modeling w/ Hi-C to obtain residuals.
resid_hic[i,] <- lm(exprs[i,]~cov_matrix[i,])$resid
}
mycon <- makeContrasts(HvC = Human-Chimp, levels = design)
#Filter the voom object to only contain genes that had Hi-C information here.
good.indices <- which(rownames(voom.obj$E) %in% mygenes)
voom.obj <- voom.obj[good.indices,]
#Now, replace the RPKM values in the voom object for after linear modeling with the residuals.
voom.obj.after <- voom.obj
voom.obj.after$E <- resid_hic
lmFit(voom.obj, design=design) %>% eBayes(.) %>% contrasts.fit(., mycon) %>% eBayes(.) %>% topTable(., coef = 1, adjust.method = "BH", number = Inf, sort.by="none") -> fit_before
lmFit(voom.obj.after, design=design) %>% eBayes(.) %>% contrasts.fit(., mycon) %>% eBayes(.) %>% topTable(., coef = 1, adjust.method = "BH", number = Inf, sort.by="none") -> fit_after
result.DE.cats <- data.frame(DEneither=sum(fit_before$adj.P.Val>0.05&fit_after$adj.P.Val>0.05), DEbefore=sum(fit_before$adj.P.Val<=0.05&fit_after$adj.P.Val>0.05), DEafter=sum(fit_before$adj.P.Val>0.05&fit_after$adj.P.Val<=0.05), DEboth=sum(fit_before$adj.P.Val<=0.05&fit_after$adj.P.Val<=0.05))
result.DE.stats <- data.frame(cor.pear=cor.calc(cov_matrix, exprs), cor.spear=cor.calc.spear(cov_matrix, exprs), pval.before=fit_before$adj.P.Val, pval.after=fit_after$adj.P.Val)
result <- list("categories"=result.DE.cats, "stats"=result.DE.stats)
return(result)
}
#Proceed to use the function on the different dataframes I've created with different sets of overlapping Hi-C contacts:
h_minFDR_pvals_KR <- lmcorrect(weighted.data, hmin.KR[,c("A-21792", "B-28126", "C-3649", "D-40300", "E-28815", "F-28834", "G-3624", "H-3651")], hmin.KR[,c("A_21792_HIC", "B_28126_HIC", "C_3649_HIC", "D_40300_HIC", "E_28815_HIC", "F_28834_HIC", "G_3624_HIC", "H_3651_HIC", "genes")], meta.data)
h_minFDR_pvals_VC <- lmcorrect(weighted.data, hmin.VC[,c("A-21792", "B-28126", "C-3649", "D-40300", "E-28815", "F-28834", "G-3624", "H-3651")], hmin.VC[,c("A_21792_HIC", "B_28126_HIC", "C_3649_HIC", "D_40300_HIC", "E_28815_HIC", "F_28834_HIC", "G_3624_HIC", "H_3651_HIC", "genes")], meta.data)
#I now proceed to visualize the difference in p-values for expression before and after "correcting" for the Hi-C data. I am hoping to see many hits in the bottom right quadrant of the following plots, indicating genes that showed up as DE before Hi-C correction, but not after. I also expect that p-values falling farther away from the null line of expectation for p-values being identical between models will have higher correlations, which I color here for the pearson correlation (results look similar for spearman).
#ggplot(data=h_minFDR_pvals_KR$stats, aes(x=-log10(pval.before), y=-log10(pval.after), color=abs(cor.pear))) + geom_point(size=0.01) + geom_hline(yintercept=-log10(0.05), color="red") + geom_vline(xintercept=-log10(0.05), color="red") + geom_abline(slope=1, intercept=0, color="green", linetype="dashed") + ggtitle("Evidence for DE before vs. after regressing out Hi-C min. FDR, KR") + xlab("-log10(p-value of DE before Hi-C regression)") + ylab("-log10(p-value of DE after Hi-C regression")
ggplot(data=h_minFDR_pvals_KR$stats, aes(x=-log10(pval.before), y=-log10(pval.after))) + geom_point(size=0.01) + geom_hline(yintercept=-log10(0.05), color="red") + geom_vline(xintercept=-log10(0.05), color="red") + geom_abline(slope=1, intercept=0, color="green", linetype="dashed") + ggtitle("Evidence for DE Before vs. After Regressing out Hi-C Contact Frequency") + xlab("-log10(p-value of DE before KR Hi-C regression)") + ylab("-log10(p-value of DE after KR Hi-C regression)") + theme(plot.title=element_text(hjust=1))
ggplot(data=h_minFDR_pvals_VC$stats, aes(x=-log10(pval.before), y=-log10(pval.after))) + geom_point(size=0.01) + geom_hline(yintercept=-log10(0.05), color="red") + geom_vline(xintercept=-log10(0.05), color="red") + geom_abline(slope=1, intercept=0, color="green", linetype="dashed") + ggtitle("Evidence for DE Before vs. After Regressing out Hi-C Contact Frequency") + xlab("-log10(p-value of DE before VC Hi-C regression)") + ylab("-log10(p-value of DE after VC Hi-C regression)") + theme(plot.title=element_text(hjust=1))
Critically, here I see that “regressing out” Hi-C data and trying to model expression again moves many genes from being significantly differentially expressed (at FDR of 5%) to no longer showing differential expression. Seeing most of the hits in the bottom right corner of these visualizations is what confirms this. Reassuringly, I also see stronger correlations between RPKM expression values and normalized Hi-C interaction frequency values for points farther away from the diagonal green line of expectation. However, since this is not a statistical test, I have no assessment of significance. To accomplish this, I compare these results to running the same kind of analysis on permuted data in the next section.
This next section would be to look at differences in effect size and sign changes when doing the two separate lms (one before and one after regressing out Hi-C data from RPKM expression). Unfortunately the ash results I get have all their svalues as 1 so this is boring/not interesting right now. Hence it is not included. But let’s give it another go!
#######FRESH CODE 6-21-18#########
design <- model.matrix(~meta.data$SP)
exprs <- select(hmin.KR, "A-21792", "B-28126", "C-3649", "D-40300", "E-28815", "F-28834", "G-3624", "H-3651", genes) #Should be done on just DE genes, filter with adj.P.Val<=0.05
hic <- select(hmin.KR,"A_21792_HIC", "B_28126_HIC", "C_3649_HIC", "D_40300_HIC", "E_28815_HIC", "F_28834_HIC", "G_3624_HIC", "H_3651_HIC")
present <- sapply(1:nrow(hic), function(i) !any(is.na(hic[i,])))
genes <- exprs$genes
DEgenes <- filter(hmin.KR, adj.P.Val<=0.05) %>% select(., genes)
myDEindices <- which(genes %in% DEgenes[,1])
exprs <- exprs[,-9]
exprs <- data.matrix(exprs[present,])
hic <- data.matrix(hic[present,])
designy <- as.factor(c("human", "human", "chimp", "chimp", "human", "human", "chimp", "chimp"))
colnames(hic) <- colnames(exprs)
hic <- as.data.table(hic)
exprs <- as.data.table(exprs)
test <- mediate.test.regressing(exprs, designy, hic)
Attaching package: 'assertthat'
The following object is masked from 'package:tibble':
has_name
vshrDE <- vash(test$d_se[myDEindices], df=6)
vshrnonDE <- vash(test$d_se[-myDEindices], df=6)
vshrink <- vash(test$d_se, df=6)
DEtest <- test[myDEindices,]
nonDEtest <- test[-myDEindices,]
ash_reg <- ash(as.vector(DEtest$d), as.vector(vshrDE$sd.post), mixcompdist = "normal") #Now it works with this design coding. Use 6 df, as 2-group comparison and 8 individual samples (samples - 2)
Due to absence of package REBayes, switching to EM algorithm
summary(ash_reg$result$svalue<=0.05) #Incredible result, looks too good to be true.
Mode FALSE TRUE
logical 245 18
ind_sig <- (ash_reg$result$svalue < 0.05)
ind_sig[ind_sig == TRUE] <- "red"
ind_sig[ind_sig == FALSE] <- "black"
plot(x=ash_reg$result$betahat, y=ash_reg$result$sebetahat, ylab="Standard error of the difference", xlab="Difference in effect size from 2 lms, KR", main="Difference of effect size (DE genes)", pch=16, cex=0.6, col=ind_sig)#, xlim=c(-2.5, 2.5), ylim=c(0, 1.3))
legend("topleft", legend=c("s < 0.05", "s >= 0.05"), col=c("red", "black"), pch=16:16, cex=0.8)
ash_reg_nonDE <- ash(as.vector(nonDEtest$d), as.vector(vshrnonDE$sd.post), mixcompdist = "normal") #Now it works with this design coding. Use 6 df, as 2-group comparison and 8 individual samples (samples - 2)
Due to absence of package REBayes, switching to EM algorithm
summary(ash_reg_nonDE$result$svalue<=0.05) #Incredible result, looks too good to be true.
Mode FALSE
logical 1011
ind_sig <- (ash_reg_nonDE$result$svalue < 0.05)
ind_sig[ind_sig == TRUE] <- "red"
ind_sig[ind_sig == FALSE] <- "black"
plot(x=ash_reg_nonDE$result$betahat, y=ash_reg_nonDE$result$sebetahat, ylab="Standard error of the difference", xlab="Difference in effect size from 2 lms, KR", main="Difference of effect size (non-DE genes)", pch=16, cex=0.6, col=ind_sig)#, xlim=c(-2.5, 2.5), ylim=c(0, 1.3))
legend("topleft", legend=c("s < 0.05", "s >= 0.05"), col=c("red", "black"), pch=16:16, cex=0.8)
exprs <- select(hmin.VC, "A-21792", "B-28126", "C-3649", "D-40300", "E-28815", "F-28834", "G-3624", "H-3651", genes) #Should be done on just DE genes, filter with adj.P.Val<=0.05
hic <- select(hmin.VC,"A_21792_HIC", "B_28126_HIC", "C_3649_HIC", "D_40300_HIC", "E_28815_HIC", "F_28834_HIC", "G_3624_HIC", "H_3651_HIC")
present <- sapply(1:nrow(hic), function(i) !any(is.na(hic[i,])))
genes <- exprs$genes
DEgenes <- filter(hmin.KR, adj.P.Val<=0.05) %>% select(., genes)
myDEindices <- which(genes %in% DEgenes[,1])
exprs <- exprs[,-9]
exprs <- data.matrix(exprs[present,])
hic <- data.matrix(hic[present,])
designy <- as.factor(c("human", "human", "chimp", "chimp", "human", "human", "chimp", "chimp"))
colnames(hic) <- colnames(exprs)
hic <- as.data.table(hic)
exprs <- as.data.table(exprs)
test <- mediate.test.regressing(exprs, designy, hic)
vshrDE <- vash(test$d_se[myDEindices], df=6)
vshrnonDE <- vash(test$d_se[-myDEindices], df=6)
vshrink <- vash(test$d_se, df=6)
DEtest <- test[myDEindices,]
nonDEtest <- test[-myDEindices,]
ash_reg <- ash(as.vector(DEtest$d), as.vector(vshrDE$sd.post), mixcompdist = "normal") #Now it works with this design coding. Use 6 df, as 2-group comparison and 8 individual samples (samples - 2)
Due to absence of package REBayes, switching to EM algorithm
summary(ash_reg$result$svalue<=0.05) #Incredible result, looks too good to be true.
Mode FALSE TRUE
logical 223 13
ind_sig <- (ash_reg$result$svalue < 0.05)
ind_sig[ind_sig == TRUE] <- "red"
ind_sig[ind_sig == FALSE] <- "black"
plot(x=ash_reg$result$betahat, y=ash_reg$result$sebetahat, ylab="Standard error of the difference", xlab="Difference in effect size from 2 lms, VC", main="Difference of effect size (DE genes)", pch=16, cex=0.6, col=ind_sig)#, xlim=c(-2.5, 2.5), ylim=c(0, 1.3))
legend("topleft", legend=c("s < 0.05", "s >= 0.05"), col=c("red", "black"), pch=16:16, cex=0.8)
ash_reg_nonDE <- ash(as.vector(nonDEtest$d), as.vector(vshrnonDE$sd.post), mixcompdist = "normal") #Now it works with this design coding. Use 6 df, as 2-group comparison and 8 individual samples (samples - 2)
Due to absence of package REBayes, switching to EM algorithm
summary(ash_reg_nonDE$result$svalue<=0.05) #Incredible result, looks too good to be true.
Mode FALSE
logical 1394
ind_sig <- (ash_reg_nonDE$result$svalue < 0.05)
ind_sig[ind_sig == TRUE] <- "red"
ind_sig[ind_sig == FALSE] <- "black"
plot(x=ash_reg_nonDE$result$betahat, y=ash_reg_nonDE$result$sebetahat, ylab="Standard error of the difference", xlab="Difference in effect size from 2 lms, VC", main="Difference of effect size (non-DE genes)", pch=16, cex=0.6, col=ind_sig)#, xlim=c(-2.5, 2.5), ylim=c(0, 1.3))
legend("topleft", legend=c("s < 0.05", "s >= 0.05"), col=c("red", "black"), pch=16:16, cex=0.8)
sessionInfo()
R version 3.4.0 (2017-04-21)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: OS X El Capitan 10.11.6
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] compiler stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] assertthat_0.2.0 bindrcpp_0.2 glmnet_2.0-13
[4] foreach_1.4.4 Matrix_1.2-12 medinome_0.0.1
[7] vashr_0.99.1 qvalue_2.10.0 SQUAREM_2017.10-1
[10] ashr_2.2-7 bedr_1.0.4 forcats_0.3.0
[13] purrr_0.2.4 readr_1.1.1 tibble_1.4.2
[16] tidyverse_1.2.1 edgeR_3.20.9 RColorBrewer_1.1-2
[19] heatmaply_0.14.1 viridis_0.5.0 viridisLite_0.3.0
[22] stringr_1.3.0 gplots_3.0.1 Hmisc_4.1-1
[25] Formula_1.2-2 survival_2.41-3 lattice_0.20-35
[28] dplyr_0.7.4 plotly_4.7.1 cowplot_0.9.3
[31] ggplot2_2.2.1 reshape2_1.4.3 data.table_1.10.4-3
[34] tidyr_0.8.0 plyr_1.8.4 limma_3.34.9
loaded via a namespace (and not attached):
[1] colorspace_1.3-2 class_7.3-14 modeltools_0.2-21
[4] mclust_5.4 rprojroot_1.3-2 htmlTable_1.11.2
[7] futile.logger_1.4.3 base64enc_0.1-3 rstudioapi_0.7
[10] flexmix_2.3-14 mvtnorm_1.0-7 lubridate_1.7.3
[13] xml2_1.2.0 R.methodsS3_1.7.1 codetools_0.2-15
[16] splines_3.4.0 pscl_1.5.2 doParallel_1.0.11
[19] mnormt_1.5-5 robustbase_0.92-8 knitr_1.20
[22] jsonlite_1.5 broom_0.4.3 cluster_2.0.6
[25] kernlab_0.9-25 R.oo_1.21.0 httr_1.3.1
[28] backports_1.1.2 lazyeval_0.2.1 cli_1.0.0
[31] acepack_1.4.1 htmltools_0.3.6 tools_3.4.0
[34] gtable_0.2.0 glue_1.2.0 Rcpp_0.12.16
[37] cellranger_1.1.0 trimcluster_0.1-2 gdata_2.18.0
[40] nlme_3.1-131.1 iterators_1.0.9 fpc_2.1-11
[43] psych_1.7.8 testthat_2.0.0 rvest_0.3.2
[46] gtools_3.5.0 dendextend_1.7.0 DEoptimR_1.0-8
[49] MASS_7.3-49 scales_0.5.0 TSP_1.1-5
[52] hms_0.4.2 parallel_3.4.0 lambda.r_1.2
[55] yaml_2.1.18 gridExtra_2.3 rpart_4.1-13
[58] latticeExtra_0.6-28 stringi_1.1.7 gclus_1.3.1
[61] checkmate_1.8.5 seriation_1.2-3 caTools_1.17.1
[64] truncnorm_1.0-8 rlang_0.2.0 pkgconfig_2.0.1
[67] prabclus_2.2-6 bitops_1.0-6 evaluate_0.10.1
[70] bindr_0.1.1 labeling_0.3 htmlwidgets_1.0
[73] magrittr_1.5 R6_2.2.2 pillar_1.2.1
[76] haven_1.1.1 whisker_0.3-2 foreign_0.8-69
[79] nnet_7.3-12 modelr_0.1.1 crayon_1.3.4
[82] futile.options_1.0.0 KernSmooth_2.23-15 rmarkdown_1.9
[85] locfit_1.5-9.1 grid_3.4.0 readxl_1.0.0
[88] git2r_0.21.0 digest_0.6.15 diptest_0.75-7
[91] webshot_0.5.0 VennDiagram_1.6.19 R.utils_2.6.0
[94] stats4_3.4.0 munsell_0.4.3 registry_0.5
This R Markdown site was created with workflowr