source("../script/beanplot_fixed.R") library(DESeq) #library(colorspace) load("vsd.RData") annotations.unipeak <- function(x) x[,1:kurtosis.col(x)] # should fix this cells <- rev(c("HeLa-S3", "H1-hESC", "HepG2", "K562", "GM12878", "tf", "all")) annotations <- sapply(cells, function(cell) { with(envs[[cell]], { counts.histone <- regions[,grep("Bernstein", colnames(regions))] cell.type.histone <- factor(sapply(strsplit(colnames(counts.histone), " "), function(x) x[1])) chip.histone <- factor(sapply(strsplit(colnames(counts.histone), " "), function(x) x[2])) vsd.histone <- getVarianceStabilizedData(estimateDispersions(estimateSizeFactors(newCountDataSet(counts.histone, data.frame(cell.type.histone, chip.histone))), method="pooled", sharingMode="fit-only", fitType="local")) means.histone <- do.call(cbind, t(by(t(vsd.histone), chip.histone, colMeans))) colnames(means.histone) <- levels(chip.histone) cbind(annotations.unipeak(regions), means.histone) }) }) promoter <- lapply(annotations, function(x) rowSums(is.na(x[,grep("RNA-seq", colnames(x))])) == 0 & x[,"nearest CAGE peak"] != "" & x[,"nearest Pol2 peak"] != "") color.sequence <- c(hsv(0, 0, 1:4/5), rep(NA, 2 * (length(cells) - 2))) color.sequence[3 + 2 * 1:(length(cells) - 2)] <- rainbow(length(cells) - 2, v=0.5) color.sequence[4 + 2 * 1:(length(cells) - 2)] <- rainbow(length(cells) - 2) pretty.beanplot <- function(column, ylab=column, ...) { pretty.list <- list(annotations[[1]][! promoter[[1]], column], annotations[[1]][promoter[[1]], column]) for (i in 2:length(cells)) { # pretty.list[[length(pretty.list) + 1]] <- NA pretty.list[[length(pretty.list) + 1]] <- annotations[[i]][! promoter[[i]], column] pretty.list[[length(pretty.list) + 1]] <- annotations[[i]][promoter[[i]], column] } beanplot.fixed(pretty.list, log="", what=c(F,T,T,F), side="both", col=as.list(color.sequence), ylab=ylab, xlim=c(1-0.5,length(pretty.list)/2+0.5), ...) } pdf("../figure/annotations.pdf", width=(11-2)/2, height=8.5-2) par(bty="n", xaxt="n", mar=c(0,4,0,0), cex.axis=0.9) layout(1:8, heights=c(0.15, 0.15, rep(1, 6))) # experiment labels plot.new() plot.window(xlim=c(1-0.5, 7+0.5), ylim=c(0,1)) text(1:7, 0.5, c("All proteins", "TFs only", rev(c("HeLa-S3", "H1-hESC", "HepG2", "K562", "GM12878")))) # promoter labels plot.new() plot.window(xlim=c(1-0.5, 7+0.5), ylim=c(0,1)) text(c(1:7 - 0.25, 1:7 + 0.25), 0.5, rep(c("-", "+"), each=7)) mtext("promoters", side=2, las=1, adj=0.75, cex=.6) pretty.beanplot("GC", ylab="GC content") pretty.beanplot("CpG", ylab="CpG content") pretty.beanplot("constrained", ylab="Constrained bases") pretty.beanplot("H3K27ac") pretty.beanplot("H3K4me1") pretty.beanplot("H3K4me3") dev.off() # for diagnostic use pretty.test <- function(column) sapply(cells, function(cell) t.test(annotations[[cell]][! promoter[[cell]], column], annotations[[cell]][promoter[[cell]], column]))