load("vsd_promoter.RData") cells <- rev(c("HeLa-S3", "H1-hESC", "HepG2", "K562", "GM12878")) motif.table <- read.table("../analysis/mast/tf_to_motif.txt", stringsAsFactors=F) tf.tested <- sort(unique(unlist(sapply(cells, function(cell) with(envs[[cell]], sapply(colnames(vsd), function(name) strsplit(name, " ")[[1]][2])))))) motif.table.tested <- motif.table[motif.table[,1] %in% tf.tested,] for (cell in cells) with(envs[[cell]], { split.strings <- strsplit(regions[,"motifs"], ",") names(split.strings) <- rownames(regions) terms <- levels(factor(gsub("_.*", "", unlist(split.strings)))) motif <- t(sapply(split.strings, function(fields) { result <- rep(F, length(terms)) names(result) <- terms result[gsub("_.*", "", fields)] <- T result })) motif <- motif[,colnames(motif) %in% motif.table.tested[,2]] }) save.image("../figure/motif.RData") max.motifs.per.region <- max(sapply(cells, function(cell) with(envs[[cell]], max(rowSums(motif))))) motifs.per.region <- t(sapply(cells, function(cell) with(envs[[cell]], { motif.counts <- rowSums(motif) sapply(0:max.motifs.per.region, function(i) sum(motif.counts == i)) }))) colnames(motifs.per.region) <- 0:max.motifs.per.region motifs.per.region.scaled <- motifs.per.region / sapply(cells, function(cell) with(envs[[cell]], nrow(motif))) color.table <- rainbow(length(cells), v=0.75) names(color.table) <- cells pdf("../figure/motifSetsPerRegion.pdf", height=4, width=6) par(mar=c(5,4,0,0)) plot(0:max.motifs.per.region, motifs.per.region.scaled[1,], ylim=c(0, max(motifs.per.region.scaled)), type="n", bty="n", xlab="Motif sets in consensus promoter", ylab="Proportion of consensus promoters") for (cell in cells) points(0:max.motifs.per.region, motifs.per.region.scaled[cell,], type="b", col=color.table[cell], pch=20) dev.off() embedFonts("../figure/motifSetsPerRegion.pdf") # some viewers don't comply with standards and missing Dingbats motif.names <- sort(unique(as.vector(sapply(cells, function(cell) with(envs[[cell]], colnames(motif)))))) # in case they didn't all appear in every cell regions.per.motif <- t(sapply(cells, function(cell) with(envs[[cell]], { region.counts <- colSums(motif) sapply(motif.names, function(name) if (name %in% names(region.counts)) region.counts[name] else 0) }))) colnames(regions.per.motif) <- sub("known", "", motif.names) regions.per.motif.scaled <- regions.per.motif / sapply(cells, function(cell) with(envs[[cell]], nrow(motif))) motif.ranks <- order(colSums(regions.per.motif.scaled), decreasing=T) motifs.to.plot <- length(motif.names) # all of them! pdf("../figure/regionsPerMotifSet.pdf", height=8.5-2*1, width=11-2*1) par(mar=c(0,4,0,0)) plot(regions.per.motif.scaled[1,motif.ranks[1:motifs.to.plot]], ylim=c(0, max(regions.per.motif.scaled[,motif.ranks[1:motifs.to.plot]]) + 0.02), type="n", bty="n", xaxt="n", ylab="Proportion of consensus promoters with motif set", xlim=c(0,motifs.to.plot + 1), xaxs="i") segments(1:motifs.to.plot, apply(regions.per.motif.scaled, 2, min)[motif.ranks[1:motifs.to.plot]], 1:motifs.to.plot, apply(regions.per.motif.scaled, 2, max)[motif.ranks[1:motifs.to.plot]], col="lightgray") for (cell in cells) points(regions.per.motif.scaled[cell,motif.ranks[1:motifs.to.plot]], col=color.table[cell], pch=20) text(apply(regions.per.motif.scaled[,motif.ranks[1:motifs.to.plot]], 2, max) + 0.005, colnames(regions.per.motif.scaled)[motif.ranks[1:motifs.to.plot]], srt=90, pos=4, offset=0, cex=.75) legend("topright", legend=cells, fill=color.table, bty="n") dev.off() embedFonts("../figure/regionsPerMotifSet.pdf") # some viewers don't comply with standards and missing Dingbats