Details

  • Original publication:

Ramos-Rodríguez, M., Raurell-Vila, H., Colli, M.L. et al. The impact of proinflammatory cytokines on the β-cell regulatory landscape provides insights into the genetics of type 1 diabetes. Nat Genet. 51, 1588–1595 (2019) https://doi.org/10.1038/s41588-019-0524-6

  • Contents: Analyses and figures contained in this document correspond to the following figures/sections of the original publication:

    • Results: “Primed and neo-regulatory elements mediate cytokine response”.
    • Figure 2: “The β-cell response to proinflammatory cytokines unveils neo and primed IREs”. Panels c to f.
    • Extended Data Figure 3: “Characterization of \(\beta\)-cell IREs”. Panels a to k.

Subclassification of IREs

To determine which IREs were not accessible in control conditions, we call peaks from merged control BAM file with a relaxed threshold: P<0.05.

macs2 callpeak -f BAM -t data/CYT/ATAC/BAM/merged/endoc_ctrl.offset.bam -g hs --outdir data/CYT/ATAC/Peaks/relaxed/ -n endoc_ctrl_relaxed --tempdir data/CYT/ATAC/Peaks/tmp/ -p 0.05 --nomodel --shift -100 --extsize 200
load("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges.rda")
re <- re[!is.na(re$type),]

## Save first subgroup
re$subgroup1 <- NA
re$subgroup1[grep("SRE", re$type)] <- "SRE"
re$subgroup1[re$atac.type=="gained" & re$h3k27ac.type=="gained"] <- "Opening IRE"
re$subgroup1[re$atac.type=="stable" & re$h3k27ac.type=="gained"] <- "Primed IRE"

## Save 2nd subgroup
re$subgroup2 <- re$subgroup1

relaxed <- regioneR::toGRanges(paste0("../../data/ATAC/Peaks/relaxed/endoc_ctrl_relaxed_peaks.narrowPeak"))
ols <- subsetByOverlaps(re[re$subgroup1=="Opening IRE",],
                        relaxed)$atac.GeneID

re$subgroup2[re$atac.GeneID %in% ols & 
               re$subgroup1=="Opening IRE"] <- "Other IRE"
re$subgroup2[!(re$atac.GeneID %in% ols) &
               re$subgroup1=="Opening IRE"] <- "Neo IRE"

save(re, file="../data/CYT/REs/REs_endoc_fc1_padj0.05_granges_subgroup.rda")

## With less stringent threshold --- 
load("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges_k27.8.rda")
re <- re[!is.na(re$type),]

## Save first subgroup
re$subgroup1 <- NA
re$subgroup1[grep("SRE", re$type)] <- "SRE"
re$subgroup1[re$atac.type=="gained" & re$h3k27ac.type=="gained"] <- "Opening IRE"
re$subgroup1[re$atac.type=="stable" & re$h3k27ac.type=="gained"] <- "Primed IRE"

## Save 2nd subgroup
re$subgroup2 <- re$subgroup1

relaxed <- regioneR::toGRanges(paste0("../data/CYT/ATAC/Peaks/relaxed/endoc_ctrl_relaxed_peaks.narrowPeak"))
ols <- subsetByOverlaps(re[re$subgroup1=="Opening IRE",],
                        relaxed)$atac.GeneID

re$subgroup2[re$atac.GeneID %in% ols & 
               re$subgroup1=="Opening IRE"] <- "Other IRE"
re$subgroup2[!(re$atac.GeneID %in% ols) &
               re$subgroup1=="Opening IRE"] <- "Neo IRE"

save(re, file="../data/CYT/REs/REs_endoc_fc1_padj0.05_granges_subgroup_k27.8.rda")
load("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges_subgroup.rda")
re <- data.frame(mcols(re)[,c(1,13,17,18)])

load("../data/CYT/ATAC/diffAnalysis/ATAC_endoc_normCountsBatch.rda")
counts <- data.frame(counts)
counts$atac.GeneID <- rownames(counts)
counts <- reshape2::melt(counts,
                         id.var=11,
                         value.var=1:10,
                         value.name="counts",
                         variable.name="sample")
counts$treatment <- unlist(lapply(strsplit(as.character(counts$sample), "_"),
                                  function(x) x[2]))

counts <- counts %>%
  group_by(atac.GeneID, treatment) %>%
  summarise(counts=mean(counts))

colnames(counts)[3] <- "ATAC-seq"

re.counts <- dplyr::left_join(re, counts)

load("../data/CYT/H3K27ac/diffAnalysis/H3K27ac_endoc_normCountsBatch.rda")
counts <- data.frame(counts)
counts$h3k27ac.GeneID <- rownames(counts)
counts <- reshape2::melt(counts,
                         id.var=9,
                         value.var=1:8,
                         value.name="counts",
                         variable.name="sample")
counts$treatment <- unlist(lapply(strsplit(as.character(counts$sample), "_"),
                                  function(x) x[2]))

counts <- counts %>%
  group_by(h3k27ac.GeneID, treatment) %>%
  summarise(counts=mean(counts))

colnames(counts)[3] <- "H3K27ac"

re.counts <- dplyr::left_join(re.counts, counts)

re.counts <- reshape2::melt(re.counts,
                            id.vars=1:5,
                            value.vars=6:7,
                            value.name="counts",
                            variable.name="experiment")

distr.reads <- 
  ggplot(re.counts[!grepl("Other", re.counts$subgroup2),],
       aes(subgroup2, log2(counts+1))) +
  geom_boxplot(aes(color=treatment), notch=TRUE, lwd=0.7) +
  scale_color_manual(values=pals$treatment,
                     labels=function(x) toupper(x),
                     name="Treatment") +
  facet_wrap(~experiment) +
  ylab(expression(Log[2]*" normalized counts")) +
  scale_x_discrete(labels=function(x) paste0(x, "s")) +
  theme(axis.text.x=element_text(angle=30, hjust=1),
        axis.title.x=element_blank(),
        strip.background = element_rect(fill="white", color="black",
                                        size=0.5, linetype=1))

distr.reads

Version Author Date
4dbeb03 Mireia Ramos 2020-05-13

Characterization

win.width=30e3

load("../data/CYT/RNA/diffAnalysis/RNA_endoc_GRangesBatch.rda")

rna <- res.gr[res.gr$gene_biotype=="protein_coding",]
win <- promoters(rna)
win <- resize(rna, width=win.width, fix="center")

load("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges_subgroup.rda")
ols <- findOverlaps(re, win)

anno <- cbind(mcols(re)[queryHits(ols), c(1,17,18)],
              mcols(win)[subjectHits(ols), c(1,2,5)])

load("../data/CYT/RNA/diffAnalysis/RNA_endoc_normCountsBatch.rda")
counts <- data.frame(counts)
counts$GeneID <- rownames(counts)
counts$GeneID <- gsub("\\.[[:digit:]]*", "", counts$GeneID)

counts <- reshape2::melt(counts,
                         id.vars=11,
                         value.vars=1:10,
                         value.name="counts",
                         variable.name="sample")
counts$treatment <- unlist(lapply(strsplit(as.character(counts$sample), "_"),
                                  function(x) x[2]))

counts.mean <- counts %>%
  group_by(GeneID, treatment) %>%
  summarise(counts=mean(counts))

anno.counts <- left_join(data.frame(anno), counts.mean)
anno.counts <- unique(anno.counts[,-1])


test <- anno.counts[anno.counts$subgroup2!="SRE",] %>%
  group_by(subgroup1) %>%
  summarise(pval=wilcox.test(counts[treatment=="ctrl"],
                             counts[treatment=="cyt"],
                             paired=F)$p.value)

test$lab <- ""
test$lab[test$pval<0.05] <- "*"
test$lab[test$pval<0.01] <- "**"
test$lab[test$pval<0.001] <- "***"

ggplot(anno.counts[anno.counts$subgroup2!="SRE",],
       aes(subgroup1, log2(counts + 1))) +
  geom_boxplot(aes(color=treatment), notch=TRUE, 
               outlier.shape=NA, lwd=1) +
  geom_text(data=test,
            aes(x=subgroup1, y=20, label=lab),
            size=5) +
  scale_color_manual(values=pals$treatment,
                     name="Treatment",
                     labels=function(x) toupper(x)) +
  ylab(expression("RNA-seq "*log[2]*" counts")) +
  scale_x_discrete(labels=function(x) paste0(x, "s")) +
  theme(legend.position="top",
        axis.title.x=element_blank())

Version Author Date
4dbeb03 Mireia Ramos 2020-05-13
load(paste0("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges_subgroup.rda"))
scope=1e3
bin=50

## Opening IREs
op <- re[grep("Opening", re$subgroup1),]
rnd <- regioneR::randomizeRegions(op)

op.cons <- pipelineNGS::calculateMeanCons(op,
                                           scope=scope,
                                           bin=bin)
op.cons$re_type <- "Opening IRE"
rnd.cons <- pipelineNGS::calculateMeanCons(rnd,
                                           scope=scope,
                                           bin=bin)
rnd.cons$re_type <- "Random Opening IRE"

ire.cons <- rbind(op.cons, rnd.cons)

## Primed IREs
pr <- re[grep("Primed", re$subgroup1),]
rnd <- regioneR::randomizeRegions(pr)

pr.cons <- pipelineNGS::calculateMeanCons(pr,
                                           scope=scope,
                                           bin=bin)
pr.cons$re_type <- "Primed IRE"
rnd.cons <- pipelineNGS::calculateMeanCons(rnd,
                                           scope=scope,
                                           bin=bin)
rnd.cons$re_type <- "Random Primed IRE"

## Final dataset
ire.cons <- rbind(ire.cons, pr.cons, rnd.cons)
save(ire.cons, file=file.path(out_dir, "CONS_IREs_sub.rda"))
load(file.path(out_dir, "CONS_IREs_sub.rda"))

ire.cons$color <- gsub("Random ", "", ire.cons$re_type)
ire.cons$lty <- gsub("Opening ", "", gsub("Primed ", "", ire.cons$re_type))

cons.plot <- 
  ggplot(ire.cons,
       aes(position, meanCons)) +
  geom_line(aes(lty=lty, color=color), 
            lwd=1) +
  scale_linetype_discrete(name="Region type") +
  scale_color_manual(values=pals$re,
                     name="Region type",
                     labels=function(x) paste0(x, "s")) +
  geom_vline(xintercept=0, lty=2, color="grey") +
  ylab("Mean PhastCons46way Score") +
  xlab("Position from peak center (bp)") +
  theme(legend.position = "top") +
  guides(linetype=guide_legend(nrow=2),
         color=guide_legend(nrow=2))

cons.plot

Version Author Date
4dbeb03 Mireia Ramos 2020-05-13
load("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges_subgroup.rda")
re.df <- data.frame(re)[,-c(1:5)]

## Make groups
re.tss <- unique(re.df[!is.na(re.df$type),])

re.tss$anno.group <- NA
re.tss$anno.group[abs(re.tss$atac.distanceToTSS)>200e3] <- ">200kb"
re.tss$anno.group[abs(re.tss$atac.distanceToTSS)<=200e3 &
                    abs(re.tss$atac.distanceToTSS)>20e3] <- "20-200kb"
re.tss$anno.group[abs(re.tss$atac.distanceToTSS)<=20e3 &
                    abs(re.tss$atac.distanceToTSS)>2e3] <- "2-20kb"
re.tss$anno.group[abs(re.tss$atac.distanceToTSS)<=2e3] <- "0-2kb"

len.neo <- sum(grepl("Neo", re.tss$subgroup2))
len.primed <- sum(grepl("Primed", re.tss$subgroup2))
len.sre <- sum(grepl("SRE", re.tss$subgroup2))

sum.tss <- re.tss %>%
  group_by(subgroup2, anno.group) %>%
  summarise(num=length(unique(atac.GeneID)))

sum.tss$perc <- NA
sum.tss$perc[grep("Neo", sum.tss$subgroup2)] <- sum.tss$num[grep("Neo", sum.tss$subgroup2)]/len.neo*100
sum.tss$perc[grep("Primed", sum.tss$subgroup2)] <- sum.tss$num[grep("Primed", sum.tss$subgroup2)]/len.primed*100
sum.tss$perc[grep("SRE", sum.tss$subgroup2)] <- sum.tss$num[grep("SRE", sum.tss$subgroup2)]/len.sre*100

sum.tss <- sum.tss[!is.na(sum.tss$perc),]
sum.tss$anno.group <- factor(sum.tss$anno.group,
                             levels=c("0-2kb", "2-20kb", "20-200kb", ">200kb"))

tss.plot <- 
  ggplot(sum.tss,
       aes(anno.group, perc)) +
  geom_bar(aes(fill=subgroup2), color="black", lwd=0.7, stat="identity", position="dodge") +
  geom_vline(xintercept=1.5, lty=2, color="dark red") +
  scale_fill_manual(values=pals$re,
                    name="RE type", labels=function(x) paste0(x, "s")) +
  annotate("text", x=c(1,3), y=c(60, 60), label=c("Promoters", "Enhancers")) +
  theme(legend.position="top") +
  xlab("Distance to TSS") + 
  scale_y_continuous(name="Percentage of RE",
                     labels=function(x) paste0(x, "%"),
                     breaks=scales::pretty_breaks()) +
  guides(fill=guide_legend(ncol=1))

tss.plot

Version Author Date
4dbeb03 Mireia Ramos 2020-05-13
load("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges_subgroup.rda")

## Do it on enhancers IREs
sel <- re[grep("IRE", re$type),]


files <- list.files("~/data/ChromHMM_ENCODE-Broad/",
                    pattern="*.bed",
                    full.names=TRUE)
names <- pipelineNGS::getNameFromPath(files,
                                      prefix="wgEncodeBroadHmm", 
                                      suffix="HMM.bed")

chrom <- lapply(files, regioneR::toGRanges)
names(chrom) <- names

overlaps <- data.frame()
for (i in 1:length(chrom)) {
  hmm <- chrom[[i]]
  ols <- findOverlaps(sel,
                      hmm)
  df <- cbind(sel$type[queryHits(ols)],
              sel$atac.GeneID[queryHits(ols)],
              hmm$name[subjectHits(ols)])
  df <- as.data.frame(df)
  colnames(df) <- c("type", "peakID", "chromHMM")
  df$ENCODE <- names(chrom)[i]  
  overlaps <- rbind(overlaps, df)
}

ids.enh <- unique(overlaps$peakID[grep("Enhancer", overlaps$chromHMM)])
length(ids.enh)
ids.prom <- unique(overlaps$peakID[grep("Promoter", overlaps$chromHMM)])
length(ids.prom)
ids.het <- unique(overlaps$peakID[grep("Hetero", overlaps$chromHMM)])
length(ids.het)

ids.het.enh <- ids.het[ids.het %in% ids.enh]
length(ids.het.enh)

## Test with a randomized set
rndm <- regioneR::randomizeRegions(sel)
rndm$geneID <- paste0("random_", 1:length(rndm))

overlaps.rndm <- data.frame()
for (i in 1:length(chrom)) {
  hmm <- chrom[[i]]
  ols <- findOverlaps(rndm,
                      hmm)
  df <- cbind(rndm$geneID[queryHits(ols)],
              hmm$name[subjectHits(ols)])
  df <- as.data.frame(df)
  colnames(df) <- c("peakID", "chromHMM")
  df$ENCODE <- names(chrom)[i]  
  overlaps.rndm <- rbind(overlaps.rndm, df)
}

ids.enh.rndm <- unique(overlaps.rndm$peakID[grep("Enhancer", overlaps.rndm$chromHMM)])
length(ids.enh.rndm)
ids.prom.rndm <- unique(overlaps.rndm$peakID[grep("Promoter", overlaps.rndm$chromHMM)])
length(ids.prom.rndm)
ids.het.rndm <- unique(overlaps.rndm$peakID[grep("Hetero", overlaps.rndm$chromHMM)])
length(ids.het.rndm)

ids.het.enh.rndm <- ids.het.rndm[ids.het.rndm %in% ids.enh.rndm]
length(ids.het.enh.rndm)

df <- data.frame("class"=rep(c("Enhancer", "Promoter"), 2),
                 "regions"=c(rep("IREs", 2), rep("Random", 2)),
                 "overlaps"=c(length(ids.enh), length(ids.prom), 
                              length(ids.enh.rndm), length(ids.prom.rndm)))

mat <- matrix(df$overlaps, ncol=2)

save(df, file=file.path(out_dir, "ENH_chromHMM_IREs.rda"))
load(file.path(out_dir, "ENH_chromHMM_IREs.rda"))

bar.chrom <- 
  ggplot(df[df$class!="Promoter",],
       aes(class, overlaps)) +
  geom_bar(aes(fill=regions), stat="identity",
           position="dodge", color="black",
           width=0.7) +
  xlab("ChromHMM class") +
  scale_y_continuous(labels=scales::comma,
                     breaks=scales::pretty_breaks(),
                     name="# of overlapping IREs") +
  scale_fill_manual(values=c("#e37418", "#0c3a5c"),
                    name="Source") +
  annotate("text", 1, 2800, label="*", size=8) +
  theme(legend.position=c(0.55, 0.85))

bar.chrom

Version Author Date
4dbeb03 Mireia Ramos 2020-05-13

Transcription factor analysis

de novo motif analysis

library(maRge)

deNovoMotifHOMER(bed="../data/CYT/bedfiles/IREs_endoc_fc1_padj0.05_opening_distal.bed",
                 path_output=file.path(out_dir, "HOMER_IREs_endoc_opening_distal_mask"),
                 other_param="-mask",
                 path_homer="~/tools/homer/")

deNovoMotifHOMER(bed="../data/CYT/bedfiles/IREs_endoc_fc1_padj0.05_primed_distal.bed",
                 path_output=file.path(out_dir, "HOMER_IREs_endoc_primed_distal_mask"),
                 other_param="-mask",
                 path_homer="~/tools/homer/")

deNovoMotifHOMER(bed="../data/CYT/bedfiles/IREs_endoc_fc1_padj0.05_neo_distal.bed",
                 path_output=file.path(out_dir, "HOMER_IREs_endoc_neo_distal_mask"),
                 other_param="-mask",
                 path_homer="~/tools/homer/")
htmltools::includeHTML(file.path(out_dir, "HOMER_IREs_endoc_opening_distal_mask/homerResults.html"))

Homer de novo Motif Results (HOMER_IREs_endoc_fc1_opening_distal_mask//)

Known Motif Enrichment Results
Gene Ontology Enrichment Results
If Homer is having trouble matching a motif to a known motif, try copy/pasting the matrix file into STAMP
More information on motif finding results: HOMER | Description of Results | Tips
Total target sequences = 1841
Total background sequences = 48084
* - possible false positive
RankMotifP-valuelog P-pvalue% of Targets% of Background STD(Bg STD) Best Match/DetailsMotif File
1 C T A G C G T A C G T A T G C A T A C G G A C T C T A G C T G A G C T A C G T A T A C G G A C T 1e-1270-2.925e+0364.86%3.25%176.3bp (187.2bp)IRF1(IRF)/PBMC-IRF1-ChIP-Seq(GSE43036)/Homer(0.987)
More Information | Similar Motifs Found
motif file (matrix)
2 G A T C A C G T C T A G G C A T A C G T A G C T C T G A G A T C 1e-27-6.289e+0154.16%41.48%240.1bp (183.2bp)Foxo1(Forkhead)/RAW-Foxo1-ChIP-Seq(Fan_et_al.)/Homer(0.981)
More Information | Similar Motifs Found
motif file (matrix)
3 A C G T A G C T T A G C G A T C T G A C G C T A A C T G C G A T T G C A C T G A 1e-20-4.786e+0120.91%12.95%256.5bp (183.8bp)STAT3/MA0144.2/Jaspar(0.785)
More Information | Similar Motifs Found
motif file (matrix)
4 T A C G T G C A A G C T G C A T C T G A C G A T T C A G A T G C 1e-17-4.046e+0131.99%23.14%263.6bp (180.4bp)GSC(Homeobox)/FrogEmbryos-GSC-ChIP-Seq(DRA000576)/Homer(0.798)
More Information | Similar Motifs Found
motif file (matrix)
5 T C A G A C G T A T C G C G T A A T C G G C A T A G T C C T G A 1e-16-3.878e+0121.73%14.37%256.3bp (186.7bp)FOS::JUN/MA0099.2/Jaspar(0.925)
More Information | Similar Motifs Found
motif file (matrix)
6 C G T A A C G T A C T G C G T A A C T G A C T G A G T C C G T A A C T G C T G A A G T C A G T C 1e-15-3.675e+010.43%0.00%162.4bp (8.4bp)FOS/MA0476.1/Jaspar(0.652)
More Information | Similar Motifs Found
motif file (matrix)
7 A C T G A G T C A G T C C G T A A G T C A C G T A T G C A C T G A C G T G T C A A C T G A C G T 1e-15-3.675e+010.43%0.00%189.9bp (0.0bp)PH0171.1_Nkx2-1/Jaspar(0.597)
More Information | Similar Motifs Found
motif file (matrix)
8 A G T C C G T A C T A G A C G T A G T C C G T A A C G T A C G T A C T G A C G T A C G T C G T A 1e-15-3.589e+010.60%0.01%183.2bp (139.5bp)PB0172.1_Sox1_2/Jaspar(0.743)
More Information | Similar Motifs Found
motif file (matrix)
9 G C T A T A G C A G T C G C T A A C T G A G T C C G A T C T A G 1e-14-3.324e+0124.55%17.34%261.5bp (189.0bp)Ap4(bHLH)/AML-Tfap4-ChIP-Seq(GSE45738)/Homer(0.912)
More Information | Similar Motifs Found
motif file (matrix)
10 A G T C A C G T A C T G C G T A C T G A A C G T G T A C A C G T A C T G A C T G C G T A A C G T 1e-13-3.082e+010.60%0.02%117.3bp (209.4bp)Hand1::Tcf3/MA0092.1/Jaspar(0.623)
More Information | Similar Motifs Found
motif file (matrix)
11 A G T C A T C G C T A G A C G T A C T G A G T C A C T G C G A T T C G A A G C T 1e-12-2.984e+010.49%0.01%98.4bp (127.7bp)Ahr::Arnt/MA0006.1/Jaspar(0.615)
More Information | Similar Motifs Found
motif file (matrix)
12 A C T G A C G T C G T A A G T C A C G T A G T C A C G T A G T C A C G T C G T A A C G T A C T G 1e-12-2.984e+010.49%0.01%138.6bp (103.8bp)PB0139.1_Irf5_2/Jaspar(0.652)
More Information | Similar Motifs Found
motif file (matrix)
13 T A C G T A G C G C A T C G A T T C A G A G T C G C T A C G T A C A T G A C T G C G T A G C T A 1e-12-2.803e+010.43%0.01%123.4bp (36.2bp)Bcl6/MA0463.1/Jaspar(0.649)
More Information | Similar Motifs Found
motif file (matrix)
14 A C T G C G T A A C T G C G T A A C G T C G T A A G T C C G T A A C G T A G T C A C G T A G T C 1e-12-2.803e+010.43%0.01%267.3bp (89.3bp)DMRT3/MA0610.1/Jaspar(0.628)
More Information | Similar Motifs Found
motif file (matrix)
15 T C A G A T C G C T A G C G T A G C T A C G A T C G A T A G T C G T A C T A G C 1e-12-2.772e+018.37%4.55%251.7bp (175.5bp)NFkB-p65(RHD)/GM12787-p65-ChIP-Seq(GSE19485)/Homer(0.856)
More Information | Similar Motifs Found
motif file (matrix)
16 * C T A G C G A T A C G T C G T A A G T C A C G T C G T A A G T C A C T G A G T C 1e-11-2.659e+010.38%0.01%137.8bp (135.5bp)PB0056.1_Rfxdc2_1/Jaspar(0.716)
More Information | Similar Motifs Found
motif file (matrix)
17 * C G T A A G T C A C G T C G T A A G T C A C G T A G T C A G T C A G T C A C G T A C T G A C T G 1e-11-2.659e+010.38%0.01%193.3bp (181.5bp)Znf263(Zf)/K562-Znf263-ChIP-Seq(GSE31477)/Homer(0.620)
More Information | Similar Motifs Found
motif file (matrix)
18 * A C T G A C T G A C G T A C G T A G T C C G T A A C T G C G T A A G T C C G T A A C T G C G T A 1e-11-2.619e+010.33%0.00%125.2bp (195.5bp)Smad2(MAD)/ES-SMAD2-ChIP-Seq(GSE29422)/Homer(0.655)
More Information | Similar Motifs Found
motif file (matrix)
19 * A C G T C G T A A C T G C G A T A C T G A G T C C G T A A C T G A G T C A G T C A C G T A C T G 1e-11-2.619e+010.33%0.00%172.6bp (0.0bp)Smad3(MAD)/NPC-Smad3-ChIP-Seq(GSE36673)/Homer(0.560)
More Information | Similar Motifs Found
motif file (matrix)
20 * A C G T A G T C A C T G A C T G C G T A A C G T A G T C A C T G A G T C A C G T A C T G A C G T 1e-11-2.619e+010.33%0.00%73.9bp (8.7bp)POL009.1_DCE_S_II/Jaspar(0.579)
More Information | Similar Motifs Found
motif file (matrix)
21 * A C T G A C T G C G T A A G T C A G T C A G T C C G T A C G T A A G T C A G T C A C G T C G T A 1e-11-2.619e+010.33%0.00%304.2bp (72.7bp)PH0026.1_Duxbl/Jaspar(0.589)
More Information | Similar Motifs Found
motif file (matrix)
22 * C G T A A C T G A C G T A C T G A G T C C G T A A G T C C G T A A G T C A C G T A T G C C G T A 1e-11-2.619e+010.33%0.00%126.0bp (0.0bp)PB0104.1_Zscan4_1/Jaspar(0.687)
More Information | Similar Motifs Found
motif file (matrix)
23 * A T C G A C T G A C G T A C G T A C G T A C G T A C T G A T C G 1e-10-2.499e+017.44%4.03%218.5bp (180.3bp)PB0121.1_Foxj3_2/Jaspar(0.686)
More Information | Similar Motifs Found
motif file (matrix)
24 * A G T C A G T C A G T C A G T C C A T G A T C G A C T G T A C G C T A G T G C A 1e-10-2.445e+012.23%0.64%250.2bp (176.3bp)EBF1(EBF)/Near-E2A-ChIP-Seq(GSE21512)/Homer(0.882)
More Information | Similar Motifs Found
motif file (matrix)
25 * A C G T A C G T A G T C A C G T A G T C C G T A A G T C A C G T A C T G C G T A A T C G A C T G 1e-10-2.401e+010.43%0.01%321.4bp (98.2bp)PB0139.1_Irf5_2/Jaspar(0.573)
More Information | Similar Motifs Found
motif file (matrix)
26 * A C G T C G T A A G T C C G T A A C G T A G T C C G T A A T C G A C G T A C T G A C G T A G T C 1e-10-2.378e+010.38%0.01%247.4bp (126.9bp)Chop(bZIP)/MEF-Chop-ChIP-Seq(GSE35681)/Homer(0.642)
More Information | Similar Motifs Found
motif file (matrix)
27 * A C T G A G T C C G T A C T G A T G A C G T C A A G C T C G T A T A G C A G T C 1e-10-2.324e+013.26%1.26%263.5bp (180.7bp)CEBP:AP1(bZIP)/ThioMac-CEBPb-ChIP-Seq(GSE21512)/Homer(0.597)
More Information | Similar Motifs Found
motif file (matrix)
28 * A C G T A C T G A C T G A C G T A C T G A C G T A C G T A G T C A C G T A C T G A G T C A G T C 1e-9-2.207e+010.33%0.01%78.6bp (254.1bp)PB0120.1_Foxj1_2/Jaspar(0.623)
More Information | Similar Motifs Found
motif file (matrix)
29 * A C G T A C G T C G T A A C G T C G T A A G T C A C G T A C G T A G T C C G T A A C T G A G T C 1e-9-2.207e+010.33%0.01%130.5bp (31.8bp)ETV6/MA0645.1/Jaspar(0.631)
More Information | Similar Motifs Found
motif file (matrix)
30 * A C T G A C T G A G T C A G T C A C T G C G T A A G T C C G T A A G T C A C T G 1e-9-2.113e+010.27%0.00%248.9bp (0.0bp)PB0117.1_Eomes_2/Jaspar(0.632)
More Information | Similar Motifs Found
motif file (matrix)
31 * C G T A C G T A A C T G C G T A C G T A C G T A C G T A C G T A A C T G A G T C A C T G A G T C 1e-9-2.113e+010.27%0.00%60.0bp (0.0bp)PB0148.1_Mtf1_2/Jaspar(0.662)
More Information | Similar Motifs Found
motif file (matrix)
32 * A C G T A G T C A C G T A C G T A G T C A C T G A C T G C G T A A C G T A C G T 1e-8-1.967e+010.33%0.01%80.0bp (174.1bp)OTX1/MA0711.1/Jaspar(0.649)
More Information | Similar Motifs Found
motif file (matrix)
33 * A C T G A G T C C G T A A C T G A G T C A C G T A G T C C G T A A C T G A C T G A G T C A C G T 1e-8-1.903e+010.38%0.01%136.7bp (198.8bp)ZNF519(Zf)/HEK293-ZNF519.GFP-ChIP-Seq(GSE58341)/Homer(0.674)
More Information | Similar Motifs Found
motif file (matrix)
34 * A G T C C G A T G A T C T C A G T A G C A T C G G T A C G C T A A G C T C T G A 1e-7-1.770e+010.27%0.00%195.2bp (24.9bp)GCM2/MA0767.1/Jaspar(0.620)
More Information | Similar Motifs Found
motif file (matrix)
35 * A C T G C G T A A G T C A G T C C G T A C G T A A G T C A C T G A G T C A G T C 1e-6-1.571e+010.27%0.01%29.9bp (124.7bp)ZBTB7C/MA0695.1/Jaspar(0.640)
More Information | Similar Motifs Found
motif file (matrix)
36 * A C G T C G T A A G T C C G T A A C T G C G T A A C T G A G T C A C T G A G T C 1e-6-1.430e+010.27%0.01%161.2bp (111.6bp)Sox9(HMG)/Limb-SOX9-ChIP-Seq(GSE73225)/Homer(0.582)
More Information | Similar Motifs Found
motif file (matrix)
37 * A G T C A G T C A G T C A G T C A G T C C G T A A C T G A C G T A G T C A C T G 1e-5-1.233e+010.27%0.01%95.7bp (56.9bp)PB0201.1_Zfp281_2/Jaspar(0.703)
More Information | Similar Motifs Found
motif file (matrix)

htmltools::includeHTML(file.path(out_dir, "HOMER_IREs_endoc_primed_distal_mask/homerResults.html"))

Homer de novo Motif Results (HOMER_IREs_endoc_fc1_padj0.05_primed_distal_mask//)

Known Motif Enrichment Results
Gene Ontology Enrichment Results
If Homer is having trouble matching a motif to a known motif, try copy/pasting the matrix file into STAMP
More information on motif finding results: HOMER | Description of Results | Tips
Total target sequences = 979
Total background sequences = 48465
* - possible false positive
RankMotifP-valuelog P-pvalue% of Targets% of Background STD(Bg STD) Best Match/DetailsMotif File
1 T C G A T A C G A G C T A C G T A G C T A G T C C T G A A T G C A G C T G A C T A C G T A G T C 1e-180-4.151e+0229.83%3.44%328.3bp (176.1bp)IRF1(IRF)/PBMC-IRF1-ChIP-Seq(GSE43036)/Homer(0.985)
More Information | Similar Motifs Found
motif file (matrix)
2 C A T G G C A T A G C T G T C A C G T A C G A T A T G C G C T A C A G T A G C T G C T A C G T A 1e-17-4.137e+018.99%3.06%283.3bp (180.0bp)HNF1B/MA0153.2/Jaspar(0.958)
More Information | Similar Motifs Found
motif file (matrix)
3 A C T G A G T C A C G T T C A G A C G T G A C T C G A T T A C G A G T C G A C T 1e-17-4.008e+0117.26%8.58%342.0bp (184.9bp)FOXM1(Forkhead)/MCF7-FOXM1-ChIP-Seq(GSE72977)/Homer(0.773)
More Information | Similar Motifs Found
motif file (matrix)
4 A G C T A C G T T A C G A G T C A G T C G A C T C T G A T A C G A T G C G T C A C G T A A G T C 1e-16-3.815e+014.09%0.75%359.7bp (180.9bp)Rfx5(HTH)/GM12878-Rfx5-ChIP-Seq(GSE31477)/Homer(0.774)
More Information | Similar Motifs Found
motif file (matrix)
5 G C A T T A C G C A G T T A G C G T C A A C T G T A G C C T G A A C T G G C T A 1e-15-3.565e+0119.82%10.94%364.4bp (188.3bp)NRL/MA0842.1/Jaspar(0.837)
More Information | Similar Motifs Found
motif file (matrix)
6 C G A T G T A C G T C A G C A T G A C T C G T A C A T G A T G C G C T A C A T G 1e-15-3.474e+0133.91%22.69%343.6bp (184.7bp)PDX1/Human-Islets(0.785)
More Information | Similar Motifs Found
motif file (matrix)
7 A G T C C G T A C A T G C T A G C T A G G C T A A G C T C G T A C G T A C G A T 1e-14-3.364e+0115.12%7.63%339.1bp (176.8bp)Pit1+1bp(Homeobox)/GCrat-Pit1-ChIP-Seq(GSE58009)/Homer(0.658)
More Information | Similar Motifs Found
motif file (matrix)
8 G A C T A G C T T A G C A G T C G A T C C T A G C A T G G T C A C T G A G C T A 1e-14-3.280e+0115.53%8.01%330.1bp (178.4bp)STAT1/MA0137.3/Jaspar(0.934)
More Information | Similar Motifs Found
motif file (matrix)
9 G T A C G T C A C A G T A G T C C A G T T C A G G A C T G C A T G T C A A G T C 1e-13-3.125e+0119.92%11.53%323.9bp (185.2bp)NeuroD1(bHLH)/Islet-NeuroD1-ChIP-Seq(GSE30298)/Homer(0.786)
More Information | Similar Motifs Found
motif file (matrix)
10 * C T G A T G A C G A C T A C T G A G T C A G T C T G A C A G C T T A G C A C G T 1e-11-2.723e+0110.42%4.89%401.2bp (181.9bp)THAP1/MA0597.1/Jaspar(0.690)
More Information | Similar Motifs Found
motif file (matrix)
11 * C A G T A T G C A G T C T C A G A C G T C G A T C G A T G T C A A G C T A C G T 1e-11-2.658e+019.30%4.18%345.8bp (176.9bp)BARHL2/MA0635.1/Jaspar(0.745)
More Information | Similar Motifs Found
motif file (matrix)
12 * A C G T A C G T A C T G A C T G A C G T A G T C A C G T A C T G A C T G A G T C A G T C C G T A 1e-11-2.589e+010.61%0.01%254.7bp (73.8bp)Hand1::Tcf3/MA0092.1/Jaspar(0.634)
More Information | Similar Motifs Found
motif file (matrix)
13 * A C T G A G C T A G T C C G T A C G A T C G A T C G T A T C G A A C G T A G T C 1e-11-2.549e+013.98%1.08%284.6bp (178.7bp)PH0073.1_Hoxc9/Jaspar(0.836)
More Information | Similar Motifs Found
motif file (matrix)
14 * A C G T A C G T A C G T A G T C G T A C A G T C G A C T A C G T C T G A C G T A A C T G A G T C 1e-10-2.487e+011.23%0.07%240.1bp (187.7bp)Stat4/MA0518.1/Jaspar(0.665)
More Information | Similar Motifs Found
motif file (matrix)
15 * A C T G C G T A A C T G A C T G C G A T A C G T A C G T A C T G A C G T A C T G A G T C C G T A 1e-10-2.433e+010.51%0.00%125.8bp (225.7bp)PB0145.1_Mafb_2/Jaspar(0.649)
More Information | Similar Motifs Found
motif file (matrix)
16 * A C T G A C T G C G T A A C G T A C G T C T A G C G T A C G T A C G T A A G T C A C G T A C G T 1e-10-2.347e+010.61%0.01%200.6bp (144.7bp)CHR(?)/Hela-CellCycle-Expression/Homer(0.673)
More Information | Similar Motifs Found
motif file (matrix)
17 * A C T G A C G T A C T G A G T C C G T A A C G T A C T G C G T A A C T G C G T A C G T A A G T C 1e-9-2.088e+010.51%0.00%126.5bp (86.4bp)PB0194.1_Zbtb12_2/Jaspar(0.633)
More Information | Similar Motifs Found
motif file (matrix)
18 * A C T G C G T A A C T G A C G T A C G T A G T C C G T A A G T C C G T A A G T C A C T G A C T G 1e-9-2.088e+010.51%0.01%156.3bp (32.3bp)Rbpj1(?)/Panc1-Rbpj1-ChIP-Seq(GSE47459)/Homer(0.634)
More Information | Similar Motifs Found
motif file (matrix)
19 * A G C T C G T A C T G A C G T A C G A T C A T G C T A G G A T C G C A T T A G C 1e-8-2.049e+017.35%3.37%416.8bp (184.1bp)PH0005.1_Barhl1/Jaspar(0.700)
More Information | Similar Motifs Found
motif file (matrix)
20 * G T A C A T C G A G T C A T C G A C T G A T C G A G T C A C G T A C T G A C T G 1e-8-1.910e+013.78%1.24%508.1bp (190.8bp)POL010.1_DCE_S_III/Jaspar(0.646)
More Information | Similar Motifs Found
motif file (matrix)
21 * G T C A C A G T C A T G C G T A G A T C G A T C A C T G C G A T C G A T G A C T 1e-8-1.906e+014.49%1.65%314.9bp (187.1bp)PB0049.1_Nr2f2_1/Jaspar(0.711)
More Information | Similar Motifs Found
motif file (matrix)
22 * A C T G A C T G A C T G A G T C C G T A C G T A C G T A A C T G A C T G A G T C C G T A A C G T 1e-8-1.887e+010.51%0.01%98.2bp (203.2bp)RXR(NR),DR1/3T3L1-RXR-ChIP-Seq(GSE13511)/Homer(0.655)
More Information | Similar Motifs Found
motif file (matrix)
23 * A G T C A G T C A G T C A C G T A C T G A C T G C G T A A C T G A C G T C G T A A C G T A G C T 1e-8-1.887e+010.51%0.01%87.9bp (199.2bp)ELF4/MA0641.1/Jaspar(0.581)
More Information | Similar Motifs Found
motif file (matrix)
24 * A C T G C G T A A G T C C G T A A G T C A C G T A C T G C G T A A C G T C T A G A C G T C G T A 1e-8-1.887e+010.51%0.01%79.6bp (117.3bp)Chop(bZIP)/MEF-Chop-ChIP-Seq(GSE35681)/Homer(0.618)
More Information | Similar Motifs Found
motif file (matrix)
25 * C G T A A G T C A C G T A C G T A G T C A G T C A G T C A G T C A C G T A C T G A G T C A C G T 1e-8-1.881e+010.41%0.00%85.4bp (52.8bp)PU.1(ETS)/ThioMac-PU.1-ChIP-Seq(GSE21512)/Homer(0.712)
More Information | Similar Motifs Found
motif file (matrix)
26 * A C T G A C G T A C G T A C T G C G T A A C T G A G T C A G T C C G T A A C T G A C T G A G T C 1e-8-1.881e+010.41%0.00%85.8bp (64.1bp)ZNF165(Zf)/WHIM12-ZNF165-ChIP-Seq(GSE65937)/Homer(0.653)
More Information | Similar Motifs Found
motif file (matrix)
27 * A C T G A C T G A C G T C G T A A C T G A C G T A G T C A C T G A C G T A C T G 1e-6-1.605e+010.41%0.01%112.2bp (109.2bp)ZBTB7A/MA0750.1/Jaspar(0.677)
More Information | Similar Motifs Found
motif file (matrix)
28 * A C T G A G T C A G T C A C T G C G T A A G T C C G T A A G T C A C T G A G T C 1e-6-1.605e+010.41%0.01%87.1bp (183.9bp)Ahr::Arnt/MA0006.1/Jaspar(0.697)
More Information | Similar Motifs Found
motif file (matrix)
29 * A C T G A C T G A G T C C G T A A C T G A G T C A G T C A C G T A C T G A C G T A C G T A G T C 1e-6-1.605e+010.41%0.00%187.3bp (14.1bp)PR(NR)/T47D-PR-ChIP-Seq(GSE31130)/Homer(0.620)
More Information | Similar Motifs Found
motif file (matrix)
30 * A C G T A G T C A C G T A G T C C G T A A C T G A C G T A G T C A G T C C G T A A G T C A C G T 1e-6-1.605e+010.41%0.00%92.9bp (134.1bp)Srebp2(bHLH)/HepG2-Srebp2-ChIP-Seq(GSE31477)/Homer(0.625)
More Information | Similar Motifs Found
motif file (matrix)
31 * A C G T C G T A C T A G C G T A A G T C A C T G C G A T A G T C A G T C C G T A 1e-6-1.559e+010.72%0.04%184.6bp (182.8bp)PB0143.1_Klf7_2/Jaspar(0.632)
More Information | Similar Motifs Found
motif file (matrix)
32 * C G T A A G T C A C T G A C T G A G T C A C G T A C G T A C G T A G T C A C T G 1e-6-1.445e+010.41%0.01%179.5bp (223.8bp)PB0136.1_IRC900814_2/Jaspar(0.676)
More Information | Similar Motifs Found
motif file (matrix)
33 * T C A G T G A C G T C A C T G A C A T G T G A C G T A C C G A T T C G A T C G A 1e-6-1.432e+013.68%1.44%246.7bp (176.1bp)Barhl1/MA0877.1/Jaspar(0.586)
More Information | Similar Motifs Found
motif file (matrix)
34 * A G T C A C T G A C T G A C T G A C G T A C T G A G T C A C G T 1e-5-1.325e+0136.47%29.52%345.9bp (188.2bp)TCF3/MA0522.2/Jaspar(0.678)
More Information | Similar Motifs Found
motif file (matrix)
35 * A T G C T A C G A T G C C A T G A T G C T A G C A T G C G A T C A T G C A C T G 1e-5-1.176e+0110.42%6.68%356.7bp (188.9bp)KLF16/MA0741.1/Jaspar(0.833)
More Information | Similar Motifs Found
motif file (matrix)
36 * A G T C A C G T A G T C C G T A A G T C C G A T C T G A A C G T 1e-4-1.109e+0117.88%13.13%333.2bp (183.2bp)PH0152.1_Pou6f1_2/Jaspar(0.676)
More Information | Similar Motifs Found
motif file (matrix)
37 * A G T C A T G C T A G C A T C G A T C G T G A C A C T G T G A C 1e-4-1.053e+0123.49%18.30%404.5bp (193.9bp)POL013.1_MED-1/Jaspar(0.647)
More Information | Similar Motifs Found
motif file (matrix)
38 * A C G T A C T G C G T A A C T G A C T G A G C T A C T G A C G T A C T G C G T A A G T C A C G T 1e-2-5.781e+000.20%0.01%88.4bp (129.6bp)TBX15/MA0803.1/Jaspar(0.808)
More Information | Similar Motifs Found
motif file (matrix)
39 * C G T A A G T C C G T A A G T C C G T A A G T C C G T A A G T C A C T G A C T G A C T G A C G T 1e-1-3.230e+000.10%0.01%212.0bp (185.9bp)PB0130.1_Gm397_2/Jaspar(0.666)
More Information | Similar Motifs Found
motif file (matrix)
40 * A C T G A C G T A C T G C G T A A C T G A C T G A G T C A C T G A C G T A C T G A C T G C G T A 1e-1-3.230e+000.10%0.01%62.5bp (161.8bp)PB0180.1_Sp4_2/Jaspar(0.648)
More Information | Similar Motifs Found
motif file (matrix)
41 * A G T C A C T G A C T G A C T G A C G T A G T C A C G T C G T A 1e-1-3.013e+0011.03%9.41%337.0bp (184.1bp)PB0153.1_Nr2f2_2/Jaspar(0.684)
More Information | Similar Motifs Found
motif file (matrix)

Footprint of ISRE motif

Rscript code/CYT_runFootprintAnalysis.R
load(file.path(out_dir, "FOOT_ISRE_neo_distal_cov.rda"))
cov$condition <- gsub("endoc_", "", gsub(".5Tag", "", cov$condition))

df <- XML::readHTMLTable(file.path(out_dir, "HOMER_IREs_endoc_neo_distal_mask/homerResults.html"))[[1]]
df <- df[1,]

stats <- cov %>%
  group_by(condition) %>%
  summarise(median=median(tags5),
            max=max(tags5),
            cent=max/2)

stats$width <- nchar(as.character(df$Motif))/4

num <- 1179

foot.neo <- 
  ggplot(cov,
       aes(Distance, tags5))+
  geom_tile(data=stats,
            aes(x=0, y=cent,
                width=width, height=max),
            color=NA, fill="grey") +
  geom_hline(data=stats,
             aes(yintercept=median),
             lty=2, color="dark grey") +
  geom_line(aes(color=condition), lwd=0.7) +
  scale_color_manual(values=pals$treatment,
                     name="Treatment", labels=function(x) toupper(x)) +
  annotate("text", 100, 0, label=num) +
  ylab("Normalized cleavage\nevents per bp") +
  ggtitle("Neo enhancers")

foot.neo

Version Author Date
a825cc2 Mireia Ramos 2020-08-11
load(file.path(out_dir, "FOOT_ISRE_primed_distal_cov.rda"))
cov$condition <- gsub("endoc_", "", gsub(".5Tag", "", cov$condition))

df <- XML::readHTMLTable(file.path(out_dir, "HOMER_IREs_endoc_primed_distal_mask/homerResults.html"))[[1]]
df <- df[1,]

stats <- cov %>%
  group_by(condition) %>%
  summarise(median=median(tags5),
            max=max(tags5),
            cent=max/2)

stats$width <- nchar(as.character(df$Motif))/4

num <- 403

foot.prim <- 
  ggplot(cov,
       aes(Distance, tags5))+
  geom_tile(data=stats,
            aes(x=0, y=cent,
                width=width, height=max),
            color=NA, fill="grey") +
  geom_hline(data=stats,
             aes(yintercept=median),
             lty=2, color="dark grey") +
  geom_line(aes(color=condition), lwd=0.7) +
  scale_color_manual(values=pals$treatment,
                     name="Treatment", labels=function(x) toupper(x)) +
  annotate("text", 100, 0, label=num) +
  ylab("Normalized cleavage\nevents per bp") +
  ggtitle("Primed enhancers")

foot.prim

Version Author Date
a825cc2 Mireia Ramos 2020-08-11

Overlap Primed IREs with islet regulome ChIP-seq TF data

  • Overlap with peaks
load("~/data/isletRegulome_TF.rda")

load("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges_subgroup.rda")
re <- re[re$atac.annotation=="Distal",]
type <- c("Neo IRE", "Primed IRE", "SRE")
re <- re[re$subgroup2 %in% type,]

## Find overlaps
ols <- findOverlaps(re, tfs.islet)

tfs <- cbind(data.frame(mcols(re))[queryHits(ols),c(1,18)],
             data.frame(mcols(tfs.islet))[subjectHits(ols),])
tfs <- tfs[,-3]

no <- data.frame(mcols(re))[!(re$atac.GeneID %in% tfs$atac.GeneID),c(1,18)]
no$TF <- "None"

tfs <- rbind(tfs, no)

tfs$TF <- factor(tfs$TF,
                 levels=rev(unique(tfs$TF)[c(3,2,1,4,5,6)]))

save(tfs,
     file=file.path(out_dir, "IREs_fc1_padj0.05_isletSpecific_TFs_peaks.rda"))
load(file.path(out_dir, "IREs_fc1_padj0.05_isletSpecific_TFs_peaks.rda"))

bar.tfs <- 
  ggplot(tfs,
       aes(subgroup2, ..count..)) +
  geom_bar(aes(fill=TF), position="fill",
           color="black") +
  scale_fill_manual(values = c("FOXA2"="plum3", "MAFB"="steelblue3", "NKX2.2"="olivedrab3",
                               "NKX6.1"="sienna3", "PDX1"="khaki3", 
                               "None" = "grey")) +
  coord_flip() +
  scale_y_continuous(labels=function(x) paste0(x*100, "%"),
                     name="Percentage of regions") +
  scale_x_discrete(labels=function(x) paste0(x, "s")) +
  theme(legend.position="top",
        axis.title.y=element_blank()) +
  guides(fill=guide_legend(nrow=2, reverse=TRUE))

bar.tfs

Version Author Date
a825cc2 Mireia Ramos 2020-08-11
  • Density of reads
## Create SAF
load("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges_subgroup.rda")
re <- re[re$atac.annotation=="Distal",]
type <- c("Neo IRE", "Primed IRE", "SRE")
re <- re[re$subgroup2 %in% type,]

re <- resize(re, width=2e3, fix="center")
re.bin <- unlist(tile(re, n=201))
pos <- seq(-1000, 1000, by=10)
re.bin$position <- rep(pos, length(re))
re.bin$atac.GeneID <- rep(re$atac.GeneID, each=length(pos))
re.bin$GeneID <- paste0(re.bin$atac.GeneID, "_", re.bin$position)

saf <- data.frame(re.bin)[,c(1:3,6:8)]
colnames(saf)[1:3] <- c("Chr", "Start", "End")
saf$Strand <- "+"

files <- list.files("~/data/TFsIslet_hg19/BAM",
                    pattern="*.bam$",
                    full.names=TRUE)
files <- files[!grepl("CTCF", files)]

counts <- Rsubread::featureCounts(files,
                                  annot.ext=saf,
                                  allowMultiOverlap = TRUE,
                                  nthreads=6)

names <- pipelineNGS::getNameFromPath(files, suffix=".bam")
colnames(counts$counts) <- names

mat <- preprocessCore::normalize.quantiles(counts$counts)
colnames(mat) <- names
rownames(mat) <- rownames(counts$counts)

##
df <- data.frame(mat)
df$GeneID <- rownames(mat)

df <- dplyr::left_join(df,
                       saf)
df <- dplyr::left_join(df,
                       data.frame(mcols(re))[,c(1,18)])

df <- reshape2::melt(df,
                     id.vars=6:13,
                     value.vars=1:5,
                     variable.name="TF",
                     value.name="counts")

df <- df[,c(5,8:10)]

tfs <- df %>%
  group_by(position, TF, subgroup2) %>%
  summarise(mean=mean(counts, na.rm=T))

save(tfs, file=file.path(out_dir, "IREs_fc1_padj0.05_isletSpecific_TFs_bam.rda"))
load(file.path(out_dir, "IREs_fc1_padj0.05_isletSpecific_TFs_bam.rda"))

tfs.dens <- 
  ggplot(tfs,
       aes(position, mean)) +
  geom_smooth(aes(group=subgroup2, color=subgroup2)) +
  scale_color_manual(values=c("olivedrab3", "#80E8C0", "dark grey"),
                     name="") +
  scale_y_continuous(breaks=scales::pretty_breaks(),
                     name="Islet TFs read counts") +
  coord_cartesian(xlim=c(-500, 500)) + 
  xlab("Position relative to peak center (bp)") +
  ggtitle("Islet-specific TFs occupancy") +
  theme(legend.position=c(0.7, 0.9))

tfs.dens

Version Author Date
a825cc2 Mireia Ramos 2020-08-11

Co-localization of inflammatory with islet-specific TFs at Primed IREs

Rscript code/CYT_colocalizationMotifs.R
load(file.path(out_dir, "MOT-COLOC_df.rda"))
mot.coloc <- mot.coloc[grep("primed", mot.coloc$type),]

infl <- c("ISRE", "STAT")
isl <- c("HNF", "PDX", "NEURO", "MAF", "NKX")

vert <- data.frame(Var1=unique(mot.coloc$Var1),
                   TF_type="Other",
                   stringsAsFactors = FALSE)
vert$TF_type[sapply(infl, grep, toupper(vert$Var1))] <- "Inflammatory"
vert$TF_type[sapply(isl, grep, toupper(vert$Var1))] <- "Islet"
vert <- dplyr::left_join(vert, unique(mot.coloc[mot.coloc$Var1==mot.coloc$Var2,c(1,4)]))

mot.coloc <- mot.coloc[order(mot.coloc$percentage),]

igraph <- graph_from_data_frame(mot.coloc,
                                vertices=vert)
cols <- c("hotpink3", "goldenrod3", "grey")

graphPrimed <-
  ggraph(igraph, layout = 'linear', circular = TRUE) + 
    geom_edge_arc(aes(edge_width=percentage, color=percentage), alpha=0.8) +
    scale_edge_width_continuous(guide=FALSE, limits=c(0,30)) +
    scale_edge_colour_gradient(low="white", high="grey15",
                               name="% Overlap", limits=c(0,30)) +
    geom_node_label(aes(label = name, fill=TF_type, size=overlaps)) +
    scale_fill_manual(values=cols, name="Motif Type") +
    scale_size_continuous(range=c(3,6), guide=FALSE) + 
    theme_void() +
    xlim(-1.3, 1.3) + ylim(-1.15, 1.15) +
    theme(legend.position="bottom") +
  ggtitle("Primed enhancers")

graphPrimed

Version Author Date
a825cc2 Mireia Ramos 2020-08-11
load(file.path(out_dir, "MOT-COLOC_df.rda"))
mot.coloc <- mot.coloc[grep("SRE", mot.coloc$type),]

infl <- c("ISRE", "STAT")
isl <- c("HNF", "PDX", "NEURO", "MAF", "NKX")

vert <- data.frame(Var1=unique(mot.coloc$Var1),
                   TF_type="Other",
                   stringsAsFactors = FALSE)
vert$TF_type[sapply(infl, grep, toupper(vert$Var1))] <- "Inflammatory"
vert$TF_type[sapply(isl, grep, toupper(vert$Var1))] <- "Islet"
vert <- dplyr::left_join(vert, unique(mot.coloc[mot.coloc$Var1==mot.coloc$Var2,c(1,4)]))

mot.coloc <- mot.coloc[order(mot.coloc$percentage),]

igraph <- graph_from_data_frame(mot.coloc,
                                vertices=vert)

cols <- c("hotpink3", "goldenrod3", "grey")

graphSRE <-
  ggraph(igraph, layout = 'linear', circular = TRUE) + 
    geom_edge_arc(aes(edge_width=percentage, color=percentage), alpha=0.8) +
    scale_edge_width_continuous(guide=FALSE, limits=c(0,30)) +
    scale_edge_colour_gradient(low="white", high="grey15",
                               name="% Overlap", limits=c(0,30)) +
    geom_node_label(aes(label = name, fill=TF_type, size=overlaps)) +
    scale_fill_manual(values=cols, name="Motif Type") +
    scale_size_continuous(range=c(3,6), guide=FALSE) + 
    theme_void() +
    xlim(-1.3, 1.3) + ylim(-1.15, 1.15) +
    theme(legend.position="bottom") +
  ggtitle("SREs enhancers")

graphSRE

Version Author Date
a825cc2 Mireia Ramos 2020-08-11
  • Fisher test
load(file.path(out_dir, "MOT-COLOC_df_rmRedundant.rda"))

comb <- unique(mot.coloc[,c(1:2)])

## fisher test
comb$estimate <- NA
comb$p.value <- NA
comb$confIntMin <- NA
comb$confIntMax <- NA

for (i in 1:nrow(comb)) {
  sel <- mot.coloc[mot.coloc$Var1==comb$Var1[i] &
                     mot.coloc$Var2==comb$Var2[i],]
  
  overlapping.primed <- sel$overlaps[grepl("primed", sel$type)]
  overlapping.stable <- sel$overlaps[grepl("SREs", sel$type)]
  nonOverlapping.primed <- sel$total[grepl("primed", sel$type)] - overlapping.primed
  nonOverlapping.stable <- sel$total[grepl("SREs", sel$type)] - overlapping.stable
    
  mat.test <- matrix(c(overlapping.primed, overlapping.stable,
                       nonOverlapping.primed, nonOverlapping.stable),
                     byrow=T, ncol=2)
    
  test <- fisher.test(mat.test)
  
  comb$estimate[i] <- test$estimate
  comb$p.value[i] <- test$p.value
  comb$confIntMin[i] <- test$conf.int[1]
  comb$confIntMax[i] <- test$conf.int[2]
}

comb$padj <-  p.adjust(comb$p.value, method="fdr")

comb$name <- paste(comb$Var1, "+", comb$Var2)

comb.sel <- comb[comb$padj<=0.001,]
comb.sel <- comb.sel[order(comb.sel$estimate),]

comb.sel$name <- factor(comb.sel$name,
                        levels=unique(comb.sel$name))

comb.sel$TFtype1 <- "Other"
comb.sel$TFtype1[unlist(sapply(infl, grep, comb.sel$Var1))] <- "Inflammatory"
comb.sel$TFtype1[unlist(sapply(isl, grep, comb.sel$Var1))] <- "Islet"

comb.sel$TFtype2 <- "Other"
comb.sel$TFtype2[unlist(sapply(infl, grep, comb.sel$Var2))] <- "Inflammatory"
comb.sel$TFtype2[unlist(sapply(isl, grep, comb.sel$Var2))] <- "Islet"

comb.sel$combined <- FALSE
comb.sel$combined[comb.sel$TFtype1=="Inflammatory" & comb.sel$TFtype2=="Islet"] <- TRUE
comb.sel$combined[comb.sel$TFtype2=="Inflammatory" & comb.sel$TFtype1=="Islet"] <- TRUE

comb.sel$color <- "black"
comb.sel$color[comb.sel$combined] <- "violetred3"

fisher.coloc <-
  ggplot(comb.sel, aes(estimate, name)) + 
  geom_point(size=3, color=comb.sel$color) +
  geom_segment(aes(x=confIntMin, xend=confIntMax,
                   y=name, yend=name), color=comb.sel$color) +
  geom_vline(xintercept=1, color="dark red") +
  # scale_color_manual(values=pals$tfs,
  #                    name="Motif Pair Type",
  #                    labels=function(x) Hmisc::capitalize(x)) +
  # scale_x_log10() +
  xlab("Odds Ratio") +
  ylab("Motif Pairs") +
  theme(panel.grid.major.y=element_line(color="grey", linetype=2)) +
  guides(color=guide_legend(nrow=3))

fisher.coloc

Version Author Date
a825cc2 Mireia Ramos 2020-08-11

DNA methylation

Rscript code/CYT_runMethylationAnalysis.R
betas <- read.csv("../data/CYT/methylationEPIC/reports/differential_methylation_data/diffMethTable_site_cmp2.csv",
                  stringsAsFactors = F)

betas <- regioneR::toGRanges(betas[,c(3,4,4,2,6:18)])

## Overlap with IREs
load("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges_subgroup.rda")
re <- re[grep("Distal", re$atac.annotation),]

ols <- findOverlaps(re,
                    betas)

betas.re <- cbind(data.frame(re)[queryHits(ols),c(6,21:23)],
                  data.frame(betas)[subjectHits(ols),c(6:19)])

betas.re$diffmeth.p.adj.fdr.2 <- p.adjust(betas.re$diffmeth.p.val, method="fdr")

length(unique(betas.re$cgid[betas.re$mean.diff>0.2 & betas.re$diffmeth.p.adj.fdr<=0.05]))
length(unique(betas.re$cgid[betas.re$mean.diff>0.2 & betas.re$diffmeth.p.adj.fdr.2<=0.05]))

betas.re$methType <- "stable"
betas.re$methType[betas.re$mean.diff>0.2 & betas.re$diffmeth.p.adj.fdr<=0.05] <- "lost"

save(betas.re,
     file=file.path(out_dir, "METH_betas_REs_endoc.rda"))

betas <- read.csv("../data//CYT/methylationEPIC/reports/differential_methylation_data/diffMethTable_site_cmp2.csv",
                  stringsAsFactors = F)

betas <- betas[,c(3,4,4,2,6:18)]

length(unique(betas$cgid[betas$mean.diff>0.2 & betas$diffmeth.p.adj.fdr<=0.05]))

betas$methType <- "stable"
betas$methType[betas$mean.diff>0.2 & betas$diffmeth.p.adj.fdr<=0.05] <- "lost"

save(betas,
     file=file.path(out_dir, "METH_betas_all_endoc.rda"))
load(file.path(out_dir, "METH_betas_REs_endoc.rda"))

betas.distr <- unique(betas.re[!grepl("Other", betas.re$subgroup2),c(1,4,5:7)])
betas.distr <- betas.distr[!grepl("SRE", betas.distr$subgroup2),]
betas.distr <- betas.distr[!is.na(betas.distr$subgroup2),]

betas.distr <- reshape2::melt(betas.distr,
                              id.vars=1:3,
                              value.vars=4:5,
                              value.name="beta",
                              variable.name="treatment")
betas.distr$treatment <- gsub("mean.", "", tolower(as.character(betas.distr$treatment)))

sum <- betas.distr %>%
  group_by(subgroup2, treatment) %>%
  summarise(mean=mean(beta),
            median=median(beta))

test <- betas.distr %>%
  group_by(subgroup2) %>%
  summarise(pval=wilcox.test(beta[grep("ctrl", treatment)],
                             beta[grep("cyt", treatment)],
                             paired=F)$p.value)
test$sym <- ""
test$sym[test$pval<0.05] <- "*"
test$sym[test$pval<0.01] <- "**"
test$sym[test$pval<0.001] <- "***"

violin <- 
  ggplot(betas.distr,
       aes(treatment, beta)) +
  geom_violin(aes(fill=treatment)) +
  geom_point(data=sum, 
             aes(treatment, median), size=3) +
  scale_fill_manual(values=pals$treatment,
                    name="Treatment", labels=function(x) toupper(x)) +
  geom_text(data=test,
            aes(x=1.5,
                y=0.9, label=sym), size=8) +
  facet_wrap(~gsub("IRE", "enhancers", subgroup2)) +
  scale_y_continuous(name=expression("DNA methylation "*beta*"-value"),
                     limits=c(0,1), expand=c(0,0)) +
  theme(legend.position="bottom",
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.line.x=element_blank(),
        strip.background=element_rect(fill="white", color="black", linetype=1,
                                      size=0.5))

violin

Version Author Date
a825cc2 Mireia Ramos 2020-08-11
load(file.path(out_dir, "METH_betas_all_endoc.rda"))

volc <- ggplot(betas, 
       aes(-mean.diff, -log10(diffmeth.p.adj.fdr))) +
  geom_point(aes(color=methType)) +
  scale_color_manual(values=c(stable="grey", lost="tomato3")) +
  geom_vline(xintercept=c(-0.2, 0.2), lty=2, color="dark grey") +
  geom_hline(yintercept=-log10(0.05), lty=2, color="dark grey") +
  xlab(expression(beta*cyt-beta*ctrl)) +
  ylab(expression("DNA methlyation -log"[10]*" FDR")) +
  theme(legend.position="none")

volc

Version Author Date
a825cc2 Mireia Ramos 2020-08-11
load(file.path(out_dir, "METH_betas_REs_endoc.rda"))

bar <- 
  ggplot(betas.re,#[!grepl("Other", betas.re$subgroup2),],
       aes(methType, ..count..)) +
  geom_bar(aes(fill=subgroup2), position="fill", color="black") +
  scale_fill_manual(values=pals$re,
                    name="RE type",
                    labels=function(x) paste(gsub(" IRE", "", x), "enh")) +
  scale_y_continuous(name="Percentage of CpGs",
                     labels=function(x) paste0(x*100, "%")) +
  scale_x_discrete(name="DNA methylation change",
                   labels=c("Demethylated", "Stable")) +
  theme(axis.text.x=element_text(angle=20, hjust=1))

bar

Version Author Date
a825cc2 Mireia Ramos 2020-08-11

sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.1 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_ES.UTF-8        LC_COLLATE=C              
 [5] LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=es_ES.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] ggraph_2.0.3         igraph_1.2.5         dplyr_1.0.1         
 [4] kableExtra_1.1.0     cowplot_1.0.0        ggplot2_3.3.2       
 [7] GenomicRanges_1.41.5 GenomeInfoDb_1.25.8  IRanges_2.23.10     
[10] S4Vectors_0.27.12    BiocGenerics_0.35.4  workflowr_1.6.2     

loaded via a namespace (and not attached):
 [1] viridis_0.5.1          httr_1.4.2             tidyr_1.1.1           
 [4] splines_4.0.2          tidygraph_1.2.0        viridisLite_0.3.0     
 [7] GenomeInfoDbData_1.2.3 yaml_2.2.1             ggrepel_0.8.2         
[10] lattice_0.20-41        pillar_1.4.6           backports_1.1.8       
[13] glue_1.4.1             digest_0.6.25          promises_1.1.1        
[16] XVector_0.29.3         polyclip_1.10-0        rvest_0.3.6           
[19] colorspace_1.4-1       Matrix_1.2-18          htmltools_0.5.0       
[22] httpuv_1.5.4           plyr_1.8.6             XML_3.99-0.5          
[25] pkgconfig_2.0.3        bookdown_0.20          zlibbioc_1.35.0       
[28] purrr_0.3.4            scales_1.1.1           webshot_0.5.2         
[31] tweenr_1.0.1           whisker_0.4            later_1.1.0.1         
[34] ggforce_0.3.2          git2r_0.27.1           tibble_3.0.3          
[37] mgcv_1.8-31            generics_0.0.2         farver_2.0.3          
[40] ellipsis_0.3.1         withr_2.2.0            magrittr_1.5          
[43] crayon_1.3.4           evaluate_0.14          fs_1.5.0              
[46] nlme_3.1-148           MASS_7.3-51.6          xml2_1.3.2            
[49] tools_4.0.2            hms_0.5.3              lifecycle_0.2.0       
[52] stringr_1.4.0          munsell_0.5.0          compiler_4.0.2        
[55] rlang_0.4.7            grid_4.0.2             RCurl_1.98-1.2        
[58] rstudioapi_0.11        bitops_1.0-6           labeling_0.3          
[61] rmarkdown_2.3          gtable_0.3.0           reshape2_1.4.4        
[64] graphlayouts_0.7.0     R6_2.4.1               gridExtra_2.3         
[67] knitr_1.29             rprojroot_1.3-2        readr_1.3.1           
[70] stringi_1.4.6          Rcpp_1.0.5             vctrs_0.3.2           
[73] tidyselect_1.1.0       xfun_0.16