Ramos-Rodríguez, M., Raurell-Vila, H., Colli, M.L. et al. The impact of proinflammatory cytokines on the β-cell regulatory landscape provides insights into the genetics of type 1 diabetes. Nat Genet. 51, 1588–1595 (2019) https://doi.org/10.1038/s41588-019-0524-6
Contents: Analyses and figures contained in this document correspond to the following figures/sections of the original publication:
Run Rscript for generating the necessary quality control measures.
Rscript code/CYT_QC_ATAC.R
load("../data/CYT/ATAC/QC/ATAC_stats.rda")
load("../data/CYT/ATAC/QC/QC_scores.rda")
stats <- dplyr::left_join(stats, txt)
lib <-
ggplot(stats,
aes("Lib Size",
sampleID)) +
geom_tile(aes(fill=total_reads),
color="black", size=1) +
geom_text(aes(label=round(total_reads/1e6, 1))) +
scale_fill_gradient(low="white",
high="purple3",
limits=c(0, max(stats$total_reads)),
breaks=c(0, max(stats$total_reads)),
labels=c("0", "Max"),
name="Library size (x10^6)") +
guides(fill=guide_colourbar(ticks=FALSE,
frame.colour = "black",
frame.linewidth = 1)) +
theme(axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title = element_blank())
align <-
ggplot(stats,
aes("% Align",
sampleID)) +
geom_tile(aes(fill=alignment_rate),
color="black", size=1) +
geom_text(aes(label=round(alignment_rate, 1))) +
scale_fill_gradient(low="white",
high="darkorange3",
limits=c(0, max(stats$alignment_rate)),
breaks=c(0, max(stats$alignment_rate)),
labels=c(0, "Max"),
name="% alignment") +
guides(fill=guide_colourbar(ticks=FALSE,
frame.colour = "black",
frame.linewidth = 1)) +
theme(axis.text.y=element_blank(),
axis.title=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
plot.margin=margin(0,0,0,0, unit='pt'))
mit <-
ggplot(stats,
aes("% Mito",
sampleID)) +
geom_tile(aes(fill=chrM_rate),
color="black", size=1) +
geom_text(aes(label=round(chrM_rate, 1))) +
scale_fill_gradient(low="dodgerblue3",
high="white",
limits=c(0, max(stats$chrM_rate)),
breaks=c(0, max(stats$chrM_rate)),
labels=c("0", "Max"),
name="% Mitochondrial") +
guides(fill=guide_colourbar(ticks=FALSE,
frame.colour = "black",
frame.linewidth = 1)) +
theme(axis.text.y=element_blank(),
axis.title=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
plot.margin=margin(0,0,0,0, unit='pt'))
nsc <-
ggplot(stats,
aes("NSC",
sampleID)) +
geom_tile(aes(fill=NSC),
color="black", size=1) +
geom_text(aes(label=round(NSC, 1))) +
scale_fill_gradient(low="white",
high="indianred4",
limits=c(1, max(stats$NSC)),
breaks=c(1, max(stats$NSC)),
labels=c("1", "Max"),
name="NSC") +
guides(fill=guide_colourbar(ticks=FALSE,
frame.colour = "black",
frame.linewidth = 1)) +
theme(axis.text.y=element_blank(),
axis.title=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
plot.margin=margin(0,0,0,0, unit='pt'))
rsc <-
ggplot(stats,
aes("RSC",
sampleID)) +
geom_tile(aes(fill=RSC),
color="black", size=1) +
geom_text(aes(label=round(RSC, 1))) +
scale_fill_gradient(low="white",
high="turquoise4",
limits=c(0, max(stats$RSC)),
breaks=c(0, max(stats$RSC)),
labels=c("0", "Max"),
name="RSC") +
guides(fill=guide_colourbar(ticks=FALSE,
frame.colour = "black",
frame.linewidth = 1)) +
theme(axis.text.y=element_blank(),
axis.title=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
plot.margin=margin(0,0,0,0, unit='pt'))
## Get legend -----------------------
lib.leg <- get_legend(lib)
al.leg <- get_legend(align)
mit.leg <- get_legend(mit)
nsc.leg <- get_legend(nsc)
rsc.leg <- get_legend(rsc)
leg <- plot_grid(lib.leg,
al.leg,
mit.leg,
nsc.leg,
rsc.leg,
nrow=1)
## Create heatmap --------------------------
heat <- plot_grid(lib + theme(legend.position="none"),
align + theme(legend.position="none"),
mit + theme(legend.position="none"),
nsc + theme(legend.position="none"),
rsc + theme(legend.position="none"),
nrow=1,
align='h',
rel_widths=c(0.30, rep(0.1750, 4))
)
## Final plot ----------------
qc <-
plot_grid(heat, leg, nrow=2, rel_heights = c(0.7, 0.3))
qc
load("../data/CYT/ATAC/QC/ATAC_tss_enrichment.rda")
# tss <- ungroup(tss)
# save(tss, file="../data/CYT/ATAC/QC/ATAC_tss_enrichment.rda")
tss$dataset <- factor(tss$dataset, levels=c("TSS annotation", "Random control"))
tss$group <- gsub("[[:digit:]]*_", "_", tss$sample)
tss <- tss %>%
group_by(dataset, group, Position) %>%
summarise(mean=mean(mean))
tss.plot <-
ggplot(tss,
aes(Position, mean)) +
geom_line(aes(group=dataset, color=dataset), lwd=0.7) +
scale_color_manual(values=c("seagreen4", "goldenrod3"), name="Dataset") +
facet_wrap(~group, scales="free_y") +
theme(legend.position="top") +
ylab("Mean ATAC-seq read counts") + xlab("Position relative to TSS (bp)")
tss.plot
load("../data/CYT/ATAC/QC/ATAC_noise.rda")
# stats <- ungroup(stats)
# save(stats, file="../data/CYT/ATAC/QC/ATAC_noise.rda")
stats$mean.errorbar <- stats$mean
stats <- stats[order(rev(stats$Annotation)),]
stats <- stats[stats$Annotation!="Unassigned",] %>%
group_by(group) %>%
mutate(cumsum=cumsum(mean))
noise <-
ggplot(stats[stats$Annotation!="Unassigned",],
aes(group, mean)) +
geom_bar(aes(fill=Annotation), stat="identity",
position="stack",
color="black", lwd=0.7) +
geom_errorbar(aes(ymin=cumsum, ymax=cumsum+sd,
group=Annotation),
width=.3, lwd=0.5) +
scale_fill_manual(values=c("violetred", "dark orange")) +
scale_y_continuous(name="Percentage of reads in peaks (%)") +
theme(axis.text.x=element_text(angle=30, hjust=1),
legend.position="top",
axis.title.x=element_blank())
noise
Rscript code/QC_CORR_genome.R data/CYT/ATAC/BAM/ data/CYT/ATAC/QC/
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
get_lower_tri <- function(cormat){
cormat[upper.tri(cormat)]<- NA
return(cormat)
}
load("../data/CYT/ATAC/QC/COR_10kb_norm.rda")
mat <- counts
cor.mat.ctrl <- get_lower_tri(cor(mat[,grep("ctrl", colnames(mat))], method="pearson"))
ctrl.m <- reshape2::melt(cor.mat.ctrl, na.rm=TRUE)
c.ctrl.atac <-
ggplot(data = ctrl.m, aes(Var2, Var1, fill = value))+
geom_tile(color = "black", lwd=0.7)+
scale_fill_gradient2(low = "white", high = "slateblue4", mid = "skyblue2",
midpoint = 0.5, limit = c(0,1), space = "Lab",
name="Pearson\nCorrelation") +
geom_text(aes(label=round(value, 2)), size=3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1),
axis.title=element_blank(),
panel.grid.major = element_blank(),
legend.position="none") +
coord_fixed() +
ggtitle("ATAC-seq genome-wide correlation")
cor.mat.cyt <- get_upper_tri(cor(mat[,grep("cyt", colnames(mat))], method="pearson"))
cyt.m <- reshape2::melt(cor.mat.cyt, na.rm=TRUE)
c.cyt.atac <-
ggplot(data = cyt.m, aes(Var2, Var1, fill = value))+
geom_tile(color = "black", lwd=0.7)+
scale_fill_gradient2(low = "white", high = "slateblue4", mid = "skyblue2",
midpoint = 0.5, limit = c(0,1), space = "Lab",
name="Pearson\nCorrelation") +
geom_text(aes(label=round(value, 2)), size=3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1),
axis.title=element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.6, 0.75),
legend.direction = "horizontal",
panel.grid.major = element_blank()) +
coord_fixed() +
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
cor.rep.atac <- plot_grid(c.ctrl.atac, c.cyt.atac)
cor.rep.atac
cd data/CYT/ATAC
Rscript ../../code/CYT_diffAnalysis_DESeq2_chrom.R -f 1 -q 0.05 -b TRUE -s hi -e ATAC
Rscript ../../code/CYT_diffAnalysis_DESeq2_chrom.R -f 1 -q 0.05 -b TRUE -s endoc -e ATAC
load("../data/CYT/ATAC/diffAnalysis/ATAC_endoc_fc1_padj0.05_GRangesBatch.rda")
table(res.gr$type, res.gr$annotation) %>%
knitr::kable(format="html",
format.args = list(big.mark = ","),
caption = "Number of regions classified according to significance and distance to TSS in ATAC-seq EndoC samples.") %>%
kable_styling(full_width = FALSE) %>%
add_header_above(c("Region type" = 1, "Location respect TSS" = 2))
Distal | Promoter | |
---|---|---|
gained | 12,240 | 267 |
lost | 679 | 9 |
stable | 171,672 | 13,354 |
volc_ec <-
ggplot(data.frame(res.gr),
aes(log2FoldChange, -log10(padj))) +
geom_point(aes(color=type), size=0.4) +
scale_color_manual(values=pals$differential,
name="RE type") +
geom_vline(xintercept=c(1,-1), linetype=2, color="dark grey") +
geom_hline(yintercept=-log10(0.05), linetype=2, color="dark grey") +
xlab(expression(Log[2]*" fold-change")) + ylab(expression(-Log[10]*" FDR adjusted P")) +
ggtitle(expression("ATAC-seq EndoC-"*beta*H1)) +
theme(legend.position="none")
load("../data/CYT/ATAC/diffAnalysis/ATAC_hi_fc1_padj0.05_GRangesBatch.rda")
table(res.gr$type, res.gr$annotation) %>%
knitr::kable(format="html",
format.args = list(big.mark = ","),
caption = "Number of regions classified according to significance and distance to TSS in ATAC-seq HI samples.") %>%
kable_styling(full_width = FALSE) %>%
add_header_above(c("Region type" = 1, "Location respect TSS" = 2))
Distal | Promoter | |
---|---|---|
gained | 14,505 | 462 |
lost | 4,544 | 115 |
stable | 295,383 | 18,090 |
volc_hi <-
ggplot(data.frame(res.gr),
aes(log2FoldChange, -log10(padj))) +
geom_point(aes(color=type), size=0.4) +
scale_color_manual(values=pals$differential,
name="RE type") +
geom_vline(xintercept=c(1,-1), linetype=2, color="dark grey") +
geom_hline(yintercept=-log10(0.05), linetype=2, color="dark grey") +
xlab(expression(Log[2]*" fold-change")) + ylab(expression(-Log[10]*" FDR adjusted P")) +
ggtitle(expression("ATAC-seq HI")) +
theme(legend.position="none")
plot_grid(volc_ec,
volc_hi,
ncol=2)
## Load regions
load("../data/CYT/ATAC/diffAnalysis/ATAC_endoc_fc1_padj0.05_GRangesBatch.rda")
ec <- res.gr
load("../data/CYT/ATAC/diffAnalysis/ATAC_hi_fc1_padj0.05_GRangesBatch.rda")
hi <- res.gr
## Find overlaps
ols <- findOverlaps(hi, ec)
## Get fold-change in HI vs type in EndoC
df <- cbind(data.frame(hi)[queryHits(ols),c(6,8)],
data.frame(ec)[subjectHits(ols),c(13)])
colnames(df)[3] <- "type_ec"
## Plot results
ggplot(df,
aes(type_ec, log2FoldChange)) +
geom_hline(yintercept=c(1,-1), lty=2, color="grey") +
geom_hline(yintercept=0, lty=1, color="grey") +
geom_boxplot(aes(color=type_ec), notch=F, outlier.shape=NA,
lwd=1) +
scale_color_manual(values=pals$differential,
name="RE type") +
scale_y_continuous(name=expression("HI "*log[2]*" FC")) +
theme(legend.position="none",
strip.background = element_rect(fill="white", linetype=1, size=.5, color="black")) +
scale_x_discrete(name=expression("EndoC-"*beta*"H1 region type"),
labels=function(x) Hmisc::capitalize(x)) +
coord_cartesian(ylim=c(-2,3))
Rscript code/QC_CORR_genome.R data/CYT/H3K27ac/BAM/ data/CYT/H3K27ac/QC/
load("../data/CYT/H3K27ac/QC/COR_10kb_norm.rda")
mat <- counts
cor.mat.ctrl <- get_lower_tri(cor(mat[,grep("ctrl", colnames(mat))], method="pearson"))
ctrl.m <- reshape2::melt(cor.mat.ctrl, na.rm=TRUE)
c.ctrl.H3K27ac <-
ggplot(data = ctrl.m, aes(Var2, Var1, fill = value))+
geom_tile(color = "black", lwd=0.7)+
scale_fill_gradient2(low = "white", high = "slateblue4", mid = "skyblue2",
midpoint = 0.5, limit = c(0,1), space = "Lab",
name="Pearson\nCorrelation") +
geom_text(aes(label=round(value, 2)), size=3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1),
axis.title=element_blank(),
panel.grid.major = element_blank(),
legend.position="none") +
coord_fixed() +
ggtitle("H3k27ac genome-wide correlation")
cor.mat.cyt <- get_upper_tri(cor(mat[,grep("cyt", colnames(mat))], method="pearson"))
cyt.m <- reshape2::melt(cor.mat.cyt, na.rm=TRUE)
c.cyt.H3K27ac <-
ggplot(data = cyt.m, aes(Var2, Var1, fill = value))+
geom_tile(color = "black", lwd=0.7)+
scale_fill_gradient2(low = "white", high = "slateblue4", mid = "skyblue2",
midpoint = 0.5, limit = c(0,1), space = "Lab",
name="Pearson\nCorrelation") +
geom_text(aes(label=round(value, 2)), size=3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1),
axis.title=element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.6, 0.75),
legend.direction = "horizontal",
panel.grid.major = element_blank()) +
coord_fixed() +
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
cor.rep.H3K27ac <- plot_grid(c.ctrl.H3K27ac, c.cyt.H3K27ac)
cor.rep.H3K27ac
cd data/CYT/H3K27ac
Rscript ../../code/CYT_diffAnalysis_DESeq2_chrom.R -f 1 -q 0.05 -b TRUE -s hi -e H3K27ac
Rscript ../../code/CYT_diffAnalysis_DESeq2_chrom.R -f 1 -q 0.05 -b TRUE -s endoc -e H3K27ac
load("../data/CYT/H3K27ac/diffAnalysis/H3K27ac_endoc_fc1_padj0.05_GRangesBatch.rda")
table(res.gr$type, res.gr$annotation) %>%
knitr::kable(format="html",
format.args = list(big.mark = ","),
caption = "Number of regions classified according to significance and distance to TSS in H3K27ac EndoC samples.") %>%
kable_styling(full_width = FALSE) %>%
add_header_above(c("Region type" = 1, "Location respect TSS" = 2))
Distal | Promoter | |
---|---|---|
gained | 3,180 | 201 |
lost | 19 | 0 |
stable | 134,512 | 11,930 |
volc_ec <-
ggplot(data.frame(res.gr),
aes(log2FoldChange, -log10(padj))) +
geom_point(aes(color=type), size=0.4) +
scale_color_manual(values=pals$differential,
name="RE type") +
geom_vline(xintercept=c(1,-1), linetype=2, color="dark grey") +
geom_hline(yintercept=-log10(0.05), linetype=2, color="dark grey") +
xlab(expression(Log[2]*" fold-change")) + ylab(expression(-Log[10]*" FDR adjusted P")) +
ggtitle(expression("H3K27ac EndoC-"*beta*H1)) +
theme(legend.position="none")
load("../data/CYT/H3K27ac/diffAnalysis/H3K27ac_hi_fc1_padj0.05_GRangesBatch.rda")
table(res.gr$type, res.gr$annotation) %>%
knitr::kable(format="html",
format.args = list(big.mark = ","),
caption = "Number of regions classified according to significance and distance to TSS in H3K27ac HI samples.") %>%
kable_styling(full_width = FALSE) %>%
add_header_above(c("Region type" = 1, "Location respect TSS" = 2))
Distal | Promoter | |
---|---|---|
gained | 4,022 | 219 |
lost | 5,895 | 250 |
stable | 110,543 | 11,006 |
volc_hi <-
ggplot(data.frame(res.gr),
aes(log2FoldChange, -log10(padj))) +
geom_point(aes(color=type), size=0.4) +
scale_color_manual(values=pals$differential,
name="RE type") +
geom_vline(xintercept=c(1,-1), linetype=2, color="dark grey") +
geom_hline(yintercept=-log10(0.05), linetype=2, color="dark grey") +
xlab(expression(Log[2]*" fold-change")) + ylab(expression(-Log[10]*" FDR adjusted P")) +
ggtitle(expression("H3K27ac HI")) +
theme(legend.position="none")
plot_grid(volc_ec,
volc_hi,
ncol=2)
## Load regions
load("../data/CYT/H3K27ac/diffAnalysis/H3K27ac_endoc_fc1_padj0.05_GRangesBatch.rda")
ec <- res.gr
load("../data/CYT/H3K27ac/diffAnalysis/H3K27ac_hi_fc1_padj0.05_GRangesBatch.rda")
hi <- res.gr
## Find overlaps
ols <- findOverlaps(hi, ec)
## Get fold-change in HI vs type in EndoC
df <- cbind(data.frame(hi)[queryHits(ols),c(6,8)],
data.frame(ec)[subjectHits(ols),c(13)])
colnames(df)[3] <- "type_ec"
## Plot results
ggplot(df,
aes(type_ec, log2FoldChange)) +
geom_hline(yintercept=c(1,-1), lty=2, color="grey") +
geom_hline(yintercept=0, lty=1, color="grey") +
geom_boxplot(aes(color=type_ec), notch=F, outlier.shape=NA,
lwd=1) +
scale_color_manual(values=pals$differential,
name="RE type") +
scale_y_continuous(name=expression("HI "*log[2]*" FC")) +
scale_x_discrete(name=expression("EndoC-"*beta*"H1 region type"),
labels=function(x) Hmisc::capitalize(x)) +
theme(legend.position="none",
strip.background = element_rect(fill="white", linetype=1, size=.5, color="black")) +
coord_cartesian(ylim=c(-2,3))
load("../data/CYT/ATAC/diffAnalysis/ATAC_endoc_fc1_padj0.05_GRangesBatch.rda")
at <- res.gr
load("../data/CYT/H3K27ac/diffAnalysis/H3K27ac_endoc_fc1_padj0.05_GRangesBatch.rda")
k27 <- res.gr
ols <- findOverlaps(at, k27, maxgap=200)
re <- at[queryHits(ols),]
colnames(mcols(re)) <- paste0("atac.", colnames(mcols(re)))
mcols(k27) <- mcols(k27)[,c(1,3,7,8)]
colnames(mcols(k27)) <- paste0("h3k27ac.", colnames(mcols(k27)))
mcols(re) <- cbind(mcols(re),
mcols(k27)[subjectHits(ols),])
## Create new types of IREs
re$type <- NA
re$type[re$h3k27ac.type=="stable" & re$atac.type=="stable"] <- "SRE"
re$type[re$h3k27ac.type=="gained" & (re$atac.type %in% c("gained", "stable"))] <- "IRE"
dir.create("../data/CYT/REs", F)
save(re, file="../data/CYT/REs/REs_endoc_fc1_padj0.05_granges.rda")
## Create less stringent IREs
re$type <- NA
re$h3k27ac.type[re$h3k27ac.log2FoldChange > 0.8 & re$h3k27ac.padj <= 0.05] <- "gained"
re$type[re$h3k27ac.type=="stable" & re$atac.type=="stable"] <- "SRE"
re$type[re$h3k27ac.type=="gained" & (re$atac.type %in% c("gained", "stable"))] <- "IRE"
save(re, file="../data/CYT/REs/REs_endoc_fc1_padj0.05_granges_k27.8.rda")
load("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges.rda")
cor <- cor.test(re$atac.log2FoldChange, re$h3k27ac.log2FoldChange)
re.df <- data.frame(re)[,-c(1:5)]
re.df$type <- factor(re.df$type,
levels=c("SRE", "IRE"))
re.df <- re.df[order(re.df$type),]
ggplot(re.df,
aes(atac.log2FoldChange, h3k27ac.log2FoldChange)) +
geom_point(aes(color=h3k27ac.type,
fill=atac.type), pch=21) +
scale_fill_manual(values=pals$differential) +
scale_color_manual(values=pals$differential) +
geom_vline(xintercept=c(-1,1), lty=2, color="grey") +
geom_hline(yintercept=c(-1,1), lty=2, color="grey") +
annotate("text", x=4, y=-2, label=paste0("r2=", round(cor$estimate, 2),
"\nP<2x10-16")) +
theme(legend.position="none") +
scale_y_continuous(limits=c(-3,6),
name=expression("H3K27ac "*log[2]*" FC")) +
scale_x_continuous(limits=c(-3,6),
name=expression("ATAC-seq "*log[2]*" FC"))
Version | Author | Date |
---|---|---|
2b4820d | Mireia Ramos | 2020-05-06 |
load("../data/CYT/ATAC/diffAnalysis/ATAC_hi_fc1_padj0.05_GRangesBatch.rda")
at <- res.gr
load("../data/CYT/H3K27ac/diffAnalysis/H3K27ac_hi_fc1_padj0.05_GRangesBatch.rda")
k27 <- res.gr
ols <- findOverlaps(at, k27, maxgap=200)
re <- at[queryHits(ols),]
colnames(mcols(re)) <- paste0("atac.", colnames(mcols(re)))
mcols(k27) <- mcols(k27)[,c(1,3,7,8)]
colnames(mcols(k27)) <- paste0("h3k27ac.", colnames(mcols(k27)))
mcols(re) <- cbind(mcols(re),
mcols(k27)[subjectHits(ols),])
## Create new types of IREs
re$type <- NA
re$type[re$h3k27ac.type=="stable" & re$atac.type=="stable"] <- "SRE"
re$type[re$h3k27ac.type=="gained" & (re$atac.type %in% c("gained", "stable"))] <- "IRE"
dir.create("../data/CYT/REs", F)
save(re, file="../data/CYT/REs/REs_hi_fc1_padj0.05_granges.rda")
## Create less stringent IREs
re$type <- NA
re$h3k27ac.type[re$h3k27ac.log2FoldChange > 0.8 & re$h3k27ac.padj <= 0.05] <- "gained"
re$type[re$h3k27ac.type=="stable" & re$atac.type=="stable"] <- "SRE"
re$type[re$h3k27ac.type=="gained" & (re$atac.type %in% c("gained", "stable"))] <- "IRE"
save(re, file="../data/CYT/REs/REs_hi_fc1_padj0.05_granges_k27.8.rda")
load("../data/CYT/REs/REs_hi_fc1_padj0.05_granges.rda")
cor <- cor.test(re$atac.log2FoldChange, re$h3k27ac.log2FoldChange)
re.df <- data.frame(re)[,-c(1:5)]
re.df$type <- factor(re.df$type,
levels=c("SRE", "IRE"))
re.df <- re.df[order(re.df$type),]
ggplot(re.df,
aes(atac.log2FoldChange, h3k27ac.log2FoldChange)) +
geom_point(aes(color=h3k27ac.type,
fill=atac.type), pch=21) +
scale_fill_manual(values=pals$differential) +
scale_color_manual(values=pals$differential) +
geom_vline(xintercept=c(-1,1), lty=2, color="grey") +
geom_hline(yintercept=c(-1,1), lty=2, color="grey") +
annotate("text", x=4, y=-2, label=paste0("r2=", round(cor$estimate, 2),
"\nP<2x10-16")) +
theme(legend.position="none") +
scale_y_continuous(limits=c(-3,6),
name=expression("H3K27ac "*log[2]*" FC")) +
scale_x_continuous(limits=c(-3,6),
name=expression("ATAC-seq "*log[2]*" FC"))
Version | Author | Date |
---|---|---|
2b4820d | Mireia Ramos | 2020-05-06 |
Interestingly, the set of IREs identified in EndoC-\(\beta\)H1 cells allows us to cluster both EndoC-\(\beta\)H1 and Human Islet ATAC-seq and H3K27ac regions according to the suffered treatment and not sample type.
################################################
## Clustering samples according to EndoC IREs ##
################################################
library(DESeq2)
library(BiocParallel)
register(MulticoreParam(4))
## ATAC-seq ------------------------------------
# Create SAF
load("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges.rda")
saf <- data.frame(re)[grep("IRE", re$type),c(1:3,6)]
colnames(saf) <- c("Chr", "Start", "End", "GeneID")
saf$Strand <- "+"
# Get counts at IREs
files <- list.files("../data/CYT/ATAC/BAM",
pattern=".offset.bam$",
full.names=TRUE)
atac <- Rsubread::featureCounts(files,
annot.ext=saf,
nthreads=10)
names <- pipelineNGS::getNameFromPath(files, suffix=".offset.bam")
colnames(atac$counts) <- names
# Normalize with DESeq2
coldata <- data.frame(sample=names,
treatment=unlist(lapply(strsplit(names, "_"),
function(x) x[2])),
batch=unlist(lapply(strsplit(names, "_"),
function(x) x[1])))
coldata$tissue <- gsub("[[:digit:]]*", "", coldata$batch)
dds <- DESeqDataSetFromMatrix(countData=atac$counts,
colData=coldata,
design=~batch+treatment)
dds <- DESeq(dds,
parallel=TRUE)
rld <- rlog(dds)
vsd <- vst(dds)
save(rld,
file=file.path(out_dir, "IREs_ATAC_clustering_rld.rda"))
## H3K27ac -------------------------------------------
# Create SAF
load("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges.rda")
id <- unique(re$h3k27ac.GeneID[grep("IRE", re$type)])
load("../data/CYT/H3K27ac/diffAnalysis/H3K27ac_endoc_fc1_padj0.05_GRangesBatch.rda")
res.gr <- res.gr[res.gr$GeneID %in% id,]
saf <- data.frame(res.gr)[,c(1:3,6)]
colnames(saf) <- c("Chr", "Start", "End", "GeneID")
saf$Strand <- "+"
# Get counts at IREs
files <- list.files("../../data/CYT/H3K27ac/BAM",
pattern=".bam$",
full.names=TRUE)
files <- files[!grepl("raw", files)]
atac <- Rsubread::featureCounts(files,
annot.ext=saf,
nthreads=10)
names <- pipelineNGS::getNameFromPath(files, suffix=".bam")
colnames(atac$counts) <- names
# Normalize with DESeq2
coldata <- data.frame(sample=names,
treatment=unlist(lapply(strsplit(names, "_"),
function(x) x[2])),
batch=unlist(lapply(strsplit(names, "_"),
function(x) x[1])))
coldata$tissue <- gsub("[[:digit:]]*", "", coldata$batch)
dds <- DESeqDataSetFromMatrix(countData=atac$counts,
colData=coldata,
design=~batch+treatment)
dds <- DESeq(dds,
parallel=TRUE)
rld <- rlog(dds)
save(rld,
file=file.path(out_dir, "IREs_H3K27ac_clustering_rld.rda"))
load(file.path(out_dir, "IREs_ATAC_clustering_rld.rda"))
mat <- assay(rld)
mat <- limma::removeBatchEffect(mat, rld$batch)
colors <- colorRampPalette( rev(RColorBrewer::brewer.pal(9, "BuPu")) )(255)
rows <- data.frame("Treatment"=rld$treatment,
"Tissue"=rld$tissue)
ha = HeatmapAnnotation(df = rows,
col = list(Treatment = c(cyt="dark orange", ctrl="steelblue3"),
Tissue = c(endoc="violetred3", hi="palegreen3")))
sampleDists <- dist(t(mat), method="euclidean")
sampleDistMatrix <- as.matrix(sampleDists)
hclust <- hclust(sampleDists)
heat <- Heatmap(sampleDistMatrix+1,
col=colors,
heatmap_legend_param = list(title = "Distance",
color_bar = "continuous",
legend_direction="horizontal"),
top_annotation=ha,
rect_gp = gpar(col= "black"),
cluster_rows=hclust,
cluster_columns=hclust)
draw(heat, heatmap_legend_side = "bottom", annotation_legend_side = "right")
Version | Author | Date |
---|---|---|
2b4820d | Mireia Ramos | 2020-05-06 |
load(file.path(out_dir, "IREs_H3K27ac_clustering_rld.rda"))
rld$batch[1] <- "endoc3"
mat <- assay(rld)
mat <- limma::removeBatchEffect(mat, rld$batch)
Coefficients not estimable: batch8
colors <- colorRampPalette( rev(RColorBrewer::brewer.pal(9, "BuPu")) )(255)
rows <- data.frame("Treatment"=rld$treatment,
"Tissue"=rld$tissue)
ha = HeatmapAnnotation(df = rows,
col = list(Treatment = c(cyt="dark orange", ctrl="steelblue3"),
Tissue = c(endoc="violetred3", hi="palegreen3")))
sampleDists <- dist(t(mat), method="euclidean")
sampleDistMatrix <- as.matrix(sampleDists)
hclust <- hclust(sampleDists)
heat <- Heatmap(sampleDistMatrix+1,
col=colors,
heatmap_legend_param = list(title = "Distance",
color_bar = "continuous",
legend_direction="horizontal"),
top_annotation=ha,
rect_gp = gpar(col= "black"),
cluster_rows=hclust,
cluster_columns=hclust)
draw(heat, heatmap_legend_side = "bottom", annotation_legend_side = "right")
Version | Author | Date |
---|---|---|
2b4820d | Mireia Ramos | 2020-05-06 |
load("../data/CYT/REs/REs_endoc_fc1_padj0.05_granges.rda")
re.df <- data.frame(re)[,-c(1:5)]
t <- table(re.df$type, re.df$atac.annotation)
t
Distal Promoter
IRE 3594 204
SRE 70391 9982
## Make groups
re.tss <- unique(re.df[!is.na(re.df$type),])
re.tss$anno.group <- NA
re.tss$anno.group[abs(re.tss$atac.distanceToTSS)>200e3] <- ">200kb"
re.tss$anno.group[abs(re.tss$atac.distanceToTSS)<=200e3 &
abs(re.tss$atac.distanceToTSS)>20e3] <- "20-200kb"
re.tss$anno.group[abs(re.tss$atac.distanceToTSS)<=20e3 &
abs(re.tss$atac.distanceToTSS)>2e3] <- "2-20kb"
re.tss$anno.group[abs(re.tss$atac.distanceToTSS)<=2e3] <- "0-2kb"
len.ire <- sum(grepl("IRE", re.tss$type))
len.sre <- sum(grepl("SRE", re.tss$type))
sum.tss <- re.tss %>%
group_by(type, anno.group) %>%
summarise(num=length(unique(atac.GeneID)))
sum.tss$perc <- NA
sum.tss$perc[grep("IRE", sum.tss$type)] <- sum.tss$num[grep("IRE", sum.tss$type)]/len.ire*100
sum.tss$perc[grep("SRE", sum.tss$type)] <- sum.tss$num[grep("SRE", sum.tss$type)]/len.sre*100
sum.tss$anno.group <- factor(sum.tss$anno.group,
levels=c("0-2kb", "2-20kb", "20-200kb", ">200kb"))
tss.plot <-
ggplot(sum.tss,
aes(anno.group, perc)) +
geom_bar(aes(fill=type), color="black", lwd=0.7, stat="identity", position="dodge") +
geom_vline(xintercept=1.5, lty=2, color="dark red") +
scale_fill_manual(values=pals$re,
name="RE type", labels=function(x) paste0(x, "s")) +
theme(legend.position="top") +
xlab("Distance to TSS") +
scale_y_continuous(name="Percentage of RE",
labels=function(x) paste0(x, "%"),
breaks=scales::pretty_breaks())
tss.plot
scope=1e3
bin=50
ire <- re[grep("IRE", re$type),]
rnd <- regioneR::randomizeRegions(ire)
ire.cons <- pipelineNGS::calculateMeanCons(ire,
scope=scope,
bin=bin)
ire.cons$re_type <- "IREs"
rnd.cons <- pipelineNGS::calculateMeanCons(rnd,
scope=scope,
bin=bin)
rnd.cons$re_type <- "Random IREs"
ire.cons <- rbind(ire.cons, rnd.cons)
save(ire.cons, file=file.path(out_path, "IRE_endoc_fc1_conservation.rda"))
load(file.path(out_dir, "IRE_endoc_fc1_conservation.rda"))
cons.plot <-
ggplot(ire.cons,
aes(position, meanCons)) +
geom_line(aes(lty=re_type), lwd=0.7, color=pals$re["IRE"]) +
scale_linetype_discrete(name="Region type") +
geom_vline(xintercept=0, lty=2, color="grey") +
ylab("Mean PhastCons46way score") +
xlab("Position from peak center (bp)") +
theme(legend.position = "top") +
guides(linetype=guide_legend(nrow=2))
cons.plot
library(maRge)
out_homer <- file.path(out_dir, "HOMER_IREs_endoc_fc1_padj0.05_mask/")
deNovoMotifHOMER(bed=paste0("../data/CYT/bedfiles/IREs_endoc_fc1_padj0.05.bed"),
path_output=out_homer,
other_param="-mask",
path_homer="~/tools/homer/")
htmltools::includeHTML(file.path(out_dir, "HOMER_IREs_endoc_fc1_padj0.05_mask/homerResults.html"))
Rank | Motif | P-value | log P-pvalue | % of Targets | % of Background | STD(Bg STD) | Best Match/Details | Motif File |
1 | 1e-1396 | -3.215e+03 | 51.71% | 3.44% | 229.2bp (184.8bp) | IRF1(IRF)/PBMC-IRF1-ChIP-Seq(GSE43036)/Homer(0.972) More Information | Similar Motifs Found | motif file (matrix) | |
2 | 1e-39 | -9.077e+01 | 44.13% | 32.56% | 278.3bp (183.6bp) | FOXP3/MA0850.1/Jaspar(0.918) More Information | Similar Motifs Found | motif file (matrix) | |
3 | 1e-31 | -7.239e+01 | 15.29% | 8.67% | 299.7bp (184.1bp) | STAT4(Stat)/CD4-Stat4-ChIP-Seq(GSE22104)/Homer(0.934) More Information | Similar Motifs Found | motif file (matrix) | |
4 | 1e-29 | -6.687e+01 | 4.62% | 1.50% | 263.4bp (177.4bp) | HNF1B/MA0153.2/Jaspar(0.896) More Information | Similar Motifs Found | motif file (matrix) | |
5 | 1e-25 | -5.806e+01 | 29.88% | 21.69% | 315.3bp (184.6bp) | NeuroD1(bHLH)/Islet-NeuroD1-ChIP-Seq(GSE30298)/Homer(0.947) More Information | Similar Motifs Found | motif file (matrix) | |
6 | 1e-21 | -4.880e+01 | 5.85% | 2.62% | 293.7bp (169.2bp) | MF0003.1_REL_class/Jaspar(0.966) More Information | Similar Motifs Found | motif file (matrix) | |
7 | 1e-20 | -4.775e+01 | 0.37% | 0.00% | 185.7bp (189.2bp) | Zfx/MA0146.2/Jaspar(0.618) More Information | Similar Motifs Found | motif file (matrix) | |
8 | 1e-19 | -4.473e+01 | 0.40% | 0.00% | 220.6bp (67.7bp) | Srebp2(bHLH)/HepG2-Srebp2-ChIP-Seq(GSE31477)/Homer(0.638) More Information | Similar Motifs Found | motif file (matrix) | |
9 | 1e-18 | -4.261e+01 | 0.33% | 0.00% | 194.0bp (0.0bp) | PB0194.1_Zbtb12_2/Jaspar(0.637) More Information | Similar Motifs Found | motif file (matrix) | |
10 | 1e-18 | -4.186e+01 | 17.35% | 11.84% | 306.5bp (185.9bp) | FOSL1/MA0477.1/Jaspar(0.902) More Information | Similar Motifs Found | motif file (matrix) | |
11 | 1e-17 | -4.019e+01 | 0.37% | 0.00% | 188.7bp (17.4bp) | ZNF382(Zf)/HEK293-ZNF382.GFP-ChIP-Seq(GSE58341)/Homer(0.597) More Information | Similar Motifs Found | motif file (matrix) | |
12 | 1e-16 | -3.825e+01 | 16.38% | 11.25% | 315.7bp (187.8bp) | NRL/MA0842.1/Jaspar(0.871) More Information | Similar Motifs Found | motif file (matrix) | |
13 | 1e-16 | -3.756e+01 | 0.30% | 0.00% | 164.6bp (0.0bp) | PB0180.1_Sp4_2/Jaspar(0.656) More Information | Similar Motifs Found | motif file (matrix) | |
14 | 1e-16 | -3.756e+01 | 0.30% | 0.00% | 176.8bp (176.1bp) | Hand1::Tcf3/MA0092.1/Jaspar(0.647) More Information | Similar Motifs Found | motif file (matrix) | |
15 | 1e-16 | -3.756e+01 | 0.30% | 0.00% | 194.1bp (0.0bp) | NKX2.2/Human-Islets(0.604) More Information | Similar Motifs Found | motif file (matrix) | |
16 | 1e-16 | -3.756e+01 | 0.30% | 0.00% | 208.2bp (29.9bp) | Chop(bZIP)/MEF-Chop-ChIP-Seq(GSE35681)/Homer(0.778) More Information | Similar Motifs Found | motif file (matrix) | |
17 | 1e-15 | -3.680e+01 | 0.47% | 0.02% | 215.3bp (177.0bp) | PU.1-IRF(ETS:IRF)/Bcell-PU.1-ChIP-Seq(GSE21512)/Homer(0.778) More Information | Similar Motifs Found | motif file (matrix) | |
18 | 1e-15 | -3.579e+01 | 0.37% | 0.01% | 336.1bp (230.6bp) | SD0003.1_at_AC_acceptor/Jaspar(0.614) More Information | Similar Motifs Found | motif file (matrix) | |
19 | 1e-15 | -3.574e+01 | 0.33% | 0.01% | 158.6bp (149.0bp) | ZNF528(Zf)/HEK293-ZNF528.GFP-ChIP-Seq(GSE58341)/Homer(0.681) More Information | Similar Motifs Found | motif file (matrix) | |
20 | 1e-14 | -3.262e+01 | 0.27% | 0.00% | 159.5bp (92.2bp) | PB0026.1_Gm397_1/Jaspar(0.724) More Information | Similar Motifs Found | motif file (matrix) | |
21 | 1e-13 | -3.210e+01 | 11.67% | 7.69% | 302.8bp (184.2bp) | PB0036.1_Irf6_1/Jaspar(0.818) More Information | Similar Motifs Found | motif file (matrix) | |
22 | 1e-13 | -3.138e+01 | 0.30% | 0.00% | 182.1bp (124.4bp) | ZBTB7A/MA0750.1/Jaspar(0.677) More Information | Similar Motifs Found | motif file (matrix) | |
23 | 1e-13 | -3.138e+01 | 0.30% | 0.01% | 223.9bp (167.0bp) | MyoG(bHLH)/C2C12-MyoG-ChIP-Seq(GSE36024)/Homer(0.640) More Information | Similar Motifs Found | motif file (matrix) | |
24 | 1e-13 | -3.138e+01 | 0.30% | 0.01% | 147.7bp (224.1bp) | CHR(?)/Hela-CellCycle-Expression/Homer(0.663) More Information | Similar Motifs Found | motif file (matrix) | |
25 * | 1e-11 | -2.727e+01 | 17.95% | 13.41% | 325.0bp (178.4bp) | Rfx5(HTH)/GM12878-Rfx5-ChIP-Seq(GSE31477)/Homer(0.708) More Information | Similar Motifs Found | motif file (matrix) | |
26 * | 1e-11 | -2.713e+01 | 0.27% | 0.01% | 261.1bp (102.8bp) | PB0083.1_Tcf7_1/Jaspar(0.598) More Information | Similar Motifs Found | motif file (matrix) | |
27 * | 1e-11 | -2.713e+01 | 0.27% | 0.00% | 163.6bp (34.9bp) | GFY(?)/Promoter/Homer(0.671) More Information | Similar Motifs Found | motif file (matrix) | |
28 * | 1e-11 | -2.713e+01 | 0.27% | 0.00% | 128.4bp (160.8bp) | DMRT3/MA0610.1/Jaspar(0.621) More Information | Similar Motifs Found | motif file (matrix) | |
29 * | 1e-11 | -2.602e+01 | 2.69% | 1.14% | 328.7bp (186.6bp) | ETV6/MA0645.1/Jaspar(0.743) More Information | Similar Motifs Found | motif file (matrix) | |
30 * | 1e-11 | -2.575e+01 | 1.46% | 0.43% | 188.7bp (183.0bp) | SPDEF(ETS)/VCaP-SPDEF-ChIP-Seq(SRA014231)/Homer(0.758) More Information | Similar Motifs Found | motif file (matrix) | |
31 * | 1e-10 | -2.315e+01 | 0.43% | 0.03% | 177.6bp (162.4bp) | PB0146.1_Mafk_2/Jaspar(0.661) More Information | Similar Motifs Found | motif file (matrix) | |
32 * | 1e-8 | -2.072e+01 | 0.60% | 0.09% | 169.9bp (145.3bp) | PB0172.1_Sox1_2/Jaspar(0.802) More Information | Similar Motifs Found | motif file (matrix) | |
33 * | 1e-8 | -2.039e+01 | 0.30% | 0.02% | 125.2bp (92.7bp) | Ahr::Arnt/MA0006.1/Jaspar(0.782) More Information | Similar Motifs Found | motif file (matrix) | |
34 * | 1e-7 | -1.767e+01 | 0.83% | 0.22% | 216.9bp (177.2bp) | Pit1+1bp(Homeobox)/GCrat-Pit1-ChIP-Seq(GSE58009)/Homer(0.765) More Information | Similar Motifs Found | motif file (matrix) | |
35 * | 1e-7 | -1.702e+01 | 1.96% | 0.89% | 392.9bp (180.7bp) | POL006.1_BREu/Jaspar(0.676) More Information | Similar Motifs Found | motif file (matrix) | |
36 * | 1e-7 | -1.639e+01 | 0.53% | 0.10% | 259.8bp (154.0bp) | Hes1/MA1099.1/Jaspar(0.668) More Information | Similar Motifs Found | motif file (matrix) | |
37 * | 1e-4 | -1.148e+01 | 0.13% | 0.01% | 230.9bp (101.0bp) | ZBTB12(Zf)/HEK293-ZBTB12.GFP-ChIP-Seq(GSE58341)/Homer(0.740) More Information | Similar Motifs Found | motif file (matrix) | |
38 * | 1e-4 | -1.148e+01 | 0.13% | 0.01% | 110.8bp (157.1bp) | Pknox1(Homeobox)/ES-Prep1-ChIP-Seq(GSE63282)/Homer(0.586) More Information | Similar Motifs Found | motif file (matrix) | |
39 * | 1e-4 | -1.032e+01 | 0.40% | 0.09% | 152.3bp (187.6bp) | PB0056.1_Rfxdc2_1/Jaspar(0.567) More Information | Similar Motifs Found | motif file (matrix) | |
40 * | 1e-3 | -7.026e+00 | 0.40% | 0.13% | 245.9bp (171.3bp) | Gata1(Zf)/K562-GATA1-ChIP-Seq(GSE18829)/Homer(0.643) More Information | Similar Motifs Found | motif file (matrix) |