How to access Conc value in DiffBind(待完成)
我以文章里面的ATAC-Seq为例
library(readr)
library(dplyr)
Diff_ATAC <- read_csv("https://github.com/WangLab-SIPPE/2020STARProtocols_pipeline/raw/main/DownStreamAnalysis/File/E50h_GM3D_DiffPeakAnno.csv") %>%
select(Conc_E50h, Conc_GM3D, feature_id)
githubURL <- "https://github.com/shangguandong1996/myOwnBlog_DataLink/raw/main/202102/DiffBind_dba_count.rda"
download.file(githubURL,"DiffBind_dba_count.rda")
load("DiffBind_dba_count.rda")
peakCount_list <- dba_count$peaks
names(peakCount_list) <- dba_count$samples$SampleID
str(peakCount_list)
lapply(peakCount_list, function(x) x$Reads) %>%
do.call(cbind, . ) -> rawCount
rownames(rawCount) <- paste0("E50h_GM3D_", 1:nrow(rawCount))
reads_number <- as.numeric(dba_count$class["Reads", ])
(new_SF <- reads_number / min(reads_number))
normCount <- sweep(rawCount, 2, new_SF, FUN = "/")
normCount_mean <- (normCount[, c(1, 3)] +
normCount[, c(2, 4)]) / 2
normCount_mean_log <- log2(normCount_mean)
# 可以看到几乎是没有偏差的
# 那些偏差是来源于DiffBind自己用了个round函数来近似输出Conc的结果
normCount_mean_log %>%
as_tibble(rownames = "feature_id") %>%
inner_join(Diff_ChIP) %>%
mutate(E5 = Conc_E50h - E50h_R1,
G3 = Conc_GM3D - GM3D_R1) %>%
filter(E5 > 1e-2 | G3 > 1e-2)