How to access Conc value in DiffBind(待完成)

我以文章里面的ATAC-Seq为例

library(readr)
library(dplyr)
Diff_ATAC <- read_csv("https://github.com/WangLab-SIPPE/2020STARProtocols_pipeline/raw/main/DownStreamAnalysis/File/E50h_GM3D_DiffPeakAnno.csv") %>% 
  select(Conc_E50h, Conc_GM3D, feature_id)
githubURL <- "https://github.com/shangguandong1996/myOwnBlog_DataLink/raw/main/202102/DiffBind_dba_count.rda"

download.file(githubURL,"DiffBind_dba_count.rda")
load("DiffBind_dba_count.rda")
peakCount_list <- dba_count$peaks
names(peakCount_list) <- dba_count$samples$SampleID

str(peakCount_list)
lapply(peakCount_list, function(x) x$Reads) %>% 
  do.call(cbind, . ) -> rawCount

rownames(rawCount) <- paste0("E50h_GM3D_", 1:nrow(rawCount))
reads_number <- as.numeric(dba_count$class["Reads", ])
(new_SF <- reads_number / min(reads_number))

normCount <- sweep(rawCount, 2, new_SF, FUN = "/")

normCount_mean <- (normCount[, c(1, 3)] +
                     normCount[, c(2, 4)]) / 2

normCount_mean_log <- log2(normCount_mean)
# 可以看到几乎是没有偏差的
# 那些偏差是来源于DiffBind自己用了个round函数来近似输出Conc的结果
normCount_mean_log %>% 
  as_tibble(rownames = "feature_id") %>% 
  inner_join(Diff_ChIP) %>% 
  mutate(E5 = Conc_E50h - E50h_R1, 
         G3 = Conc_GM3D - GM3D_R1) %>% 
  filter(E5 > 1e-2 | G3 > 1e-2) 
Guandong Shang
Guandong Shang
PhD Candidate

My research interests include rstats, epigenomics, bioinformatics and evo-devo.

Related