minfi:Analyze Illumina Infinium DNA methylation arrays
1. 数据读入
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("minfi",force = TRUE)
BiocManager::install("minfiData")
library(minfi)
library(minfiData)
### 1. 数据读入
# RGsetEx
# ## RGsetEx: RGChannelSet, 622,399 features
# class(RGsetEx) #"RGChannelSet"
# MsetEx <- preprocessRaw(RGsetEx)
# class(MsetEx) # "MethylSet"
# ## MsetEx: MethylSet, 485,512 features
# GMsetEx <- mapToGenome(MsetEx)
# class(GMsetEx) #"GenomicMethylSet"
# ## GMsetEx: GenomicMethylSet, 485,512 features
## ----baseDir------------------------------------------------------------------
baseDir <- system.file("extdata", package = "minfiData")
list.files(baseDir)
list.files(file.path(baseDir, "5723646052"))
## 读Illumina methylation sample sheet(csv结尾的文件),含有样本信息以及文件位置
targets <- read.metharray.sheet(baseDir) # targets 为data.frame
# Usage
# read.metharray.sheet(base, pattern = "csv$", ignore.case = TRUE,
# recursive = TRUE, verbose = TRUE)
# sub(baseDir, "", targets$Basename) # 替换
## 通过样本信息(targets为data.frame)读取甲基化芯片数据idat
RGset <- read.metharray.exp(targets = targets)
class(RGset) # "RGChannelSet"
# 只读取某一文件夹下面的idat文件
RGset2 <- read.metharray.exp(file.path(baseDir, "5723646052"))
# 读取baseDir下所有子文件夹下面的idat文件
RGset3 <- read.metharray.exp(baseDir, recursive = TRUE)
2. 质控
### 2. 质控
## 甲基化β值的豆状密度图。
par(mar=c(5,6,4,2)) # c(bottom, left, top, right)
densityBeanPlot(RGset, sampNames=names, sampGroups=group)
## β值密度曲线
densityPlot(RGset,sampGroups=group)
# 检测失败的位点
detP<- detectionP(RGset, type = "m+u")
failed <- detP>0.01
colMeans(failed) # Fraction of failed positions per sample
sum(rowMeans(failed)>0.5) # How many positions failed in >50% of samples
3.数据类型转化
### 3.数据类型转化
# 产生没有标准的MethylSet类型
Mset <- preprocessRaw(RGset)
# quantile normalization
gmSet <- preprocessQuantile(Mset)
# GMset <- mapToGenome(Mset)
# getMeth(GMset)
# getUnmeth(GMset)
## 对于RGset,Mset,GMset, gmSet对象 都有这些函数
pd <- pData(gmSet) # 表型数据
getBeta(gmSet)
group <- pd$Sample_Group # 样品分组
annotation(gmSet) # 注释信息
getAnnotatio(gmSet)
ids <- rownames(gmSet) # 探针id名
names <- colnames(gmSet) # 样品名
beta <- getBeta(gmSet) #β值
#assays(RGset)[[1]] # assays(RGset)$Green
#assays(RGset)[2] # assays(RGset)$Red
4. 读入TCGA数据和GEO数据
### 4. 读入TCGA数据和GEO数据
data_file <- "/Users/zhengxueming/test/test_READ.methylation450.tsv"
gmSet_tcga <- readTCGA(data_file)
class(gmSet_tcga) #"GenomicRatioSet"
?readGEORawFile
?read.metharray
?read.metharray.exp
?read.metharray.sheet
5.甲基化差异化分析
### 5.甲基化差异化分析
## 检测差异甲基化位点
# 要用normalization后的gmSet数据
pd <- pData(gmSet)
beta <- getBeta(gmSet)
group <- pd$Sample_Group
dmp <- dmpFinder(beta, pheno = group, type = "categorical")
class(dump) # "data.frame"
# Usage
# dmpFinder(dat, pheno, type = c("categorical", "continuous"),
# qCutoff = 1, shrinkVar = FALSE)
# Continuous phenotypes are tested with linear regression,
# while an F-test is used for categorical phenotypes.
head(dmp)
## 选择统计学差异的位点
# sum(dmp$qval < 0.05, na.rm=TRUE)
dmp_sig <- dmp[which(dmp$qval < 0.05),]
head(dmp_sig)
##检测差异甲基化区域
designMatrix <- model.matrix(~ gmSet$status)
#运行时间较长,并行处理
require(doParallel)
registerDoParallel(cores = 2)
bumps <- bumphunter(gmSet, design = designMatrix, B = 10,
type = "Beta", cutoff = 0.25)
class(bumps) #"list"
names(bumps)
dim(bumps$table)
head(bumps$coef)
length(bumps$coef)
head(bumps$pvaluesMarginal)
bumps$pvaluesMarginal
head(bumps$fitted)
bumps_table <- bumps$table
dim(bumps_table)
head(bumps_table)
summary(bumps_table$fwer)
bumps_table_sig <- bumps_table[which(bumps_table$p.value < 0.01),]
head(bumps_table_sig)
dim(bumps_table_sig)
bumps_table_sig[which(bumps_table_sig$chr=="chr7"),]
# dat <- dummyData()
# # Enable parallelization
# require(doParallel)
# registerDoParallel(cores = 2)
# # Find bumps
# dmrs <- bumphunter(dat$mat, design=dat$design, chr=dat$chr, pos=dat$pos,
# cluster=dat$cluster, coef=2, cutoff= 0.28, nullMethod="bootstrap",
# smooth=TRUE, B=250, verbose=TRUE,
# smoothFunction=loessByCluster)
# head(dmrs$table[,1:4], n =3)
参考:
https://www.bioconductor.org/packages/release/bioc/manuals/minfi/man/minfi.pdf