90%以上的审稿人比问!多个数据集怎么联合分析?看完就懂了!
整合多个数据集获取表达差异基因
Veen,RAA与SVA
▌整合多个不同平台测序/芯片数据集
考虑到批次效应对样本表达差异的影响,整合多个数据集往往不建议合并数据集,比较常用的方法有两个,第一是多个数据集表达差异基因取交集,第二是使用RobustRankAggreg包进行表达差异基因结果整合。下面以 GSE13507, GSE37815和GSE65635 这3个膀胱癌数据集为例进行说明。
首先我们基于R包、GEO2R、仙桃学术、Networkanalyst或其他方法进行表达差异分析,获得3个数据集分别对应的表达差异基因,基于GSE13507获得820个下调基因256个上调基因,基于GSE37815获得465个下调基因、175个上调基因,基于GSE65635获得2366个下调基因 1126个上调基因。
library(tidyverse)
# 加载 GSE13507,GSE37815,GSE65635表达差异基因
load(file = "GSE13507_DE_result_all.Rdata") # 820个下调基因 256个上调基因
DEG_GSE13507 <- DE_result1
table(DEG_GSE13507$Type)
# down ns up
# 820 23281 256
load(file = "GSE37815_DE_result_all.Rdata") # 465个下调基因 175个上调基因
DEG_GSE37815 <- DE_result1
table(DEG_GSE37815$Type)
# down ns up
# 465 23717 175
load(file = "GSE65635_DE_result_all.Rdata") # 2366个下调基因 1126个上调基因
DEG_GSE65635 <- DE_result1
table(DEG_GSE65635$Type)
# down ns up
# 2366 15924 1126
方法一
将3个数据集表达差异基因取交集,绘制Veen图展示,获得表达差异基因共有259个,上调基因42个下调基因217个。
###### Veen图 #####
Up_GSE13507 <- DEG_GSE13507 %>% filter(Type == "up")
Dw_GSE13507 <- DEG_GSE13507 %>% filter(Type == "down")
Up_GSE37815 <- DEG_GSE37815 %>% filter(Type == "up")
Dw_GSE37815 <- DEG_GSE37815 %>% filter(Type == "down")
Up_GSE65635 <- DEG_GSE65635 %>% filter(Type == "up")
Dw_GSE65635 <- DEG_GSE65635 %>% filter(Type == "down")
library(VennDiagram)
venn.plot <-venn.diagram(
x = list(Up_GSE13507=Up_GSE13507$Gene_Symbol,
Up_GSE37815=Up_GSE37815$Gene_Symbol,
Up_GSE65635=Up_GSE65635$Gene_Symbol),
filename ="Venn_UP.tif",
lty ="dotted",
lwd =0.5,
col ="black", #"transparent"
fill =c("dodgerblue", "goldenrod1", "darkorange1"),
alpha =0.60,
cat.col =c("dodgerblue", "goldenrod1", "darkorange1"),
cat.cex =1,
cat.fontface="bold",
margin =0.05,
cex =1)
venn.plot <-venn.diagram(
x = list(Dw_GSE13507=Dw_GSE13507$Gene_Symbol,
Dw_GSE37815=Dw_GSE37815$Gene_Symbol,
Dw_GSE65635=Dw_GSE65635$Gene_Symbol),
filename ="Venn_DW.tif",
lty ="dotted",
lwd =0.5,
col ="black", #"transparent"
fill =c("dodgerblue", "goldenrod1", "darkorange1"),
alpha =0.60,
cat.col =c("dodgerblue", "goldenrod1", "darkorange1"),
cat.cex =1,
cat.fontface="bold",
margin =0.05,
cex =1)
# 提取交集
# 上调基因42个
Up_venn_list <- list(Up_GSE13507=Up_GSE13507$Gene_Symbol,
Up_GSE37815=Up_GSE37815$Gene_Symbol,
Up_GSE65635=Up_GSE65635$Gene_Symbol)
for (i in 1:length(Up_venn_list)) {
if(i == 1){Up_interDEGs <- Up_venn_list[[1]]}
else{Up_interDEGs <- intersect(Up_interDEGs, Up_venn_list[[i]])}
}
Up_interDEGs
save(Up_interDEGs,file = "Up_interDEGs.Rdata")
# [1] "WDR72" "NUSAP1" "CCNB2" "CDCA5" "UHRF1" "TPX2"
# [7] "PRC1" "KIF20A" "ASPM" "E2F2" "GINS2" "AURKA"
# [13] "NCAPG" "KRT7" "CEP55" "HAS3" "DTL" "CKAP2L"
# [19] "FAM83H" "BUB1B" "HOOK1" "CDS1" "LAD1" "TIMELESS"
# [25] "ZNF165" "PMM2" "BUB1" "HMMR" "KNTC1" "CCNB1"
# [31] "STAP2" "PLXNB1" "SHMT2" "SOX4" "LMNB2" "ZNF593"
# [37] "MIF" "MARVELD3" "VEGFA" "FANCD2" "PUS1" "SDC1"
# 下调基因217个
Dw_venn_list <- list(Dw_GSE13507=Dw_GSE13507$Gene_Symbol,
Dw_GSE37815=Dw_GSE37815$Gene_Symbol,
Dw_GSE65635=Dw_GSE65635$Gene_Symbol)
for (i in 1:length(Dw_venn_list)) {
if(i == 1){Dw_interDEGs <- Dw_venn_list[[1]]}
else{Dw_interDEGs <- intersect(Dw_interDEGs, Dw_venn_list[[i]])}
}
Dw_interDEGs
save(Dw_interDEGs,file = "Dw_interDEGs.Rdata")
# [1] "ALDH1A1" "SRPX" "PCP4" "FLNC" "MFAP4"
# [6] "ABCA8" "CNN1" "MOXD1" "FAM107A" "DES"
# [11] "PGM5" "TCEAL2" "SPON1" "C7" "APOD"
# [16] "SPARCL1" "PLAC9" "CRYAB" "SLIT2" "DCN"
# [21] "HSPB6" "CFD" "ATP1A2" "CYBRD1" "SERPINA3"
# [26] "SH3GL2" "FHL1" "MYLK" "RGS2" "COX7A1"
# [31] "ITM2A" "NR2F1" "SORBS1" "SMOC2" "VIM"
# [36] "PMP22" "CLIP3" "CALD1" "COL16A1" "CCND2"
# [41] "PALLD" "DPT" "TIMP2" "CDH11" "RASL12"
# [46] "ZEB2" "BIN1" "JAM3" "TMOD1" "AEBP1"
# [51] "LTBP4" "ANTXR2" "DPYSL3" "IGFBP6" "PDLIM3"
# [56] "DACT3" "A2M" "SERPINF1" "C1S" "OLFM1"
# [61] "FGF9" "DIXDC1" "GNG11" "PI16" "CLEC3B"
# [66] "KLF9" "PRPH" "DPYSL2" "WIPF1" "OLFML3"
# [71] "MAOB" "CYP1B1" "CRISPLD2" "PLSCR4" "EMP3"
# [76] "SPARC" "ACOX2" "CYP27A1" "SERPINE2" "GPM6B"
# [81] "GHR" "ALDH2" "RNASE4" "TNC" "LIFR"
# [86] "FXYD6" "MAMDC2" "FGL2" "RGS1" "SCARA5"
# [91] "GLT8D2" "ADAMTS8" "TSHZ3" "OLFML1" "TGFBR2"
# [96] "MAP1B" "RGL1" "MSRB3" "AP1S2" "NBEA"
# [101] "MEG3" "ABI3BP" "COL14A1" "CXCL12" "COL15A1"
# [106] "EDNRA" "PRRT2" "JAZF1" "SNCA" "TGFB3"
# [111] "SORCS1" "PAM" "EFEMP1" "PID1" "LDB2"
# [116] "ROR2" "SFRP2" "EMILIN1" "COLEC12" "RGS11"
# [121] "PDGFC" "ARHGAP15" "PRICKLE1" "PRICKLE2" "FBLN5"
# [126] "RNF150" "EPB41L2" "RBP1" "ZNF521" "SERPING1"
# [131] "NFIX" "RAB3IL1" "GNG7" "INMT" "GSTM5"
# [136] "PLA2G4C" "CSRP1" "RHOJ" "SLC9A9" "RARRES2"
# [141] "NLGN1" "STOM" "SYT11" "CNN3" "STAB1"
# [146] "SPRY1" "GYPC" "BOC" "DPYD" "JAM2"
# [151] "CPXM2" "GAS1" "PRDM8" "TCF4" "PELI2"
# [156] "FLRT2" "BCL2" "ISLR" "SFRP1" "HDC"
# [161] "AXL" "SCRG1" "FILIP1L" "MGLL" "MYOT"
# [166] "TMEFF2" "MAPRE2" "CTSG" "GSN" "CYP2U1"
# [171] "XKR4" "TSC22D3" "NES" "ATP8B4" "FXYD1"
# [176] "GIMAP8" "TCEAL7" "F10" "SYNPO2" "SORCS2"
# [181] "DACT1" "HSPB2" "HHIP" "FNBP1" "MXRA7"
# [186] "FAIM2" "COL21A1" "RECK" "LIMS2" "ZBTB16"
# [191] "VIPR2" "TNFSF12" "MYADM" "GABARAPL1" "RRAS"
# [196] "ZBTB20" "TNFAIP8L3" "GFPT2" "CNTNAP1" "CLIC4"
# [201] "KLHL13" "PTCH1" "DENND2A" "TSHZ1" "REEP2"
# [206] "NAALAD2" "MYOZ3" "ADCY4" "CLDN5" "STX2"
# [211] "DOCK11" "GFRA2" "PCSK5" "TMEM100" "ZFHX4"
# [216] "PPARGC1A" "SCARA3"
方法二
基于RobustRankAggreg包整合3个数据集的上下调基因,获得表达差异基因共316个,上调基因88个,下调基因228个,提取Gene symbol和logFC绘制热图展示。
# RobustRankAggreg整合上调基因 88个
glist_up=list(Up_GSE13507$Gene_Symbol,
Up_GSE37815$Gene_Symbol,
Up_GSE65635$Gene_Symbol)
ups=aggregateRanks(glist = glist_up,
N = length(unique(unlist(glist_up))))
tmp_up=as.data.frame(table(unlist(glist_up)))
ups$Freq=tmp_up[match(ups$Name,tmp_up[,1]),2]
head(ups)
save(ups,file = "ups.Rdata")
ups_genes <- ups[ups$Score < 0.05,1]
ups_genes
# [1] "CDC20" "TOP2A" "KRT7" "WDR72"
# [5] "TCN1" "NUSAP1" "UBE2C" "ESM1"
# [9] "CCNB2" "IQGAP3" "UHRF1" "LAD1"
# [13] "PRC1" "TTK" "ASPM" "C20orf46"
# [17] "CDCA5" "GJB2" "KIF20A" "TPX2"
# [21] "GINS2" "SHMT2" "LOC440030" "TRIP13"
# [25] "NCAPG" "E2F2" "AURKB" "SLC4A11"
# [29] "SPINK1" "TK1" "ASF1B" "CA9"
# [33] "SNORD33" "ZNF593" "IGFBP3" "AURKA"
# [37] "MIF" "CEP55" "MTP18" "HAS3"
# [41] "PODXL2" "TACSTD2" "FAM83H" "POLQ"
# [45] "SOX4" "ETV4" "CDT1" "CENPF"
# [49] "C19orf46" "SFN" "DTL" "SDC1"
# [53] "LOC647954" "CKAP2L" "SPAG5" "CAMK2N1"
# [57] "SBK1" "CSTB" "DKFZp762E1312" "FUT3"
# [61] "C9orf140" "KIAA0101" "FABP6" "CENPM"
# [65] "LOC284837" "PAFAH1B3" "S100P" "BUB1B"
# [69] "TROAP" "SLC6A8" "DSCR6" "LOC650803"
# [73] "DLG7" "SPAG4" "ADIRF" "CDCA3"
# [77] "CDS1" "CBLC" "GPR172A" "KRT19"
# [81] "EPS8L1" "MGC2408" "LOC440421" "LOC648526"
# [85] "CDCA8" "RPL39" "GSK3B" "TIMELESS"
# RobustRankAggreg整合下调基因 228个
glist_dw=list(Dw_GSE13507$Gene_Symbol,
Dw_GSE37815$Gene_Symbol,
Dw_GSE65635$Gene_Symbol)
dws=aggregateRanks(glist = glist_dw,
N = length(unique(unlist(glist_dw))))
tmp_dw=as.data.frame(table(unlist(glist_dw)))
dws$Freq=tmp_dw[match(dws$Name,tmp_dw[,1]),2]
head(dws)
save(dws,file = "dws.Rdata")
dws_genes <- dws[dws$Score < 0.05,1]
dws_genes
# [1] "FLNC" "CNN1" "PCP4" "ALDH1A1"
# [5] "DES" "PGM5" "CFD" "DCN"
# [9] "RGS2" "MFAP4" "HSPB6" "CLIP3"
# [13] "SRPX" "FHL1" "VIM" "SPARCL1"
# [17] "CALD1" "PALLD" "MYH11" "ISL1"
# [21] "CCND2" "IGJ" "ABCA8" "BIN1"
# [25] "DPYSL3" "COX7A1" "IGFBP6" "MYLK"
# [29] "JAM3" "MOXD1" "DACT3" "PRAC"
# [33] "PI16" "C7" "CRYAB" "DPYSL2"
# [37] "KIAA0367" "TIMP2" "PDLIM3" "APOD"
# [41] "ANTXR2" "ACTG2" "SERPINE2" "FAM107A"
# [45] "TCEAL2" "CYBRD1" "AEBP1" "FXYD6"
# [49] "RASL12" "SEPP1" "DMN" "DPT"
# [53] "A2M" "PTGDS" "CRISPLD2" "SMOC2"
# [57] "SLIT2" "PLAC9" "EMP3" "SPARC"
# [61] "SPON1" "CLEC3B" "ATP1A2" "SORBS1"
# [65] "FLJ21986" "LUM" "SERPINA3" "HLA-DPA1"
# [69] "SH3GL2" "FGL2" "COL16A1" "RGS1"
# [73] "KLF9" "RNF150" "NR2F1" "PTGS1"
# [77] "ACTC1" "SCARA5" "GNG11" "PRICKLE2"
# [81] "NFIX" "ACTA2" "MAMDC2" "SFRP2"
# [85] "PMP22" "EPB41L3" "ABI3BP" "MSRB3"
# [89] "CSRP1" "EDNRA" "RARRES2" "C2orf40"
# [93] "HLA-DRB4" "STOM" "TSHZ3" "SYT11"
# [97] "ALDH2" "PAM" "CNN3" "TGFB3"
# [101] "GATM" "LTBP4" "GYPC" "C1S"
# [105] "KCNMB1" "CPXM2" "ITM2A" "HLA-DRB3"
# [109] "RBP1" "EMILIN1" "C4orf31" "SGCE"
# [113] "EFEMP1" "IGFBP5" "ZEB2" "TAGLN"
# [117] "LOC652493" "PRICKLE1" "AXL" "GAS1"
# [121] "FBLN5" "TMOD1" "DKK3" "SERPINF1"
# [125] "MYL9" "RGL1" "LPPR4" "OLFM1"
# [129] "MAP1B" "FGF9" "TCF4" "GSN"
# [133] "MAOB" "MGP" "CAVIN1" "DKFZP586H2123"
# [137] "CDH11" "FILIP1L" "LDB2" "ALOX5AP"
# [141] "C2orf32" "PRPH" "LOC652694" "WIPF1"
# [145] "OLFML3" "CYP1B1" "CTSG" "SERPING1"
# [149] "DIXDC1" "FNBP1" "MXRA7" "TPM1"
# [153] "RBPMS2" "TPM2" "PDK4" "ACOX2"
# [157] "CPE" "CYP27A1" "SELM" "SPRY1"
# [161] "JAZF1" "LIMS2" "AMICA1" "ROR2"
# [165] "COLEC12" "RAB3IL1" "TGFBR2" "TNFSF12"
# [169] "INMT" "PLEKHC1" "RNASE4" "GPM6B"
# [173] "PLSCR4" "LOC730456" "CGNL1" "MYADM"
# [177] "GABARAPL1" "LHFP" "RRAS" "LOC647450"
# [181] "ADH1A" "ASPA" "ZBTB20" "TNC"
# [185] "TNFAIP8L3" "LOC647436" "COL5A1" "CXCL12"
# [189] "C10orf56" "DARC" "PTRF" "CLIC4"
# [193] "ENPP2" "AGR3" "ADAMTS8" "ANKRD25"
# [197] "SDPR" "CSF1R" "AP1S2" "FLJ22655"
# [201] "PROS1" "HLA-DRB1" "LPP" "MEG3"
# [205] "ALDH1A3" "COL6A1" "PRRT2" "C1QA"
# [209] "LOC641750" "PRDM8" "DENND2A" "COL14A1"
# [213] "MGC24103" "C9orf19" "GPX3" "OLFM4"
# [217] "GLT8D2" "MAGT1" "NELL2" "IL7R"
# [221] "SFRP1" "PTGIS" "SNCA" "CLDN5"
# [225] "MYL6" "SORCS1" "MS4A6A" "LOC642113"
# 差异基因可视化
dws_genes2 <- dws[dws$Score < 0.05,1]
ups_genes2 <- ups[ups$Score < 0.05,1]
deg <- c(ups_genes2,dws_genes2)
RAA_data<- data.frame(GSE13507 = DEG_GSE13507[deg,"logFC"],
GSE37815 = DEG_GSE37815[deg,"logFC"],
GSE65635 = DEG_GSE65635[deg,"logFC"])
rownames(RAA_data) <- deg
library(pheatmap)
head(RAA_data)
pheatmap(RAA_data,
color = colorRampPalette(c("#1E90FF","#FF0000"))(100),
cluster_rows = F,
cluster_cols = F,
display_numbers=T)
接下来,尝试下将RRA结果与Veen结果取交集,共有表达差异基因共178个,上调基因28个下调基因150个,可见RRA结果与Veen结果还是有差异的。
# RRA与Veen取交集
Veen_upDEGs <- Up_interDEGs
Veen_dwDEGs <- Dw_interDEGs
RAA_upDEGs <- ups_genes
RAA_dwDEGs <- dws_genes
# 上调基因
upgenes <- intersect(Veen_upDEGs,RAA_upDEGs)
upgenes
library(VennDiagram)
venn.plot <-venn.diagram(
x = list(Veen_upDEGs=Veen_upDEGs,
RAA_upDEGs=RAA_upDEGs),
filename ="Venn_UP2.tif",
lty ="dotted",
lwd =0.5,
col ="black", #"transparent"
fill =c("dodgerblue", "goldenrod1"),
alpha =0.60,
cat.col =c("dodgerblue", "goldenrod1"),
cat.cex =1,
cat.fontface="bold",
margin =0.05,
cex =1)
# 下调基因
dwgenes <- intersect(Veen_dwDEGs,RAA_dwDEGs)
dwgenes
venn.plot <-venn.diagram(
x = list(Veen_dwDEGs=Veen_dwDEGs,
RAA_dwDEGs=RAA_dwDEGs),
filename ="Venn_DW2.tif",
lty ="dotted",
lwd =0.5,
col ="black", #"transparent"
fill =c("dodgerblue", "goldenrod1"),
alpha =0.60,
cat.col =c("dodgerblue", "goldenrod1"),
cat.cex =1,
cat.fontface="bold",
margin =0.05,
cex =1)
▌整合多个相同平台测序/芯片数据集
对于同一平台的芯片/测序数据集,可以通过校正批次效应进行整合,如limma包removeBatchEffect()函数、SVA包ComBat()函数,以ComBat()函数为例整合GSE13507和GSE37815数据集,随后进行差异分析,结果获得表达差异基因1051个,下调基因772个上调基因279个。
# 加载标准化处理的数据集
load(file = "GSE13507_gene_exp_res.Rdata")
gene_exp1 <- gene_exp_res %>%
column_to_rownames("Gene_Symbol")
load(file = "GSE37815_gene_exp_res.Rdata")
gene_exp2 <- gene_exp_res %>%
column_to_rownames("Gene_Symbol")
# GSE13507_clinical添加batch信息
load(file = "GSE13507_clinical.Rdata")
sample_info1 <- sample_info %>%
mutate(batch = rep(1,nrow(sample_info))) %>%
filter(DE_group != "others")
gene_exp1 <- gene_exp1[rownames(sample_info1)]
# GSE13507_clinical添加batch信息
load(file = "GSE37815_clinical.Rdata")
sample_info2 <- sample_info %>%
mutate(batch = rep(2,nrow(sample_info))) %>%
filter(DE_group != "NA")
gene_exp2 <- gene_exp2[rownames(sample_info2)]
# intersect取交集
gene_names <- intersect(rownames(gene_exp1),rownames(gene_exp2))
# cbind 进行合并
expr <- cbind(gene_exp1[gene_names,],gene_exp2[gene_names,])
sample <- rbind(sample_info1,sample_info2)
# ComBat去除批次效应
BiocManager::install("sva")
library("sva")
??ComBat
mod <- model.matrix(~sample$DE_group)
expr_batch <- ComBat(dat = expr,
batch = sample$batch,
mod = mod)
# 构建对比矩阵
design <- model.matrix(~ 0 + sample$DE_group)
colnames(design) <- levels(factor(sample$DE_group))
rownames(design) <- rownames(sample)
contrasts <- makeContrasts(DE_group = cancer - control,
levels = design)
contrasts # treatment 1;control -1.
# 进行表达差异分析
fit <- lmFit(expr_batch,
design)
fit <- contrasts.fit(fit,
contrasts)
fit <- eBayes(fit)
DE_result <- topTable(fit,
number = Inf) # 保留所有基因
head(DE_result)
dim(DE_result)
# 整理差异分析数据 以logFC筛选
DE_result1 <- DE_result %>%
rownames_to_column("Gene_Symbol") %>%
mutate(Type = if_else(adj.P.Val >= 0.05,
"ns",
if_else(abs(logFC) < 1, "ns",
if_else(logFC >= 1, "up", "down")))) %>%
arrange(desc(abs(logFC)))
dim(DE_result1)
write_tsv(DE_result1, "expr_batch_DE_result_all.txt")
save(DE_result1, file = "expr_batch_DE_result_all.Rdata")
load(file = "expr_batch_DE_result_all.Rdata")
# 统计结果
table(DE_result1$Type)
# down ns up
# 772 23306 279
GSE13507和GSE37815数据集独立进行差异分析,若取交集,获得表达差异基因550个,下调基因409个上调基因141个;若RRA整合,获得表达差异基因127个,下调基因97个上调基因30个。而ComBat()函数整合后再进行差异分析,获得表达差异基因1051个,下调基因772个上调基因279个。分别绘制Veen图展示3种方法上、下调基因。
###### 与Veen、RRA比较 #####
# Veen
load(file = "GSE13507_DE_result_all.Rdata")
DEG_GSE13507 <- DE_result1 %>%
mutate(symbol = Gene_Symbol) %>%
column_to_rownames("symbol")
Up_GSE13507 <- DEG_GSE13507 %>% filter(Type == "up")
Dw_GSE13507 <- DEG_GSE13507 %>% filter(Type == "down")
load(file = "GSE37815_DE_result_all.Rdata")
DEG_GSE37815 <- DE_result1 %>%
mutate(symbol = Gene_Symbol) %>%
column_to_rownames("symbol")
Up_GSE37815 <- DEG_GSE37815 %>% filter(Type == "up")
Dw_GSE37815 <- DEG_GSE37815 %>% filter(Type == "down")
Up_VeenDEGs <- intersect(Up_GSE13507$Gene_Symbol,Up_GSE37815$Gene_Symbol)
length(Up_VeenDEGs)
# [1] 141
Dw_VeenDEGs <- intersect(Dw_GSE13507$Gene_Symbol,Dw_GSE37815$Gene_Symbol)
length(Dw_VeenDEGs)
# [1] 409
# RAA
glist_up=list(Up_GSE13507$Gene_Symbol,
Up_GSE37815$Gene_Symbol)
ups=aggregateRanks(glist = glist_up,
N = length(unique(unlist(glist_up))))
tmp_up=as.data.frame(table(unlist(glist_up)))
ups$Freq=tmp_up[match(ups$Name,tmp_up[,1]),2]
head(ups)
save(ups,file = "ups.Rdata")
Up_RaaDEGs <- ups[ups$Score < 0.05,1]
length(Up_RaaDEGs)
# [1] 30
glist_dw=list(Dw_GSE13507$Gene_Symbol,
Dw_GSE37815$Gene_Symbol)
dws=aggregateRanks(glist = glist_dw,
N = length(unique(unlist(glist_dw))))
tmp_dw=as.data.frame(table(unlist(glist_dw)))
dws$Freq=tmp_dw[match(dws$Name,tmp_dw[,1]),2]
head(dws)
save(dws,file = "dws.Rdata")
Dw_RaaDEGs <- dws[dws$Score < 0.05,1]
length(Dw_RaaDEGs)
# [1] 91
# ComBat
load(file = "expr_batch_DE_result_all.Rdata")
Up_batchDEGs <- DE_result1 %>% filter(Type == "up")
Dw_batchDEGs <- DE_result1 %>% filter(Type == "down")
# 绘制Veen图
library(VennDiagram)
venn.plot <-venn.diagram(
x = list(Up_batchDEGs = Up_batchDEGs$Gene_Symbol,
Up_VeenDEGs = Up_VeenDEGs,
Up_RaaDEGs = Up_RaaDEGs),
filename ="Venn_UP3.tif",
lty ="dotted",
lwd =0.5,
col ="black", #"transparent"
fill =c("dodgerblue", "goldenrod1", "darkorange1"),
alpha =0.60,
cat.col =c("dodgerblue", "goldenrod1", "darkorange1"),
cat.cex =1,
cat.fontface="bold",
margin =0.05,
cex =1)
venn.plot <-venn.diagram(
x = list(Dw_batchDEGs = Dw_batchDEGs$Gene_Symbol,
Dw_VeenDEGs = Dw_VeenDEGs,
Dw_RaaDEGs = Dw_RaaDEGs),
filename ="Venn_DW3.tif",
lty ="dotted",
lwd =0.5,
col ="black", #"transparent"
fill =c("dodgerblue", "goldenrod1", "darkorange1"),
alpha =0.60,
cat.col =c("dodgerblue", "goldenrod1", "darkorange1"),
cat.cex =1,
cat.fontface="bold",
margin =0.05,
cex =1)
可见,以ComBat()整合数据集去除批次效应,然后再进行差异分析,获得表达差异基因数目是最多的,而各个数据集独立进行差异分析,随后取交集获得表达差异基因数目次之,RAA整合后获得表达差异基因数目最少。那么问题来了:实际操作中采用哪种方法更好呢?事实上,ComBat()适用于数据集来自同一检测平台的情况,Veen或RAA的具体情况具体分析。不过,如果一种方法不能获得我们需求的目标基因,不放换一种方法试试看。
好啦,以上就是今日推文的全部内容了,小伙伴们我们下期再见~!
生信分析全代码系列
Nucleic Acids Res 精选系列
LncRNA/CircRNA系列
小白实战课堂开课啦!手把手教你转录因子与靶基因预测操作~!
引药生变数据库系列传送门(完结)
甲基化数据库系列传送门(完结)
—
END—
撰文丨弘 毅
排版丨四金兄
主编丨小雪球
欢迎大家关注解螺旋生信频道-挑圈联靠公号~
最新评论
推荐文章
作者最新文章
你可能感兴趣的文章
Copyright Disclaimer: The copyright of contents (including texts, images, videos and audios) posted above belong to the User who shared or the third-party website which the User shared from. If you found your copyright have been infringed, please send a DMCA takedown notice to [email protected]. For more detail of the source, please click on the button "Read Original Post" below. For other communications, please send to [email protected].
版权声明:以上内容为用户推荐收藏至CareerEngine平台,其内容(含文字、图片、视频、音频等)及知识版权均属用户或用户转发自的第三方网站,如涉嫌侵权,请通知[email protected]进行信息删除。如需查看信息来源,请点击“查看原文”。如需洽谈其它事宜,请联系[email protected]。
版权声明:以上内容为用户推荐收藏至CareerEngine平台,其内容(含文字、图片、视频、音频等)及知识版权均属用户或用户转发自的第三方网站,如涉嫌侵权,请通知[email protected]进行信息删除。如需查看信息来源,请点击“查看原文”。如需洽谈其它事宜,请联系[email protected]。