决定了!把免疫做到新高度就用这个分析!|(附代码 )
平均阅读时长为 6分钟
WGCNA应用之免疫浸润
大家好,我是风风。今天我们要继续我们的WGCNA系列。在之前的推文中,我们已经一起做了对单个数据集进行了WGCNA分析并完整地走完了单个WGCNA的流程和针对多数据集联合分析的共识性分析,同时,在构建网络的过程中,我们也使用了“自动构建拓扑异构网络”法和“逐步法构建拓扑异构网络”两种方法,那么我们WGCNA的常用知识就算基本完结了,接下来无非就是利用WGCNA进行实战的内容。我们将通过三次不同角度的实战,利用我们自己的数据进行分析,看看如何将WGCNA应用到我们的文章之中。
其实关于WGCNA的应用,最关键的内容就是网络构建和联系网络和表型数据,这两步基本决定了分析结果的好坏。今天我们开始第一个内容——联合免疫浸润分析的结果和表达数据进行WGCNA分析:
WGCNA系列传送门
免疫浸润分析我们使用耳熟能详的CIBERSORT算法,使用的数据和相关代码放在公众号后台,回复关键词即可获取:
rm(list = ls())
"WGCNA") 安装R包 BiocManager::install(
"tidyverse") BiocManager::install(
"reshape2") BiocManager::install(
"plyr") BiocManager::install(
"tidyr") BiocManager::install(
"dplyr") BiocManager::install(
"ggplot2") BiocManager::install(
options(stringsAsFactors = FALSE)
library(tidyverse)
# -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
# v ggplot2 3.3.3 v purrr 0.3.4
# v tibble 3.1.2 v dplyr 1.0.6
# v tidyr 1.1.3 v stringr 1.4.0
# v readr 1.4.0 v forcats 0.5.1
# -- Conflicts ------------------------------------------ tidyverse_conflicts() --
# x dplyr::filter() masks stats::filter()
# x dplyr::lag() masks stats::lag()
library(reshape2)
#
# 载入程辑包:'reshape2'
# The following object is masked from 'package:tidyr':
#
# smiths
library(WGCNA)
# 载入需要的程辑包:dynamicTreeCut
# 载入需要的程辑包:fastcluster
#
# 载入程辑包:'fastcluster'
# The following object is masked from 'package:stats':
#
# hclust
#
#
# 载入程辑包:'WGCNA'
# The following object is masked from 'package:stats':
#
# cor
library(plyr)
# ------------------------------------------------------------------------------
# You have loaded plyr after dplyr - this is likely to cause problems.
# If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
# library(plyr); library(dplyr)
# ------------------------------------------------------------------------------
#
# 载入程辑包:'plyr'
# The following objects are masked from 'package:dplyr':
#
# arrange, count, desc, failwith, id, mutate, rename, summarise,
# summarize
# The following object is masked from 'package:purrr':
#
# compact
library(dplyr)
library(tidyr)
library(ggplot2)
免疫浸润分析
进行CIBERSORT分析
source("CIBERSORT.R")
tpms_data <- "tpms_data.txt"
LM22_file <- "LM22.txt"
cibersort_res_tpms <- CIBERSORT(LM22_file,
tpms_data,
perm = 10,
QN = TRUE)
cibersort_res_tpms1 <- as.data.frame(cibersort_res_tpms)
去除0值较多的结果
cibersort_res_tpms2 <- cibersort_res_tpms1 %>%
select(-c("P-value","Correlation","RMSE"))
筛选出0值太多的一些细胞
res_sample <- apply(cibersort_res_tpms2, 2, function(x)
sum(x == 0) < dim(cibersort_res_tpms2)[1]/2)
cibersort_res_tpms3 <- cibersort_res_tpms2[,res_sample] %>%
as.matrix() %>%
t()
dim(cibersort_res_tpms3)
# [1] 11 430
library(RColorBrewer)
mypalette <- colorRampPalette(brewer.pal(8,"Set3"))
cibersort_res_tpms4 <- cibersort_res_tpms3 %>%
as.data.frame() %>%
rownames_to_column("cell_type") %>%
gather(sample, fraction, - cell_type)
cibersort_res_tpms5 <- cibersort_res_tpms3 %>%
as.data.frame() %>%
rownames_to_column("cell_type")
ggplot(cibersort_res_tpms4,
aes(cell_type,
fraction,
fill = cell_type)) +
geom_boxplot(outlier.shape = 21,
coulour = "black") +
theme_bw() +
labs(x = "Cell Type",
y = "Estimated Proportion") +
theme(axis.text.x = element_blank()) +
theme(axis.ticks.x = element_blank()) +
scale_fill_manual(values = mypalette(22))
# Warning: Ignoring unknown parameters: coulour
# 导出免疫浸润分析结果作为表型数据
write_tsv(cibersort_res_tpms5, "cibersort_res.txt")
获得了免疫浸润的数据之后,我们只需要把结果视为表型数据的一种就可以了,接下来我们要做的就是利用WGCNA分析构建网络并获得相应模块:
# 联合免疫浸润和WGCNA分析
WGCNA_data <- read_tsv("tpms_data.txt") %>%
column_to_rownames("GeneSymbol")
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_double(),
## GeneSymbol = col_character()
## )
## i<U+00A0>Use `spec()` for the full column specifications.
WGCNA_data[1:3, 1:3]
## TCGA-AA-3975-01A-01R-1022-07 TCGA-AA-A00R-01A-01R-A002-07
## A1BG 0.05639508 0.1112941
## A1CF 35.75448131 0.4451762
## A2M 292.91605034 228.2084637
## TCGA-AY-A69D-01A-11R-A37K-07
## A1BG 0.1540014
## A1CF 44.0957226
## A2M 206.3960444
WGCNA_data1 <- WGCNA_data %>%
t() %>%
as.data.frame()
dim(WGCNA_data1)
## [1] 430 19470
WGCNA_data1 <- log(WGCNA_data1 + 1)
# 筛选基因变化较大的前50%的基因
vars_res <- apply(WGCNA_data1, 2, var)
# 计算百分位数截止值
per_res <- quantile(vars_res, probs = seq(0, 1, 0.25))
upperGene <- WGCNA_data1[, which(vars_res > per_res[4])]
WGCNA_data2 <- data.matrix(upperGene)
# 对分析数据进行质量评估
gsg <- goodSamplesGenes(WGCNA_data2, verbose = 3);
## Flagging genes and samples with too many missing values...
## ..step 1
gsg[["allOK"]] # 全部合格
## [1] TRUE
datExpr0 <- WGCNA_data2
# 判断矩阵的样本是否都合格?
gsg <- goodSamplesGenes(datExpr0, verbose = 3);
## Flagging genes and samples with too many missing values...
## ..step 1
gsg$allOK # 显示TURE则为都合格,不需要进行额外处理,如果是FALSE,则运行下面这部分代码
## [1] TRUE
if (!gsg$allOK)
{
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
# Remove the offending genes and samples from the data:
datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
}
# 对样本进行聚类,dist为计算距离,hclust为聚类
sampleTree <- hclust(dist(datExpr0), method = "average")
# 聚类结果可视化
# pdf("1_sampleTree.pdf",width = 10, height = 5)
par(cex = 0.7);
par(mar <- c(0,4,2,0))
## NULL
plot(sampleTree,
main = "Sample clustering to detect outliers",
sub="",
xlab="",
cex.lab = 1.5,
cex.axis = 1.5,
cex.main = 2)
datExpr <- datExpr0
nGenes <- ncol(datExpr)
nSamples <- nrow(datExpr)
allTraits <- as.data.frame(t(cibersort_res_tpms3))
dim(allTraits)
names(allTraits)
traitRows <- match(rownames(datExpr), rownames(allTraits))
datTraits <- allTraits[traitRows, ]
sampleTree2 <- hclust(dist(datExpr), method = "average")
traitColors <- numbers2colors(datTraits, signed = FALSE)
plotDendroAndColors(sampleTree2,
traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap")
dev.off()
保存清洗后的数据
save(datExpr, datTraits, file = "dataInput.RData")
构建网络并进行模块识别
设置多线程进行WGCNA分析
enableWGCNAThreads()
# Allowing parallel execution with up to 11 working processes.
加载前面清洗完成的数据
lnames <- load(file = "dataInput.RData")
lnames
# [1] "datExpr" "datTraits"
设置软阈值区间
powers = c(c(1:10),
seq(from = 12, to = 20, by = 2))
进行拓扑网络分析识别最佳软阈值
sft = pickSoftThreshold(datExpr,
powerVector = powers,
verbose = 5)
# pickSoftThreshold: will use block size 4868.
# pickSoftThreshold: calculating connectivity for given powers...
# ..working on genes 1 through 4868 of 4868
# Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
# 1 1 0.00659 0.123 0.841 781.000 7.30e+02 1490.0
# 2 2 0.69200 -1.120 0.794 231.000 1.68e+02 771.0
# 3 3 0.80700 -1.310 0.814 96.500 4.85e+01 487.0
# 4 4 0.81800 -1.290 0.822 49.800 1.61e+01 337.0
# 5 5 0.83900 -1.230 0.859 29.300 5.97e+00 246.0
# 6 6 0.85000 -1.210 0.883 18.700 2.49e+00 185.0
# 7 7 0.85100 -1.190 0.891 12.700 1.11e+00 143.0
# 8 8 0.86400 -1.170 0.909 8.920 5.28e-01 112.0
# 9 9 0.87700 -1.160 0.927 6.490 2.61e-01 89.4
# 10 10 0.88400 -1.150 0.936 4.840 1.32e-01 72.2
# 11 12 0.88500 -1.170 0.949 2.850 3.80e-02 49.9
# 12 14 0.89600 -1.180 0.958 1.780 1.19e-02 35.6
# 13 16 0.89400 -1.200 0.962 1.160 3.81e-03 26.1
# 14 18 0.91700 -1.190 0.976 0.787 1.29e-03 19.5
# 15 20 0.91600 -1.170 0.975 0.548 4.74e-04 14.9
powerEstimates <- sft$powerEstimate # 查看自动识别的最佳软阈值
powerEstimates
# [1] 6
如果上述没有自动识别最佳软阈值,则可以将结果进行可视化后尽心选取
"2_Threshold.pdf",width = 10, height = 5) pdf(
par(mfrow = c(1,2));
cex1 = 0.9;
$fitIndices[,3])*sft$fitIndices[,2], fitIndices[,1], -sign(sft
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
$fitIndices[,3])*sft$fitIndices[,2], fitIndices[,1], -sign(sft
labels=powers,cex=cex1,col="red");
abline(h=0.90,col="red")
$fitIndices[,5], fitIndices[,1], sft
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
$fitIndices[,5], labels=powers, cex=cex1,col="red") fitIndices[,1], sft
# dev.off()
# 构建网络(很关键)
softPower<- powerEstimates # 设置最佳软阈值
adjacency<- adjacency(datExpr,
softPower) =
# 将最邻近值转化为拓扑网络
TOM<- TOMsimilarity(adjacency)
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
dissTOM <- 1-TOM
# 调用层次聚类函数
geneTree<- hclust(as.dist(dissTOM),
"average") =
# 如果我们喜欢大的模块,那我们就可以把最小模块的数值设置得比较高一些:
minModuleSize = 50
# 模块识别使用动态切割:
dynamicMods<- cutreeDynamic(dendro = geneTree,
distM = dissTOM,
deepSplit = 2,
pamRespectsDendro = FALSE,
minClusterSize = minModuleSize)
## ..cutHeight not given, setting it to 0.998 ===> 99% of the (truncated) height range in dendro.
## ..done.
table(dynamicMods)
## dynamicMods
## 0 1 2 3 4 5 6 7 8
## 1124 893 770 765 464 378 244 144 86
# 可以看到,每个模块的基因数目都大于50
# 转换数字标签成颜色标签
dynamicColors<- labels2colors(dynamicMods)
table(dynamicColors)
## dynamicColors
## black blue brown green grey pink red turquoise
## 144 770 765 378 1124 86 244 893
## yellow
## 464
# 在下面画出树状图和颜色
plotDendroAndColors(geneTree,
"Dynamic Tree Cut",
dendroLabels = FALSE,
hang = 0.03,
addGuide = TRUE,
guideHang = 0.05,
main = "Gene dendrogram and module colors")
接下来,我们就可以计算Eigengenes了
MEList <- moduleEigengenes(datExpr,
colors = dynamicColors)
str(MEList)
# List of 12
# $ eigengenes :'data.frame': 430 obs. of 9 variables:
# ..$ MEblack : num [1:430] 0.00398 -0.01492 -0.0239 -0.01864 -0.02114 ...
# ..$ MEblue : num [1:430] 0.01021 0.000788 -0.028916 0.023806 -0.031946 ...
# ..$ MEbrown : num [1:430] -0.0173 0.1039 -0.0436 0.0921 -0.0788 ...
# ..$ MEgreen : num [1:430] 0.0111 0.1147 -0.0125 0.0747 -0.0782 ...
# ..$ MEgrey : num [1:430] -3.55e-02 1.98e-02 8.83e-05 5.94e-02 -9.34e-02 ...
# ..$ MEpink : num [1:430] 0.0641 0.0736 -0.052 0.0685 -0.0504 ...
# ..$ MEred : num [1:430] 0.09325 -0.01676 0.03156 0.00199 0.00716 ...
# ..$ MEturquoise: num [1:430] 0.00806 -0.01397 -0.00363 -0.01001 -0.01136 ...
# ..$ MEyellow : num [1:430] -0.0291 -0.0205 0.0416 -0.0316 0.0348 ...
# $ averageExpr :'data.frame': 430 obs. of 9 variables:
# ..$ AEblack : num [1:430] 0.111 -0.204 -0.299 -0.251 -0.289 ...
# ..$ AEblue : num [1:430] 0.1636 -0.0595 -0.337 0.2479 -0.3418 ...
# ..$ AEbrown : num [1:430] 0.2782 0.0865 -0.2335 0.4193 -0.3366 ...
# ..$ AEgreen : num [1:430] 0.185 1.242 -0.212 0.864 -0.875 ...
# ..$ AEgrey : num [1:430] 0.2614 -0.5079 -0.0675 -0.1852 0.0861 ...
# ..$ AEpink : num [1:430] 0.726 0.931 -0.622 0.745 -0.56 ...
# ..$ AEred : num [1:430] 0.9547 -0.1598 0.2893 0.016 0.0291 ...
# ..$ AEturquoise: num [1:430] 0.1578 -0.288 -0.0457 -0.1067 -0.0892 ...
# ..$ AEyellow : num [1:430] -0.212 -0.243 0.358 -0.32 0.372 ...
# $ varExplained:'data.frame': 1 obs. of 9 variables:
# ..$ X1: num 0.497
# ..$ X2: num 0.406
# ..$ X3: num 0.317
# ..$ X4: num 0.394
# ..$ X5: num 0.0866
# ..$ X6: num 0.426
# ..$ X7: num 0.289
# ..$ X8: num 0.304
# ..$ X9: num 0.333
# $ nPC : num 1
# $ validMEs : logi [1:9] TRUE TRUE TRUE TRUE TRUE TRUE ...
# $ validColors : chr [1:4868] "brown" "turquoise" "blue" "blue" ...
# $ allOK : logi TRUE
# $ allPC : logi TRUE
# $ isPC : logi [1:9] TRUE TRUE TRUE TRUE TRUE TRUE ...
# $ isHub : logi [1:9] FALSE FALSE FALSE FALSE FALSE FALSE ...
# $ validAEs : logi [1:9] TRUE TRUE TRUE TRUE TRUE TRUE ...
# $ allAEOK : logi TRUE
MEs <- MEList$eigengenes
计算模块特征间的不相似性
MEDiss = 1 - cor(MEs)
METree <- hclust(as.dist(MEDiss), method = "average")
合并相似模块
merge <- mergeCloseModules(datExpr,
dynamicColors,
cutHeight = 0.2,
verbose = 3)
# mergeCloseModules: Merging modules whose distance is less than 0.2
# multiSetMEs: Calculating module MEs.
# Working on set 1 ...
# moduleEigengenes: Calculating 9 module eigengenes in given set.
# multiSetMEs: Calculating module MEs.
# Working on set 1 ...
# moduleEigengenes: Calculating 6 module eigengenes in given set.
# Calculating new MEs...
# multiSetMEs: Calculating module MEs.
# Working on set 1 ...
# moduleEigengenes: Calculating 6 module eigengenes in given set.
mergedColors = merge$colors
新合并模块的特征基因:
mergedMEs = merge$newMEs
table(mergedColors)
# mergedColors
# black brown grey pink red yellow
# 1807 1143 1124 86 244 464
进行可视化
"4_Module.pdf", 10, 5) pdf(
plotDendroAndColors(geneTree,
cbind(dynamicColors,
mergedColors),
c("Dynamic Tree Cut",
"Merged dynamic"),
dendroLabels = FALSE,
hang = 0.03,
addGuide = TRUE,
guideHang = 0.05)
# dev.off()
# 重命名模块颜色名称
moduleColors = mergedColors
colorOrder = c("grey",
standardColors(50))
moduleLabels = match(moduleColors, colorOrder) - 1
MEs = mergedMEs
# 保存模块颜色和标签,以供后续部分使用
save(MEs,
moduleLabels,
moduleColors,
geneTree,
file = "networkConstruction-stepByStep.RData")
现在我们手上已经有了WGCNA的模块网络,也有了免疫浸润分析的结果作为表型数据,接下来就是关联网络和表型数据进行分析看相关性,从而找到关键网络了:
library(reshape2)
library(tidyverse)
dim(allTraits)
names(allTraits)
traitRows <- match(rownames(datExpr), rownames(allTraits))
datTraits <- allTraits[traitRows, ]
data2 <- datTraits
lnames = load(file = "networkConstruction-stepByStep.RData");
lnames
nGenes = ncol(datExpr);
nSamples = nrow(datExpr);
MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
cosample <- intersect(rownames(data2), rownames(MEs))
data3 <- data2[cosample,]
data3 <- rownames_to_column(data3, "ID")
write_tsv(data3, "WGCNA_pheno.txt")
data3 <- read_tsv("WGCNA_pheno.txt") %>%
column_to_rownames("ID")
moduleTraitCor = cor(MEs, data3, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
textMatrix = paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) = dim(moduleTraitCor)
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = colnames(data3),
yLabels = names(MEs),
cex.lab = 0.5,
ySymbols = colnames(MEs),
colorLabels = FALSE,
colors = colorRampPalette(c("blue", "white", "red"))(100),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5,
zlim = c(-1,1),
main = paste("Module trait relationships"))
# dev.off()
从热图结果上我们可以发现,Tregs细胞与黄色模块呈显著负相关,而M1型巨噬细胞与棕色模块的相关性最强,接下来我们就可以围绕这两种细胞进行进一步的分析了。
好了,今天的内容到此结束!今天我们主要整合了免疫浸润分析和WGCNA分析。
后台回复“feng 63”获取代码和数据,我们下期见!
单细胞分析专栏传送门
碎碎念专栏传送门(完结)
风之美图系列传送门(完结)
—
END—
撰文丨风 风
排版丨四金兄
主编丨小雪球
欢迎大家关注解螺旋生信频道-挑圈联靠公号~
关键词
数据
网络
模块
内容
结果
最新评论
推荐文章
作者最新文章
你可能感兴趣的文章
Copyright Disclaimer: The copyright of contents (including texts, images, videos and audios) posted above belong to the User who shared or the third-party website which the User shared from. If you found your copyright have been infringed, please send a DMCA takedown notice to [email protected]. For more detail of the source, please click on the button "Read Original Post" below. For other communications, please send to [email protected].
版权声明:以上内容为用户推荐收藏至CareerEngine平台,其内容(含文字、图片、视频、音频等)及知识版权均属用户或用户转发自的第三方网站,如涉嫌侵权,请通知[email protected]进行信息删除。如需查看信息来源,请点击“查看原文”。如需洽谈其它事宜,请联系[email protected]。
版权声明:以上内容为用户推荐收藏至CareerEngine平台,其内容(含文字、图片、视频、音频等)及知识版权均属用户或用户转发自的第三方网站,如涉嫌侵权,请通知[email protected]进行信息删除。如需查看信息来源,请点击“查看原文”。如需洽谈其它事宜,请联系[email protected]。