WGCNA-cytoscape-analysis

WGCNA-cytoscape-analysis

全套R代码复刻：

WGCNA install
source("https://bioconductor.org/biocLite.R")
biocLite(c("AnnotationDbi", "impute","GO.db", "preprocessCore"))

install.packages(c("matrixStats", "Hmisc", "splines", "foreach", "doParallel", "fastcluster", "dynamicTreeCut", "survival"))

site="https://mirrors.tuna.tsinghua.edu.cn/CRAN"
install.packages(c("WGCNA", "stringr", "reshape2"), repos=site)

biocLite("WGCNA")
library('WGCNA')
library('stringr')
library('reshape2')
library("DESeq2")
library('AnnotationDbi')
library('org.Rn.eg.db')#分析的大鼠的数据

sampleTable = data.frame(read.table(file = './Sample_Meta_Info.txt',header = T))
rownames(sampleTable) = sampleTable$samplenames
library("DESeq2")

####all gene list in contrast result
gene_list_8_kinds_of_situation <-

c(rownames(res_Liver_N_M_sig_lfc_2),

rownames(res_Liver_N_S_sig_lfc_2),

rownames(res_Brain_N_M_sig_lfc_2),

rownames(res_Brain_N_S_sig_lfc_2),

rownames(res_Spleen_N_M_sig_lfc_2),

rownames(res_Spleen_S_N_sig_lfc_2),

rownames(res_W_M_N_sig_lfc_2),

rownames(res_W_S_N_sig_lfc_2))

##all DE genes expression pattern counts
gene_list_8_condition_counts=countdata[match(gene_list_8_kinds_of_situation,rownames(countdata)),]
library('edgeR')
rpkm=rpkm(gene_list_8_condition_counts,gene.length = gene_number)
dim(rpkm)

对基因表达的rpkm值进行安列的筛选。
dataExpr = rpkm[rowSums(rpkm>1)>0.4*ncol(rpkm),]
dim(dataExpr)

##preprocess setting
options(stringsAsFactors = FALSE)
allowWGCNAThreads()
enableWGCNAThreads()
corType = "pearson"
corFnc = ifelse(corType=="pearson", cor, bicor)
maxPOutliers = ifelse(corType=="pearson",1,0.05)
robustY = ifelse(corType=="pearson",T,F)
#######
m.mad <- apply(dataExpr,1,mad)##mad的normalization的方法使用。
dataExprVar <- dataExpr[which(m.mad > max(quantile(m.mad, probs=seq(0, 1, 0.25))[2],0.01)),]##利用mad之后的分位值进行筛选。
dataExpr <- as.data.frame(t(dataExprVar))
gsg = goodSamplesGenes(dataExpr, verbose = 3)#利用WGCNA中的函数goodSamplesGenes，在基因表达水平的控制下进行样本质量的筛选

if (!gsg$allOK){
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:",
paste(names(dataExpr)[!gsg$goodGenes], collapse = ",")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:",
paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ",")));
# Remove the offending genes and samples from the data:
dataExpr = dataExpr[gsg$goodSamples, gsg$goodGenes]
}

dim(dataExpr)
nGenes= ncol(dataExpr)
nSamples = nrow(dataExpr)
sampleTree = hclust(dist(dataExpr), method = "average")##简单的层次聚类步骤
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="")
powers = c(c(1:10), seq(from = 12, to=30, by=2))

#######Constructing the soft threshold and plotting
type = "unsigned"
sft = pickSoftThreshold(t(dataExpr), powerVector=powers,networkType=type, verbose=5)
head(dataExpr)[,1:8]
cex1 = 0.9

plot(sft$fitIndices[,1], sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",
ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red")
abline(h=0.85,col="red")
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers,
cex=cex1, col="red")

一般地，power等于
power = sft$powerEstimate

经验power:无满足条件的power时选用

if (is.na(power)){
power = ifelse(nSamples<20, ifelse(type == "unsigned", 9, 18),
ifelse(nSamples<30, ifelse(type == "unsigned", 8, 16),
ifelse(nSamples<40, ifelse(type == "unsigned", 7, 14),
ifelse(type == "unsigned", 6, 12))
))
}

corType="bicor"

一步法网络构建：One-step network construction and module detection
net = blockwiseModules(dataExpr, power = power, maxBlockSize = nGenes,
TOMType = type, minModuleSize = 30,##type = "unsigned"
reassignThreshold = 0, mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs=F, corType = corType,
maxPOutliers=maxPOutliers, loadTOMs=TRUE,
#saveTOMFileBase = paste0('Sepsis_wgcna_analysis', ".tom"),
verbose = 3)
#####################

提取未plot out的网络信息

table(net$colors)
moduleLabels = net$colors
moduleColors = labels2colors(moduleLabels)
names(moduleColors)=colnames(dataExpr)
pdf("./plotDendroAndColors.pdf")
plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
dev.off()##
MEs = net$MEs
MEs_col = MEs

colnames(MEs_col) = paste0("ME", labels2colors(as.numeric(str_replace_all(colnames(MEs),"ME",""))))
###sometimes if you are using server in ubuntu or centos ， may encounter the lazy load problem, once that we use the next line command alternatively
colnames(MEs_col) = paste0("ME", labels2colors(c(7,9,1,8,10,6,2,4,3,5,0)))

MEs_col = orderMEs(MEs_col)
pdf("./plotEigengeneNetworks.pdf") #特征基因的网络构建
plotEigengeneNetworks(MEs_col, "Eigengene adjacency heatmap",
marDendro = c(3,3,2,4),
marHeatmap = c(3,4,2,2), plotDendrograms = T,
xLabelsAngle = 90)
dev.off()
###########TOM plot(topology map plot)
TOM = TOMsimilarityFromExpr(dataExpr, power=power, corType=corType, networkType=type)
TOM <- as.matrix(TOM)
dissTOM = 1-TOM
plotTOM = dissTOM^7
diag(plotTOM) = NA
pdf("./TOMplot.pdf")
TOMplot(plotTOM, net$dendrograms, moduleColors,
main = "Network heatmap plot, all genes",setLayout=TRUE)
dev.off()

plot out to observe the majority cluster to specifically plot in details

########for the brown cluster to cytoscape
module_brown='brown'
inModule_brown = is.finite(match(moduleColors, module_brown))
modTOM_brown = TOM[inModule_brown, inModule_brown]
modProbes_brown = names(moduleColors[moduleColors==module_brown])
modGenes_brown <- mapIds(org.Rn.eg.db, keys = modProbes_brown, keytype = 'ENSEMBL', column = 'SYMBOL')
dimnames(modTOM_brown) = list(modProbes_brown, modProbes_brown)
cyt = exportNetworkToCytoscape(modTOM_brown,
edgeFile = paste("sepsis_wgcna_brown_cluster", ".edges.txt", sep=""),
nodeFile = paste("sepsis_wgcna_brown_cluster", ".nodes.txt", sep=""),
weighted = TRUE, threshold = 0.3,
nodeNames = modGenes_brown, nodeAttr = moduleColors[inModule_brown])
##########for the blue cluster
module_blue='blue'
inModule_blue = is.finite(match(moduleColors, module_blue))
modTOM_blue = TOM[inModule_blue, inModule_blue]
modProbes_blue = names(moduleColors[moduleColors==module_blue])
modGenes_blue <- mapIds(org.Rn.eg.db, keys = modProbes_blue, keytype = 'ENSEMBL', column = 'SYMBOL')
TOM_blue=TOM[names(moduleColors[moduleColors=="blue"]),names(moduleColors[moduleColors=="blue"])]
moduleColors_blue=moduleColors[moduleColors=="blue"]
probes_blue=colnames(TOM_blue)
dimnames(TOM_blue) <- list(probes_blue, probes_blue)
cyt = exportNetworkToCytoscape(TOM_blue,
edgeFile = paste("sepsis_wgcna_blue_cluster", ".edges.txt", sep=""),
nodeFile = paste("sepsis_wgcna_blue_cluster", ".nodes.txt", sep=""),
weighted = TRUE, threshold = 0.3,
nodeNames = probes_blue, nodeAttr = moduleColors_blue)

##############the whole gene expression output into cytoscape database
probes = colnames(dataExpr)
dimnames(TOM) <- list(probes, probes)
# Export the network into edge and node list files Cytoscape can read
# threshold 默认为0.5, 可以根据自己的需要调整，也可以都导出后在
# cytoscape中再调整
cyt = exportNetworkToCytoscape(TOM,
edgeFile = paste("sepsis_wgcna", ".edges.txt", sep=""),
nodeFile = paste("sepsis_wgcna", ".nodes.txt", sep=""),
weighted = TRUE, threshold = 0,
nodeNames = probes, nodeAttr = moduleColors)
############
#关联表型数据，不同phenotype的大鼠的外周血激素数据：

hormone_data_Table = data.frame(read.table(file = './samples-hormones.txt',header = F))
rownames(hormone_data_Table)=hormone_data_Table$V1
hormone_data=hormone_data_Table[,c(-1,-3,-5,-7,-9,-11,-13,-15)]
colnames(hormone_data)=c("OT","T","ghrelin","T4","cortisol","NE","LEP")
dim(hormone_data)
sampleName = rownames(dataExpr)
rownames(MEs_col)=sampleName

hormone_data=apply(hormone_data, 2, function(X) rep(X, each = 4))
rownames(hormone_data)=paste(rownames(hormone_data),rep(c('L','B','S','W'),9),sep = '')

rownames(hormone_data)[c(14,18,22,26,30,34)]=paste("Re_seq_",c(rownames(hormone_data[c(14,18,22,26,30,34),])),sep = '')
dim(hormone_data)###[36:7]

大鼠外周血激素数据，分为四种器官liver、 brain、 spleen、 white cell（from PBMC）
hormone_data[match(rownames(dataExpr),rownames(hormone_data)),]
hormone_data_L=hormone_data[match(rownames(MEs_col_L),rownames(hormone_data)),]
hormone_data_B=hormone_data[match(rownames(MEs_col_B),rownames(hormone_data)),]
hormone_data_S=hormone_data[match(rownames(MEs_col_S),rownames(hormone_data)),]
hormone_data_W=hormone_data[match(rownames(MEs_col_W),rownames(hormone_data)),]

RNAseq gene expressionn data
dataExpr_W=dataExpr[match(rownames(MEs_col_W),rownames(dataExpr)),]
dataExpr_L=dataExpr[match(rownames(MEs_col_L),rownames(dataExpr)),]
dataExpr_S=dataExpr[match(rownames(MEs_col_S),rownames(dataExpr)),]
dataExpr_B=dataExpr[match(rownames(MEs_col_B),rownames(dataExpr)),]

WGCNA modules classified data
MEs_col_L=MEs_col[grep("L",rownames(MEs_col)),]
MEs_col_B=MEs_col[grep("B",rownames(MEs_col)),]
MEs_col_S=MEs_col[grep("S",rownames(MEs_col)),]
MEs_col_W=MEs_col[grep("W",rownames(MEs_col)),]

remove the module (not co-expression by observering， such as grey in whole histogram)
MEs_col = MEs_col[,grep('grey',colnames(MEs_col),invert = T)]

plotting the correlation-score between clinical data with every module

in white cell
if (corType=="pearson") {
modTraitCor_W= cor(MEs_col_W, hormone_data_W, use = "p")
modTraitP_W = corPvalueStudent(modTraitCor_W, 9)
} else {
modTraitCorP = bicorAndPvalue(MEs_col_W, hormone_data_W, robustY=robustY)
modTraitCor_W = modTraitCorP$bicor
modTraitP_W = modTraitCorP$p
}

in brain
if (corType=="pearson") {
modTraitCor_B= cor(MEs_col_B, hormone_data_B, use = "p")
modTraitP_B = corPvalueStudent(modTraitCor_B, 9)
} else {
modTraitCorP = bicorAndPvalue(MEs_col_B, hormone_data_B, robustY=robustY)
modTraitCor_B = modTraitCorP$bicor
modTraitP_B = modTraitCorP$p
}

in spleen
if (corType=="pearson") {
modTraitCor_S= cor(MEs_col_S, hormone_data_S, use = "p")
modTraitP_S = corPvalueStudent(modTraitCor_S, 9)
} else {
modTraitCorP = bicorAndPvalue(MEs_col_S, hormone_data_S, robustY=robustY)
modTraitCor_S = modTraitCorP$bicor
modTraitP_S = modTraitCorP$p
}

in liver
if (corType=="pearson") {
modTraitCor_L= cor(MEs_col_L, hormone_data_L, use = "p")
modTraitP_L = corPvalueStudent(modTraitCor_L, 9)
} else {
modTraitCorP = bicorAndPvalue(MEs_col_L, hormone_data_L, robustY=robustY)
modTraitCor_L = modTraitCorP$bicor
modTraitP_L = modTraitCorP$p
}

calculate pvalue qvalue（fdr、adjpvalue） t（t test score） z（z test score） sd（standard deviation） values of

matrix-values，r and n is two list or two column vectors
cor2pvalue = function(r, n) {
t <- (r*sqrt(n-2))/sqrt(1-r^2)
p <- 2*(1 - pt(abs(t),(n-2)))
se <- sqrt((1-r*r)/(n-2))
out <- list(r, n, t, p, se)
names(out) <- c("r", "n", "t", "p", "se")
return(out)
}
######
modTraitQ_W=p.adjust(modTraitP_W,method = "fdr",n=98)
dim(modTraitQ_W)=dim(modTraitP_W)

###########text preparation
textMatrix_W = paste(signif(modTraitCor_W, 2), " (", signif(modTraitP_W, 1), ")", sep = "")
textMatrix_B = paste(signif(modTraitCor_B, 2), " (", signif(modTraitP_B, 1), ")", sep = "")
textMatrix_S = paste(signif(modTraitCor_S, 2), " (", signif(modTraitP_S, 1), ")", sep = "")
textMatrix_L = paste(signif(modTraitCor_L, 2), " (", signif(modTraitP_L, 1), ")", sep = "")
###########dimension of matrix
dim(textMatrix_W) = dim(modTraitCor_W)
dim(textMatrix_B) = dim(modTraitCor_B)
dim(textMatrix_L) = dim(modTraitCor_L)
dim(textMatrix_S) = dim(modTraitCor_S)

###模块与表型之间的相关性以及pvalue of significance
pdf("./labeledHeatmap_plot_white_cell.pdf",height = 8,width = 10)
labeledHeatmap(Matrix = modTraitCor_W, xLabels = colnames(hormone_data_W),
yLabels = colnames(MEs_col_W),
cex.lab = 0.5,
ySymbols = colnames(MEs_col_W), colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = textMatrix_W, setStdMargins = FALSE,
cex.text = 0.5, zlim = c(-1,1),
main = paste("Module-trait relationships of white cell"))
dev.off()
pdf("./labeledHeatmap_plot_brain.pdf",height = 8,width = 10)
labeledHeatmap(Matrix = modTraitCor_B, xLabels = colnames(hormone_data_B),
yLabels = colnames(MEs_col_B),
cex.lab = 0.5,
ySymbols = colnames(MEs_col_B), colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = textMatrix_B, setStdMargins = FALSE,
cex.text = 0.5, zlim = c(-1,1),
main = paste("Module-trait relationships of brain"))
dev.off()
pdf("./labeledHeatmap_plot_liver.pdf",height = 8,width = 10)
labeledHeatmap(Matrix = modTraitCor_L, xLabels = colnames(hormone_data_L),
yLabels = colnames(MEs_col_L),
cex.lab = 0.5,
ySymbols = colnames(MEs_col_L), colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = textMatrix_L, setStdMargins = FALSE,
cex.text = 0.5, zlim = c(-1,1),
main = paste("Module-trait relationships of liver"))
dev.off()
pdf("./labeledHeatmap_plot_spleen.pdf",height = 8,width = 10)
labeledHeatmap(Matrix = modTraitCor_S, xLabels = colnames(hormone_data_S),
yLabels = colnames(MEs_col_S),
cex.lab = 0.5,
ySymbols = colnames(MEs_col_S), colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = textMatrix_S, setStdMargins = FALSE,
cex.text = 0.5, zlim = c(-1,1),
main = paste("Module-trait relationships of spleen"))
dev.off()

################
###############
#Gene relationship to trait and important modules: Gene with Module
#############
############
if (corType=="pearson") {
geneModuleMembership_W = as.data.frame(cor(dataExpr_W, MEs_col_W, use = "p"))
MMPvalue_W = as.data.frame(corPvalueStudent(
as.matrix(geneModuleMembership_W), 9))
} else {
geneModuleMembershipA = bicorAndPvalue(dataExpr_W, MEs_col_W, robustY=robustY)
geneModuleMembership_W = geneModuleMembershipA$bicor
MMPvalue_W = geneModuleMembershipA$p
}
############

###########
#gene with clinic data relationship
###########
###########
if (corType=="pearson") {
geneTraitCor_S = as.data.frame(cor(dataExpr_S, hormone_data_S, use = "p"))
geneTraitP_S = as.data.frame(corPvalueStudent(
as.matrix(geneTraitCor_S), 9))
} else {
geneTraitCorA = bicorAndPvalue(dataExpr_S, hormone_data_S, robustY=robustY)
geneTraitCor_S = as.data.frame(geneTraitCorA$bicor)
geneTraitP_S = as.data.frame(geneTraitCorA$p)
}

########find different color modules' GO pathway

module = "blue"
pheno = "T4"
modNames = substring(colnames(MEs_col), 3)
module_column = match(module, modNames)
pheno_column = match(pheno,colnames(hormone_data))
moduleGenes = moduleColors == module
sizeGrWindow(7, 7)
par(mfrow = c(1,1))
pdf("./verboseScatterplot.pdf",height = 8,width = 10)
verboseScatterplot(abs(geneModuleMembership_W[moduleGenes, module_column]),
abs(geneTraitCor_W[moduleGenes, pheno_column]),
xlab = paste("Module Membership in", module, "module"),
ylab = paste("Gene significance for", pheno),
main = paste("Module membership vs. gene significance "),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
dev.off()

########find different color modules' GO pathway
module_for_GO='yellow'
modProbes_yellow = names(moduleColors[moduleColors==module_for_GO])
modProbes_yellow =mapIds(org.Rn.eg.db,keys = modProbes_yellow,keytype = 'ENSEMBL',column = 'ENTREZID')
GO_yellow_bp <- simplify(enrichGO(gene = c(modProbes_yellow),
OrgDb = org.Rn.eg.db,
ont = "BP", pAdjustMethod = "BH",
pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
select_fun = min, measure = "Wang", semData = NULL)
GO_yellow_cc <- simplify(enrichGO(gene = c(modProbes_yellow),
OrgDb = org.Rn.eg.db,
ont = "CC", pAdjustMethod = "BH",
pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
select_fun = min, measure = "Wang", semData = NULL)
GO_yellow_mf <- simplify(enrichGO(gene = c(modProbes_yellow),
OrgDb = org.Rn.eg.db,
ont = "MF", pAdjustMethod = "BH",
pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
select_fun = min, measure = "Wang", semData = NULL)
GO_yellow_all_GO_terms_table=rbind(GO_yellow_bp@result,GO_yellow_cc@result,GO_yellow_mf@result)
GO_yellow_all_GO_terms_table_significant=GO_yellow_all_GO_terms_table[GO_yellow_all_GO_terms_table$p.adjust<0.05,]

module_for_GO='blue'
modProbes_blue = names(moduleColors[moduleColors==module_for_GO])
modProbes_blue =mapIds(org.Rn.eg.db,keys = modProbes_blue,keytype = 'ENSEMBL',column = 'ENTREZID')
GO_blue_bp <- simplify(enrichGO(gene = c(modProbes_blue),
OrgDb = org.Rn.eg.db,
ont = "BP", pAdjustMethod = "BH",
pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
select_fun = min, measure = "Wang", semData = NULL)
GO_blue_cc <- simplify(enrichGO(gene = c(modProbes_blue),
OrgDb = org.Rn.eg.db,
ont = "CC", pAdjustMethod = "BH",
pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
select_fun = min, measure = "Wang", semData = NULL)
GO_blue_mf <- simplify(enrichGO(gene = c(modProbes_blue),
OrgDb = org.Rn.eg.db,
ont = "MF", pAdjustMethod = "BH",
pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
select_fun = min, measure = "Wang", semData = NULL)
GO_blue_all_GO_terms_table=rbind(GO_blue_bp@result,GO_blue_cc@result,GO_blue_mf@result)
GO_blue_all_GO_terms_table_significant=GO_blue_all_GO_terms_table[GO_blue_all_GO_terms_table$p.adjust<0.05,]

module_for_GO='turquoise'
modProbes_turquoise = names(moduleColors[moduleColors==module_for_GO])
modProbes_turquoise =mapIds(org.Rn.eg.db,keys = modProbes_turquoise,keytype = 'ENSEMBL',column = 'ENTREZID')
GO_turquoise_bp <- simplify(enrichGO(gene = c(modProbes_turquoise),
OrgDb = org.Rn.eg.db,
ont = "BP", pAdjustMethod = "BH",
pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
select_fun = min, measure = "Wang", semData = NULL)
GO_turquoise_cc <- simplify(enrichGO(gene = c(modProbes_turquoise),
OrgDb = org.Rn.eg.db,
ont = "CC", pAdjustMethod = "BH",
pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
select_fun = min, measure = "Wang", semData = NULL)
GO_turquoise_mf <- simplify(enrichGO(gene = c(modProbes_turquoise),
OrgDb = org.Rn.eg.db,
ont = "MF", pAdjustMethod = "BH",
pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
select_fun = min, measure = "Wang", semData = NULL)
GO_turquoise_all_GO_terms_table=rbind(GO_turquoise_bp@result,GO_turquoise_cc@result,GO_turquoise_mf@result)
GO_turquoise_all_GO_terms_table_significant=GO_turquoise_all_GO_terms_table[GO_turquoise_all_GO_terms_table$p.adjust<0.05,]
write.csv(GO_turquoise_all_GO_terms_table_significant,file = './GO_turquoise.csv')
write.csv(GO_blue_all_GO_terms_table_significant,file = './GO_blue.csv')
write.csv(GO_yellow_all_GO_terms_table_significant,file = './GO_yellow.csv')

############## GOenrichmentAnalysis: The function takes a vector of module labels,
##and the Entrez (a.k.a. Locus Link) codes for the genes whose labels are given
library(Seurat)
library(DOSE)
library(GO.db)
library(topGO)
library(GSEABase)
library(clusterProfiler)
library(AnnotationDbi)
library(org.Hs.eg.db)
library(scater)
library(scran)
moduleGene=colnames(dataExpr)
module.entrez <- mapIds(org.Rn.eg.db, keys = moduleGene, keytype = 'ENSEMBL', column = 'ENTREZID')
GOenr = GOenrichmentAnalysis(moduleColors, module.entrez, organism = "rat", nBestP = 10)
######################
#find GO terms coresponsed gene lists
###in liver
A=list(chemokine_activity=subset(GO_Liver,GO_Liver$Description_N_M=='chemokine activity')[,8],
cellular_response_to_metal_ion=subset(GO_Liver,GO_Liver$Description_N_M=='cellular response to metal ion')[,8],
neutrophil_homeostasis=subset(GO_Liver,GO_Liver$Description_N_M=='neutrophil homeostasis')[,8],
chronic_inflammatory_response=subset(GO_Liver,GO_Liver$Description_N_M=='chronic inflammatory response')[,8],
nitric_oxide_synthase_biosynthetic_process=GO_Liver$geneID.y[51],
monocarboxylic_acid_metabolic_process=GO_Liver$geneID.y[37])
A=unique(unlist(lapply(A, function(x) strsplit(x,"/"))))
Liver_term_gene = mapIds(org.Rn.eg.db,A,keytype="ENTREZID", column="ENSEMBL")
Liver_term_gene_lfc_SN=res_Liver_N_S_sig[rownames(res_Liver_N_S_sig) %in% Liver_term_gene,][,2]
Liver_term_gene_lfc_MN=res_Liver_N_M_sig[rownames(res_Liver_N_M_sig) %in% Liver_term_gene,][,2]

###in spleen
B=list(response_to_lipopolysaccharide=GO_Spleen$geneID.y[29],
positive_regulation_of_ERK1_and_ERK2_cascade=GO_Spleen$geneID.y[46],
positive_regulation_of_leukocyte_migration=GO_Spleen$geneID.y[3])
B=unique(unlist(lapply(B, function(x) strsplit(x,"/"))))
Spleen_term_gene = mapIds(org.Rn.eg.db,B,keytype="ENTREZID", column="ENSEMBL")
Spleen_term_gene_lfc_SN=res_Spleen_S_N_sig[rownames(res_Spleen_S_N_sig) %in% Spleen_term_gene,][,2]
Spleen_term_gene_lfc_MN=res_Spleen_N_M_sig[rownames(res_Spleen_N_M_sig) %in% Spleen_term_gene,][,2]

####in brain
C=list(cytokine_binding_b=GO_Brain$geneID.y[45]
,chemotaxis_inbrain=GO_Brain$geneID.y[28]
,response_to_reactive_oxygen_species_b=GO_Brain$geneID.y[1])
C=unique(unlist(lapply(C, function(x) strsplit(x,"/"))))
BRAIN_term_gene = mapIds(org.Rn.eg.db,C,keytype="ENTREZID", column="ENSEMBL")
BRAIN_term_gene_lfc_SN=res_Brain_N_S_sig[rownames(res_Brain_N_S_sig) %in% BRAIN_term_gene,][,2]
BRAIN_term_gene_lfc_MN=res_Brain_N_M_sig[rownames(res_Brain_N_M_sig) %in% BRAIN_term_gene,][,2]

#########in WC
D=list(reactive_oxygen_species_metabolic_process=GO_WC$geneID.y[137],
cytokine_binding_wc=GO_WC$geneID.y[59],
chemotaxis_inwc=GO_WC$geneID.y[30],
response_to_reactive_oxygen_species_wc=GO_WC$geneID.y[1],
cellular_response_to_fatty_acid=GO_WC$geneID.y[130],
response_to_temperature_stimulus=GO_WC$geneID.y[42])

D=unique(unlist(lapply(D, function(x) strsplit(x,"/"))))
WC_term_gene = mapIds(org.Rn.eg.db,D,keytype="ENTREZID", column="ENSEMBL")
WC_term_gene_lfc_SN=res_W_S_N_sig[rownames(res_W_S_N_sig) %in% WC_term_gene,][,2]
WC_term_gene_lfc_MN=res_W_M_N_sig[rownames(res_W_M_N_sig) %in% WC_term_gene,][,2]

##integration
GO_term_gene_list=c(A,B,C,D)
lapply(GO_term_gene_list, unlist)
GO_term_gene=unique(unlist(lapply(GO_term_gene_list, function(x) strsplit(x,"/"))))
GO_term_gene = mapIds(org.Rn.eg.db,GO_term_gene,keytype="ENTREZID", column="ENSEMBL")
SE_GO_term_gene_acoords_read_counts=se[match(GO_term_gene,rownames(countdata)),]
rpkm_GO_terms_counts=rpkm(assay(SE_GO_term_gene_acoords_read_counts),gene.length = 414)
SPLEEN_GO=SE_GO_term_gene_acoords_read_counts[,c(2,5,8,10,14,18,22,24,27)]
WC_GO = SE_GO_term_gene_acoords_read_counts[,c(3,6,9,11,15,19,23,25,28)]
BRAIN_GO = SE_GO_term_gene_acoords_read_counts[,c(12,16,20,29,30,31,32,34,36)]
#####
LIVER_GO = SE_GO_term_gene_acoords_read_counts[,c(1,4,7,13,17,21,26,33,35)]
dds_L_GO <- DESeqDataSet(LIVER_GO, design = ~ group)
dds_Liver_GO = DESeq(dds_L_GO)
res_Liver_N_M_GO = results(dds_Liver_GO,contrast=c("group","Normal","Mild"))
res_Liver_N_M_GO_sig = subset(res_Liver_N_M_GO,padj<0.05)
res_Liver_N_M_GO_sig_lfc_1 = subset(res_Liver_N_M_GO_sig,abs(res_Liver_N_M_GO_sig$log2FoldChange) >= 1)
res_Liver_N_M_GO_sig_lfc_1.5 = subset(res_Liver_N_M_GO_sig,abs(res_Liver_N_M_GO_sig$log2FoldChange) >= 1.5)
res_Liver_N_M_GO_sig_lfc_2 = subset(res_Liver_N_M_GO_sig,abs(res_Liver_N_M_GO_sig$log2FoldChange) >= 2)
res_Liver_N_M_GO_fc=res_Liver_N_M_GO$log2FoldChange

res_Liver_N_S_GO = results(dds_Liver_GO,contrast=c("group","Normal","Severe"))
res_Liver_N_S_GO_sig = subset(res_Liver_N_S_GO,padj<0.05)
res_Liver_N_S_GO_sig_lfc_1 = subset(res_Liver_N_S_GO_sig,abs(res_Liver_N_S_GO_sig$log2FoldChange) >= 1)
res_Liver_N_S_GO_sig_lfc_1.5 = subset(res_Liver_N_S_GO_sig,abs(res_Liver_N_S_GO_sig$log2FoldChange) >= 1.5)
res_Liver_N_S_GO_sig_lfc_2 = subset(res_Liver_N_S_GO_sig,abs(res_Liver_N_S_GO_sig$log2FoldChange) >= 2)
res_Liver_N_S_GO_fc=res_Liver_N_S_GO$log2FoldChange

######
BRAIN_GO = SE_GO_term_gene_acoords_read_counts[,c(12,16,20,29,30,31,32,34,36)]
dds_B_GO <- DESeqDataSet(BRAIN_GO, design = ~ group)
dds_BRAIN_GO = DESeq(dds_B_GO)
res_BRAIN_N_M_GO = results(dds_BRAIN_GO,contrast=c("group","Normal","Mild"))
res_BRAIN_N_M_GO_sig = subset(res_BRAIN_N_M_GO,padj<0.05)
res_BRAIN_N_M_GO_sig_lfc_1 = subset(res_BRAIN_N_M_GO_sig,abs(res_BRAIN_N_M_GO_sig$log2FoldChange) >= 1)
res_BRAIN_N_M_GO_sig_lfc_1.5 = subset(res_BRAIN_N_M_GO_sig,abs(res_BRAIN_N_M_GO_sig$log2FoldChange) >= 1.5)
res_BRAIN_N_M_GO_sig_lfc_2 = subset(res_BRAIN_N_M_GO_sig,abs(res_BRAIN_N_M_GO_sig$log2FoldChange) >= 2)
res_BRAIN_N_M_GO_fc=res_BRAIN_N_M_GO$log2FoldChange

res_BRAIN_N_S_GO = results(dds_BRAIN_GO,contrast=c("group","Normal","Severe"))
res_BRAIN_N_S_GO_sig = subset(res_BRAIN_N_S_GO,padj<0.05)
res_BRAIN_N_S_GO_sig_lfc_1 = subset(res_BRAIN_N_S_GO_sig,abs(res_BRAIN_N_S_GO_sig$log2FoldChange) >= 1)
res_BRAIN_N_S_GO_sig_lfc_1.5 = subset(res_BRAIN_N_S_GO_sig,abs(res_BRAIN_N_S_GO_sig$log2FoldChange) >= 1.5)
res_BRAIN_N_S_GO_sig_lfc_2 = subset(res_BRAIN_N_S_GO_sig,abs(res_BRAIN_N_S_GO_sig$log2FoldChange) >= 2)
res_BRAIN_N_S_GO_fc=res_BRAIN_N_S_GO$log2FoldChange
######white cell
WC_GO = SE_GO_term_gene_acoords_read_counts[,c(3,6,9,11,15,19,23,25,28)]
dds_W_GO <- DESeqDataSet(WC_GO, design = ~ group)
dds_WC_GO = DESeq(dds_W_GO)
res_WC_N_M_GO = results(dds_WC_GO,contrast=c("group","Normal","Mild"))
res_WC_N_M_GO_sig = subset(res_WC_N_M_GO,padj<0.05)
res_WC_N_M_GO_sig_lfc_1 = subset(res_WC_N_M_GO_sig,abs(res_WC_N_M_GO_sig$log2FoldChange) >= 1)
res_WC_N_M_GO_sig_lfc_1.5 = subset(res_WC_N_M_GO_sig,abs(res_WC_N_M_GO_sig$log2FoldChange) >= 1.5)
res_WC_N_M_GO_sig_lfc_2 = subset(res_WC_N_M_GO_sig,abs(res_WC_N_M_GO_sig$log2FoldChange) >= 2)
res_WC_N_M_GO_fc=res_WC_N_M_GO$log2FoldChange

res_WC_N_S_GO = results(dds_WC_GO,contrast=c("group","Normal","Severe"))
res_WC_N_S_GO_sig = subset(res_WC_N_S_GO,padj<0.05)
res_WC_N_S_GO_sig_lfc_1 = subset(res_WC_N_S_GO_sig,abs(res_WC_N_S_GO_sig$log2FoldChange) >= 1)
res_WC_N_S_GO_sig_lfc_1.5 = subset(res_WC_N_S_GO_sig,abs(res_WC_N_S_GO_sig$log2FoldChange) >= 1.5)
res_WC_N_S_GO_sig_lfc_2 = subset(res_WC_N_S_GO_sig,abs(res_WC_N_S_GO_sig$log2FoldChange) >= 2)
res_WC_N_S_GO_fc=res_WC_N_S_GO$log2FoldChange

#######spleen
SPLEEN_GO=SE_GO_term_gene_acoords_read_counts[,c(2,5,8,10,14,18,22,24,27)]
dds_S_GO <- DESeqDataSet(SPLEEN_GO, design = ~ group)
dds_Spleen_GO = DESeq(dds_S_GO)
res_Spleen_N_M_GO = results(dds_Spleen_GO,contrast=c("group","Normal","Mild"))
res_Spleen_N_M_GO_sig = subset(res_Spleen_N_M_GO,padj<0.05)
res_Spleen_N_M_GO_sig_lfc_1 = subset(res_Spleen_N_M_GO_sig,abs(res_Spleen_N_M_GO_sig$log2FoldChange) >= 1)
res_Spleen_N_M_GO_sig_lfc_1.5 = subset(res_Spleen_N_M_GO_sig,abs(res_Spleen_N_M_GO_sig$log2FoldChange) >= 1.5)
res_Spleen_N_M_GO_sig_lfc_2 = subset(res_Spleen_N_M_GO_sig,abs(res_Spleen_N_M_GO_sig$log2FoldChange) >= 2)
res_Spleen_N_M_GO_fc=res_Spleen_N_M_GO$log2FoldChange

res_Spleen_N_S_GO = results(dds_Spleen_GO,contrast=c("group","Normal","Severe"))
res_Spleen_N_S_GO_sig = subset(res_Spleen_N_S_GO,padj<0.05)
res_Spleen_N_S_GO_sig_lfc_1 = subset(res_Spleen_N_S_GO_sig,abs(res_Spleen_N_S_GO_sig$log2FoldChange) >= 1)
res_Spleen_N_S_GO_sig_lfc_1.5 = subset(res_Spleen_N_S_GO_sig,abs(res_Spleen_N_S_GO_sig$log2FoldChange) >= 1.5)
res_Spleen_N_S_GO_sig_lfc_2 = subset(res_Spleen_N_S_GO_sig,abs(res_Spleen_N_S_GO_sig$log2FoldChange) >= 2)
res_Spleen_N_S_GO_fc=res_Spleen_N_S_GO$log2FoldChange

#######
GO_term_gene_in_8_groups_fc_value_table=data.frame(res_Liver_N_M_GO_fc,res_Liver_N_S_GO_fc,res_BRAIN_N_M_GO_fc,res_BRAIN_N_S_GO_fc,res_WC_N_M_GO_fc,res_WC_N_S_GO_fc,res_Spleen_N_M_GO_fc,res_Spleen_N_S_GO_fc)
#414:8 FC matrix
rownames(GO_term_gene_in_8_groups_fc_value_table) <- mapIds(org.Rn.eg.db, keys = GO_term_gene, keytype = 'ENSEMBL', column = 'SYMBOL')
library('pheatmap')
GO_term_gene_in_8_groups_fc_value_table = t(apply(GO_term_gene_in_8_groups_fc_value_table,1,function(x){(x-mean(x))/sd(x)}))
#GO_term_gene_in_8_groups_fc_value_table<-GO_term_gene_in_8_groups_fc_value_table-rowMeans(GO_term_gene_in_8_groups_fc_value_table)
pheatmap(na.omit(GO_term_gene_in_8_groups_fc_value_table),fontsize = 10,fontsize_row=3,cellwidth =20,main = 'GO_terms_high_expression_genes_FC_value_in_8_group')
#######414:36 expression matrix
SE_GO_term_gene_acoords_read_counts=se[match(GO_term_gene,rownames(countdata)),]
rownames(rpkm_GO_terms_counts) <- mapIds(org.Rn.eg.db, keys = rownames(rpkm_GO_terms_counts), keytype = 'ENSEMBL', column = 'SYMBOL')
rpkm_GO_terms_counts=rpkm(assay(SE_GO_term_gene_acoords_read_counts),gene.length = 414)
rpkm_GO_terms_counts = t(apply(rpkm_GO_terms_counts,1,function(x){(x-mean(x))/sd(x)}))
#rpkm_GO_terms_counts<-rpkm_GO_terms_counts-rowMeans(rpkm_GO_terms_counts)
pheatmap(rpkm_GO_terms_counts,fontsize = 10,cellwidth =12,fontsize_row=3,main = 'GO_terms_high_expression_genes_normalized_counts_hp_in_36samples')
############
相关阅读:
Hive伪分布式下安装
 Hadoop单机和伪分布式安装
 Spark 键值对RDD操作
 Scala入门：从HelloWorld开始【源码及编译】
Spark RDD编程核心
 Scala 元组
 剑指offer（7）：斐波那契数列
 剑指offer（13）：调整数组顺序使奇数位于偶数前面
 剑指offer（21）：二维数组中的查找
 解决IEDA web项目控制台中文乱码
原文地址：https://www.cnblogs.com/beckygogogo/p/9849350.html