• WGCNA-cytoscape-analysis


    全套R代码复刻:

    WGCNA install
    source("https://bioconductor.org/biocLite.R")
    biocLite(c("AnnotationDbi", "impute","GO.db", "preprocessCore"))

    install.packages(c("matrixStats", "Hmisc", "splines", "foreach", "doParallel", "fastcluster", "dynamicTreeCut", "survival"))

    site="https://mirrors.tuna.tsinghua.edu.cn/CRAN"
    install.packages(c("WGCNA", "stringr", "reshape2"), repos=site)


    biocLite("WGCNA")
    library('WGCNA')
    library('stringr')
    library('reshape2')
    library("DESeq2")
    library('AnnotationDbi')
    library('org.Rn.eg.db')#分析的大鼠的数据

    sampleTable = data.frame(read.table(file = './Sample_Meta_Info.txt',header = T))
    rownames(sampleTable) = sampleTable$samplenames
    library("DESeq2")

    ####all gene list in contrast result
    gene_list_8_kinds_of_situation <-

    c(rownames(res_Liver_N_M_sig_lfc_2),

    rownames(res_Liver_N_S_sig_lfc_2),

    rownames(res_Brain_N_M_sig_lfc_2),

    rownames(res_Brain_N_S_sig_lfc_2),

    rownames(res_Spleen_N_M_sig_lfc_2),

    rownames(res_Spleen_S_N_sig_lfc_2),

    rownames(res_W_M_N_sig_lfc_2),

    rownames(res_W_S_N_sig_lfc_2))


    ##all DE genes expression pattern counts
    gene_list_8_condition_counts=countdata[match(gene_list_8_kinds_of_situation,rownames(countdata)),]
    library('edgeR')
    rpkm=rpkm(gene_list_8_condition_counts,gene.length = gene_number)
    dim(rpkm)

    对基因表达的rpkm值进行安列的筛选。
    dataExpr = rpkm[rowSums(rpkm>1)>0.4*ncol(rpkm),]
    dim(dataExpr)

    ##preprocess setting
    options(stringsAsFactors = FALSE)
    allowWGCNAThreads() 
    enableWGCNAThreads()
    corType = "pearson"
    corFnc = ifelse(corType=="pearson", cor, bicor)
    maxPOutliers = ifelse(corType=="pearson",1,0.05)
    robustY = ifelse(corType=="pearson",T,F)
    #######
    m.mad <- apply(dataExpr,1,mad)##mad的normalization的方法使用。
    dataExprVar <- dataExpr[which(m.mad > max(quantile(m.mad, probs=seq(0, 1, 0.25))[2],0.01)),]##利用mad之后的分位值进行筛选。
    dataExpr <- as.data.frame(t(dataExprVar))
    gsg = goodSamplesGenes(dataExpr, verbose = 3)#利用WGCNA中的函数goodSamplesGenes,在基因表达水平的控制下进行样本质量的筛选

    if (!gsg$allOK){
    # Optionally, print the gene and sample names that were removed:
    if (sum(!gsg$goodGenes)>0)
    printFlush(paste("Removing genes:",
    paste(names(dataExpr)[!gsg$goodGenes], collapse = ",")));
    if (sum(!gsg$goodSamples)>0)
    printFlush(paste("Removing samples:",
    paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ",")));
    # Remove the offending genes and samples from the data:
    dataExpr = dataExpr[gsg$goodSamples, gsg$goodGenes]
    }


    dim(dataExpr)
    nGenes= ncol(dataExpr)
    nSamples = nrow(dataExpr)
    sampleTree = hclust(dist(dataExpr), method = "average")##简单的层次聚类步骤
    plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="")
    powers = c(c(1:10), seq(from = 12, to=30, by=2))


    #######Constructing the soft threshold and plotting
    type = "unsigned"
    sft = pickSoftThreshold(t(dataExpr), powerVector=powers,networkType=type, verbose=5)
    head(dataExpr)[,1:8]
    cex1 = 0.9


    plot(sft$fitIndices[,1], sign(sft$fitIndices[,3])*sft$fitIndices[,2],
    xlab="Soft Threshold (power)",
    ylab="Scale Free Topology Model Fit,signed R^2",type="n",
    main = paste("Scale independence"))
    text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
    labels=powers,cex=cex1,col="red")
    abline(h=0.85,col="red")
    plot(sft$fitIndices[,1], sft$fitIndices[,5],
    xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
    main = paste("Mean connectivity"))
    text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers,
    cex=cex1, col="red")

    一般地,power等于
    power = sft$powerEstimate

    经验power:无满足条件的power时选用

    if (is.na(power)){
    power = ifelse(nSamples<20, ifelse(type == "unsigned", 9, 18),
    ifelse(nSamples<30, ifelse(type == "unsigned", 8, 16),
    ifelse(nSamples<40, ifelse(type == "unsigned", 7, 14),
    ifelse(type == "unsigned", 6, 12))
    ))
    }


    corType="bicor"


    一步法网络构建:One-step network construction and module detection
    net = blockwiseModules(dataExpr, power = power, maxBlockSize = nGenes,
    TOMType = type, minModuleSize = 30,##type = "unsigned"
    reassignThreshold = 0, mergeCutHeight = 0.25,
    numericLabels = TRUE, pamRespectsDendro = FALSE,
    saveTOMs=F, corType = corType,
    maxPOutliers=maxPOutliers, loadTOMs=TRUE,
    #saveTOMFileBase = paste0('Sepsis_wgcna_analysis', ".tom"),
    verbose = 3)
    #####################

    提取未plot out的网络信息

    table(net$colors)
    moduleLabels = net$colors
    moduleColors = labels2colors(moduleLabels)
    names(moduleColors)=colnames(dataExpr)
    pdf("./plotDendroAndColors.pdf")
    plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]],
    "Module colors",
    dendroLabels = FALSE, hang = 0.03,
    addGuide = TRUE, guideHang = 0.05)
    dev.off()##
    MEs = net$MEs
    MEs_col = MEs


    colnames(MEs_col) = paste0("ME", labels2colors(as.numeric(str_replace_all(colnames(MEs),"ME",""))))
    ###sometimes if you are using server  in  ubuntu or centos , may encounter the lazy load problem,  once that we use the next line command alternatively
    colnames(MEs_col) = paste0("ME", labels2colors(c(7,9,1,8,10,6,2,4,3,5,0)))


    MEs_col = orderMEs(MEs_col)
    pdf("./plotEigengeneNetworks.pdf") #特征基因的网络构建
    plotEigengeneNetworks(MEs_col, "Eigengene adjacency heatmap",
    marDendro = c(3,3,2,4),
    marHeatmap = c(3,4,2,2), plotDendrograms = T,
    xLabelsAngle = 90)
    dev.off()
    ###########TOM plot(topology map plot)
    TOM = TOMsimilarityFromExpr(dataExpr, power=power, corType=corType, networkType=type)
    TOM <- as.matrix(TOM)
    dissTOM = 1-TOM
    plotTOM = dissTOM^7
    diag(plotTOM) = NA
    pdf("./TOMplot.pdf")
    TOMplot(plotTOM, net$dendrograms, moduleColors,
    main = "Network heatmap plot, all genes",setLayout=TRUE)
    dev.off()

    plot out to observe the majority cluster to specifically plot in details


    ########
    for the brown cluster to cytoscape
    module_brown='brown'
    inModule_brown = is.finite(match(moduleColors, module_brown))
    modTOM_brown = TOM[inModule_brown, inModule_brown]
    modProbes_brown = names(moduleColors[moduleColors==module_brown])
    modGenes_brown <- mapIds(org.Rn.eg.db, keys = modProbes_brown, keytype = 'ENSEMBL', column = 'SYMBOL')
    dimnames(modTOM_brown) = list(modProbes_brown, modProbes_brown)
    cyt = exportNetworkToCytoscape(modTOM_brown,
    edgeFile = paste("sepsis_wgcna_brown_cluster", ".edges.txt", sep=""),
    nodeFile = paste("sepsis_wgcna_brown_cluster", ".nodes.txt", sep=""),
    weighted = TRUE, threshold = 0.3,
    nodeNames = modGenes_brown, nodeAttr = moduleColors[inModule_brown])
    ##########
    for the blue cluster
    module_blue='blue'
    inModule_blue = is.finite(match(moduleColors, module_blue))
    modTOM_blue = TOM[inModule_blue, inModule_blue]
    modProbes_blue = names(moduleColors[moduleColors==module_blue])
    modGenes_blue <- mapIds(org.Rn.eg.db, keys = modProbes_blue, keytype = 'ENSEMBL', column = 'SYMBOL')
    TOM_blue=TOM[names(moduleColors[moduleColors=="blue"]),names(moduleColors[moduleColors=="blue"])]
    moduleColors_blue=moduleColors[moduleColors=="blue"]
    probes_blue=colnames(TOM_blue)
    dimnames(TOM_blue) <- list(probes_blue, probes_blue)
    cyt = exportNetworkToCytoscape(TOM_blue,
    edgeFile = paste("sepsis_wgcna_blue_cluster", ".edges.txt", sep=""),
    nodeFile = paste("sepsis_wgcna_blue_cluster", ".nodes.txt", sep=""),
    weighted = TRUE, threshold = 0.3,
    nodeNames = probes_blue, nodeAttr = moduleColors_blue)

    ##############the whole gene expression output into cytoscape database
    probes = colnames(dataExpr)
    dimnames(TOM) <- list(probes, probes)
    # Export the network into edge and node list files Cytoscape can read
    # threshold 默认为0.5, 可以根据自己的需要调整,也可以都导出后在
    # cytoscape中再调整
    cyt = exportNetworkToCytoscape(TOM,
    edgeFile = paste("sepsis_wgcna", ".edges.txt", sep=""),
    nodeFile = paste("sepsis_wgcna", ".nodes.txt", sep=""),
    weighted = TRUE, threshold = 0,
    nodeNames = probes, nodeAttr = moduleColors)
    ############
    #关联表型数据,不同phenotype的大鼠的外周血激素数据:

    hormone_data_Table = data.frame(read.table(file = './samples-hormones.txt',header = F))
    rownames(hormone_data_Table)=hormone_data_Table$V1
    hormone_data=hormone_data_Table[,c(-1,-3,-5,-7,-9,-11,-13,-15)]
    colnames(hormone_data)=c("OT","T","ghrelin","T4","cortisol","NE","LEP")
    dim(hormone_data)
    sampleName = rownames(dataExpr)
    rownames(MEs_col)=sampleName


    hormone_data=apply(hormone_data, 2, function(X) rep(X, each = 4))
    rownames(hormone_data)=paste(rownames(hormone_data),rep(c('L','B','S','W'),9),sep = '')

    rownames(hormone_data)[c(14,18,22,26,30,34)]=paste("Re_seq_",c(rownames(hormone_data[c(14,18,22,26,30,34),])),sep = '')
    dim(hormone_data)###[36:7]

    大鼠外周血激素数据,分为四种器官liver、 brain、 spleen、 white cell(from PBMC)
    hormone_data[match(rownames(dataExpr),rownames(hormone_data)),]
    hormone_data_L=hormone_data[match(rownames(MEs_col_L),rownames(hormone_data)),]
    hormone_data_B=hormone_data[match(rownames(MEs_col_B),rownames(hormone_data)),]
    hormone_data_S=hormone_data[match(rownames(MEs_col_S),rownames(hormone_data)),]
    hormone_data_W=hormone_data[match(rownames(MEs_col_W),rownames(hormone_data)),]

    RNAseq gene expressionn data
    dataExpr_W=dataExpr[match(rownames(MEs_col_W),rownames(dataExpr)),]
    dataExpr_L=dataExpr[match(rownames(MEs_col_L),rownames(dataExpr)),]
    dataExpr_S=dataExpr[match(rownames(MEs_col_S),rownames(dataExpr)),]
    dataExpr_B=dataExpr[match(rownames(MEs_col_B),rownames(dataExpr)),]

    WGCNA modules classified data
    MEs_col_L=MEs_col[grep("L",rownames(MEs_col)),]
    MEs_col_B=MEs_col[grep("B",rownames(MEs_col)),]
    MEs_col_S=MEs_col[grep("S",rownames(MEs_col)),]
    MEs_col_W=MEs_col[grep("W",rownames(MEs_col)),]

    remove the module (not co-expression by observering, such as grey in whole histogram)
    MEs_col = MEs_col[,grep('grey',colnames(MEs_col),invert = T)]


    plotting the correlation-score between clinical data with every module

    in white cell
    if (corType=="pearson") {
    modTraitCor_W= cor(MEs_col_W, hormone_data_W, use = "p")
    modTraitP_W = corPvalueStudent(modTraitCor_W, 9)
    } else {
    modTraitCorP = bicorAndPvalue(MEs_col_W, hormone_data_W, robustY=robustY)
    modTraitCor_W = modTraitCorP$bicor
    modTraitP_W = modTraitCorP$p
    }

    in brain
    if (corType=="pearson") {
    modTraitCor_B= cor(MEs_col_B, hormone_data_B, use = "p")
    modTraitP_B = corPvalueStudent(modTraitCor_B, 9)
    } else {
    modTraitCorP = bicorAndPvalue(MEs_col_B, hormone_data_B, robustY=robustY)
    modTraitCor_B = modTraitCorP$bicor
    modTraitP_B = modTraitCorP$p
    }

    in spleen
    if (corType=="pearson") {
    modTraitCor_S= cor(MEs_col_S, hormone_data_S, use = "p")
    modTraitP_S = corPvalueStudent(modTraitCor_S, 9)
    } else {
    modTraitCorP = bicorAndPvalue(MEs_col_S, hormone_data_S, robustY=robustY)
    modTraitCor_S = modTraitCorP$bicor
    modTraitP_S = modTraitCorP$p
    }

    in liver
    if (corType=="pearson") {
    modTraitCor_L= cor(MEs_col_L, hormone_data_L, use = "p")
    modTraitP_L = corPvalueStudent(modTraitCor_L, 9)
    } else {
    modTraitCorP = bicorAndPvalue(MEs_col_L, hormone_data_L, robustY=robustY)
    modTraitCor_L = modTraitCorP$bicor
    modTraitP_L = modTraitCorP$p
    }


    calculate pvalue  qvalue(fdr、adjpvalue) t(t test score) z(z test score) sd(standard deviation) values of

    matrix-values,r and n is two list or two column vectors
    cor2pvalue = function(r, n) {
    t <- (r*sqrt(n-2))/sqrt(1-r^2)
    p <- 2*(1 - pt(abs(t),(n-2)))
    se <- sqrt((1-r*r)/(n-2))
    out <- list(r, n, t, p, se)
    names(out) <- c("r", "n", "t", "p", "se")
    return(out)
    }
    ######
    modTraitQ_W=p.adjust(modTraitP_W,method = "fdr",n=98)
    dim(modTraitQ_W)=dim(modTraitP_W)


    ###########text preparation
    textMatrix_W = paste(signif(modTraitCor_W, 2), " (", signif(modTraitP_W, 1), ")", sep = "")
    textMatrix_B = paste(signif(modTraitCor_B, 2), " (", signif(modTraitP_B, 1), ")", sep = "")
    textMatrix_S = paste(signif(modTraitCor_S, 2), " (", signif(modTraitP_S, 1), ")", sep = "")
    textMatrix_L = paste(signif(modTraitCor_L, 2), " (", signif(modTraitP_L, 1), ")", sep = "")
    ###########dimension of matrix 
    dim(textMatrix_W) = dim(modTraitCor_W)
    dim(textMatrix_B) = dim(modTraitCor_B)
    dim(textMatrix_L) = dim(modTraitCor_L)
    dim(textMatrix_S) = dim(modTraitCor_S)

    ###模块与表型之间的相关性以及pvalue of significance
    pdf("./labeledHeatmap_plot_white_cell.pdf",height = 8,width = 10)
    labeledHeatmap(Matrix = modTraitCor_W, xLabels = colnames(hormone_data_W),
    yLabels = colnames(MEs_col_W),
    cex.lab = 0.5,
    ySymbols = colnames(MEs_col_W), colorLabels = FALSE,
    colors = blueWhiteRed(50),
    textMatrix = textMatrix_W, setStdMargins = FALSE,
    cex.text = 0.5, zlim = c(-1,1),
    main = paste("Module-trait relationships of white cell"))
    dev.off()
    pdf("./labeledHeatmap_plot_brain.pdf",height = 8,width = 10)
    labeledHeatmap(Matrix = modTraitCor_B, xLabels = colnames(hormone_data_B),
    yLabels = colnames(MEs_col_B),
    cex.lab = 0.5,
    ySymbols = colnames(MEs_col_B), colorLabels = FALSE,
    colors = blueWhiteRed(50),
    textMatrix = textMatrix_B, setStdMargins = FALSE,
    cex.text = 0.5, zlim = c(-1,1),
    main = paste("Module-trait relationships of brain"))
    dev.off()
    pdf("./labeledHeatmap_plot_liver.pdf",height = 8,width = 10)
    labeledHeatmap(Matrix = modTraitCor_L, xLabels = colnames(hormone_data_L),
    yLabels = colnames(MEs_col_L),
    cex.lab = 0.5,
    ySymbols = colnames(MEs_col_L), colorLabels = FALSE,
    colors = blueWhiteRed(50),
    textMatrix = textMatrix_L, setStdMargins = FALSE,
    cex.text = 0.5, zlim = c(-1,1),
    main = paste("Module-trait relationships of liver"))
    dev.off()
    pdf("./labeledHeatmap_plot_spleen.pdf",height = 8,width = 10)
    labeledHeatmap(Matrix = modTraitCor_S, xLabels = colnames(hormone_data_S),
    yLabels = colnames(MEs_col_S),
    cex.lab = 0.5,
    ySymbols = colnames(MEs_col_S), colorLabels = FALSE,
    colors = blueWhiteRed(50),
    textMatrix = textMatrix_S, setStdMargins = FALSE,
    cex.text = 0.5, zlim = c(-1,1),
    main = paste("Module-trait relationships of spleen"))
    dev.off()


    ################
    ###############
    #Gene relationship to trait and important modules: Gene with Module
    #############
    ############
    if (corType=="pearson") {
    geneModuleMembership_W = as.data.frame(cor(dataExpr_W, MEs_col_W, use = "p"))
    MMPvalue_W = as.data.frame(corPvalueStudent(
    as.matrix(geneModuleMembership_W), 9))
    } else {
    geneModuleMembershipA = bicorAndPvalue(dataExpr_W, MEs_col_W, robustY=robustY)
    geneModuleMembership_W = geneModuleMembershipA$bicor
    MMPvalue_W = geneModuleMembershipA$p
    }
    ############

    ###########
    #gene with clinic data relationship
    ###########
    ###########
    if (corType=="pearson") {
    geneTraitCor_S = as.data.frame(cor(dataExpr_S, hormone_data_S, use = "p"))
    geneTraitP_S = as.data.frame(corPvalueStudent(
    as.matrix(geneTraitCor_S), 9))
    } else {
    geneTraitCorA = bicorAndPvalue(dataExpr_S, hormone_data_S, robustY=robustY)
    geneTraitCor_S = as.data.frame(geneTraitCorA$bicor)
    geneTraitP_S = as.data.frame(geneTraitCorA$p)
    }


    ########find different color modules' GO pathway

    module = "blue"
    pheno = "T4"
    modNames = substring(colnames(MEs_col), 3)
    module_column = match(module, modNames)
    pheno_column = match(pheno,colnames(hormone_data))
    moduleGenes = moduleColors == module
    sizeGrWindow(7, 7)
    par(mfrow = c(1,1))
    pdf("./verboseScatterplot.pdf",height = 8,width = 10)
    verboseScatterplot(abs(geneModuleMembership_W[moduleGenes, module_column]),
    abs(geneTraitCor_W[moduleGenes, pheno_column]),
    xlab = paste("Module Membership in", module, "module"),
    ylab = paste("Gene significance for", pheno),
    main = paste("Module membership vs. gene significance "),
    cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
    dev.off()


    ########find different color modules' GO pathway
    module_for_GO='yellow'
    modProbes_yellow = names(moduleColors[moduleColors==module_for_GO])
    modProbes_yellow =mapIds(org.Rn.eg.db,keys = modProbes_yellow,keytype = 'ENSEMBL',column = 'ENTREZID')
    GO_yellow_bp <- simplify(enrichGO(gene = c(modProbes_yellow),
    OrgDb = org.Rn.eg.db,
    ont = "BP", pAdjustMethod = "BH",
    pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
    select_fun = min, measure = "Wang", semData = NULL)
    GO_yellow_cc <- simplify(enrichGO(gene = c(modProbes_yellow),
    OrgDb = org.Rn.eg.db,
    ont = "CC", pAdjustMethod = "BH",
    pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
    select_fun = min, measure = "Wang", semData = NULL)
    GO_yellow_mf <- simplify(enrichGO(gene = c(modProbes_yellow),
    OrgDb = org.Rn.eg.db,
    ont = "MF", pAdjustMethod = "BH",
    pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
    select_fun = min, measure = "Wang", semData = NULL)
    GO_yellow_all_GO_terms_table=rbind(GO_yellow_bp@result,GO_yellow_cc@result,GO_yellow_mf@result)
    GO_yellow_all_GO_terms_table_significant=GO_yellow_all_GO_terms_table[GO_yellow_all_GO_terms_table$p.adjust<0.05,]

    module_for_GO='blue'
    modProbes_blue = names(moduleColors[moduleColors==module_for_GO])
    modProbes_blue =mapIds(org.Rn.eg.db,keys = modProbes_blue,keytype = 'ENSEMBL',column = 'ENTREZID')
    GO_blue_bp <- simplify(enrichGO(gene = c(modProbes_blue),
    OrgDb = org.Rn.eg.db,
    ont = "BP", pAdjustMethod = "BH",
    pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
    select_fun = min, measure = "Wang", semData = NULL)
    GO_blue_cc <- simplify(enrichGO(gene = c(modProbes_blue),
    OrgDb = org.Rn.eg.db,
    ont = "CC", pAdjustMethod = "BH",
    pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
    select_fun = min, measure = "Wang", semData = NULL)
    GO_blue_mf <- simplify(enrichGO(gene = c(modProbes_blue),
    OrgDb = org.Rn.eg.db,
    ont = "MF", pAdjustMethod = "BH",
    pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
    select_fun = min, measure = "Wang", semData = NULL)
    GO_blue_all_GO_terms_table=rbind(GO_blue_bp@result,GO_blue_cc@result,GO_blue_mf@result)
    GO_blue_all_GO_terms_table_significant=GO_blue_all_GO_terms_table[GO_blue_all_GO_terms_table$p.adjust<0.05,]


    module_for_GO='turquoise'
    modProbes_turquoise = names(moduleColors[moduleColors==module_for_GO])
    modProbes_turquoise =mapIds(org.Rn.eg.db,keys = modProbes_turquoise,keytype = 'ENSEMBL',column = 'ENTREZID')
    GO_turquoise_bp <- simplify(enrichGO(gene = c(modProbes_turquoise),
    OrgDb = org.Rn.eg.db,
    ont = "BP", pAdjustMethod = "BH",
    pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
    select_fun = min, measure = "Wang", semData = NULL)
    GO_turquoise_cc <- simplify(enrichGO(gene = c(modProbes_turquoise),
    OrgDb = org.Rn.eg.db,
    ont = "CC", pAdjustMethod = "BH",
    pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
    select_fun = min, measure = "Wang", semData = NULL)
    GO_turquoise_mf <- simplify(enrichGO(gene = c(modProbes_turquoise),
    OrgDb = org.Rn.eg.db,
    ont = "MF", pAdjustMethod = "BH",
    pvalueCutoff = 0.01,qvalueCutoff = 0.05,universe = c(all_expressed_genes)), cutoff = 0.7, by = "p.adjust",
    select_fun = min, measure = "Wang", semData = NULL)
    GO_turquoise_all_GO_terms_table=rbind(GO_turquoise_bp@result,GO_turquoise_cc@result,GO_turquoise_mf@result)
    GO_turquoise_all_GO_terms_table_significant=GO_turquoise_all_GO_terms_table[GO_turquoise_all_GO_terms_table$p.adjust<0.05,]
    write.csv(GO_turquoise_all_GO_terms_table_significant,file = './GO_turquoise.csv')
    write.csv(GO_blue_all_GO_terms_table_significant,file = './GO_blue.csv')
    write.csv(GO_yellow_all_GO_terms_table_significant,file = './GO_yellow.csv')

     

    ############## GOenrichmentAnalysis: The function takes a vector of module labels,
    ##and the Entrez (a.k.a. Locus Link) codes for the genes whose labels are given
    library(Seurat)
    library(DOSE)
    library(GO.db)
    library(topGO)
    library(GSEABase)
    library(clusterProfiler)
    library(AnnotationDbi)
    library(org.Hs.eg.db)
    library(scater)
    library(scran)
    moduleGene=colnames(dataExpr)
    module.entrez <- mapIds(org.Rn.eg.db, keys = moduleGene, keytype = 'ENSEMBL', column = 'ENTREZID')
    GOenr = GOenrichmentAnalysis(moduleColors, module.entrez, organism = "rat", nBestP = 10)
    ######################
    #find GO terms coresponsed gene lists
    ###in liver
    A=list(chemokine_activity=subset(GO_Liver,GO_Liver$Description_N_M=='chemokine activity')[,8],
    cellular_response_to_metal_ion=subset(GO_Liver,GO_Liver$Description_N_M=='cellular response to metal ion')[,8],
    neutrophil_homeostasis=subset(GO_Liver,GO_Liver$Description_N_M=='neutrophil homeostasis')[,8],
    chronic_inflammatory_response=subset(GO_Liver,GO_Liver$Description_N_M=='chronic inflammatory response')[,8],
    nitric_oxide_synthase_biosynthetic_process=GO_Liver$geneID.y[51],
    monocarboxylic_acid_metabolic_process=GO_Liver$geneID.y[37])
    A=unique(unlist(lapply(A, function(x) strsplit(x,"/"))))
    Liver_term_gene = mapIds(org.Rn.eg.db,A,keytype="ENTREZID", column="ENSEMBL")
    Liver_term_gene_lfc_SN=res_Liver_N_S_sig[rownames(res_Liver_N_S_sig) %in% Liver_term_gene,][,2]
    Liver_term_gene_lfc_MN=res_Liver_N_M_sig[rownames(res_Liver_N_M_sig) %in% Liver_term_gene,][,2]

    ###in spleen
    B=list(response_to_lipopolysaccharide=GO_Spleen$geneID.y[29],
    positive_regulation_of_ERK1_and_ERK2_cascade=GO_Spleen$geneID.y[46],
    positive_regulation_of_leukocyte_migration=GO_Spleen$geneID.y[3])
    B=unique(unlist(lapply(B, function(x) strsplit(x,"/"))))
    Spleen_term_gene = mapIds(org.Rn.eg.db,B,keytype="ENTREZID", column="ENSEMBL")
    Spleen_term_gene_lfc_SN=res_Spleen_S_N_sig[rownames(res_Spleen_S_N_sig) %in% Spleen_term_gene,][,2]
    Spleen_term_gene_lfc_MN=res_Spleen_N_M_sig[rownames(res_Spleen_N_M_sig) %in% Spleen_term_gene,][,2]

    ####in brain
    C=list(cytokine_binding_b=GO_Brain$geneID.y[45]
    ,chemotaxis_inbrain=GO_Brain$geneID.y[28]
    ,response_to_reactive_oxygen_species_b=GO_Brain$geneID.y[1])
    C=unique(unlist(lapply(C, function(x) strsplit(x,"/"))))
    BRAIN_term_gene = mapIds(org.Rn.eg.db,C,keytype="ENTREZID", column="ENSEMBL")
    BRAIN_term_gene_lfc_SN=res_Brain_N_S_sig[rownames(res_Brain_N_S_sig) %in% BRAIN_term_gene,][,2]
    BRAIN_term_gene_lfc_MN=res_Brain_N_M_sig[rownames(res_Brain_N_M_sig) %in% BRAIN_term_gene,][,2]

    #########in WC
    D=list(reactive_oxygen_species_metabolic_process=GO_WC$geneID.y[137],
    cytokine_binding_wc=GO_WC$geneID.y[59],
    chemotaxis_inwc=GO_WC$geneID.y[30],
    response_to_reactive_oxygen_species_wc=GO_WC$geneID.y[1],
    cellular_response_to_fatty_acid=GO_WC$geneID.y[130],
    response_to_temperature_stimulus=GO_WC$geneID.y[42])


    D=unique(unlist(lapply(D, function(x) strsplit(x,"/"))))
    WC_term_gene = mapIds(org.Rn.eg.db,D,keytype="ENTREZID", column="ENSEMBL")
    WC_term_gene_lfc_SN=res_W_S_N_sig[rownames(res_W_S_N_sig) %in% WC_term_gene,][,2]
    WC_term_gene_lfc_MN=res_W_M_N_sig[rownames(res_W_M_N_sig) %in% WC_term_gene,][,2]

    ##integration
    GO_term_gene_list=c(A,B,C,D)
    lapply(GO_term_gene_list, unlist)
    GO_term_gene=unique(unlist(lapply(GO_term_gene_list, function(x) strsplit(x,"/"))))
    GO_term_gene = mapIds(org.Rn.eg.db,GO_term_gene,keytype="ENTREZID", column="ENSEMBL")
    SE_GO_term_gene_acoords_read_counts=se[match(GO_term_gene,rownames(countdata)),]
    rpkm_GO_terms_counts=rpkm(assay(SE_GO_term_gene_acoords_read_counts),gene.length = 414)
    SPLEEN_GO=SE_GO_term_gene_acoords_read_counts[,c(2,5,8,10,14,18,22,24,27)]
    WC_GO = SE_GO_term_gene_acoords_read_counts[,c(3,6,9,11,15,19,23,25,28)]
    BRAIN_GO = SE_GO_term_gene_acoords_read_counts[,c(12,16,20,29,30,31,32,34,36)]
    #####
    LIVER_GO = SE_GO_term_gene_acoords_read_counts[,c(1,4,7,13,17,21,26,33,35)]
    dds_L_GO <- DESeqDataSet(LIVER_GO, design = ~ group)
    dds_Liver_GO = DESeq(dds_L_GO)
    res_Liver_N_M_GO = results(dds_Liver_GO,contrast=c("group","Normal","Mild"))
    res_Liver_N_M_GO_sig = subset(res_Liver_N_M_GO,padj<0.05)
    res_Liver_N_M_GO_sig_lfc_1 = subset(res_Liver_N_M_GO_sig,abs(res_Liver_N_M_GO_sig$log2FoldChange) >= 1)
    res_Liver_N_M_GO_sig_lfc_1.5 = subset(res_Liver_N_M_GO_sig,abs(res_Liver_N_M_GO_sig$log2FoldChange) >= 1.5)
    res_Liver_N_M_GO_sig_lfc_2 = subset(res_Liver_N_M_GO_sig,abs(res_Liver_N_M_GO_sig$log2FoldChange) >= 2)
    res_Liver_N_M_GO_fc=res_Liver_N_M_GO$log2FoldChange

    res_Liver_N_S_GO = results(dds_Liver_GO,contrast=c("group","Normal","Severe"))
    res_Liver_N_S_GO_sig = subset(res_Liver_N_S_GO,padj<0.05)
    res_Liver_N_S_GO_sig_lfc_1 = subset(res_Liver_N_S_GO_sig,abs(res_Liver_N_S_GO_sig$log2FoldChange) >= 1)
    res_Liver_N_S_GO_sig_lfc_1.5 = subset(res_Liver_N_S_GO_sig,abs(res_Liver_N_S_GO_sig$log2FoldChange) >= 1.5)
    res_Liver_N_S_GO_sig_lfc_2 = subset(res_Liver_N_S_GO_sig,abs(res_Liver_N_S_GO_sig$log2FoldChange) >= 2)
    res_Liver_N_S_GO_fc=res_Liver_N_S_GO$log2FoldChange

    ######
    BRAIN_GO = SE_GO_term_gene_acoords_read_counts[,c(12,16,20,29,30,31,32,34,36)]
    dds_B_GO <- DESeqDataSet(BRAIN_GO, design = ~ group)
    dds_BRAIN_GO = DESeq(dds_B_GO)
    res_BRAIN_N_M_GO = results(dds_BRAIN_GO,contrast=c("group","Normal","Mild"))
    res_BRAIN_N_M_GO_sig = subset(res_BRAIN_N_M_GO,padj<0.05)
    res_BRAIN_N_M_GO_sig_lfc_1 = subset(res_BRAIN_N_M_GO_sig,abs(res_BRAIN_N_M_GO_sig$log2FoldChange) >= 1)
    res_BRAIN_N_M_GO_sig_lfc_1.5 = subset(res_BRAIN_N_M_GO_sig,abs(res_BRAIN_N_M_GO_sig$log2FoldChange) >= 1.5)
    res_BRAIN_N_M_GO_sig_lfc_2 = subset(res_BRAIN_N_M_GO_sig,abs(res_BRAIN_N_M_GO_sig$log2FoldChange) >= 2)
    res_BRAIN_N_M_GO_fc=res_BRAIN_N_M_GO$log2FoldChange


    res_BRAIN_N_S_GO = results(dds_BRAIN_GO,contrast=c("group","Normal","Severe"))
    res_BRAIN_N_S_GO_sig = subset(res_BRAIN_N_S_GO,padj<0.05)
    res_BRAIN_N_S_GO_sig_lfc_1 = subset(res_BRAIN_N_S_GO_sig,abs(res_BRAIN_N_S_GO_sig$log2FoldChange) >= 1)
    res_BRAIN_N_S_GO_sig_lfc_1.5 = subset(res_BRAIN_N_S_GO_sig,abs(res_BRAIN_N_S_GO_sig$log2FoldChange) >= 1.5)
    res_BRAIN_N_S_GO_sig_lfc_2 = subset(res_BRAIN_N_S_GO_sig,abs(res_BRAIN_N_S_GO_sig$log2FoldChange) >= 2)
    res_BRAIN_N_S_GO_fc=res_BRAIN_N_S_GO$log2FoldChange
    ######white cell
    WC_GO = SE_GO_term_gene_acoords_read_counts[,c(3,6,9,11,15,19,23,25,28)]
    dds_W_GO <- DESeqDataSet(WC_GO, design = ~ group)
    dds_WC_GO = DESeq(dds_W_GO)
    res_WC_N_M_GO = results(dds_WC_GO,contrast=c("group","Normal","Mild"))
    res_WC_N_M_GO_sig = subset(res_WC_N_M_GO,padj<0.05)
    res_WC_N_M_GO_sig_lfc_1 = subset(res_WC_N_M_GO_sig,abs(res_WC_N_M_GO_sig$log2FoldChange) >= 1)
    res_WC_N_M_GO_sig_lfc_1.5 = subset(res_WC_N_M_GO_sig,abs(res_WC_N_M_GO_sig$log2FoldChange) >= 1.5)
    res_WC_N_M_GO_sig_lfc_2 = subset(res_WC_N_M_GO_sig,abs(res_WC_N_M_GO_sig$log2FoldChange) >= 2)
    res_WC_N_M_GO_fc=res_WC_N_M_GO$log2FoldChange


    res_WC_N_S_GO = results(dds_WC_GO,contrast=c("group","Normal","Severe"))
    res_WC_N_S_GO_sig = subset(res_WC_N_S_GO,padj<0.05)
    res_WC_N_S_GO_sig_lfc_1 = subset(res_WC_N_S_GO_sig,abs(res_WC_N_S_GO_sig$log2FoldChange) >= 1)
    res_WC_N_S_GO_sig_lfc_1.5 = subset(res_WC_N_S_GO_sig,abs(res_WC_N_S_GO_sig$log2FoldChange) >= 1.5)
    res_WC_N_S_GO_sig_lfc_2 = subset(res_WC_N_S_GO_sig,abs(res_WC_N_S_GO_sig$log2FoldChange) >= 2)
    res_WC_N_S_GO_fc=res_WC_N_S_GO$log2FoldChange

    #######spleen
    SPLEEN_GO=SE_GO_term_gene_acoords_read_counts[,c(2,5,8,10,14,18,22,24,27)]
    dds_S_GO <- DESeqDataSet(SPLEEN_GO, design = ~ group)
    dds_Spleen_GO = DESeq(dds_S_GO)
    res_Spleen_N_M_GO = results(dds_Spleen_GO,contrast=c("group","Normal","Mild"))
    res_Spleen_N_M_GO_sig = subset(res_Spleen_N_M_GO,padj<0.05)
    res_Spleen_N_M_GO_sig_lfc_1 = subset(res_Spleen_N_M_GO_sig,abs(res_Spleen_N_M_GO_sig$log2FoldChange) >= 1)
    res_Spleen_N_M_GO_sig_lfc_1.5 = subset(res_Spleen_N_M_GO_sig,abs(res_Spleen_N_M_GO_sig$log2FoldChange) >= 1.5)
    res_Spleen_N_M_GO_sig_lfc_2 = subset(res_Spleen_N_M_GO_sig,abs(res_Spleen_N_M_GO_sig$log2FoldChange) >= 2)
    res_Spleen_N_M_GO_fc=res_Spleen_N_M_GO$log2FoldChange

    res_Spleen_N_S_GO = results(dds_Spleen_GO,contrast=c("group","Normal","Severe"))
    res_Spleen_N_S_GO_sig = subset(res_Spleen_N_S_GO,padj<0.05)
    res_Spleen_N_S_GO_sig_lfc_1 = subset(res_Spleen_N_S_GO_sig,abs(res_Spleen_N_S_GO_sig$log2FoldChange) >= 1)
    res_Spleen_N_S_GO_sig_lfc_1.5 = subset(res_Spleen_N_S_GO_sig,abs(res_Spleen_N_S_GO_sig$log2FoldChange) >= 1.5)
    res_Spleen_N_S_GO_sig_lfc_2 = subset(res_Spleen_N_S_GO_sig,abs(res_Spleen_N_S_GO_sig$log2FoldChange) >= 2)
    res_Spleen_N_S_GO_fc=res_Spleen_N_S_GO$log2FoldChange

    #######
    GO_term_gene_in_8_groups_fc_value_table=data.frame(res_Liver_N_M_GO_fc,res_Liver_N_S_GO_fc,res_BRAIN_N_M_GO_fc,res_BRAIN_N_S_GO_fc,res_WC_N_M_GO_fc,res_WC_N_S_GO_fc,res_Spleen_N_M_GO_fc,res_Spleen_N_S_GO_fc)
    #414:8 FC matrix
    rownames(GO_term_gene_in_8_groups_fc_value_table) <- mapIds(org.Rn.eg.db, keys = GO_term_gene, keytype = 'ENSEMBL', column = 'SYMBOL')
    library('pheatmap')
    GO_term_gene_in_8_groups_fc_value_table = t(apply(GO_term_gene_in_8_groups_fc_value_table,1,function(x){(x-mean(x))/sd(x)}))
    #GO_term_gene_in_8_groups_fc_value_table<-GO_term_gene_in_8_groups_fc_value_table-rowMeans(GO_term_gene_in_8_groups_fc_value_table)
    pheatmap(na.omit(GO_term_gene_in_8_groups_fc_value_table),fontsize = 10,fontsize_row=3,cellwidth =20,main = 'GO_terms_high_expression_genes_FC_value_in_8_group')
    #######414:36 expression matrix
    SE_GO_term_gene_acoords_read_counts=se[match(GO_term_gene,rownames(countdata)),]
    rownames(rpkm_GO_terms_counts) <- mapIds(org.Rn.eg.db, keys = rownames(rpkm_GO_terms_counts), keytype = 'ENSEMBL', column = 'SYMBOL')
    rpkm_GO_terms_counts=rpkm(assay(SE_GO_term_gene_acoords_read_counts),gene.length = 414)
    rpkm_GO_terms_counts = t(apply(rpkm_GO_terms_counts,1,function(x){(x-mean(x))/sd(x)}))
    #rpkm_GO_terms_counts<-rpkm_GO_terms_counts-rowMeans(rpkm_GO_terms_counts)
    pheatmap(rpkm_GO_terms_counts,fontsize = 10,cellwidth =12,fontsize_row=3,main = 'GO_terms_high_expression_genes_normalized_counts_hp_in_36samples')
    ############

  • 相关阅读:
    Hive伪分布式下安装
    Hadoop单机和伪分布式安装
    Spark 键值对RDD操作
    Scala入门:从HelloWorld开始【源码及编译】
    Spark RDD编程核心
    Scala 元组
    剑指offer(7):斐波那契数列
    剑指offer(13):调整数组顺序使奇数位于偶数前面
    剑指offer(21):二维数组中的查找
    解决IEDA web项目控制台中文乱码
  • 原文地址:https://www.cnblogs.com/beckygogogo/p/9849350.html
Copyright © 2020-2023  润新知