• R绘图(6): 拯救初学者——发表级绘图全能包ggpubr


    今天花了很多时间整理这个包的绘图函数,不得不说这个基于ggplot2的包,是真的友好,很适合初学者。可能对于熟悉ggplot2的人来说,ggpubr的存在有些多余,但这并不妨碍它成为一个优秀的R包。

    接下来我主要依据变量类型,对这个包的十来种函数,近30种图形进行展示,几乎涵盖了平常看到的大多数图。这篇推文也很可能成为你见过的最详细的ggpubr中文教程。
    公众号后台回复20210330,获取今天的代码和图形示例pdf。

    install.packages("ggpubr")
    library(ggpubr)
    

    1. 单变量——连续型

    1.1 密度图
    #创建数据框
    set.seed(0330)
    mydata = data.frame(
      group = rep(c("A", "B","C","D"), each=200),
      value = c(rnorm(200, mean = 2), rnorm(200, 6),rnorm(200,2,4),rnorm(200,6,4))
    )
    
    ggdensity(mydata, x = "value", y="..density..", #或者"..count..",默认为"..density.."
              fill = "lightgray",
              add = "mean", #或者"median",
              rug = TRUE #在图形下方添加密度线
              )
    ggsave("density1.pdf",width = 10,height = 10,units = "cm")
    

    分组绘制

    ggdensity(mydata, x = "value",
              color = "group", fill="group", #分组
              palette = "Dark2",#或者"aaas"这种ggsci包的配色形式,或者"#00AFBB"这种编码形式
              add = "mean",
              rug = TRUE,
              alpha=0.2, #调整透明度
              xlab=F,ylab = "Density",
              facet.by="group", #分面
              panel.labs=list(group = c("1", "2", "3", "4")), #修改每个panel的名字
              title="density plot",
              ggtheme=theme_bw() #可以是ggplot2中的主题类型
    )
    ggsave("density2.pdf",width = 13,height = 12,units = "cm")
    

    1.2 密度图叠加正态分布
    set.seed(0330)
    mydata = data.frame(
      group = rep(c("A", "B"), each=200),
      value = c(rnorm(200, 2), rnorm(200, 6, 4))
    )
    
    ggdensity(mydata, x = "value", fill = "red") +
      stat_overlay_normal_density(color = "red", linetype = "dashed")+
      scale_x_continuous(limits = c(-5,20))
    ggsave("density3.pdf",width = 10,height = 10,units = "cm")
    

    分组绘制

    ggdensity(mydata, x = "value", fill = "group") +
      stat_overlay_normal_density(aes(color=group), linetype = "dashed")+
      scale_x_continuous(limits = c(-5,20))
    ggsave("density4.pdf",width = 10,height = 10,units = "cm")
    

    分组+分面

    ggdensity(mydata, x = "value", fill = "group", facet.by = "group") +
      stat_overlay_normal_density(aes(color=group), linetype = "dashed")+
      scale_x_continuous(limits = c(-5,20))
    ggsave("density5.pdf",width = 16,height = 10,units = "cm")
    

    1.3 直方图
    gghistogram(mydata, x = "value", fill = "lightgray",
                add = "mean", rug = TRUE)
    ggsave("hist1.pdf",width = 10,height = 10,units = "cm")
    

    分组绘制

    gghistogram(mydata, x = "value", fill = "group",
                add = "mean", rug = TRUE,
                palette = c("#00AFBB", "#E7B800"))
    ggsave("hist2.pdf",width = 10,height = 10,units = "cm")
    

    添加核密度图

    gghistogram(mydata, x = "value", fill = "group",
                rug = TRUE,
                palette = c("#00AFBB", "#E7B800"),
                add_density = TRUE)
    ggsave("hist3.pdf",width = 10,height = 10,units = "cm")
    

    2. 双变量——x离散,y连续

    2.1 箱型图
    library(patchwork)
    set.seed(0330)
    mydata = data.frame(
      group = rep(c("A", "B"), each=100),
      group2 = rep(c("g1","g2","g1","g2"),each=50),
      value = c(rnorm(100, 2), rnorm(100, 6, 4))
    )
    
    #下面的加号表示拼接图形
    ggboxplot(mydata, x = "group", y = "value", width = 0.8)+
    ggboxplot(mydata, x = "group", y = "value", width = 0.8, orientation = "horizontal")+
    ggboxplot(mydata, x = "group", y = "value", width = 0.8, notch = TRUE,order = c("B","A"))+
    ggboxplot(mydata, x = "group", y = "value", width = 0.8, select = c("A"))
    ggsave("box1.pdf",width = 10,height = 10,units = "cm")
    

    orientation调整图形方向;notch添加缺口;order调整顺序;select选择特定的水平来画图

    ggboxplot(mydata, x = "group", y = "value", width = 0.8, add = "jitter",add.params=list(color = "lightblue",size=1, shape = 17))+
    ggboxplot(mydata, x = "group", y = "value", width = 0.8, add = "dotplot",add.params=list(color = "lightblue",size=0.5))
    ggsave("box2.pdf",width = 16,height = 10,units = "cm")
    

    上面的add添加额外图形
    add.params对附加图形的参数进行调整,shape表示点的形状,可以参加下图

    内部分组

    ggboxplot(mydata, x = "group", y = "value", width = 0.6, color = "black",fill="group2",palette = c("#00AFBB", "#E7B800"),
              xlab = F, #不显示x轴的label
              bxp.errorbar=T,bxp.errorbar.width=0.4, #添加errorbar
              size=1, #箱型图边线的粗细
              outlier.shape=NA, #不显示outlier
              legend = "right") #图例放右边
    ggsave("box3.pdf",width = 10,height = 10,units = "cm")
    

    2.1.1 箱型图添加配对连线

    可以接受两种数据框

    mydata2=mydata
    mydata2$group2=NULL
    head(mydata2)
    # group    value
    # 1     A 3.551687
    # 2     A 3.664068
    # 3     A 2.194454
    # 4     A 2.569605
    # 5     A 2.579997
    # 6     A 1.837967
    ggpaired(mydata2, x = "group", y = "value",
             color = "group", line.color = "gray", line.size = 0.4,
             palette = "npg")
    				 
    mydata2$id=rep(1:100,2)
    mydata2=mydata2%>%reshape2::dcast(id~group)
    head(mydata2)
    # id        A         B
    # 1  1 3.551687  4.720074
    # 2  2 3.664068  7.821049
    # 3  3 2.194454  8.956841
    # 4  4 2.569605 -4.450063
    # 5  5 2.579997  7.568216
    # 6  6 1.837967  5.133688
    ggpaired(mydata2, cond1 = "A", cond2 = "B",
             color = "condition", line.color = "gray", line.size = 0.4,
             palette = "npg")
    ggsave("box4.pdf",width = 10,height = 10,units = "cm")
    

    上面两种数据框得到的图是一模一样的

    2.1.2 添加p值

    一般用法

    ggboxplot(mydata, x = "group", y = "value", width = 0.8, 
              add = "dotplot",add.params=list(color = "lightblue",size=0.5))+
      stat_compare_means(method = "t.test")
    ggsave("box5.pdf",width = 10,height = 10,units = "cm")
    

    成对数据

    ggpaired(mydata2, cond1 = "A", cond2 = "B",
             color = "condition", line.color = "gray", line.size = 0.4,
             palette = "npg")+
      stat_compare_means(paired = TRUE)
    ggsave("box6.pdf",width = 10,height = 10,units = "cm")
    

    多于两个组时,定义想检验的配对

    my_comparisons <- list( c("0.5", "1"), c("1", "2"), c("0.5", "2") )
    ggboxplot(ToothGrowth, x = "dose", y = "len",
              color = "dose", palette = "npg")+
      #两两比较的p值
      stat_compare_means(comparisons = my_comparisons, label.y = c(29, 35, 40))+
      #整体的p值
      stat_compare_means(label.y = 45)
    ggsave("box7.pdf",width = 10,height = 10,units = "cm")
    

    固定某一组,其他组与其比较

    ggboxplot(ToothGrowth, x = "dose", y = "len",
              color = "dose", palette = "npg")+
      # 整体的p值
      stat_compare_means(method = "anova", label.y = 40)+ 
      #label中用点表示显著性
      stat_compare_means(aes(label = ..p.signif..),
                         method = "t.test", ref.group = "0.5")
    ggsave("box8.pdf",width = 10,height = 10,units = "cm")
    

    分组/分面之后再做比较

    ggboxplot(ToothGrowth, x = "supp", y = "len",
              color = "supp", palette = "npg",
              add = "jitter",
              facet.by = "dose")+
      #label中去掉检验方法
      stat_compare_means(aes(label = paste0("p = ", ..p.format..)))
    ggsave("box9.pdf",width = 10,height = 10,units = "cm")
    

    2.2 小提琴图

    整体上参数选项和箱型图差不多

    ggviolin(mydata, x = "group", y = "value", fill = "group",
             palette = c("#00AFBB", "#E7B800"),
             add = "boxplot", add.params = list(fill = "white"))+
    ggviolin(mydata, x = "group", y = "value", color = "group2", #内部分组
             palette = c("#00AFBB", "#E7B800"), 
             add = "boxplot")
    ggsave("violin1.pdf",width = 16,height = 10,units = "cm")
    

    2.3 柱形图
    2.3.1 数据已经统计好
    df1 <- data.frame(group=c("A", "B", "C"),
                     len=c(6, 10, 14))
    ggbarplot(df1, "group", "len",
              fill = "group", color = "group",
              palette = c("#00AFBB", "#E7B800", "#FC4E07"),
              label = TRUE, lab.pos = "in", lab.col = "white")
    ggsave("bar1.pdf",width = 10,height = 10,units = "cm")
    

    2.3.2 数据没有统计好

    (需要函数帮你计算,比如组内求和,一般反映在纵坐标上,这是函数帮你算的)

    df2 <- data.frame(group=rep(c("A", "B", "C"),2),
                      group2=rep(c("1", "2"), each=3),
                      len=c(6, 15, 3, 4, 10, 5))
    # group group2 len
    # 1     A      1   6
    # 2     B      1  15
    # 3     C      1   3
    # 4     A      2   4
    # 5     B      2  10
    # 6     C      2   5
    ggbarplot(df2, "group", "len",
              fill = "group2", color = "group2", palette = "Paired",
              label = TRUE, lab.col = "white", lab.pos = "in")+
    ggbarplot(df2, "group", "len",
              fill = "group2", color = "group2", palette = "Paired",
              label = TRUE,
              position = position_dodge(0.9)) #范围0-1,表示柱子之间的错开程度
    ggsave("bar2.pdf",width = 16,height = 10,units = "cm")
    

    2.3.3 添加误差棒
    df3 <- mydata
    #这时累加
    ggbarplot(df3, x = "group", y = "value")+
    #这时求均值
    ggbarplot(df3, x = "group", y = "value",
              add = "mean")+
    #添加误差棒,error.plot选择展示形式,默认上下都展示
    ggbarplot(df3, x = "group", y = "value",
              add = "mean_se",
              error.plot = "upper_errorbar")+
    #内部分组
    ggbarplot(df3, x = "group", y = "value", color = "group2", 
              add = "mean_se", palette = c("#00AFBB", "#E7B800"),
              position = position_dodge())
    ggsave("bar3.pdf",width = 16,height = 16,units = "cm")
    

    2.4 连线图
    #数据计算好,可以直接用
    ggline(df1, x = "group", y = "len")+
    #内部分组,点线的形状和颜色均不同
    ggline(df2, x = "group", y = "len", 
           linetype = "group2", shape = "group2",#点的形状
           color = "group2", palette = c("#00AFBB", "#E7B800"))+
    #添加点和误差棒
    ggline(df3, x = "group", y = "value",
           add = c("mean_se","dotplot"),add.params = list(size=0.5),
           color = "steelblue")+
    #内部分组,线的颜色不一样
    ggline(df3, x = "group", y = "value", color = "group2",
           add = "mean_se", palette = c("#00AFBB", "#E7B800"))
    ggsave("line1.pdf",width = 16,height = 16,units = "cm")
    

    2.5 饼图
    df1$ratio=paste(df1$group,"(",round(df1$len / sum(df1$len),3) * 100,"%)",sep = "")
    ggpie(df1, "len", label = "ratio",
          fill = "group", color = "white",
          palette = c("#00AFBB", "#E7B800", "#FC4E07"))+
    ggpie(df1, "len", label = "ratio",
          lab.pos = "in", lab.font = "white",
          fill = "group", color = "white",
          palette = c("#00AFBB", "#E7B800", "#FC4E07"))
    ggsave("pie1.pdf",width = 16,height = 10,units = "cm")
    

    label添加文本注释;
    color是扇形的边线,lab.pos调整文本的位置,lab.font调整文本字体颜色

    2.6 圆环图
    ggdonutchart(df1, "len", label = "ratio",
                 lab.pos = "in", lab.font = "white",
                 fill = "group", color = "white",
                 palette = c("#00AFBB", "#E7B800", "#FC4E07"))
    ggsave("donut1.pdf",width = 10,height = 10,units = "cm")
    

    2.7 克利夫兰点图
    ggdotchart(df2, x = "group", y = "len",
               color = "group2", size = 3,
               add = "segment",
               add.params = list(color = "lightgray", size = 1.5),
               position = position_dodge(0.5),
               palette = "jco",
               ggtheme = theme_pubclean())
    ggsave("Clevelands_Dot1.pdf",width = 10,height = 10,units = "cm")
    

    3. 双变量——x, y都连续

    3.1 散点图添加回归线,相关系数
    df4=mtcars
    df4$cyl=as.factor(df4$cyl)
    ggscatter(df4, x = "wt", y = "mpg",
              color = "black", size = 3, # 点的颜色,大小
              add = "reg.line",  # 添加回归线
              add.params = list(color = "blue", fill = "lightgray"), # 回归线的调整
              conf.int = TRUE, # 回归线的置信区间
              cor.coef = TRUE, # 添加相关系数
              cor.coeff.args = list(method = "pearson", label.x = 3, label.sep = "
    ")#相关系数的调整
    )
    
    3.2 分组计算相关系数
    ggscatter(df4, x = "wt", y = "mpg",
              color = "cyl", palette = "jco",
              add = "reg.line", conf.int = TRUE)+
      stat_cor(aes(color = cyl), label.x = 3)
    
    3.3 局部回归
    ggscatter(df4, x = "wt", y = "mpg",
              add = "loess", conf.int = TRUE)
    

    3.4 添加分组椭圆,均值点,以及辐射线
    ggscatter(df4, x = "wt", y = "mpg",
              color = "cyl", shape = "cyl",
              palette = c("#00AFBB", "#E7B800", "#FC4E07"),
              ellipse = TRUE, mean.point = TRUE,
              star.plot = TRUE)
    
    3.5 添加文本注释
    df4$name <- rownames(df4)
    ggscatter(df4, x = "wt", y = "mpg",
              color = "cyl", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
              label = "name", repel = TRUE)+plot_layout(widths = c(1,2))
    

    3.6 散点图边缘添加密度图/箱型图
    ggscatterhist(
      iris, x = "Sepal.Length", y = "Sepal.Width",
      color = "Species", size = 3, alpha = 0.6,
      palette = c("#00AFBB", "#E7B800", "#FC4E07"),
      margin.params = list(fill = "Species", color = "black", size = 0.2)
    )
    

    ggscatterhist(
      iris, x = "Sepal.Length", y = "Sepal.Width",
      color = "Species", size = 3, alpha = 0.6,
      palette = c("#00AFBB", "#E7B800", "#FC4E07"),
      margin.plot = "boxplot",
      ggtheme = theme_bw()
    )
    


    感谢你能读到这里,有任何疑问欢迎后台留言。

    因水平有限,有错误的地方,欢迎批评指正!

  • 相关阅读:
    python开发函数进阶:内置函数
    学习笔记之机器学习(Machine Learning)
    学习笔记之Visual Studio Code (VSCode) & Clang
    学习笔记之Supervised Learning with scikit-learn | DataCamp
    学习笔记之1001 Inventions That Changed the World
    学习笔记之Machine Learning by Andrew Ng | Stanford University | Coursera
    学习笔记之Everything
    学习笔记之HTML
    学习笔记之Python全栈开发/人工智能公开课_腾讯课堂
    学习笔记之曾国藩家书
  • 原文地址:https://www.cnblogs.com/TOP-Bio/p/14599533.html
Copyright © 2020-2023  润新知