• R:ggplot2数据可视化——进阶(1)


    ,分为三个部分,此篇为Part1,推荐学习一些基础知识后阅读~

    1 理解ggplot语法

    (1)对数据框类型数据进行可视化

    (2)可以叠加层来不断丰富图形信息

    让我们基于midwest数据集来初始化一个基本的图形:

    # Setup
    options(scipen=999)  # 关闭科学计数表示法 1e+06
    library(ggplot2)
    data("midwest", package = "ggplot2")  # 加载数据
    # midwest <- read.csv("http://goo.gl/G1K41K") # alt source 
    
    # 初始化 Ggplot
    ggplot(midwest, aes(x=area, y=poptotal))  # area 和 poptotal 是'midwest'中的列
    

    aes()函数用来专门指定x和y轴,源数据框的任何信息都需要在这个函数中特意指定。

    2 线性模型拟合散点图

    library(ggplot2)
    g <- ggplot(midwest, aes(x=area, y=poptotal)) + geom_point() + geom_smooth(method="lm")  # set se=FALSE to turnoff confidence bands
    plot(g)
    

     ?geom_smooth 查询该函数帮助文档

    3 调整x y轴范围

    #Method 1: By deleting the points outside the range
    library(ggplot2)
    g <- ggplot(midwest, aes(x=area, y=poptotal)) + geom_point() + geom_smooth(method="lm")  # set se=FALSE to turnoff confidence bands
    
    # Delete the points outside the limits
    g + xlim(c(0, 0.1)) + ylim(c(0, 1000000))   # deletes points
    
    #Method 2: Zooming In
    library(ggplot2)
    g <- ggplot(midwest, aes(x=area, y=poptotal)) + geom_point() + geom_smooth(method="lm")  # set se=FALSE to turnoff confidence bands
    
    # Zoom in without deleting the points outside the limits. 
    # As a result, the line of best fit is the same as the original plot.
    g1 <- g + coord_cartesian(xlim=c(0,0.1), ylim=c(0, 1000000))  # zooms in
    plot(g1)
    

    4 改变标题

    # Full Plot call
    library(ggplot2)
    ggplot(midwest, aes(x=area, y=poptotal)) + 
      geom_point() + 
      geom_smooth(method="lm") + 
      coord_cartesian(xlim=c(0,0.1), ylim=c(0, 1000000)) + 
      labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")
    
    # or
    
    g1 + ggtitle("Area Vs Population", subtitle="From midwest dataset") + xlab("Area") + ylab("Population")
    

     5 改变点的颜色和大小

    library(ggplot2)
    ggplot(midwest, aes(x=area, y=poptotal)) + 
      geom_point(col="steelblue", size=3) +   # Set static color and size for points
      geom_smooth(method="lm", col="firebrick") +  # change the color of line
      coord_cartesian(xlim=c(0, 0.1), ylim=c(0, 1000000)) + 
      labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")
    

     改变颜色以反应另一列变量的类型

    library(ggplot2)
    gg <- ggplot(midwest, aes(x=area, y=poptotal)) + 
      geom_point(aes(col=state), size=3) +  # Set color to vary based on state categories.
      geom_smooth(method="lm", col="firebrick", size=2) + 
      coord_cartesian(xlim=c(0, 0.1), ylim=c(0, 1000000)) + 
      labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")
    plot(gg)
    

    color,  sizeshapestroke (thickness of boundary) and fill (fill color) 均可指定

    也可以改变调色板

    gg + scale_colour_brewer(palette = "Set1")  # change color palette
    

    更多调色板可以在 RColorBrewer 包中找到

    library(RColorBrewer)
    head(brewer.pal.info, 10)  # show 10 palettes
    #>          maxcolors category colorblind
    #> BrBG            11      div       TRUE
    #> PiYG            11      div       TRUE
    #> PRGn            11      div       TRUE
    #> PuOr            11      div       TRUE
    #> RdBu            11      div       TRUE
    #> RdGy            11      div      FALSE
    #> RdYlBu          11      div       TRUE
    #> RdYlGn          11      div      FALSE
    #> Spectral        11      div      FALSE
    #> Accent           8     qual      FALSE
    

     

     6 改变x轴文本和刻度位置

    breaks and labels

    Step 1: Set the breaks

     scale_x_continuous —— X 轴变量是连续变量

    scale_x_date ——日期变量

    library(ggplot2)
    
    # Base plot
    gg <- ggplot(midwest, aes(x=area, y=poptotal)) + 
      geom_point(aes(col=state), size=3) +  # Set color to vary based on state categories.
      geom_smooth(method="lm", col="firebrick", size=2) + 
      coord_cartesian(xlim=c(0, 0.1), ylim=c(0, 1000000)) + 
      labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")
    
    # Change breaks
    gg + scale_x_continuous(breaks=seq(0, 0.1, 0.01))
    

    Step 2: Change the labels 

    改变 labels at the axis ticks. labels 需要和 breaks向量长度保持一致

    library(ggplots)
    
    # Base Plot
    gg <- ggplot(midwest, aes(x=area, y=poptotal)) + 
      geom_point(aes(col=state), size=3) +  # Set color to vary based on state categories.
      geom_smooth(method="lm", col="firebrick", size=2) + 
      coord_cartesian(xlim=c(0, 0.1), ylim=c(0, 1000000)) + 
      labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")
    
    # Change breaks + label
    gg + scale_x_continuous(breaks=seq(0, 0.1, 0.01), labels = letters[1:11])
    

    # Reverse X Axis Scale
    gg + scale_x_reverse()

     为轴标签自定义文本

    Method 1: Using sprintf(). (Have formatted it as % in below example)

    Method 2: Using a custom user defined function. (Formatted 1000’s to 1K scale)

    library(ggplot2)
    
    # Base Plot
    gg <- ggplot(midwest, aes(x=area, y=poptotal)) + 
      geom_point(aes(col=state), size=3) +  # Set color to vary based on state categories.
      geom_smooth(method="lm", col="firebrick", size=2) + 
      coord_cartesian(xlim=c(0, 0.1), ylim=c(0, 1000000)) + 
      labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")
    
    # Change Axis Texts
    gg + scale_x_continuous(breaks=seq(0, 0.1, 0.01), labels = sprintf("%1.2f%%", seq(0, 0.1, 0.01))) + 
      scale_y_continuous(breaks=seq(0, 1000000, 200000), labels = function(x){paste0(x/1000, 'K')})
    

    使用内置主题一次性自定义整个主题

    ?theme_bw 

    theme_set() to set the theme before drawing the ggplot. Note that this setting will affect all future plots. *

    Draw the ggplot and then add the overall theme setting (eg. theme_bw())

    library(ggplot2)
    
    # Base plot
    gg <- ggplot(midwest, aes(x=area, y=poptotal)) + 
      geom_point(aes(col=state), size=3) +  # Set color to vary based on state categories.
      geom_smooth(method="lm", col="firebrick", size=2) + 
      coord_cartesian(xlim=c(0, 0.1), ylim=c(0, 1000000)) + 
      labs(title="Area Vs Population", subtitle="From midwest dataset", y="Population", x="Area", caption="Midwest Demographics")
    
    gg <- gg + scale_x_continuous(breaks=seq(0, 0.1, 0.01))
    
    # method 1: Using theme_set()
    theme_set(theme_classic())  # not run
    gg
    
    # method 2: Adding theme Layer itself.
    gg + theme_bw() + labs(subtitle="BW Theme")
    gg + theme_classic() + labs(subtitle="Classic Theme")
    

     更多主题可以看看 the ggthemes package and the ggthemr package.

     参考:

    英文教程:http://r-statistics.co/Complete-Ggplot2-Tutorial-Part1-With-R-Code.html

  • 相关阅读:
    LinQ表达式的一点点总结(二)select中新建对象
    给自己的博客添加分享到功能
    职场日记2上班第一天
    清楚屏幕右侧变化的数据Application.Current.Host.Settings.EnableFrameRateCounter = true;
    基于委托的异步
    C#中的装箱与拆箱
    关于java的初始化问题
    StreamReader类以及其方法ReadLine,Read,ReadToEnd的分析
    WP7的控件开发入门(二)
    单元测试的阶段性总结
  • 原文地址:https://www.cnblogs.com/icydengyw/p/11492656.html
Copyright © 2020-2023  润新知