• R语言数据可视化部分


    数据挖掘

    R语言数据可视化部分

    散点图

    library(datasets)

    head(cars)

    plot(cars)

     

    plot(cars$dist,cars$speed,xlab = "dist", ylab = "speed",xlim = c(0,20))

     

    attach(cars)

    plot(dist,speed,xlab = "dist",ylab = "speed")

     

    plot(cars$dist,cars$speed,xlab = "dist",ylab = "speed",main = "cars 散点图",type="b",pch=4,lwd=2,col="red")

     

    a<-c(20,30,40,50,60)

    b<-c(11,22,33,44,66)

    c<-c(15,25,35,45,55)

    plot(a,b,type = "b")#有点,用线连接

    plot(a,b,type = "o")#有点,线连接,线过点

    plot(a,b,type = "p")#点图

    plot(a,b,type = "h")#用线表示值

    plot(a,b,type = "l")#直接线链接

    plot(a,b,type = "s")#梯形图

    plot(a,b,type = "S")#梯形图

    plot(a,b,type = "n")#不做图

     

    plot(a,b,pch=0)

    plot(a,b,pch=1)

    plot(a,b,pch=2)

    plot(a,b,pch=3)

    plot(a,b,lty=2,lwd=2,pch=15,cex=2)

    plot(a,b,type="b",lty=2,lwd=2,pch=15,cex=2)

     

    install.packages("RColorBrewer")

    library(RColorBrewer)

    plot(a,b,type="b",lty=2,lwd=2,pch=15,cex=2,col=2)

    opar<-par(no.readonly = FALSE)

    par(pin=c(2,3))

    par(lwd=2,cex=1.5)

    par(cex.axis=.75,font.axis=3)

     

    plot(a,b,type="b",pch=19,lty=2,col=2)

     

    plot(a,b,type="b",pch=23,lty=5,col=2,bg="green")

    par(opar)

     

    par(mfrow = c(2, 2))

    plot(a,c,type = "o",col=3,lty=3,pch=3,lwd=4,

         main = "实验图形",xlab="a",ylab="c",

         xlim = c(0,100),ylim=c(0,100))

    x<-c(1:15)

    y<-x

    z<-10/x

    opar<-par(no.readonly = TRUE)

    par(mar=c(5,4,4,8)+0.1)

    plot(x,y,type = "b",pch=21,col=2,yaxt="n",lty=3,ann = FALSE)

    lines(x,z,type = "b",pch=3,col=4,lty=2)

    axis(side = 2,at=x,labels = x,col.axis=2,las=2)

    axis(side = 4,at=z,labels = round(z,digits = 2),

         cex.axis=2,las=2)

    par(opar)

     

    library(datasets)#加载数据包

    attenu#

    attach(attenu)

    subset(attenu,dist>=40)#选出dist列大于等于14的一行

    df<-subset(attenu,event=="2" & dist=="107",select=c(dist));df#显示特定的行

    s=subset(attenu,event=="2" & dist=="107",select=c(event,dist));s#多条件查询

    df1<-as.list(attenu);df1#转换成列表

    append(df1$event,123)#在event后部分追加123这个数

    append(df1,1235,after = 55)#指定长度追加

    data1= data.frame(Id=c(1:6),Product=c(rep("Toaster",3),rep("Radio",3)))

    data2 = data.frame(Id=c(2,4,6,7),State=c(rep("Alabama",3),rep("Ohio",1)))

    merge(data1,data2,all=T)#取两数据框并集

    merge(data1,data2,all = F)#取交集

    merge(data1,data2,all=FALSE)#只是列先后的区别

    merge(data1, data2, all.x=TRUE)#以df1所拥有的显示

    merge(data1,data2)

    #有两个及以上相同的列,合并则会自动选择两个均相同的,

    #结果同merge(df1, df2, by = c("id", "sex"))

    merge(data1,data2,by="Id")#两数据框中共有的sex那一列则会以sex.x和sex.y形式输出。

    #aggregate()函数

     

     

     

     

     

     

     

    #aggregate()函数

    mtcars#R语言自带的汽车数据集

    attach(mtcars)

    str(mtcars)#查看数据集的列名

    aggregate(mtcars,by=list(cyl,gear),FUN = mean)#使用aggregate函数按cyl(汽缸数)进行分类计算平均值

    #公式是一种特殊的R数据对象,在aggregate函数中使用公式参数可以对数据框的部分指标进行统计

    aggregate(cbind(mpg,hp) ~ cyl+gear, FUN=mean)

    #频次统计table

    table(hp)#统计mtcars数据中的hp列中出现结果频次

    #函数apply(),list,matrix,array三种形式均可以

    rname = c("r1","r2","r3")

    cname = c("c1","c2")

    b<-matrix(1:6,nrow=3, dimnames = list(rname, cname))

    apply(b,1,sum)#1—表示按行计算,2—按列计算;

    apply(b,2,sum)#1—表示按行计算,2—按列计算;

    m<-c("SPYDERMAN","BATMAN","VERTIGO","CHINATOWN")

    m1<-lapply(m,tolower)#把大写的字符改成小写

    m1

    sapply(mtcars,min)#查找数据框中最小的

    tapply(mtcars$mpg, INDEX=mtcars$mpg, FUN = mean)#计算数据框中的列或者向量里的均值

     

     

     

    条形图

    #条形图

    install.packages("vcd")

    install.packages("plottrix")

    install.packages("sm")

    install.packages("vioplot")

    library(vcd)

    a<-table(Arthritis$Treatment);a

    hist(a)

    barplot(a,main = "条形图",ylab="Frequency",

            xlab="Treatment",ylim=c(0,50))#条形图绘制

    barplot(a,main = "条形图",ylab="Treatment",

            xlab="Frequency",xlim=c(0,50),horiz=TRUE)#条形图绘制

    plot(Arthritis$Improved,main = "条形图",xlab="Improved",

         ylab="Frequency")

     

    counts <- table(Arthritis$Improved, Arthritis$Treatment)

    counts

    #堆切

    barplot(counts, main = "条形图", xlab = "Treatment",

            ylab = "Frequency", col = c("red", "yellow", "green"),

            legend = rownames(counts))

     

    #分组

    barplot(counts, main = "条形图", xlab = "Treatment",

            ylab = "Frequency", col = c("red", "yellow", "green"),

            legend = rownames(counts),

            beside = TRUE)

     

    #均值条形图

    states <- data.frame(state.region, state.x77)

    means <- aggregate(states$Illiteracy,

                       by = list(state.region),

                       FUN = mean)#以列表的形式计算均值

    means

    means <- means[order(means$x), ]#排序

    means

    barplot(means$x, names.arg = means$Group.1)#把均值做条形图

    title("Mean Illiteracy Rate")

     

    #调条形图

    par(mar = c(5, 8, 4, 2))#增加Y的边界

    par(las = 2)#旋转条形图的标签

    counts <- table(Arthritis$Improved)

    barplot(counts, main = "Treatment Outcome", horiz = TRUE,

            cex.names = 0.8,

            names.arg = c("No Improvement",

                          "Some Improvement", "Marked Improvement"))

     

    #棘状图用spine(),看比例

    attach(Arthritis)

    counts <-table(Treatment, Improved)

    spine(counts, main = "棘状图")

    detach(Arthritis)

     

    饼图

    #饼图

    par(mfrow = c(2, 2))

    s<- c(10, 12, 4, 16, 8)

    lbls <- c("A", "B", "C", "D", "E")

    pie(s,labels = lbls, main = "饼图")

    pct <- round(s/sum(s) * 100)#求百分比

    lbls2 <- paste(lbls, " ", pct, "%", sep = "")

    pie(s, labels = lbls2, col = rainbow(length(lbls)),

        main = "饼图")

     

    install.packages("plotrix")

    library(plotrix)

    pie3D(s, labels = lbls, explode = 0.1, main = "3D 饼图 ")

    mydata<-table(state.region)

    lbls <-paste(names(mydata), " ", mydata, sep = "")

    pie3D(mydata, labels = lbls, main = "饼图1")

     

     

    #扇形图

    slices<-c(10, 12, 4, 16, 8)

    lbls<-c("US", "UK", "Australia", "Germany", "France")

    fan.plot(slices, labels = lbls, main = "扇形图")

     

    散点图

    #散点图

    attach(mtcars)                                                    

    par(mfrow=c(2,2))

    plot(wt, mpg,

         main="Basic Scatterplot of MPG vs. Weight",      

         xlab="Car Weight (lbs/1000)",

         ylab="Miles Per Gallon ", pch=19)

    abline(lm(mpg ~ wt), col="red", lwd=2, lty=1)           

    lines(lowess(wt, mpg), col="blue", lwd=2, lty=2) 

    #loess(),lowess()

    library(car)

    scatterplot(mpg ~ wt | cyl, at=cyl,data=mtcars, lwd=2,

                main="Scatter Plot of MPG vs. Weight by  Cylinders",

                xlab="Weight of Car (lbs/1000)",

                ylab="Miles Per Gallon", id.method="identify",

                legend.plot=TRUE, labels=row.names(mtcars),

                boxplots="xy")

     

     

    #散点图矩阵

    attach(mtcars)

    pairs(~mpg+disp+wt+drat,data=mtcars,main="汽车数据矩阵散点图")

    library(car)

    scatterplotMatrix(~mpg+disp+drat+wt,data=mtcars,spread=FALSE,

                      smoother.args=list(lty=2),

                      main="散点图矩阵")

    #spread=FALSE;选项表示不添加展示分散度和对称信息的直线

    #smoother.args=list(lty=2);设定loess()拟合曲线是用虚线而不是实线

     

    scatterplotMatrix(~mpg+disp+drat+wt,data=mtcars,spread=TRUE,

                      smoother.args=list(lty=1),

                      main="散点图矩阵")

    #不同风格

    install.packages("glus")

    library(glus)

    install.packages("TeachingDemos")

    library(TeachingDemos)

    pairs2()

    install.packages("HH")

    installed.packages("ResourceSelection")

     

    #高密度散点图

    set.seed(1234)

    n <- 10000

    c1 <- matrix(rnorm(n, mean=0, sd=0.5), ncol=2)

    c2 <- matrix(rnorm(n, mean=3, sd=2), ncol=2)

    mydata <- rbind(c1, c2)

    mydata <- as.data.frame(mydata)

    names(mydata) <- c("x", "y")

     

    with(mydata,

         plot(x, y, pch=19, main="高密度散点图"))

     

     

    with(mydata,

         smoothScatter(x, y, main="高密度散点图"))

     

    library(hexbin)

    with(mydata, {

      bin <- hexbin(x, y, xbins=50)

      plot(bin, main="Hexagonal Binning with 10,000 Observations")

    })

     

    library(IDPmisc)#做的散点图对大数据集的创建可读性比较好

    with(mydata,

         iplot(x, y, main="Image Scatter Plot with Color Indicating Density"))

    par(opar)

     

    #多维可视化散点图

    install.packages("scatterplot3d")

    library(scatterplot3d)

    attach(mtcars)

    scatterplot3d(wt,disp,mpg,main = "3d 散点图")

    scatterplot3d(wt, disp, mpg,

                  pch=16,

                  highlight.3d=TRUE,

                  type="h",

                  main="3D 散点图")

    #highlight.3d=TRUE;填充颜色

    s3d <-scatterplot3d(wt, disp, mpg,

                        pch=16,

                        highlight.3d=TRUE,

                        type="h",

                        main="3D 散点图")

     

     

    fit <- lm(mpg ~ wt+disp)

    s3d$plane3d(fit)

    detach(mtcars)

     

    #3d旋转散点图1

    install.packages("rgl")

    library(rgl)

    attach(mtcars)

    plot3d(wt,disp,mpg,col="blue",size=5)

     

    折线图

    #展示五种树的生长情况

    Orange$Tree<- as.numeric(Orange$Tree)#转换成数值型     

    ntrees <-max(Orange$Tree);ntrees

    #创建图形

    #range返回一个包含所有给定参数的最小值和最大值的向量。

    xrange <- range(Orange$age);xrange                

    yrange <- range(Orange$circumference);yrange       

    plot(xrange, yrange,                       

         type="n",                              

         xlab="Age (days)",                     

         ylab="Circumference (mm)")                                         

    #rainbow创建一个由n个相邻颜色组成的向量。

    colors <-rainbow(ntrees)                  

    linetype <- c(1:ntrees)#绘制1:5的颜色                    

    plotchar <-seq(18,18+ntrees,1);plotchar#绘图的类型          

    #绘制图形

    for (i in 1:ntrees) {                      

      tree<-subset(Orange,Tree==i)         

      lines(tree$age,tree$circumference,     

            type="b",                          

            lwd=2,                             

            lty=linetype[i],                     

            col=colors[i],                     

            pch=plotchar[i]                    

      )                                      

    }                                           

    #lty=linetype[i], 1-5的绘图风格                   

    #col=colors[i],颜色从1-5的编号                      

    #pch=plotchar[i],18-23 的绘制符号

    title("树子的生长折线图", "绘图例子")

    #添加标签legend

    legend(xrange[1], yrange[2],               

           1:ntrees,                               

           cex=0.8,                               

           col=colors,                            

           pch=plotchar,                          

           lty=linetype,                          

           title="Tree"                           

    )                 

    #  cex=0.8图形缩放0.8倍

     

    不同包作图比较

    #不同的程序包作图比较

    data<-read.csv("J:shuju/Facet_Data.csv",sep = ",",header = T)

    str(data)

    attach(data)

    #graphics包作图

    library(ggplot2)

    plot(SOD,tau,main = "实验散点图")#散点图

    hist(SOD,breaks=30,ylim=c(0,40),main = "")

    boxplot(SOD~Class,data = data,xlab = "Class",ylab = "SOD")

     

    #lattice包作图

    library(lattice)

    xyplot(SOD~tau,col="black",main="实验散点图")

    histogram(~SOD,data,type="count",nint=30,col="white")

    bwplot(SOD~Class,data,xlab = "Class",par.settings=canonical.theme(color = FALSE))

     
     
     

     

    #ggplot2包作图

    library(graphics)

    ggplot(data,aes(x=SOD,y=tau))+geom_point(shape=21,main="实验散点图")

    ggplot(data,aes(SOD))+geom_histogram(bins=30,colour="black",fill="white")

    ggplot(data,aes(x=Class,y=SOD))+geom_boxplot()

     

     

     

    核密度函数图

    #密度图

    mtcars

    attach(mtcars)

    par(mfrow = c(2, 1))

    d<-density(mpg)

    plot(d)

    d<-density(mpg)

    plot(d, main = "Kernel Density of Miles Per Gallon")

    polygon(d, col = "red", border = "blue")#填充

    rug(mpg, col = "brown")

     

    #多组密度图

    library(sm)

    a<-factor(cyl,levels = c(4,6,8),

              labels = c("4A","6A","8A"))

    sm.density.compare(mpg,cyl,xlab="hfhhj")

    title(main = "hyhbhkajbkjhu")

    c<-c(2:(1+length(levels(a))))

    length(locator(1),levels(a),fill=colfill)

     

    马瑟克图

    #马赛克图
    ###############################################################
    #逻辑指定是否应该使用gp(参见gp)。如果TRUE和expected未指定, #
    #;则会拟合一个默认模型:如果指定了condvars(请参见strucplot), #
    #则会拟合一个相应的条件独立模型,否则将拟合一个完全独立模型 #
    ###############################################################
    ftable(Titanic)#创建“ 泰坦尼克数据”列联表。
    library(vcd)
    library(grid)
    mosaic(Titanic, shade=TRUE, legend=TRUE)

    箱线图

    #箱线图
    attach(mtcars)
    boxplot(mpg,main="箱线图",ylab="gh")
    #两个组之间的比较箱线图
    boxplot(mpg~cyl,data=mtcars,main="jjgjh",
    xlab = "hjklk",ylab = "ghjgkh")
    #两个组之间的比较箱线图
    boxplot(mpg ~ cyl, data = mtcars, notch = TRUE,
    varwidth = TRUE, col = "red",
    main = "Car Mileage Data",
    xlab = "Number of Cylinders",
    ylab = "Miles Per Gallon")
    #两个交叉因子的箱线图
    mtcars$cyl.f <- factor(mtcars$cyl,
    levels = c(4, 6, 8),
    labels = c("4", "6", "8"))

    mtcars$am.f <- factor(mtcars$am, levels = c(0, 1),
    labels = c("auto", "standard"))

    boxplot(mpg ~ am.f * cyl.f, data = mtcars,
    varwidth = TRUE, col = c("gold", "darkgreen"),
    main = "MPG Distribution by Auto Type",
    xlab = "Auto Type",notch = TRUE)

     

     

     

    相关图

    #相关图
    options(digits=5)#保留小数位数
    cor(mtcars)
    #
    library(corrgram)
    corrgram(mtcars, order=TRUE,lower.panel=panel.shade,
    upper.panel=panel.pie, text.panel=panel.txt,
    main="mtcars相互关系的相关图")
    #order=TRUE;确定是否排序;
    #panel,用于绘制每个面板内容的函数
    #####################################################################################
    #labels,用于对角面板的标签(而不是数据帧变量名)。 #
    #如果使用“order”选项,这个标签向量也会被函数适当地重新排序。 #
    ####################################################################################
    #lower.panel(upper.panel)在对角线下方/上方使用单独的面板功能。
    #diag.panel面板函数用于对角线。
    #text.panel,面板图加上标签
    #label.pos,水平和垂直的标签放置在对角板。
    #label.str,斜线标签的字符串旋转
    #cex.labels,对角线面板的图形参数。
    #font.labels,字体元素
    corrgram(mtcars, order=TRUE, lower.panel=panel.ellipse,
    upper.panel=panel.pts, text.panel=panel.txt,
    diag.panel=panel.minmax,
    main="使用散点图和椭圆的mtcar数据的相关图")

    corrgram(mtcars, lower.panel=panel.shade,
    upper.panel=NULL, text.panel=panel.txt,
    main="汽车里程数据(未排序)相关图")
    col.corrgram <- function(ncol){
    colorRampPalette(c("darkgoldenrod4", "burlywood1",
    "darkkhaki", "darkgreen"))(ncol)}
    #colorRampPalette,这些函数返回内插一组给定颜色的函数,以创建新的调色板(如topo.colors)和颜色坡道(将间隔[0,1]映射到颜色(如灰色)的函数)。
    corrgram(mtcars, order=TRUE, lower.panel=panel.shade,
    upper.panel=panel.pie, text.panel=panel.txt,
    main="拥有不同颜色的相关图")

     

     

     

  • 相关阅读:
    有用工具
    Questions
    Verizon Wireless 4G LTE DROID
    测试的几点心得
    推荐书
    工作总结
    电脑故障排查检修积累
    Windows 8 下如何安装Framework 3.5
    空间域名的选择项
    自动化research
  • 原文地址:https://www.cnblogs.com/cxy-0902/p/12690580.html
Copyright © 2020-2023  润新知