• R语言实战


    1. 一个数据处理难题

    a. 确定一个单一的成绩衡量指标

    b. 前20%的学生评定为A,接下来20%评定为B,以此类推

    c. 按字母顺序对学生排序

    2. 数值和字符处理函数

    数值函数(数学,统计,概率)

    字符处理函数

    2.1 数学函数

    > abs(-4)
    [1] 4
    > 
    > sqrt(25)
    [1] 5
    > 
    > ceiling(3.475)
    [1] 4
    > 
    > floor(3.475)
    [1] 3
    > 
    > trunc(5.99)
    [1] 5
    > 
    > round(3.475, digits=2)
    [1] 3.48
    > 
    > signif(3.475, digits=2)
    [1] 3.5
    > 
    > cos(3.1415926)
    [1] -1
    > 
    > sin(3.1415926)
    [1] 5.358979e-08
    > 
    > acos(-0.416)
    [1] 1.999839
    > 
    > sinh(2)
    [1] 3.62686
    > 
    > asinh(3.627)
    [1] 2.000037
    > 
    > log(10)
    [1] 2.302585
    > 
    > log10(10)
    [1] 1
    > 
    > exp(2.3026)
    [1] 10.00015
    > 
    

    2.2 统计函数

    > mean(c(1, 2, 3, 4))
    [1] 2.5
    > 
    > median(c(1, 2, 3, 4))
    [1] 2.5
    > 
    > sd(c(1, 2, 3, 4))
    [1] 1.290994
    > 
    > var(c(1, 2, 3, 4))
    [1] 1.666667
    > 
    > mad(c(1, 2, 3, 4))
    [1] 1.4826
    > 
    > x <- c(1, 2, 3, 4)
    > y <- quantile(x, c(.3, .84))
    > y
     30%  84% 
    1.90 3.52 
    > 
    > range(x)
    [1] 1 4
    > 
    > diff(range(x))
    [1] 3
    > 
    > sum(x)
    [1] 10
    > 
    > x <- c(1, 5, 23, 29)
    > diff(x)
    [1]  4 18  6
    > 
    > min(x)
    [1] 1
    > 
    > max(x)
    [1] 29
    > 
    > scale(x, center=TRUE, scale=TRUE)
               [,1]
    [1,] -0.9925397
    [2,] -0.6984539
    [3,]  0.6249324
    [4,]  1.0660612
    attr(,"scaled:center")
    [1] 14.5
    attr(,"scaled:scale")
    [1] 13.60147
    > 
    
    > x <- c(1, 2, 3, 4, 5, 6, 7, 8)
    > mean(x)
    [1] 4.5
    > sd(x)
    [1] 2.44949
    > 
    > n <- length(x)
    > meanx <- sum(x)/n
    > css <- sum((x - meanx)^2)
    > sdx <- sqrt(css / (n-1))
    > meanx
    [1] 4.5
    > sdx
    [1] 2.44949
    > 
    

    2.3 概率函数

    > x <- pretty(c(-3, 3), 30)
    > x
     [1] -3.0 -2.8 -2.6 -2.4 -2.2 -2.0 -1.8 -1.6 -1.4 -1.2 -1.0 -0.8 -0.6 -0.4 -0.2
    [16]  0.0  0.2  0.4  0.6  0.8  1.0  1.2  1.4  1.6  1.8  2.0  2.2  2.4  2.6  2.8
    [31]  3.0
    > y <- dnorm(x)
    > y
     [1] 0.004431848 0.007915452 0.013582969 0.022394530 0.035474593 0.053990967
     [7] 0.078950158 0.110920835 0.149727466 0.194186055 0.241970725 0.289691553
    [13] 0.333224603 0.368270140 0.391042694 0.398942280 0.391042694 0.368270140
    [19] 0.333224603 0.289691553 0.241970725 0.194186055 0.149727466 0.110920835
    [25] 0.078950158 0.053990967 0.035474593 0.022394530 0.013582969 0.007915452
    [31] 0.004431848
    > plot(x, y, type="l", xlab="NormalDeviate", ylab="Density", yaxs="i")
    > 
    

    > pnorm(1.96)
    [1] 0.9750021
    > qnorm(.9, mean=500, sd=100)
    [1] 628.1552
    > rnorm(50, mean=50, sd=10)
     [1] 67.26521 54.63231 42.90968 48.38989 73.67308 49.74476 57.81742 67.75197
     [9] 66.51772 48.48707 39.37449 35.09612 59.43735 58.02651 40.43783 51.18190
    [17] 63.75237 39.67564 42.67555 50.88800 43.47265 58.69022 64.55702 34.35042
    [25] 63.23016 45.81644 43.31544 54.58287 50.46310 31.72297 40.34214 55.06260
    [33] 42.25432 45.63078 56.23651 53.27949 47.83063 53.69351 56.68358 46.04020
    [41] 57.20872 52.52052 49.20011 47.71317 55.79194 42.20664 48.22365 43.57350
    [49] 33.02280 36.45630
    > 
    

    2.3.1 设定随机数种子

    > runif(5)
    [1] 0.8650632 0.2548104 0.7736314 0.9595250 0.5731663
    > runif(5)
    [1] 0.3458971 0.8683841 0.7487097 0.1382841 0.1953431
    > set.seed(1234)
    > runif(5)
    [1] 0.1137034 0.6222994 0.6092747 0.6233794 0.8609154
    > set.seed(1234)
    > runif(5)
    [1] 0.1137034 0.6222994 0.6092747 0.6233794 0.8609154
    > 
    

    2.3.2 生成多元正态数据

    > library(MASS)
    > options(digits=3)
    # 生成随机数种子
    > set.seed(1234)
    # 指定均值向量、协方差阵
    > mean <- c(230.7, 146.7, 3.6)
    > sigma <- matrix(c(15360.8, 6721.2, -47.1,
    +                    6721.2, 4700.9, -16.5,
    +                     -47.1,  -16.5,   0.3), nrow=3, ncol=3)
    # 生成500个伪随机观测数据
    > mydata <- mvrnorm(500, mean, sigma)
    # 为方便,结果从矩阵转换为数据框
    > mydata <- as.data.frame(mydata)
    # 为变量指定了名称
    > names(mydata) <- c("y", "x1", "x2")
    # 确认拥有500个观测和3个变量
    > dim(mydata)
    [1] 500   3
    # 输出前10个观测
    > head(mydata, n=10)
           y    x1   x2
    1   98.8  41.3 3.43
    2  244.5 205.2 3.80
    3  375.7 186.7 2.51
    4  -59.2  11.2 4.71
    5  313.0 111.0 3.45
    6  288.8 185.1 2.72
    7  134.8 165.0 4.39
    8  171.7  97.4 3.64
    9  167.2 101.0 3.50
    10 121.1  94.5 4.10
    > 
    

    2.4 字符处理函数

  • 相关阅读:
    利用@media screen实现网页布局的自适应
    js判断手机的左右滑动
    文档流
    对文本段落操作的一些细节
    简易菜单的制作
    jQuery Scroll Follow
    node 监听接口
    浏览器通知
    webSocket
    前端学习路线
  • 原文地址:https://www.cnblogs.com/wnzhong/p/7501165.html
Copyright © 2020-2023  润新知