• R语言学习笔记(十):重抽样与自助法


    #置换实验 Coin包
    install.packages(c("coin"))
    
    #lmPerm包
    install.packages("lmPerm")
    
    
    #独立两样本和K样本检验
    library(coin)
    score<-c(40,57,45,55,58,57,64,55,62,65)
    treatment<-factor(c(rep("A",5),rep("B",5)))
    mydata<-data.frame(treatment,score)
    t.test(score~treatment,data=mydata,var.equal=TRUE)


    Two Sample t-test

    data: score by treatment
    t = -2.345, df = 8, p-value = 0.04705
    alternative hypothesis: true difference in means is not equal to 0
    95 percent confidence interval:
    -19.0405455 -0.1594545
    sample estimates:
    mean in group A mean in group B
    51.0 60.6

    #置换T检验
    library(coin)
    oneway_test(score~treatment,data=mydata,distribution="exact")

    Exact Two-Sample Fisher-Pitman Permutation Test

    data: score by treatment (A, B)
    Z = -1.9147, p-value = 0.07143
    alternative hypothesis: true mu is not equal to 0

    结果显著:P<0.05

    结果不显著:P>0.05

    上面这两个测试中所得的P值相差比较大,并且样本容易比较小,所以置换T检验更有说服力,因此我们认为改组样本得到的结果不显著P>0.05

    #Wilcox.test
    library(MASS)
    UScrime<-transform(UScrime,So=factor(So))
    wilcox_test(Prob~So,data=UScrime,distribution="exact")


    Exact Wilcoxon-Mann-Whitney Test

    data: Prob by So (0, 1)
    Z = -3.7493, p-value = 8.488e-05
    alternative hypothesis: true mu is not equal to 0

    wilcox.test(Prob~So,data=UScrime,distribution="exact")

    Wilcoxon rank sum test

    data: Prob by So
    W = 81, p-value = 8.488e-05
    alternative hypothesis: true location shift is not equal to 0

    #chisq_Test
    library(coin)
    library(vcd)
    Arthritis<-transform(Arthritis,Improved=as.factor(as.numeric(Improved)))
    set.seed(1234)
    chisq_test(Treatment~Improved,data=Arthritis,distribution=approximate(B=9999))

    Approximative Pearson Chi-Squared Test

    data: Treatment by Improved (1, 2, 3)
    chi-squared = 13.055, p-value = 0.0018

    #spearman.test 数值变量间的独立性
    states<-as.data.frame(state.x77)
    set.seed(1234)
    spearman_test(Illiteracy~Murder,data=states,distribution=approximate(B=9999))

    Approximative Spearman Correlation Test

    data: Illiteracy by Murder
    Z = 4.7065, p-value < 2.2e-16
    alternative hypothesis: true rho is not equal to 0

    #两样本和K样本相关性检验
    library(coin)
    library(MASS)
    wilcoxsign_test(U1~U2,data=UScrime,distribution="exact")

    Exact Wilcoxon-Pratt Signed-Rank Test

    data: y by x (pos, neg)
    stratified by block
    Z = 5.9691, p-value = 1.421e-14
    alternative hypothesis: true mu is not equal to 0

    #lmPerm包的置换检验
    library(lmPerm)
    
    #简单线性回归置换
    set.seed(1234)
    fit<-lmp(weight~height,data=women,perm="Prob")
    summary(fit)

    Call:
    lmp(formula = weight ~ height, data = women, perm = "Prob")

    Residuals:
    Min 1Q Median 3Q Max
    -1.7333 -1.1333 -0.3833 0.7417 3.1167

    Coefficients:
    Estimate Iter Pr(Prob)
    height 3.45 5000 <2e-16 ***
    ---
    Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

    Residual standard error: 1.525 on 13 degrees of freedom
    Multiple R-Squared: 0.991, Adjusted R-squared: 0.9903
    F-statistic: 1433 on 1 and 13 DF, p-value: 1.091e-14

    #多项式回归的置换检验
    set.seed(1234)
    fit<-lmp(weight~height+I(height^2),data=women,perm="Prob")
    summary(fit)

    Call:
    lmp(formula = weight ~ height + I(height^2), data = women, perm = "Prob")

    Residuals:
    Min 1Q Median 3Q Max
    -0.509405 -0.296105 -0.009405 0.286151 0.597059

    Coefficients:
    Estimate Iter Pr(Prob)
    height -7.34832 5000 <2e-16 ***
    I(height^2) 0.08306 5000 <2e-16 ***
    ---
    Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

    Residual standard error: 0.3841 on 12 degrees of freedom
    Multiple R-Squared: 0.9995, Adjusted R-squared: 0.9994
    F-statistic: 1.139e+04 on 2 and 12 DF, p-value: < 2.2e-16

    #多元回归
    library(lmPerm)
    set.seed(1234)
    states<-as.data.frame(state.x77)
    fit<-lmp(Murder~Population+Illiteracy+Income+Frost,data=states,perm="Prob")
    summary(fit)

    Call:
    lmp(formula = Murder ~ Population + Illiteracy + Income + Frost,
    data = states, perm = "Prob")

    Residuals:
    Min 1Q Median 3Q Max
    -4.79597 -1.64946 -0.08112 1.48150 7.62104

    Coefficients:
    Estimate Iter Pr(Prob)
    Population 2.237e-04 51 1.0000
    Illiteracy 4.143e+00 5000 0.0004 ***
    Income 6.442e-05 51 1.0000
    Frost 5.813e-04 51 0.8627
    ---
    Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

    Residual standard error: 2.535 on 45 degrees of freedom
    Multiple R-Squared: 0.567, Adjusted R-squared: 0.5285
    F-statistic: 14.73 on 4 and 45 DF, p-value: 9.133e-08

    #单因素方差分析和协方差分析
    library(lmPerm)
    library(multcomp)
    set.seed(1234)
    fit<-aovp(response~trt,data=cholesterol,perm="Prob")
    anova(fit)

    Analysis of Variance Table

    Response: response
    Df R Sum Sq R Mean Sq Iter Pr(Prob)
    trt 4 1351.37 337.84 5000 < 2.2e-16 ***
    Residuals 45 468.75 10.42
    ---
    Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

    #双因素方差分析
    library(lmPerm)
    set.seed(1234)
    fit<-aovp(len~supp*dose,data=ToothGrowth,perm="Prob")
    anova(fit)

    Analysis of Variance Table

    Response: len
    Df R Sum Sq R Mean Sq Iter Pr(Prob)
    supp 1 205.35 205.35 5000 < 2e-16 ***
    dose 1 2224.30 2224.30 5000 < 2e-16 ***
    supp:dose 1 88.92 88.92 2032 0.04724 *
    Residuals 56 933.63 16.67
    ---
    Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

    #自助法
    library(boot)
    set.seed(1234)
    rsq<-function(formula,data,indices){
      d<-data[indices,]
      fit<-lm(formula,data=d)
      return(summary(fit)$r.square)
      
    }
    
    
    results<-boot(data=mtcars,statistic=rsq,R=1000,formula=mpg~wt+disp)
    print(results)
    
    boot.ci(results,type=c("perc","bca"))

    BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
    Based on 1000 bootstrap replicates

    CALL :
    boot.ci(boot.out = results, type = c("perc", "bca"))

    Intervals :
    Level Percentile BCa
    95% ( 0.6838, 0.8833 ) ( 0.6344, 0.8549 )
    Calculations and Intervals on Original Scale
    Some BCa intervals may be unstable

    #多个统计量的自助法
    bs<-function(formula,data,indices){
      d<-data[indices,]
      fit<-lm(formula,data=d)
      return(coef(fit))
      
    }
    
    library(boot)
    set.seed(1234)
    results<-boot(data=mtcars,statistic=bs,R=1000,formula=mpg~wt+disp)
    print(results)
    
    
    plot(results,index=1)

  • 相关阅读:
    python--多线程&多进程
    python--MyRequest请求模块封装
    python--面向对象
    python--异常处理
    python--sys.argv 运行python文件时获取参数
    python--搭建测试环境
    mac常用快捷键
    九、django通过forms对页面数据校验
    八、django后台管理
    七、django页面请求处理
  • 原文地址:https://www.cnblogs.com/GhostBear/p/7736915.html
Copyright © 2020-2023  润新知