8.2 某医院研究心电图指标对健康人(1),硬化病患者(2)和冠心病患者(3)的鉴别能力,先训练样本如下。试用距离判别(考虑方差相同与方差不同两种情况)、Bayes判别(考虑方差同和不同两种情况,且先验概率为11/23,7/23,5/23)对数据进行分析。
原始数据 data.txt如下, 3,4,5,6列为心电图指标,2列为类别
1 1 8.11 261.01 13.23 7.36 2 1 9.36 185.39 9.02 5.99 3 1 9.85 249.58 15.61 6.11 4 1 2.55 137.13 9.21 4.35 5 1 6.01 231.34 14.27 8.79 6 1 9.64 231.38 13.03 8.53 7 1 4.11 260.25 14.72 10.02 8 1 8.90 259.91 14.16 9.79 9 1 7.71 273.84 16.01 8.79 10 1 7.51 303.59 19.14 8.53 11 1 8.06 231.03 14.41 6.15 12 2 6.80 308.90 15.11 8.49 13 2 8.68 258.69 14.02 7.16 14 2 5.67 355.54 15.03 9.43 17 2 3.71 316.32 17.12 8.17 17 2 5.37 274.57 16.75 9.67 18 2 9.89 409.42 19.47 10.49 19 3 5.22 330.34 18.19 9.61 20 3 4.71 331.47 21.26 13.72 21 3 4.71 352.50 20.79 11.00 22 3 3.36 347.31 17.90 11.19 23 3 8.27 189.56 12.74 6.94
函数distinguish.distance.R
distinguish.distance<-function (TrnX, TrnG, TstX = NULL, var.equal = FALSE){ if ( is.factor(TrnG) == FALSE){ mx<-nrow(TrnX); mg<-nrow(TrnG) TrnX<-rbind(TrnX, TrnG) TrnG<-factor(rep(1:2, c(mx, mg))) } if (is.null(TstX) == TRUE) TstX<-TrnX if (is.vector(TstX) == TRUE) TstX<-t(as.matrix(TstX)) else if (is.matrix(TstX) != TRUE) TstX<-as.matrix(TstX) if (is.matrix(TrnX) != TRUE) TrnX<-as.matrix(TrnX) nx<-nrow(TstX) blong<-matrix(rep(0, nx), nrow=1, dimnames=list("blong", 1:nx)) g<-length(levels(TrnG)) mu<-matrix(0, nrow=g, ncol=ncol(TrnX)) for (i in 1:g) mu[i,]<-colMeans(TrnX[TrnG==i,]) D<-matrix(0, nrow=g, ncol=nx) if (var.equal == TRUE || var.equal == T){ for (i in 1:g) D[i,]<- mahalanobis(TstX, mu[i,], var(TrnX)) } else{ for (i in 1:g) D[i,]<- mahalanobis(TstX, mu[i,], var(TrnX[TrnG==i,])) } for (j in 1:nx){ dmin<-Inf for (i in 1:g) if (D[i,j]<dmin){ dmin<-D[i,j]; blong[j]<-i } } blong }
函数distinguish.bayes.R
distinguish.bayes<-function (TrnX, TrnG, p=rep(1, length(levels(TrnG))), TstX = NULL, var.equal = FALSE){ if ( is.factor(TrnG) == FALSE){ mx<-nrow(TrnX); mg<-nrow(TrnG) TrnX<-rbind(TrnX, TrnG) TrnG<-factor(rep(1:2, c(mx, mg))) } if (is.null(TstX) == TRUE) TstX<-TrnX if (is.vector(TstX) == TRUE) TstX<-t(as.matrix(TstX)) else if (is.matrix(TstX) != TRUE) TstX<-as.matrix(TstX) if (is.matrix(TrnX) != TRUE) TrnX<-as.matrix(TrnX) nx<-nrow(TstX) blong<-matrix(rep(0, nx), nrow=1, dimnames=list("blong", 1:nx)) g<-length(levels(TrnG)) mu<-matrix(0, nrow=g, ncol=ncol(TrnX)) for (i in 1:g) mu[i,]<-colMeans(TrnX[TrnG==i,]) D<-matrix(0, nrow=g, ncol=nx) if (var.equal == TRUE || var.equal == T){ for (i in 1:g){ d2 <- mahalanobis(TstX, mu[i,], var(TrnX)) D[i,] <- d2 - 2*log(p[i]) } } else{ for (i in 1:g){ S<-var(TrnX[TrnG==i,]) d2 <- mahalanobis(TstX, mu[i,], S) D[i,] <- d2 - 2*log(p[i])-log(det(S)) } } for (j in 1:nx){ dmin<-Inf for (i in 1:g) if (D[i,j]<dmin){ dmin<-D[i,j]; blong[j]<-i } } blong }
运行脚本
data <- read.table("data.txt"); X <- data[,3:6]; G <- factor(data[,2]); source("distinguish.distance.R"); distinguish.distance(X,G); # 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 #blong 2 1 1 1 1 1 1 1 1 1 1 2 1 2 2 1 2 2 3 3 3 3 distinguish.distance(X,G,var.equal=TRUE); # 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 #blong 1 1 1 1 1 1 3 1 1 3 1 2 1 2 2 3 2 2 3 3 3 1 source("distinguish.bayes.R"); distinguish.bayes(X,G, p=c(rep(11/23,11),rep(7/23,7),rep(5/23,5))); # 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 #blong 2 1 1 1 1 1 1 1 1 1 1 2 1 2 2 1 2 2 3 3 3 3 distinguish.bayes(X,G, p=c(rep(11/23,11),rep(7/23,7),rep(5/23,5)),var.equal=TRUE); # 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 #blong 1 1 1 1 1 1 3 1 1 3 1 2 1 2 2 3 2 2 3 3 3 1
博文源代码和习题均来自于教材《统计建模与R软件》(ISBN:9787302143666,作者:薛毅)。