R语言中subset函数同时依据行列进行数据筛选
1、测试数据
manager <- 1:5
date <- c("10/24/08","10/28/08","10/1/08","10/12/08","5/1/09")
country <- c("US","US","UK","UK","UK")
gender <- c("M","F","F","M","F")
age <- c(32,45,25,39,99)
q1 <- c(5,3,3,3,2)
q2 <- c(4,5,5,3,2)
q3 <- c(5,2,5,4,1)
q4 <- c(5,5,5,NA,2)
q5 <- c(5,5,2,NA,1)
leadership <- data.frame(manager,date,country,gender,age,q1,q2,q3,q4,q5,
stringsAsFactors = F)
leadership$age[leadership$age == 99] <- NA
leadership <- within(leadership,{
agecat = NA
agecat[age > 75] <- "Elder"
agecat[age >= 55 & age <= 75] <- "Middle age"
agecat[age < 55] <- "Young"
})
leadership
2、测试
new1 <- subset(leadership, country == "UK" & gender == "F",
select = c(date, age, q4))
new1
new2 <- subset(leadership, country == "US" | gender == "M",
select = 1:3)
new2
new3 <- subset(leadership, country == "US" | gender == "M",
select = date:q2)
new3
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
R语言中subset函数。用于同时选择变量和观测值。
a <- sample(1:20, 20)
b <- sample(letters[1:20],20)
c <- sample(LETTERS[1:20],20)
d <- data.frame(a,b,c)
d <- d[order(d$a),]
d
e <- subset(d, a > 5 & a < 9, select = c(a,b))
e