#split根据因子或因子列表将 向量或其他对象分组
#通常与lapply一起使用
#split(参数):split(向量/列表/数据框,因子/因子列表)
> x <- c(rnorm(5),runif(5),rnorm(5,1))
> x
[1] 0.61008707 0.81746169 -1.09859969 -1.78134612 -1.94262725 0.99760581
[7] 0.37793960 0.05258653 0.38525197 0.46051864 -0.65455547 2.40130937
[13] 1.33670458 2.30777912 -1.34873009
> f <- gl(3,5)
> f
[1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
Levels: 1 2 3
> split(x,f)
$`1`
[1] 0.6100871 0.8174617 -1.0985997 -1.7813461 -1.9426272
$`2`
[1] 0.99760581 0.37793960 0.05258653 0.38525197 0.46051864
$`3`
[1] -0.6545555 2.4013094 1.3367046 2.3077791 -1.3487301
> lapply(split(x,f),mean)
$`1`
[1] -0.6790049
$`2`
[1] 0.4547805
$`3`
[1] 0.8085015
> head(airquality)
Ozone Solar.R Wind Temp Month Day
1 41 190 7.4 67 5 1
2 36 118 8.0 72 5 2
3 12 149 12.6 74 5 3
4 18 313 11.5 62 5 4
5 NA NA 14.3 56 5 5
6 28 NA 14.9 66 5 6
> s <- split(airquality,airquality$Month)
> table(airquality$Month)
5 6 7 8 9
31 30 31 31 30
> lapply(s,function(x) colMeans(x[,c("Ozone","Wind","Temp")]))
$`5`
Ozone Wind Temp
NA 11.62258 65.54839
$`6`
Ozone Wind Temp
NA 10.26667 79.10000
$`7`
Ozone Wind Temp
NA 8.941935 83.903226
$`8`
Ozone Wind Temp
NA 8.793548 83.967742
$`9`
Ozone Wind Temp
NA 10.18 76.90
> sapply(s,function(x) colMeans(x[,c("Ozone","Wind","Temp")]))
5 6 7 8 9
Ozone NA NA NA NA NA
Wind 11.62258 10.26667 8.941935 8.793548 10.18
Temp 65.54839 79.10000 83.903226 83.967742 76.90
> sapply(s,function(x) colMeans(x[,c("Ozone","Wind","Temp")],na.rm = TRUE))
5 6 7 8 9
Ozone 23.61538 29.44444 59.115385 59.961538 31.44828
Wind 11.62258 10.26667 8.941935 8.793548 10.18000
Temp 65.54839 79.10000 83.903226 83.967742 76.90000