我们主要讲解基础包中的: 1.lapply 2.sapply 3.apply 4.tapply 5.aggregate 6.doBy::summaryBy 7.split #1.基础包 ##1.1-lapply(对列表,数据框进行循环) ###Lapply返回一个列表, 并无视输入变量的类型
x <- list(a=1:5, b=rnorm(10),c=rnorm(20,1),d=rnorm(100,5))lapply(x,mean)#结果为一个列表
## $a## [1] 3## ## $b## [1] -0.00473966## ## $c## [1] 1.198488## ## $d## [1] 4.94543
knitr::opts_chunk$set(echo = TRUE)
向量 ###2.若结果是一个list,且每个元素长度为大于1,则会返回 一个矩阵 ###3.若其他复杂的结果,会返回一个向量
x <- list(a=1:5,b=rnorm(10),c=rnorm(20,1),d=rnorm(100,5))sapply(x,mean)
## a b c d ## 3.0000000 -0.2180703 1.2204910 4.9628198
knitr::opts_chunk$set(echo = TRUE)
x<-matrix(rnorm(24),4,6)apply(x,1,sum) #对行,此处是简写其实本质是MARGIN=1
## [1] 1.7468117 -0.4684263 -2.6264194 -2.2550619
apply(x,2,mean) #对列,此处是简写其实本质是MARGIN=2
## [1] 0.4134592 -0.3323806 -0.2345993 -0.3044445 -0.2624670 -0.1803418
knitr::opts_chunk$set(echo = TRUE)
x<-c(rnorm(10),runif(10),rnorm(10,1))group <- rep(1:3,each=10)a <- data.frame(x=x,group=group)a
## x group## 1 -0.14409365 1## 2 -0.18574981 1## 3 -0.72858991 1## 4 0.28562046 1## 5 -0.98111509 1## 6 0.40507215 1## 7 1.73524405 1## 8 1.30026779 1## 9 -0.16269420 1## 10 -0.55332117 1## 11 0.86508506 2## 12 0.48553627 2## 13 0.06496029 2## 14 0.48918472 2## 15 0.26840462 2## 16 0.13526884 2## 17 0.39505141 2## 18 0.27805844 2## 19 0.37989480 2## 20 0.22612703 2## 21 1.22601935 3## 22 1.58276549 3## 23 1.14964324 3## 24 3.05631522 3## 25 1.95935367 3## 26 0.19120070 3## 27 -0.15075080 3## 28 0.46590728 3## 29 1.35432261 3## 30 0.48539527 3
tapply(a$x,a$group,mean)
## 1 2 3 ## 0.09706406 0.35875715 1.13201720
knitr::opts_chunk$set(echo = TRUE)
执行下列就会出错
#tapply(mtcars[,c('mpg','hp','wt')],mtcars$am,summary)#Error in tapply(mtcars[, c("mpg", "hp", "wt")], mtcars$am, summary) : # arguments must have same lengthknitr::opts_chunk$set(echo = TRUE)
apply(mtcars[,c("am","mpg","hp","wt")],2,function(x) tapply(x,mtcars$am,mean))
## am mpg hp wt## 0 0 17.14737 160.2632 3.768895## 1 1 24.39231 126.8462 2.411000
knitr::opts_chunk$set(echo = TRUE)
接下来我们讲下怎么弥补这个缺陷: ##方法一:利用aggregate函数
Myfun <- function(x) c(mean=mean(x),sd=sd(x))aggregate(mtcars[,c("mpg","hp","wt")],by=list(am=mtcars$am,cyl=mtcars$cyl),Myfun)
## am cyl mpg.mean mpg.sd hp.mean hp.sd wt.mean wt.sd## 1 0 4 22.9000000 1.4525839 84.66667 19.65536 2.9350000 0.4075230## 2 1 4 28.0750000 4.4838599 81.87500 22.65542 2.0422500 0.4093485## 3 0 6 19.1250000 1.6317169 115.25000 9.17878 3.3887500 0.1162164## 4 1 6 20.5666667 0.7505553 131.66667 37.52777 2.7550000 0.1281601## 5 0 8 15.0500000 2.7743959 194.16667 33.35984 4.1040833 0.7683069## 6 1 8 15.4000000 0.5656854 299.50000 50.20458 3.3700000 0.2828427
knitr::opts_chunk$set(echo = TRUE)
library(doBy)
## Warning: package 'doBy' was built under R version 3.5.3
doBy::summaryBy(mpg+hp+wt~am+cyl,data = mtcars,FUN=Myfun)
## am cyl mpg.mean mpg.sd hp.mean hp.sd wt.mean wt.sd## 1 0 4 22.90000 1.4525839 84.66667 19.65536 2.935000 0.4075230## 2 0 6 19.12500 1.6317169 115.25000 9.17878 3.388750 0.1162164## 3 0 8 15.05000 2.7743959 194.16667 33.35984 4.104083 0.7683069## 4 1 4 28.07500 4.4838599 81.87500 22.65542 2.042250 0.4093485## 5 1 6 20.56667 0.7505553 131.66667 37.52777 2.755000 0.1281601## 6 1 8 15.40000 0.5656854 299.50000 50.20458 3.370000 0.2828427
knitr::opts_chunk$set(echo = TRUE)
dplyr::group_by(mtcars,am,cyl) %>% dplyr::summarise(mpg.mean=mean(mtcars$mpg,na.rm = TRUE),hp.mean=mean(mtcars$hp,na.rm = T))
## # A tibble: 6 x 4## # Groups: am [2]## am cyl mpg.mean hp.mean## <dbl> <dbl> <dbl> <dbl>## 1 0 4 20.1 147.## 2 0 6 20.1 147.## 3 0 8 20.1 147.## 4 1 4 20.1 147.## 5 1 6 20.1 147.## 6 1 8 20.1 147.
knitr::opts_chunk$set(echo = TRUE)
定数目的组由因子列表确定
str(split)
## function (x, f, drop = FALSE, ...)
#function (x, f, drop = FALSE, ...)X是一个向量或数据框,F是一个因子或因子列表, Drop表示空因子水平是否舍弃accepts <- mtcars[,c("am","mpg","wt","hp")]s<-split(accepts,accepts$am)sapply(s, function(x) lapply(x[,2:3],function(col) mean(col,na.rm = T)))
## 0 1 ## mpg 17.14737 24.39231## wt 3.768895 2.411
knitr::opts_chunk$set(echo = TRUE)
联系客服