
do() is superseded as of dplyr 1.0.0, because its syntax never really felt like it belong with the rest of dplyr. It's replaced by a combination of summarise() (which can now produce multiple rows and multiple columns), nest_by() (which creates a rowwise tibble of nested data), and across() (which allows you to access the data for the "current" group).

do(.data, ...)



a tbl


Expressions to apply to each group. If named, results will be stored in a new column. If unnamed, should return a data frame. You can use . to refer to the current group. You can not mix named and unnamed arguments.


# do() with unnamed arguments becomes summarise() # . becomes across() by_cyl <- mtcars %>% group_by(cyl) by_cyl %>% do(head(., 2))
#> # A tibble: 6 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 4 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
# -> by_cyl %>% summarise(head(across(), 2))
#> `summarise()` has grouped output by 'cyl'. You can override using the `.groups` argument.
#> # A tibble: 6 x 11 #> # Groups: cyl [3] #> cyl mpg disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 4 22.8 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 4 24.4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 6 21 160 110 3.9 2.62 16.5 0 1 4 4 #> 4 6 21 160 110 3.9 2.88 17.0 0 1 4 4 #> 5 8 18.7 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 8 14.3 360 245 3.21 3.57 15.8 0 0 3 4
by_cyl %>% slice_head(n = 2)
#> # A tibble: 6 x 11 #> # Groups: cyl [3] #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 2 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 3 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 4 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
# Can refer to variables directly by_cyl %>% do(mean = mean(.$vs))
#> # A tibble: 3 x 2 #> # Rowwise: #> cyl mean #> <dbl> <list> #> 1 4 <dbl [1]> #> 2 6 <dbl [1]> #> 3 8 <dbl [1]>
# -> by_cyl %>% summarise(mean = mean(vs))
#> # A tibble: 3 x 2 #> cyl mean #> <dbl> <dbl> #> 1 4 0.909 #> 2 6 0.571 #> 3 8 0
# do() with named arguments becomes nest_by() + mutate() & list() models <- by_cyl %>% do(mod = lm(mpg ~ disp, data = .)) # -> models <- mtcars %>% nest_by(cyl) %>% mutate(mod = list(lm(mpg ~ disp, data = data))) models %>% summarise(rsq = summary(mod)$r.squared)
#> `summarise()` has grouped output by 'cyl'. You can override using the `.groups` argument.
#> # A tibble: 3 x 2 #> # Groups: cyl [3] #> cyl rsq #> <dbl> <dbl> #> 1 4 0.648 #> 2 6 0.0106 #> 3 8 0.270
# use broom to turn models into data models %>% do(data.frame( var = names(coef(.$mod)), coef(summary(.$mod))) )
#> # A tibble: 6 x 5 #> # Rowwise: #> var Estimate Std..Error t.value Pr...t.. #> <chr> <dbl> <dbl> <dbl> <dbl> #> 1 (Intercept) 40.9 3.59 11.4 0.00000120 #> 2 disp -0.135 0.0332 -4.07 0.00278 #> 3 (Intercept) 19.1 2.91 6.55 0.00124 #> 4 disp 0.00361 0.0156 0.232 0.826 #> 5 (Intercept) 22.0 3.35 6.59 0.0000259 #> 6 disp -0.0196 0.00932 -2.11 0.0568
# -> if (requireNamespace("broom")) { models %>% summarise(broom::tidy(mod)) }
#> Loading required namespace: broom
#> `summarise()` has grouped output by 'cyl'. You can override using the `.groups` argument.
#> # A tibble: 6 x 6 #> # Groups: cyl [3] #> cyl term estimate std.error statistic p.value #> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> #> 1 4 (Intercept) 40.9 3.59 11.4 0.00000120 #> 2 4 disp -0.135 0.0332 -4.07 0.00278 #> 3 6 (Intercept) 19.1 2.91 6.55 0.00124 #> 4 6 disp 0.00361 0.0156 0.232 0.826 #> 5 8 (Intercept) 22.0 3.35 6.59 0.0000259 #> 6 8 disp -0.0196 0.00932 -2.11 0.0568