Count/tally observations by group

tally() is a convenient wrapper for summarise that will either call n() or sum(n) depending on whether you're tallying for the first time, or re-tallying. count() is similar but calls group_by() before and ungroup() after. If the data is already grouped, count() adds an additional group that is removed afterwards.

add_tally() adds a column n to a table based on the number of items within each existing group, while add_count() is a shortcut that does the grouping as well. These functions are to tally() and count() as mutate() is to summarise(): they add an additional column rather than collapsing each group.

tally(x, wt = NULL, sort = FALSE, name = "n")

count(x, ..., wt = NULL, sort = FALSE, name = "n",
  .drop = group_by_drop_default(x))

add_tally(x, wt, sort = FALSE, name = "n")

add_count(x, ..., wt = NULL, sort = FALSE, name = "n")

Arguments

x	a `tbl()` to tally/count.
wt	(Optional) If omitted (and no variable named `n` exists in the data), will count the number of rows. If specified, will perform a "weighted" tally by summing the (non-missing) values of variable `wt`. A column named `n` (but not `nn` or `nnn`) will be used as weighting variable by default in `tally()`, but not in `count()`. This argument is automatically quoted and later evaluated in the context of the data frame. It supports unquoting. See `vignette("programming")` for an introduction to these concepts.
sort	if `TRUE` will sort output in descending order of `n`
name	The output column name. If omitted, it will be `n`.
...	Variables to group by.
.drop	see `group_by()`

Value

A tbl, grouped the same way as x.

Note

The column name in the returned data is given by the name argument, set to "n" by default.

If the data already has a column by that name, the output column will be prefixed by an extra "n" as many times as necessary.

Examples

# tally() is short-hand for summarise()
mtcars %>% tally()
#>    n
#> 1 32
mtcars %>% group_by(cyl) %>% tally()
#> # A tibble: 3 x 2
#>     cyl     n
#>   <dbl> <int>
#> 1     4    11
#> 2     6     7
#> 3     8    14
# count() is a short-hand for group_by() + tally()
mtcars %>% count(cyl)
#> # A tibble: 3 x 2
#>     cyl     n
#>   <dbl> <int>
#> 1     4    11
#> 2     6     7
#> 3     8    14
# Note that if the data is already grouped, count() adds
# an additional group that is removed afterwards
mtcars %>% group_by(gear) %>% count(carb)
#> # A tibble: 11 x 3
#> # Groups:   gear [3]
#>     gear  carb     n
#>    <dbl> <dbl> <int>
#>  1     3     1     3
#>  2     3     2     4
#>  3     3     3     3
#>  4     3     4     5
#>  5     4     1     4
#>  6     4     2     4
#>  7     4     4     4
#>  8     5     2     2
#>  9     5     4     1
#> 10     5     6     1
#> 11     5     8     1

# add_tally() is short-hand for mutate()
mtcars %>% add_tally()
#> # A tibble: 32 x 12
#>      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb     n
#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
#>  1  21       6  160    110  3.9   2.62  16.5     0     1     4     4    32
#>  2  21       6  160    110  3.9   2.88  17.0     0     1     4     4    32
#>  3  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1    32
#>  4  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1    32
#>  5  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2    32
#>  6  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1    32
#>  7  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4    32
#>  8  24.4     4  147.    62  3.69  3.19  20       1     0     4     2    32
#>  9  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2    32
#> 10  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4    32
#> # … with 22 more rows
# add_count() is a short-hand for group_by() + add_tally()
mtcars %>% add_count(cyl)
#> # A tibble: 32 x 12
#>      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb     n
#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
#>  1  21       6  160    110  3.9   2.62  16.5     0     1     4     4     7
#>  2  21       6  160    110  3.9   2.88  17.0     0     1     4     4     7
#>  3  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1    11
#>  4  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1     7
#>  5  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2    14
#>  6  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1     7
#>  7  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4    14
#>  8  24.4     4  147.    62  3.69  3.19  20       1     0     4     2    11
#>  9  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2    11
#> 10  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4     7
#> # … with 22 more rows

# count() and tally() are designed so that you can call
# them repeatedly, each time rolling up a level of detail
species <-
 starwars %>%
 count(species, homeworld, sort = TRUE)
species
#> # A tibble: 58 x 3
#>    species  homeworld     n
#>    <chr>    <chr>     <int>
#>  1 Human    Tatooine      8
#>  2 Human    Naboo         5
#>  3 Human    NA            5
#>  4 Gungan   Naboo         3
#>  5 Human    Alderaan      3
#>  6 Droid    Tatooine      2
#>  7 Droid    NA            2
#>  8 Human    Corellia      2
#>  9 Human    Coruscant     2
#> 10 Kaminoan Kamino        2
#> # … with 48 more rows
species %>% count(species, sort = TRUE)
#> # A tibble: 38 x 2
#>    species      n
#>    <chr>    <int>
#>  1 Human       16
#>  2 Droid        3
#>  3 NA           3
#>  4 Zabrak       2
#>  5 Aleena       1
#>  6 Besalisk     1
#>  7 Cerean       1
#>  8 Chagrian     1
#>  9 Clawdite     1
#> 10 Dug          1
#> # … with 28 more rows

# Change the name of the newly created column:
species <-
 starwars %>%
 count(species, homeworld, sort = TRUE, name = "n_species_by_homeworld")
species
#> # A tibble: 58 x 3
#>    species  homeworld n_species_by_homeworld
#>    <chr>    <chr>                      <int>
#>  1 Human    Tatooine                       8
#>  2 Human    Naboo                          5
#>  3 Human    NA                             5
#>  4 Gungan   Naboo                          3
#>  5 Human    Alderaan                       3
#>  6 Droid    Tatooine                       2
#>  7 Droid    NA                             2
#>  8 Human    Corellia                       2
#>  9 Human    Coruscant                      2
#> 10 Kaminoan Kamino                         2
#> # … with 48 more rows
species %>%
 count(species, sort = TRUE, name = "n_species")
#> # A tibble: 38 x 2
#>    species  n_species
#>    <chr>        <int>
#>  1 Human           16
#>  2 Droid            3
#>  3 NA               3
#>  4 Zabrak           2
#>  5 Aleena           1
#>  6 Besalisk         1
#>  7 Cerean           1
#>  8 Chagrian         1
#>  9 Clawdite         1
#> 10 Dug              1
#> # … with 28 more rows

# add_count() is useful for groupwise filtering
# e.g.: show details for species that have a single member
starwars %>%
  add_count(species) %>%
  filter(n == 1)
#> # A tibble: 29 x 14
#>    name  height  mass hair_color skin_color eye_color birth_year gender
#>    <chr>  <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
#>  1 Gree…    173    74 NA         green      black             44 male  
#>  2 Jabb…    175  1358 NA         green-tan… orange           600 herma…
#>  3 Yoda      66    17 white      green      brown            896 male  
#>  4 Bossk    190   113 none       green      red               53 male  
#>  5 Ackb…    180    83 none       brown mot… orange            41 male  
#>  6 Wick…     88    20 brown      brown      brown              8 male  
#>  7 Nien…    160    68 none       grey       black             NA male  
#>  8 Nute…    191    90 none       mottled g… red               NA male  
#>  9 Watto    137    NA black      blue, grey yellow            NA male  
#> 10 Sebu…    112    40 none       grey, red  orange            NA male  
#> # … with 19 more rows, and 6 more variables: homeworld <chr>, species <chr>,
#> #   films <list>, vehicles <list>, starships <list>, n <int>