Read xls and xlsx files
read_excel()
calls excel_format()
to determine if path
is xls or xlsx,
based on the file extension and the file itself, in that order. Use
read_xls()
and read_xlsx()
directly if you know better and want to
prevent such guessing.
read_excel(path, sheet = NULL, range = NULL, col_names = TRUE, col_types = NULL, na = "", trim_ws = TRUE, skip = 0, n_max = Inf, guess_max = min(1000, n_max), progress = readxl_progress(), .name_repair = "unique") read_xls(path, sheet = NULL, range = NULL, col_names = TRUE, col_types = NULL, na = "", trim_ws = TRUE, skip = 0, n_max = Inf, guess_max = min(1000, n_max), progress = readxl_progress(), .name_repair = "unique") read_xlsx(path, sheet = NULL, range = NULL, col_names = TRUE, col_types = NULL, na = "", trim_ws = TRUE, skip = 0, n_max = Inf, guess_max = min(1000, n_max), progress = readxl_progress(), .name_repair = "unique")
path | Path to the xls/xlsx file. |
---|---|
sheet | Sheet to read. Either a string (the name of a sheet), or an
integer (the position of the sheet). Ignored if the sheet is specified via
|
range | A cell range to read from, as described in cell-specification.
Includes typical Excel ranges like "B3:D87", possibly including the sheet
name like "Budget!B2:G14", and more. Interpreted strictly, even if the
range forces the inclusion of leading or trailing empty rows or columns.
Takes precedence over |
col_names |
|
col_types | Either |
na | Character vector of strings to interpret as missing values. By default, readxl treats blank cells as missing data. |
trim_ws | Should leading and trailing whitespace be trimmed? |
skip | Minimum number of rows to skip before reading anything, be it
column names or data. Leading empty rows are automatically skipped, so this
is a lower bound. Ignored if |
n_max | Maximum number of data rows to read. Trailing empty rows are
automatically skipped, so this is an upper bound on the number of rows in
the returned tibble. Ignored if |
guess_max | Maximum number of data rows to use for guessing column types. |
progress | Display a progress spinner? By default, the spinner appears
only in an interactive session, outside the context of knitting a document,
and when the call is likely to run for several seconds or more. See
|
.name_repair | Handling of column names. By default, readxl ensures
column names are not empty and are unique. If the tibble package version is
recent enough, there is full support for |
A tibble
cell-specification for more details on targetting cells with the
range
argument
#> # A tibble: 150 x 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> <dbl> <dbl> <dbl> <dbl> <chr> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # … with 140 more rows# Specify sheet either by position or by name read_excel(datasets, 2)#> # A tibble: 32 x 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # … with 22 more rowsread_excel(datasets, "mtcars")#> # A tibble: 32 x 11 #> mpg cyl disp hp drat wt qsec vs am gear carb #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # … with 22 more rows# Skip rows and use default column names read_excel(datasets, skip = 148, col_names = FALSE)#>#> #> #> #> #>#> # A tibble: 3 x 5 #> ...1 ...2 ...3 ...4 ...5 #> <dbl> <dbl> <dbl> <dbl> <chr> #> 1 6.5 3 5.2 2 virginica #> 2 6.2 3.4 5.4 2.3 virginica #> 3 5.9 3 5.1 1.8 virginica# Recycle a single column type read_excel(datasets, col_types = "text")#> # A tibble: 150 x 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> <chr> <chr> <chr> <chr> <chr> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # … with 140 more rows# Specify some col_types and guess others read_excel(datasets, col_types = c("text", "guess", "numeric", "guess", "guess"))#> # A tibble: 150 x 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> <chr> <dbl> <dbl> <dbl> <chr> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # … with 140 more rows# Accomodate a column with disparate types via col_type = "list" df <- read_excel(readxl_example("clippy.xlsx"), col_types = c("text", "list")) df#> # A tibble: 4 x 2 #> name value #> <chr> <list> #> 1 Name <chr [1]> #> 2 Species <chr [1]> #> 3 Approx date of death <dttm [1]> #> 4 Weight in grams <dbl [1]>df$value#> [[1]] #> [1] "Clippy" #> #> [[2]] #> [1] "paperclip" #> #> [[3]] #> [1] "2007-01-01 UTC" #> #> [[4]] #> [1] 0.9 #>#> [[1]] #> [1] "character" #> #> [[2]] #> [1] "character" #> #> [[3]] #> [1] "POSIXct" "POSIXt" #> #> [[4]] #> [1] "numeric" #># Limit the number of data rows read read_excel(datasets, n_max = 3)#> # A tibble: 3 x 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> <dbl> <dbl> <dbl> <dbl> <chr> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa# Read from an Excel range using A1 or R1C1 notation read_excel(datasets, range = "C1:E7")#> # A tibble: 6 x 3 #> Petal.Length Petal.Width Species #> <dbl> <dbl> <chr> #> 1 1.4 0.2 setosa #> 2 1.4 0.2 setosa #> 3 1.3 0.2 setosa #> 4 1.5 0.2 setosa #> 5 1.4 0.2 setosa #> 6 1.7 0.4 setosaread_excel(datasets, range = "R1C2:R2C5")#> # A tibble: 1 x 4 #> Sepal.Width Petal.Length Petal.Width Species #> <dbl> <dbl> <dbl> <chr> #> 1 3.5 1.4 0.2 setosa# Specify the sheet as part of the range read_excel(datasets, range = "mtcars!B1:D5")#> # A tibble: 4 x 3 #> cyl disp hp #> <dbl> <dbl> <dbl> #> 1 6 160 110 #> 2 6 160 110 #> 3 4 108 93 #> 4 6 258 110# Read only specific rows or columns read_excel(datasets, range = cell_rows(102:151), col_names = FALSE)#>#> #> #> #> #>#> # A tibble: 50 x 5 #> ...1 ...2 ...3 ...4 ...5 #> <dbl> <dbl> <dbl> <dbl> <chr> #> 1 6.3 3.3 6 2.5 virginica #> 2 5.8 2.7 5.1 1.9 virginica #> 3 7.1 3 5.9 2.1 virginica #> 4 6.3 2.9 5.6 1.8 virginica #> 5 6.5 3 5.8 2.2 virginica #> 6 7.6 3 6.6 2.1 virginica #> 7 4.9 2.5 4.5 1.7 virginica #> 8 7.3 2.9 6.3 1.8 virginica #> 9 6.7 2.5 5.8 1.8 virginica #> 10 7.2 3.6 6.1 2.5 virginica #> # … with 40 more rows#> # A tibble: 150 x 3 #> Sepal.Width Petal.Length Petal.Width #> <dbl> <dbl> <dbl> #> 1 3.5 1.4 0.2 #> 2 3 1.4 0.2 #> 3 3.2 1.3 0.2 #> 4 3.1 1.5 0.2 #> 5 3.6 1.4 0.2 #> 6 3.9 1.7 0.4 #> 7 3.4 1.4 0.3 #> 8 3.4 1.5 0.2 #> 9 2.9 1.4 0.2 #> 10 3.1 1.5 0.1 #> # … with 140 more rows#> [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"if (utils::packageVersion("tibble") > "1.4.2") { ## exploit full .name_repair flexibility from tibble ## "universal" names are unique and syntactic read_excel( readxl_example("deaths.xlsx"), range = "arts!A5:F15", .name_repair = "universal" ) ## specify name repair as a built-in function read_excel(readxl_example("clippy.xlsx"), .name_repair = toupper) ## specify name repair as a custom function my_custom_name_repair <- function(nms) tolower(gsub("[.]", "_", nms)) read_excel( readxl_example("datasets.xlsx"), .name_repair = my_custom_name_repair ) ## specify name repair as an anonymous function read_excel( readxl_example("datasets.xlsx"), sheet = "chickwts", .name_repair = ~ substr(.x, start = 1, stop = 3) ) }#>#> #> #>#> # A tibble: 71 x 2 #> wei fee #> <dbl> <chr> #> 1 179 horsebean #> 2 160 horsebean #> 3 136 horsebean #> 4 227 horsebean #> 5 217 horsebean #> 6 168 horsebean #> 7 108 horsebean #> 8 124 horsebean #> 9 143 horsebean #> 10 140 horsebean #> # … with 61 more rows