Extract attributes, text and tag name from html.

html_text(x, trim = FALSE)

html_name(x)

html_children(x)

html_attrs(x)

html_attr(x, name, default = NA_character_)

Arguments

x	A document, node, or node set.
trim	If `TRUE` will trim leading and trailing spaces.
name	Name of attribute to retrieve.
default	A string used as a default value when the attribute does not exist in every node.

Value

html_attr, html_tag and html_text, a character vector; html_attrs, a list.

Examples

movie <- read_html("https://en.wikipedia.org/wiki/The_Lego_Movie")
cast <- html_nodes(movie, "tr:nth-child(8) .plainlist a")
html_text(cast)
#> [1] "Chris Pratt"     "Will Ferrell"    "Elizabeth Banks" "Will Arnett"    
#> [5] "Nick Offerman"   "Alison Brie"     "Charlie Day"     "Liam Neeson"    
#> [9] "Morgan Freeman" 
html_name(cast)
#> [1] "a" "a" "a" "a" "a" "a" "a" "a" "a"
html_attrs(cast)
#> [[1]]
#>                href               title 
#> "/wiki/Chris_Pratt"       "Chris Pratt" 
#> 
#> [[2]]
#>                 href                title 
#> "/wiki/Will_Ferrell"       "Will Ferrell" 
#> 
#> [[3]]
#>                    href                   title 
#> "/wiki/Elizabeth_Banks"       "Elizabeth Banks" 
#> 
#> [[4]]
#>                href               title 
#> "/wiki/Will_Arnett"       "Will Arnett" 
#> 
#> [[5]]
#>                  href                 title 
#> "/wiki/Nick_Offerman"       "Nick Offerman" 
#> 
#> [[6]]
#>                href               title 
#> "/wiki/Alison_Brie"       "Alison Brie" 
#> 
#> [[7]]
#>                href               title 
#> "/wiki/Charlie_Day"       "Charlie Day" 
#> 
#> [[8]]
#>                href               title 
#> "/wiki/Liam_Neeson"       "Liam Neeson" 
#> 
#> [[9]]
#>                   href                  title 
#> "/wiki/Morgan_Freeman"       "Morgan Freeman" 
#> 
html_attr(cast, "href")
#> [1] "/wiki/Chris_Pratt"     "/wiki/Will_Ferrell"    "/wiki/Elizabeth_Banks"
#> [4] "/wiki/Will_Arnett"     "/wiki/Nick_Offerman"   "/wiki/Alison_Brie"    
#> [7] "/wiki/Charlie_Day"     "/wiki/Liam_Neeson"     "/wiki/Morgan_Freeman"