Xpathを使用する別のオプション。
library(RCurl)
library(XML)
theurl <- "http://en.wikipedia.org/wiki/Brazil_national_football_team"
webpage <- getURL(theurl)
webpage <- readLines(tc <- textConnection(webpage)); close(tc)
pagetree <- htmlTreeParse(webpage, error=function(...){}, useInternalNodes = TRUE)
# Extract table header and contents
tablehead <- xpathSApply(pagetree, "//*/table[@class='wikitable sortable']/tr/th", xmlValue)
results <- xpathSApply(pagetree, "//*/table[@class='wikitable sortable']/tr/td", xmlValue)
# Convert character vector to dataframe
content <- as.data.frame(matrix(results, ncol = 8, byrow = TRUE))
# Clean up the results
content[,1] <- gsub("Â ", "", content[,1])
tablehead <- gsub("Â ", "", tablehead)
names(content) <- tablehead
この結果を生成します
> head(content)
Opponent Played Won Drawn Lost Goals for Goals against % Won
1 Argentina 94 36 24 34 148 150 38.3%
2 Paraguay 72 44 17 11 160 61 61.1%
3 Uruguay 72 33 19 20 127 93 45.8%
4 Chile 64 45 12 7 147 53 70.3%
5 Peru 39 27 9 3 83 27 69.2%
6 Mexico 36 21 6 9 69 34 58.3%