这里有一种方法.标头格式使事情变得复杂,但下面的代码可以工作.它提取整个表,而不仅仅是第一行.
suppressPackageStartupMessages({
library(rvest)
library(dplyr)
library(ggplot2)
})
url_data <- "https://www.immd.gov.hk/eng/stat_20220901.html"
page <- url_data %>% read_html()
page %>%
html_elements("[headers='Arrival']") %>%
html_text() %>%
paste("Arrival", .) -> col_names
page %>%
html_elements("[headers='Departure']") %>%
html_text() %>%
paste("Departure", .) %>%
c(col_names, .) -> col_names
page %>%
html_elements("[headers='Control_Point']") %>%
html_text() -> row_names
page %>%
html_elements("[class='hRight']") %>%
html_text() %>%
sub(",", "", .) %>%
as.numeric() %>%
matrix(nrow = length(row_names), byrow = TRUE) %>%
as.data.frame() %>%
setNames(col_names) %>%
`row.names<-`(row_names) -> final
final
#> Arrival Hong Kong Residents
#> Airport 4258
#> Express Rail Link West Kowloon 0
#> Hung Hom 0
#> Lo Wu 0
#> Lok Ma Chau Spur Line 0
#> Heung Yuen Wai 0
#> Hong Kong-Zhuhai-Macao Bridge 333
#> Lok Ma Chau 0
#> Man Kam To 0
#> Sha Tau Kok 0
#> Shenzhen Bay 3404
#> China Ferry Terminal 0
#> Harbour Control 0
#> Kai Tak Cruise Terminal 0
#> Macau Ferry Terminal 0
#> Total 7995
#> Arrival Mainland Visitors Arrival Other Visitors
#> Airport 1488 422
#> Express Rail Link West Kowloon 0 0
#> Hung Hom 0 0
#> Lo Wu 0 0
#> Lok Ma Chau Spur Line 0 0
#> Heung Yuen Wai 0 0
#> Hong Kong-Zhuhai-Macao Bridge 28 39
#> Lok Ma Chau 0 0
#> Man Kam To 0 0
#> Sha Tau Kok 0 0
#> Shenzhen Bay 348 37
#> China Ferry Terminal 0 0
#> Harbour Control 0 0
#> Kai Tak Cruise Terminal 0 0
#> Macau Ferry Terminal 0 0
#> Total 1864 498
#> Arrival Total Departure Hong Kong Residents
#> Airport 6168 3775
#> Express Rail Link West Kowloon 0 0
#> Hung Hom 0 0
#> Lo Wu 0 0
#> Lok Ma Chau Spur Line 0 0
#> Heung Yuen Wai 0 0
#> Hong Kong-Zhuhai-Macao Bridge 400 243
#> Lok Ma Chau 0 0
#> Man Kam To 0 0
#> Sha Tau Kok 0 0
#> Shenzhen Bay 3789 1301
#> China Ferry Terminal 0 0
#> Harbour Control 0 0
#> Kai Tak Cruise Terminal 0 0
#> Macau Ferry Terminal 0 0
#> Total 10357 5319
#> Departure Mainland Visitors
#> Airport 1154
#> Express Rail Link West Kowloon 0
#> Hung Hom 0
#> Lo Wu 0
#> Lok Ma Chau Spur Line 0
#> Heung Yuen Wai 0
#> Hong Kong-Zhuhai-Macao Bridge 194
#> Lok Ma Chau 0
#> Man Kam To 0
#> Sha Tau Kok 0
#> Shenzhen Bay 524
#> China Ferry Terminal 0
#> Harbour Control 0
#> Kai Tak Cruise Terminal 0
#> Macau Ferry Terminal 0
#> Total 1872
#> Departure Other Visitors Departure Total
#> Airport 315 5244
#> Express Rail Link West Kowloon 0 0
#> Hung Hom 0 0
#> Lo Wu 0 0
#> Lok Ma Chau Spur Line 0 0
#> Heung Yuen Wai 0 0
#> Hong Kong-Zhuhai-Macao Bridge 15 452
#> Lok Ma Chau 0 0
#> Man Kam To 0 0
#> Sha Tau Kok 0 0
#> Shenzhen Bay 28 1853
#> China Ferry Terminal 0 0
#> Harbour Control 0 0
#> Kai Tak Cruise Terminal 0 0
#> Macau Ferry Terminal 0 0
#> Total 358 7549
创建于2022-09-18年第reprex v2.0.2页
要以ggplot
为单位绘制饼图,请绘制条形图,然后更改为极坐标.
Airport <- final[1,,]
Airport %>%
t() %>%
as.data.frame() %>%
mutate(`Arrival/Departure` = row.names(.)) %>%
ggplot(aes("", Airport, fill = `Arrival/Departure`)) +
geom_col(width = 1) +
scale_fill_manual(values = RColorBrewer::brewer.pal(n = 8, name = "Spectral")) +
coord_polar(theta = "y", start = 0) +
theme_void()
创建于2022-09-18年第reprex v2.0.2页