我正在使用Golang的内置html
库.
package main
import (
"fmt"
"log"
"net/http"
"golang.org/x/net/html"
)
const url = "https://google.com"
func main() {
resp, err := http.Get(url)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
log.Fatalf("Status code error: %d %s", resp.StatusCode, resp.Status)
}
h := html.NewTokenizer(resp.Body)
for {
if h.Next() == html.ErrorToken {
break
}
l := len(h.Token().Attr)
if l != 0 {
fmt.Println("=======")
fmt.Println("Length", l) // greater than 0
fmt.Println("Attr", h.Token().Attr) // empty all the times
}
}
}
以下是输出结果
=======
Length 2
Attr []
typeof Attr []html.Attribute
=======
Length 8
Attr []
typeof Attr []html.Attribute
=======
Length 1
Attr []
typeof Attr []html.Attribute
=======
Length 1
Attr []
typeof Attr []html.Attribute
go版本
go版本 go1.17.7 linux/amd64
当h.Token().Attr
为空时,为什么Go认为h.Token().Attr
的长度不是零?
P、 美国:保存h.Token().Attr
的输出并将其用于len
,然后打印内容,一切都很好
代码:
package main
import (
"fmt"
"log"
"net/http"
"golang.org/x/net/html"
)
const url = "https://google.com"
func main() {
resp, err := http.Get(url)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
log.Fatalf("Status code error: %d %s", resp.StatusCode, resp.Status)
}
h := html.NewTokenizer(resp.Body)
for {
if h.Next() == html.ErrorToken {
break
}
attrs := h.Token().Attr // save the output here and use it everywhere else
l := len(attrs)
if l != 0 {
fmt.Println("=======")
fmt.Println("Length", l)
fmt.Println("Attr", attrs)
}
}
}
输出
Length 3
Attr [{ value AJiK0e8AAAAAYtZT7PXDBRBC2BJawIxezEfmIL6Aw5Uy} { name iflsig} { type hidden}]
=======
Length 4
Attr [{ class fl sblc} { align left} { nowrap } { width 25%}]
=======
Length 1
Attr [{ href /advanced_search?hl=en-IN&authuser=0}]
=======
Length 4
Attr [{ id gbv} { name gbv} { type hidden} { value 1}]