package main
import (
"fmt"
"github.com/antchfx/htmlquery"
"github.com/gocolly/colly"
"log"
"strings"
"time"
)
func main() {
c := colly.NewCollector(
colly.AllowedDomains("yeves.cn"),
)
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL.String())
})
c.Limit(&colly.LimitRule{
DomainGlob: "*",
RandomDelay: 1 * time.Second,
})
//收到响应后
c.OnResponse(func(r *colly.Response) {
doc, err := htmlquery.Parse(strings.NewReader(string(r.Body)))
if err != nil {
log.Fatal(err)
}
nodes := htmlquery.Find(doc, `//*[@id="secondary"]/section[2]/ul//li`)
for _, node := range nodes {
a := htmlquery.FindOne(node, "./a[@href]")
fmt.Println(htmlquery.SelectAttr(a,"href"),htmlquery.InnerText(a))
}
})//因为
c.Visit("https://yeves.cn/")
}