package main import ( "github.com/antchfx/htmlquery" "io" "net/http" "os" "strconv" ) func main() { base_url := "https://tieba.baidu.com/f?kw=%E7%BB%9D%E5%9C%B0%E6%B1%82%E7%94%9F&ie=utf-8&pn=" var start ,end int start = 1 end = 20 for i:=start;i<=end;i++{ url := base_url + strconv.Itoa((i-1)*50) result := download(url) list := parse(result) f,_ := os.Create( strconv.Itoa(i) +".html") for _,v := range list{ f.WriteString(v+" ") } f.Close() } } func download(url string) (result io.Reader) { resp,_ := http.Get(url) return resp.Body } func parse(resp io.Reader) (result[]string ) { doc,_ := htmlquery.Parse(resp) list := htmlquery.Find(doc, "//div[@class='t_con cleafix']") slice := make([]string, 5) for _,n := range list { //number := htmlquery.FindOne(n,".//span[contains(@class,'num')]/text()") title := htmlquery.FindOne(n,".//a/@title") //numstring := htmlquery.InnerText(number) titlestring := htmlquery.SelectAttr(title,"title") slice = append(slice, titlestring) } return slice }
效果