• go语言 goquery爬虫


     

      goquery 类似ruby的gem nokogiri

      goquery的选择器功能很强大,很好用。地址:https://github.com/PuerkitoBio/goquery

      这是一个糗百首页的爬虫程序

    package main
    
    import (
        "fmt"
        "log"
        "net/http"
        "strings"
    
        "github.com/PuerkitoBio/goquery"
    )
    
    func qiubai_parse() {
        res, err := http.Get("https://www.qiushibaike.com/hot/")
        if err != nil {
            log.Fatal(err)
        }
        defer res.Body.Close()
        if res.StatusCode != 200 {
            log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
        }
    
        doc, err := goquery.NewDocumentFromReader(res.Body)
        if err != nil {
            log.Fatal(err)
        }
    
        array := make([]map[string]string, 100)
        doc.Find("#content-left .article ").Each(func(i int, s *goquery.Selection) {
            hash := make(map[string]string)
            url, _ := s.Find("a[class]").Attr("href")
            hash["link"] = "https://www.qiushibaike.com" + url
            sub_res, _ := http.Get(hash["link"])
            sub_doc, _ := goquery.NewDocumentFromReader(sub_res.Body)
            hash["all_content"] = sub_doc.Find(".content").Text()
            like_num := s.Find(".likenum").Text()
            hash["like_num"] = strings.Replace(like_num, " ", "", -1)
            comment := s.Find(".main-text").Text()
            hash["comment"] = strings.Replace(comment, like_num, "", -1)
            fmt.Println(hash)
            array = append(array, hash)
        })
        fmt.Println(array)
    }
    
    func main() {
        qiubai_parse()
    }

      

  • 相关阅读:
    SSM简单实现文件上传和下载
    Web发送邮件
    scala写算法-快排
    scala写算法-从后缀表达式构造
    scalajs_初体验
    scala写算法-用小根堆解决topK
    scala-Future和Promise
    python基础之函数
    python基础知识(五)
    python基础知识(四)
  • 原文地址:https://www.cnblogs.com/wangyuyu/p/11358267.html
Copyright © 2020-2023  润新知