• goquery简单爬取ebay


    第三方包的安装

    • goquery

    goquery的github地址:github.com/PuerkitoBio/goquery

                                package main
    

    import(
    "fmt"
    "net/http"
    "io/ioutil"
    "strings"
    "github.com/PuerkitoBio/goquery"
    )

    //预定义错误处理
    func handleError(err error, why string){
    if err != nil {
    fmt.Print(why,err)
    }
    }

    //获取页面封装
    func getPages(url string)(pageStr string){
    resp, err := http.Get(url)
    resp.Header.Add("Host","www.ebay.com")
    resp.Header.Add("Connection","keep-alive")
    resp.Header.Add("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8")
    resp.Header.Add("Accept-Language","zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2")
    resp.Header.Add("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox")
    resp.Header.Add("Cookie","nonsession=BAQAAAWqAI+j2AAaAADMABl60yOw1MTgwMDAAywABXNOcdDIAygAgZjmW7DlhN2ZhMDUzMTZhMGFhZGQyMTI0ZjkxNmZmZmE3MjRhixCPqGLKye5BuKLFvWdMzprH4kg; s=CgAD4ACBc1ObsOWE3ZmEwNTMxNmEwYWFkZDIxMjRmOTE2ZmZmYTcyNGGhAzuC; dp1=bu1p/QEBfX0BAX19AQA**5eb4c8ecbl/CN6095fc6c; ebay=%5Esbf%3D%23%5Ejs%3D1%5Epsi%3DAf6B2MwI%5E; ak_bmsc=68BEC4C2CCB9CBF4D24A609F3781E6AE17036813C12000006895D35CE3AC3162~plUQDUg9/LiE/57OsXMbVM1wcDfKqG/SApfWftxrhtgLduhxKfsBp6CMzXhGHW1LJXFP+AXDCH4QaZyT8gmIVVaARRCqhjEtNpbOFVBnKCg/1YaCBlTgXb7UKFL6+ydixzxZ4mmSbcU7NP2lBOegbyLe05KsV/OyYq3JmK9RVfuT4MiZUg+WXcqdQALXmiYOrb6ZzfTYGjBKSaO8lDGE3Ejn/SENnN/rrVzHMBBBTeiFs=; bm_sv=77E98EA7DB2BF215DFACDD76E331005C~8F3r1OwVRAmlwMYb8F7yPSMIiY5n6VeLD+6XrZSTYyjZ7If+e7XZeclQoUK40241+O9vp9XsERUvGzAv0HzEzJXx8oWKO/D2b/9cCTerVgXUS1UqoBodtIlvmVcskACUAp0dXB6wIfO8oebPY3dj1w==; ds2=sotr/b9Votzzzzzzz^")
    handleError(err,"http.Get")
    defer resp.Body.Close()
    body, err := ioutil.ReadAll(resp.Body)
    handleError(err, "ioutil.ReadAll")
    page := string(body)
    return page
    }

    //ebay页面分析
    func spiderEbay(){
    pageStr := getPages("https://www.ebay.com/b/Apple-iPhone/9355/bn_319682")
    doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageStr))
    handleError(err,"goquery.NewDocumentFromReader")
    doc.Find(".s-item ").Each(func(i int, s *goquery.Selection){

        title := s.Find(".s-item__title").Text()
        image,_ := s.Find(".s-item__image-img").Attr("src")
    		fmt.Printf("	 	
    ",title,image)
    		
    	
    
    })
    

    }

    func main(){
    fmt.Printf("this is crawler")
    spiderEbay()

    }

    一个人光有知识是远不够的,知识是一个量的积累.可以在拥有知识的前提下,掌握一门技术
  • 相关阅读:
    操作系统第一天学习
    进制之间的转换
    git的使用
    Python 第二天学习(文件的处理)
    下载博客首页的博客列表
    获取所有的列表
    抓取指定博客的内容
    进程简介
    python 内置函数range和xrange
    关于read的例子和条件测试
  • 原文地址:https://www.cnblogs.com/ashton/p/10967355.html
Copyright © 2020-2023  润新知