• 微信爬取


    import re
    import urllib.request
    import time
    import urllib.error
    def wei(url,duan):
    try:
    open_url=urllib.request.Request(url)
    open_url.add_header("User-Agent","Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Mobile Safari/537.36")
    file=urllib.request.ProxyHandler({'http':duan})
    cood=urllib.request.build_opener(file,urllib.request.HTTPHandler)
    urllib.request.install_opener(cood)
    date=urllib.request.urlopen(open_url).read()
    print(len(date))
    return date
    except urllib.error.URLError as e:
    if hasattr(e,"code"):
    print(e.code)
    if hasattr(e,"reason"):
    print(e.reason)
    time.sleep(10)
    except Exception as t:
    print(str(t))
    time.sleep(1)

    duan="121.231.226.12:6666"

    //一般免费的端口其中爬取不完全
    key="Python"
    for i in range(0,10):
    try:
    key1=urllib.request.quote(key)
    url="http://weixin.sogou.com/weixin?query="+key1+"&_sug_type_=&sut=10977&lkt=7%2C1527054607490%2C1527054613464&s_from=input&_sug_=y&type=2&sst0=1527054613567&page="+str(i+1)+"&ie=utf8&w=01019900&dr=1"
    shi=wei(url,duan)
    print(shi)
    zheng='<a href="(.*?)'
    long=re.compile(zheng).findall(str(shi))
    if long==0:
    print('此'+str(i)+'爬取未成功')
    continue
    for j in range(len(long)):
    rom=long[j]
    rom=rom.replace("amp;","")
    ong="D:/html/"+str(j)+".txt"
    shi=wei(url,duan)
    try:
    ce=open(ong,"w")
    ce.write(shi)
    ce.close()
    except Exception as e:
    print(str(e))
    except urllib.error.URLError as e:
    if hasattr(e,"code"):
    print(e.code)
    if hasattr(e,"reason"):
    print(e.reason)
    except Exception as t:
    print(str(t))

  • 相关阅读:
    jQuery初级篇
    DOM初级篇
    CSS基础篇
    javascript 初级篇
    HTML 基础篇
    Oracle文章中常用数据表的描述
    Oracle视图基础
    Oracle序列简单应用
    Oracle关联查询关于left/right join的那点事
    赋值和算术运算符
  • 原文地址:https://www.cnblogs.com/chunqing/p/9079153.html
Copyright © 2020-2023  润新知