• Python3爬虫04(其他例子,如处理获取网页的内容)


    #!/usr/bin/env python
    # -*- coding:utf-8 -*-

    import os
    import re
    import requests
    from bs4 import NavigableString
    from bs4 import BeautifulSoup

    res=requests.get("https://www.qiushibaike.com/")
    qiushi=res.content
    soup=BeautifulSoup(qiushi,"html.parser")
    duanzis=soup.find_all(class_="content")
    for i in duanzis:
    duanzi=i.span.contents[0]
    # duanzi=i.span.string
    print(duanzi)
    # print(i.span.string)


    res=requests.get("http://699pic.com/sousuo-218808-13-1-0-0-0.html")
    image=res.content
    soup=BeautifulSoup(image,"html.parser")
    images=soup.find_all(class_="lazy")

    for i in images:
    original=i["data-original"]
    title=i["title"]
    # print(title)
    # print(original)
    # print("")
    try:
    with open(os.getcwd()+"\jpg\"+title+'.jpg','wb') as file:
    file.write(requests.get(original).content)
    except:
    pass

    r = requests.get("http://699pic.com/sousuo-218808-13-1.html")
    fengjing = r.content
    soup = BeautifulSoup(fengjing, "html.parser")
    # 找出所有的标签
    images = soup.find_all(class_="lazy")
    # print images # 返回list对象

    for i in images:
    jpg_rl = i["data-original"] # 获取url地址
    title = i["title"] # 返回title名称
    print(title)
    print(jpg_rl)
    print("")

    r = requests.get("https://www.qiushibaike.com/")
    r=requests.get("http://www.cnblogs.com/nicetime/")
    blog=r.content
    soup=BeautifulSoup(blog,"html.parser")
    soup=BeautifulSoup(blog,features="lxml")
    print(soup.contents[0].contents)


    tag=soup.find('div')
    tag=soup.find(class_="menu-bar menu clearfix")
    tag=soup.find(id="menu")
    print(list(tag))

    tag01=soup.find(class_="c_b_p_desc")

    print(len(list(tag01.contents)))
    print(len(list(tag01.children)))
    print(len(list(tag01.descendants)))

    print(tag01.contents)
    print(tag01.children)
    for i in tag01.children:
    print(i)


    print(len(tag01.contents))

    for i in tag01:
    print(i)

    print(tag01.contents[0].string)
    print(tag01.contents[1])
    print(tag01.contents[1].string)


    url = "http://www.dygod.net/html/tv/oumeitv/109673.html"
    s = requests.get(url)
    print(s.text.encode("iso-8859-1").decode('gbk'))
    res = re.findall('href="(.*?)">ftp',s.text)
    for resi in res:
    a=resi.encode("iso-8859-1").decode('gbk')
    print(a)

  • 相关阅读:
    ZZNU 1995: cots' times
    网站后缀名都有哪些
    webstorm运行到服务器(Apache)
    window系统下node.js环境配置
    window系统安装node.js
    网站创建自定义百度地图
    响应式一级到三级导航
    H5插入视频兼容各大浏览器
    phpStudy环境安装
    jquery on和bind
  • 原文地址:https://www.cnblogs.com/NiceTime/p/10125289.html
Copyright © 2020-2023  润新知