• python bs4


    # -*- coding: UTF-8 -*-
    #爬虫
    
    import urllib2
    #import bs4
    import re
    import sys
    from bs4 import BeautifulSoup
    # import time
    reload(sys)
    sys.setdefaultencoding("utf-8")
    
    
    #html=urllib2.urlopen("http://121.196.21.238/report.html")
    #html=urllib2.urlopen("http://image.baidu.com/")
    
    with open("report.html") as f:
        content = f.read()
    
    bsObj = BeautifulSoup(content,features='html.parser')
    
    print bsObj.prettify()
    
    #打印title
    # print bsObj.title
    
    # #<a href="http://baijiahao.baidu.com/s?id=1665731690282269956" target="_blank" mon="p=1&amp;a=1&amp;pn=1">G15沈海高速轿车起火 现场火光冲天</a>
    # titlist  = bsObj.findAll("a", {"target":"_blank"})
    
    
    # #下面这两个功能是一样的
    # bsObj.findAll(id="text")
    # bsObj.findAll("", {"id":"text"})
    
    # for x in titlist:
    #     print x.get_text()
    
    # #http://t8.baidu.com/it/u=3571592872,3353494284&fm=79&app=86&size=h300&n=0&g=4n&f=jpeg?sec=1589296136&t=e713d1fe058c0dcb1714f9bc0fd4ee92
    
    # titlist  = bsObj.find("table",{"id":"result_table"}).tr.next_siblings
    
    # for i in titlist:
    #     print i
    
    list = bsObj.findAll(class="testcase")
    
    for i in  list:
        print i
  • 相关阅读:
    定位及CSS常见属性
    浮动及清浮动的方法
    C语言II博客作业04
    C语言II—作业03
    C语言II—作业02
    C语言II博客作业01
    期末总结
    C语言I博客作业09
    C语言I博客作业08
    C语言I博客作业07
  • 原文地址:https://www.cnblogs.com/jkklearn/p/13040976.html
Copyright © 2020-2023  润新知