• python基础(xml,html,bs4)


    http://python-data.dr-chuck.net/comments_42.xml

    1 <commentinfo><note>This file contains the sample data for testing</note><comments><comment><name>Leven</name><count>100</count></comment><comment><name>Mahdiya</name><count>97</count></comment><comment><name>Ajayraj</name><count>87</count></comment><comment><name>Lillian</name><count>86</count></comment><comment><name>Aon</name><count>86</count></comment><comment><name>Ruaraidh</name><count>78</count></comment><comment><name>Gursees</name><count>75</count></comment><comment><name>Emmanuel</name><count>74</count></comment><comment><name>Christy</name><count>72</count></comment><comment><name>Annoushka</name><count>72</count></comment><comment><name>Inara</name><count>72</count></comment><comment><name>Caite</name><count>70</count></comment><comment><name>Rosangel</name><count>70</count></comment><comment><name>Iana</name><count>66</count></comment><comment><name>Anise</name><count>66</count></comment><comment><name>Jaosha</name><count>65</count></comment><comment><name>Cadyn</name><count>65</count></comment><comment><name>Edward</name><count>63</count></comment><comment><name>Charlotte</name><count>61</count></comment><comment><name>Sammy</name><count>60</count></comment><comment><name>Zarran</name><count>60</count></comment><comment><name>Rowen</name><count>59</count></comment><comment><name>Stanislaw</name><count>59</count></comment><comment><name>Maighdlin</name><count>57</count></comment><comment><name>Connan</name><count>56</count></comment><comment><name>Warrick</name><count>54</count></comment><comment><name>Diya</name><count>52</count></comment><comment><name>Lawson</name><count>52</count></comment><comment><name>Wu</name><count>51</count></comment><comment><name>Irmak</name><count>47</count></comment><comment><name>Emilija</name><count>47</count></comment><comment><name>Kayda</name><count>41</count></comment><comment><name>Ellenor</name><count>41</count></comment><comment><name>Kyra</name><count>41</count></comment><comment><name>Nikita</name><count>38</count></comment><comment><name>Kaelah</name><count>35</count></comment><comment><name>Meko</name><count>32</count></comment><comment><name>Marissa</name><count>31</count></comment><comment><name>Ayat</name><count>24</count></comment><comment><name>Sali</name><count>19</count></comment><comment><name>Hashem</name><count>19</count></comment><comment><name>Tygan</name><count>18</count></comment><comment><name>Rioden</name><count>17</count></comment><comment><name>Cruiz</name><count>16</count></comment><comment><name>Caoilfinn</name><count>13</count></comment><comment><name>Ewen</name><count>8</count></comment><comment><name>Baighley</name><count>7</count></comment><comment><name>Ramone</name><count>1</count></comment><comment><name>Kyran</name><count>1</count></comment><comment><name>Noelani</name><count>1</count></comment></comments></commentinfo>
     1 import xml.etree.ElementTree as ET
     2 import urllib2
     3 input = urllib2.urlopen('http://python-data.dr-chuck.net/comments_42.xml').read()
     4 
     5 commentinfo = ET.fromstring(input)
     6 lst = commentinfo.findall('comments/comment')
     7 sum = 0
     8 for item in lst:
     9    sum += int(item.find('count').text)
    10 print sum #2553
      1 from xml.dom import minidom
      2 import os
      3 import glob
      4 import cv2
      5 
      6 
      7 """
      8 <annotation>
      9 """
     10 def InitVoc(vocfile):
     11     doc = minidom.Document()
     12     annotation = doc.createElement("annotation")
     13     doc.appendChild(annotation)
     14     addObject(doc, annotation, vocfile)
     15     f = file(vocfile.split('.')[0] + ".xml","w")
     16     doc.writexml(f)
     17     f.close()
     18 
     19 def addObject(doc, annotation, vocfile):
     20     #<folder>VOC2007</folder>
     21     folder = doc.createElement("folder")
     22     folder.appendChild(doc.createTextNode("VOC2007"))
     23     annotation.appendChild(folder)
     24 
     25     #<filename>XXXXXX</filename>
     26     filename = doc.createElement("filename")
     27     filename.appendChild(doc.createTextNode(vocfile))
     28     annotation.appendChild(filename)
     29 
     30     """
     31     <source>
     32         <database>The VOC2007 Database</database>
     33         <annotation>PASCAL VOC2007</annotation>
     34         <image>flickr</image>
     35         <flickrid>341012865</flickrid>
     36     </source>
     37     """
     38     source = doc.createElement("source")
     39     annotation.appendChild(source)
     40 
     41     database = doc.createElement("database")
     42     database.appendChild(doc.createTextNode("The VOC2007 Database"))
     43     annotation2 = doc.createElement("annotation")
     44     annotation2.appendChild(doc.createTextNode("PASCAL VOC2007"))
     45     image = doc.createElement("image")
     46     image.appendChild(doc.createTextNode("flickr"))
     47     flickrid = doc.createElement("flickrid")
     48     flickrid.appendChild(doc.createTextNode("wang"))
     49     source.appendChild(database)
     50     source.appendChild(annotation2)
     51     source.appendChild(image)
     52     source.appendChild(flickrid)
     53 
     54     """
     55      <owner>
     56         <flickrid>Fried Camels</flickrid>
     57         <name>Jinky the Fruit Bat</name>
     58     </owner>
     59     """
     60     owner = doc.createElement("owner")
     61     annotation.appendChild(owner)
     62 
     63     flickrid2 = doc.createElement("flickrid")
     64     flickrid2.appendChild(doc.createTextNode("wang"))
     65     name = doc.createElement("image")
     66     name.appendChild(doc.createTextNode("wang"))
     67     owner.appendChild(flickrid2)
     68     owner.appendChild(name)
     69 
     70     """
     71     <size>
     72         <width>353</width>
     73         <height>500</height>
     74         <depth>3</depth>
     75     </size>
     76     """
     77     img = cv2.imread(bmpfile)
     78     size = doc.createElement("size")
     79     annotation.appendChild(size)
     80 
     81     width = doc.createElement("width")
     82     width.appendChild(doc.createTextNode(str(len(img[0]))))
     83     height = doc.createElement("height")
     84     height.appendChild(doc.createTextNode(str(len(img))))
     85     depth = doc.createElement("depth")
     86     depth.appendChild(doc.createTextNode(str(img[0][0].size)))
     87     size.appendChild(width)
     88     size.appendChild(height)
     89     size.appendChild(depth)
     90 
     91     #<segmented>0</segmented>
     92     segmented = doc.createElement("segmented")
     93     segmented.appendChild(doc.createTextNode("0"))
     94     annotation.appendChild(segmented)
     95     """
     96     <object>
     97         <name>dog</name>
     98         <pose>Left</pose>
     99         <truncated>1</truncated>
    100         <difficult>0</difficult>
    101         <bndbox>
    102             <xmin>48</xmin>
    103             <ymin>240</ymin>
    104             <xmax>195</xmax>
    105             <ymax>371</ymax>
    106         </bndbox>
    107     </object>
    108     """
    109     f = open(vocfile.split('.')[0] + '.txs')
    110     strs = ""
    111     for i in f.read():
    112         if '\x' not in repr(i):
    113             strs += i
    114     for line in strs.split(' '):
    115         elem = line.split('{')
    116         if len(elem) > 1:
    117             if len(elem[1].split(',')) == 4:
    118                 nums = elem[1].split(',')
    119                 if len(elem[0]) == 1 and len(img) > int(nums[0]) and len(img) >= int(nums[2]) and len(img[0]) >= int(nums[3]) and len(img[0]) > int(nums[1]):
    120                     object = doc.createElement("object")
    121                     annotation.appendChild(object)
    122                     name2 = doc.createElement("name")
    123                     name2.appendChild(doc.createTextNode(elem[0]))
    124                     pose = doc.createElement("pose")
    125                     pose.appendChild(doc.createTextNode("Left"))
    126                     truncated = doc.createElement("truncated")
    127                     truncated.appendChild(doc.createTextNode("1"))
    128                     difficult = doc.createElement("difficult")
    129                     difficult.appendChild(doc.createTextNode("0"))
    130                     bndbox = doc.createElement("bndbox")
    131                     xmin = doc.createElement("xmin")
    132                     xmin.appendChild(doc.createTextNode(nums[0]))
    133                     ymin = doc.createElement("ymin")
    134                     ymin.appendChild(doc.createTextNode(nums[1]))
    135                     xmax = doc.createElement("xmax")
    136                     xmax.appendChild(doc.createTextNode(nums[2]))
    137                     ymax = doc.createElement("ymax")
    138                     ymax.appendChild(doc.createTextNode(nums[3]))
    139                     bndbox.appendChild(xmin)
    140                     bndbox.appendChild(ymin)
    141                     bndbox.appendChild(xmax)
    142                     bndbox.appendChild(ymax)
    143                     object.appendChild(name2)
    144                     object.appendChild(pose)
    145                     object.appendChild(truncated)
    146                     object.appendChild(difficult)
    147                     object.appendChild(bndbox)
    148 
    149 os.chdir("E:\shared\Format_Trans_20160328\src_txs")
    150 bmpfiles = glob.glob("*.jpg")
    151 for bmpfile in bmpfiles:
    152     InitVoc(bmpfile)
     1 from xml.dom.minidom import *
     2 import struct
     3 
     4 class PltHeader(object):
     5     def __init__(self, uSize, uCharNum, uTest, uStrokeNum, reserve):
     6         self.uSize = uSize
     7         self.uCharNum = uCharNum
     8         self.uText = uTest
     9         self.uStrokeNum = uStrokeNum
    10         self.reserve = reserve
    11 
    12 my_plt = PltHeader(0, 0, [], 0, 0)
    13 dom1 = xml.dom.minidom.parse("E:/ADAB_set/ADAB_set/set_2/inkml/1233225548643.inkml")
    14 dom2 = xml.dom.minidom.parse("E:/ADAB_set/ADAB_set/set_2/upx/1233225548643.upx")
    15 root1 = dom1.documentElement
    16 root2 = dom2.documentElement
    17 itemlist1 = root1.getElementsByTagName('trace')
    18 itemText = root2.getElementsByTagName("alternate")
    19 
    20 uTest = repr(itemText[0].getAttribute("value")).split('\u')
    21 
    22 for i in uTest:
    23     if i != "u'":
    24         my_plt.uText.append(int(i.strip(' ').strip("'"), 16))
    25 
    26 my_plt.uStrokeNum = len(itemlist1)
    27 my_plt.uCharNum = len(my_plt.uText)
    28 for j in range(256 - my_plt.uCharNum):
    29     my_plt.uText.append(0)
    30 my_plt.uText.append(my_plt.reserve)
    31 nums = []
    32 count = 0
    33 for item in itemlist1:
    34     for coordinate in str(item.firstChild.data).split(','):
    35         count += 1
    36         nums.append(int(coordinate.split(" ")[0]))
    37         nums.append(int(coordinate.split(" ")[1]))
    38     nums.append(65535)
    39     nums.append(0)
    40 nums.append(65535)
    41 nums.append(65535)
    42 
    43 my_plt.uSize = 520 + (my_plt.uStrokeNum + 1 + count) * 2 * 2
    44 
    45 binfile = open("C:/Users/samsung/Desktop/1.plt","w+b")
    46 ss = struct.pack('HH258H', my_plt.uSize, my_plt.uCharNum, *my_plt.uText)
    47 binfile.write(ss)
    48 ss = struct.pack('%dH'%(len(nums)),*nums)
    49 binfile.write(ss)
    50 binfile.close()
    51 print my_plt.uStrokeNum, my_plt.uSize, my_plt.uText, my_plt.uCharNum
    52 print len(nums)
     1 from xml.dom.minidom import *
     2 import struct
     3 import os
     4 
     5 os.chdir("E:/ADAB_set/ADAB_set")
     6 reserve = 0
     7 
     8 for root, dirs, files in os.walk(os.getcwd()):
     9     for dir in dirs:
    10         binfile = open("C:/Users/samsung/Desktop/%s.plt"%dir,"ab")
    11         inkml_files = []
    12         upx_files = []
    13         os.chdir(".\%s"%dir)
    14 
    15         for inkml_root, inkml_dirs, inkml_files in os.walk(".\inkml"):
    16             break
    17         for upx_root, upx_dirs, upx_files in os.walk(".\upx"):
    18             break
    19         for i in range(len(inkml_files)):
    20             uText = []
    21             uStrokeNum = 0
    22             uCharNum = 0
    23             uSize = 0
    24             domInkml = parse(".\inkml\%s"%inkml_files[i])
    25             domUpx = parse(".\upx\%s"%upx_files[i])
    26             rootInkml = domInkml.documentElement
    27             rootUpx = domUpx.documentElement
    28             itemListInkml = rootInkml.getElementsByTagName('trace')
    29             itemValue = rootUpx.getElementsByTagName("alternate")
    30             strTest = repr(itemValue[0].getAttribute("value")).split('\u')
    31             print strTest
    32             print i
    33             for i in strTest:
    34                 if i != strTest[0]:
    35                     if len(i.split(" ")) > 1:
    36                         uText.append(int(i.split(" ")[0], 16))
    37                     else:
    38                         uText.append(int(i.strip(' ').strip("'"), 16))
    39             print uText
    40             uStrokeNum = len(itemListInkml)
    41             uCharNum = len(uText)
    42             for j in range(256 - uCharNum):
    43                 uText.append(0)
    44 
    45             uText.append(uStrokeNum)
    46             uText.append(reserve)
    47 
    48             nums = []
    49             count = 0
    50             for item in itemListInkml:
    51                 for coordinate in str(item.firstChild.data).split(','):
    52                     count += 1
    53                     nums.append(int(coordinate.split(" ")[0]))
    54                     nums.append(int(coordinate.split(" ")[1]))
    55                 nums.append(65535)
    56                 nums.append(0)
    57             nums.append(65535)
    58             nums.append(65535)
    59             uSize = 520 + (uStrokeNum + 1 + count) * 2 * 2
    60             ss = struct.pack('HH258H', uSize, uCharNum, *uText)
    61             binfile.write(ss)
    62             ss = struct.pack('%dH'%(len(nums)),*nums)
    63             binfile.write(ss)
    64         binfile.close()
    65         os.chdir("..\")
    66     break
      1 <html>
      2 <head>
      3 <title>People that Avah knows</title>
      4 <style>
      5 .overlay{
      6     opacity:0.99;
      7     background-color:#eee;
      8     position:fixed;
      9     width:100%;
     10     height:100%;
     11     top:0px;
     12     left:0px;
     13     z-index:1000;
     14 }
     15 </style>
     16 </head>
     17 <body>
     18 <h1>People that Avah knows</h1>
     19 <div class="overlay" id="overlay" style="display:none" >
     20 <center>
     21 <h2>
     22 This screen randomly changes the height between list items and vanishes 
     23 after a while to make sure that you retrieve and process the data
     24 in a Python program rather than simply counting down pressing links, and 
     25 and doing the assignment without writing a Python program :).
     26 The names are in the same order in the HTML even though they 
     27 shift around on the screen visually.
     28 Your Python program can look at the page as long as it likes.
     29 </h2>
     30 </center>
     31 </div>
     32 <ul>
     33 <li style="margin-top: 7px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Daniyal.html">Daniyal</a></li>
     34 <li style="margin-top: 4px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Fares.html">Fares</a></li>
     35 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kynan.html">Kynan</a></li>
     36 <li style="margin-top: 10px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Erika.html">Erika</a></li>
     37 <li style="margin-top: 29px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Milly.html">Milly</a></li>
     38 <li style="margin-top: 12px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ceara.html">Ceara</a></li>
     39 <li style="margin-top: 1px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Rennie.html">Rennie</a></li>
     40 <li style="margin-top: 31px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Franco.html">Franco</a></li>
     41 <li style="margin-top: 19px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Roxie.html">Roxie</a></li>
     42 <li style="margin-top: 4px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Giyia.html">Giyia</a></li>
     43 <li style="margin-top: 32px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Zuzanna.html">Zuzanna</a></li>
     44 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Calean.html">Calean</a></li>
     45 <li style="margin-top: 26px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Robyn.html">Robyn</a></li>
     46 <li style="margin-top: 22px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Sainabou.html">Sainabou</a></li>
     47 <li style="margin-top: 21px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Maximilian.html">Maximilian</a></li>
     48 <li style="margin-top: 36px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Katso.html">Katso</a></li>
     49 <li style="margin-top: 9px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Armaan.html">Armaan</a></li>
     50 <li style="margin-top: 23px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Keiva.html">Keiva</a></li>
     51 <li style="margin-top: 8px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Eiko.html">Eiko</a></li>
     52 <li style="margin-top: 28px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Salahudin.html">Salahudin</a></li>
     53 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marwa.html">Marwa</a></li>
     54 <li style="margin-top: 13px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Nodoka.html">Nodoka</a></li>
     55 <li style="margin-top: 46px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Dhyia.html">Dhyia</a></li>
     56 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lianne.html">Lianne</a></li>
     57 <li style="margin-top: 5px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Tyree.html">Tyree</a></li>
     58 <li style="margin-top: 24px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Diona.html">Diona</a></li>
     59 <li style="margin-top: 35px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lliam.html">Lliam</a></li>
     60 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Crystyn.html">Crystyn</a></li>
     61 <li style="margin-top: 34px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Maca.html">Maca</a></li>
     62 <li style="margin-top: 1px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marina.html">Marina</a></li>
     63 <li style="margin-top: 22px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Leah.html">Leah</a></li>
     64 <li style="margin-top: 21px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Avril.html">Avril</a></li>
     65 <li style="margin-top: 47px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Riagan.html">Riagan</a></li>
     66 <li style="margin-top: 13px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Jaying.html">Jaying</a></li>
     67 <li style="margin-top: 59px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Emaly.html">Emaly</a></li>
     68 <li style="margin-top: 26px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Rheanne.html">Rheanne</a></li>
     69 <li style="margin-top: 46px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Owais.html">Owais</a></li>
     70 <li style="margin-top: 31px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Aria.html">Aria</a></li>
     71 <li style="margin-top: 8px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kyie.html">Kyie</a></li>
     72 <li style="margin-top: 48px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Keryis.html">Keryis</a></li>
     73 <li style="margin-top: 32px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marcous.html">Marcous</a></li>
     74 <li style="margin-top: 44px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Catrin.html">Catrin</a></li>
     75 <li style="margin-top: 54px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marcelina.html">Marcelina</a></li>
     76 <li style="margin-top: 52px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Madeline.html">Madeline</a></li>
     77 <li style="margin-top: 21px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Patrikas.html">Patrikas</a></li>
     78 <li style="margin-top: 66px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lacey.html">Lacey</a></li>
     79 <li style="margin-top: 57px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Eason.html">Eason</a></li>
     80 <li style="margin-top: 9px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kyrillos.html">Kyrillos</a></li>
     81 <li style="margin-top: 16px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Benjamin.html">Benjamin</a></li>
     82 <li style="margin-top: 34px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ege.html">Ege</a></li>
     83 <li style="margin-top: 39px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marwan.html">Marwan</a></li>
     84 <li style="margin-top: 6px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Celik.html">Celik</a></li>
     85 <li style="margin-top: 42px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kendal.html">Kendal</a></li>
     86 <li style="margin-top: 18px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kasja.html">Kasja</a></li>
     87 <li style="margin-top: 22px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Asena.html">Asena</a></li>
     88 <li style="margin-top: 12px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Harris.html">Harris</a></li>
     89 <li style="margin-top: 79px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Si.html">Si</a></li>
     90 <li style="margin-top: 56px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lumi.html">Lumi</a></li>
     91 <li style="margin-top: 33px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Chevy.html">Chevy</a></li>
     92 <li style="margin-top: 24px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Famara.html">Famara</a></li>
     93 <li style="margin-top: 47px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Cara.html">Cara</a></li>
     94 <li style="margin-top: 80px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Elisa.html">Elisa</a></li>
     95 <li style="margin-top: 23px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Nihaal.html">Nihaal</a></li>
     96 <li style="margin-top: 76px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Vivian.html">Vivian</a></li>
     97 <li style="margin-top: 73px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Farrah.html">Farrah</a></li>
     98 <li style="margin-top: 69px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Shonagh.html">Shonagh</a></li>
     99 <li style="margin-top: 30px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Monty.html">Monty</a></li>
    100 <li style="margin-top: 80px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Turner.html">Turner</a></li>
    101 <li style="margin-top: 6px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Oliver.html">Oliver</a></li>
    102 <li style="margin-top: 24px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Aayan.html">Aayan</a></li>
    103 <li style="margin-top: 3px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Atom.html">Atom</a></li>
    104 <li style="margin-top: 3px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Abby.html">Abby</a></li>
    105 <li style="margin-top: 68px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Makala.html">Makala</a></li>
    106 <li style="margin-top: 79px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Rupert.html">Rupert</a></li>
    107 <li style="margin-top: 30px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Aine.html">Aine</a></li>
    108 <li style="margin-top: 50px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Feden.html">Feden</a></li>
    109 <li style="margin-top: 44px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Annick.html">Annick</a></li>
    110 <li style="margin-top: 44px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Avah.html">Avah</a></li>
    111 <li style="margin-top: 72px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Emilie.html">Emilie</a></li>
    112 <li style="margin-top: 47px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Autumn.html">Autumn</a></li>
    113 <li style="margin-top: 25px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Edyn.html">Edyn</a></li>
    114 <li style="margin-top: 43px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Efe.html">Efe</a></li>
    115 <li style="margin-top: 29px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kie.html">Kie</a></li>
    116 <li style="margin-top: 105px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Dougal.html">Dougal</a></li>
    117 <li style="margin-top: 58px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Carolyn.html">Carolyn</a></li>
    118 <li style="margin-top: 30px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lana.html">Lana</a></li>
    119 <li style="margin-top: 54px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Miryn.html">Miryn</a></li>
    120 <li style="margin-top: 32px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Pearce.html">Pearce</a></li>
    121 <li style="margin-top: 73px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ash.html">Ash</a></li>
    122 <li style="margin-top: 49px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Zahide.html">Zahide</a></li>
    123 <li style="margin-top: 76px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Mathias.html">Mathias</a></li>
    124 <li style="margin-top: 53px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Vaila.html">Vaila</a></li>
    125 <li style="margin-top: 104px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Abbey.html">Abbey</a></li>
    126 <li style="margin-top: 64px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ayooluwa.html">Ayooluwa</a></li>
    127 <li style="margin-top: 117px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Miriam.html">Miriam</a></li>
    128 <li style="margin-top: 83px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Levon.html">Levon</a></li>
    129 <li style="margin-top: 3px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Colin.html">Colin</a></li>
    130 <li style="margin-top: 65px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Noah.html">Noah</a></li>
    131 <li style="margin-top: 70px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Taegan.html">Taegan</a></li>
    132 <li style="margin-top: 122px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Zennon.html">Zennon</a></li>
    133 </ul>
    134 <script>
    135 // http://stackoverflow.com/questions/20423322/simple-setting-off-display-none-block-with-javascript
    136 function showHide(id) {
    137     var el = document.getElementById(id);
    138     if( el && el.style.display == 'none')    
    139         el.style.display = 'block';
    140     else 
    141         el.style.display = 'none';
    142 }
    143 setTimeout('showHide("overlay");', 2500);
    144 
    145 </script>
    146 </body>
    147 </html>
     1 import urllib2
     2 from bs4 import BeautifulSoup
     3 
     4 url = raw_input()
     5 
     6 for i in range(7):
     7     html = urllib2.urlopen(url).read()
     8     soup = BeautifulSoup(html)
     9     tags = soup('a')
    10     print tags[17].string
    11     url = tags[17].get('href', None)

     

     1 <html>
     2 <head>
     3 <title>Welcome to the comments assignment from www.pythonlearn.com</title>
     4 </head>
     5 <body>
     6 <h1>This file contains the sample data for testing</h1>
     7 
     8 <table border="2">
     9 <tr>
    10 <td>Name</td><td>Comments</td>
    11 </tr>
    12 <tr><td>Leven</td><td><span class="comments">100</span></td></tr>
    13 <tr><td>Mahdiya</td><td><span class="comments">97</span></td></tr>
    14 <tr><td>Ajayraj</td><td><span class="comments">87</span></td></tr>
    15 <tr><td>Lillian</td><td><span class="comments">86</span></td></tr>
    16 <tr><td>Aon</td><td><span class="comments">86</span></td></tr>
    17 <tr><td>Ruaraidh</td><td><span class="comments">78</span></td></tr>
    18 <tr><td>Gursees</td><td><span class="comments">75</span></td></tr>
    19 <tr><td>Emmanuel</td><td><span class="comments">74</span></td></tr>
    20 <tr><td>Christy</td><td><span class="comments">72</span></td></tr>
    21 <tr><td>Annoushka</td><td><span class="comments">72</span></td></tr>
    22 <tr><td>Inara</td><td><span class="comments">72</span></td></tr>
    23 <tr><td>Caite</td><td><span class="comments">70</span></td></tr>
    24 <tr><td>Rosangel</td><td><span class="comments">70</span></td></tr>
    25 <tr><td>Iana</td><td><span class="comments">66</span></td></tr>
    26 <tr><td>Anise</td><td><span class="comments">66</span></td></tr>
    27 <tr><td>Jaosha</td><td><span class="comments">65</span></td></tr>
    28 <tr><td>Cadyn</td><td><span class="comments">65</span></td></tr>
    29 <tr><td>Edward</td><td><span class="comments">63</span></td></tr>
    30 <tr><td>Charlotte</td><td><span class="comments">61</span></td></tr>
    31 <tr><td>Sammy</td><td><span class="comments">60</span></td></tr>
    32 <tr><td>Zarran</td><td><span class="comments">60</span></td></tr>
    33 <tr><td>Rowen</td><td><span class="comments">59</span></td></tr>
    34 <tr><td>Stanislaw</td><td><span class="comments">59</span></td></tr>
    35 <tr><td>Maighdlin</td><td><span class="comments">57</span></td></tr>
    36 <tr><td>Connan</td><td><span class="comments">56</span></td></tr>
    37 <tr><td>Warrick</td><td><span class="comments">54</span></td></tr>
    38 <tr><td>Diya</td><td><span class="comments">52</span></td></tr>
    39 <tr><td>Lawson</td><td><span class="comments">52</span></td></tr>
    40 <tr><td>Wu</td><td><span class="comments">51</span></td></tr>
    41 <tr><td>Irmak</td><td><span class="comments">47</span></td></tr>
    42 <tr><td>Emilija</td><td><span class="comments">47</span></td></tr>
    43 <tr><td>Kayda</td><td><span class="comments">41</span></td></tr>
    44 <tr><td>Ellenor</td><td><span class="comments">41</span></td></tr>
    45 <tr><td>Kyra</td><td><span class="comments">41</span></td></tr>
    46 <tr><td>Nikita</td><td><span class="comments">38</span></td></tr>
    47 <tr><td>Kaelah</td><td><span class="comments">35</span></td></tr>
    48 <tr><td>Meko</td><td><span class="comments">32</span></td></tr>
    49 <tr><td>Marissa</td><td><span class="comments">31</span></td></tr>
    50 <tr><td>Ayat</td><td><span class="comments">24</span></td></tr>
    51 <tr><td>Sali</td><td><span class="comments">19</span></td></tr>
    52 <tr><td>Hashem</td><td><span class="comments">19</span></td></tr>
    53 <tr><td>Tygan</td><td><span class="comments">18</span></td></tr>
    54 <tr><td>Rioden</td><td><span class="comments">17</span></td></tr>
    55 <tr><td>Cruiz</td><td><span class="comments">16</span></td></tr>
    56 <tr><td>Caoilfinn</td><td><span class="comments">13</span></td></tr>
    57 <tr><td>Ewen</td><td><span class="comments">8</span></td></tr>
    58 <tr><td>Baighley</td><td><span class="comments">7</span></td></tr>
    59 <tr><td>Ramone</td><td><span class="comments">1</span></td></tr>
    60 <tr><td>Kyran</td><td><span class="comments">1</span></td></tr>
    61 <tr><td>Noelani</td><td><span class="comments">1</span></td></tr>
    62 </table>
    63 </body>
    64 </html>
     1 import urllib2
     2 from bs4 import BeautifulSoup
     3 
     4 url = raw_input()
     5 html = urllib2.urlopen(url).read()
     6 soup = BeautifulSoup(html)
     7 tags = soup('span')
     8 count, sum = 0, 0
     9 for tag in tags:
    10     count += 1
    11     sum += int(tag.string)
    12 print count, sum
  • 相关阅读:
    STM32固件库和自定义工程模板
    STM32存储器映射和寄存器映射
    VScode搭建OpenCV环境
    手写数字识别——基于LeNet-5卷积网络模型
    敏感信息泄露
    Google的高级搜索——Google hack
    session fixation攻击
    认证和会话管理漏洞
    SQLmap
    基于时间型SQL盲注
  • 原文地址:https://www.cnblogs.com/wanderingzj/p/5010535.html
Copyright © 2020-2023  润新知