我最近新学python看到有人用python实现了下载糗百图片的代码。手痒于是也写了一个。
1 from sgmllib import SGMLParser
2 import urllib2
3
4 class sgm(SGMLParser):
5 def reset(self):
6 SGMLParser.reset(self)
7 self.srcs=[]
8 self.ISTRUE=True
9
10 def start_div(self,artts):
11 for k,v in artts:
12 if v=="author":
13 self.ISTRUE=False
14 def end_div(self):
15 self.ISTRUE=True
16 def start_img(self,artts):
17 for k,v in artts:
18 if k=="src" and self.ISTRUE==True:
19 self.srcs.append(v)
20
21 def download(self):
22 for src in self.srcs:
23 f=open(src[-12:],"wb")
24 print src
25 img=urllib2.urlopen(src)
26 f.write(img.read())
27 f.close()
28 sgm=sgm()
29 for page in range(1,500):
30 url="http://www.qiushibaike.com/late/page/%s?s=4622726" % page
31 data=urllib2.urlopen(url).read()
32 sgm.feed(data)
33 sgm.download()
2 import urllib2
3
4 class sgm(SGMLParser):
5 def reset(self):
6 SGMLParser.reset(self)
7 self.srcs=[]
8 self.ISTRUE=True
9
10 def start_div(self,artts):
11 for k,v in artts:
12 if v=="author":
13 self.ISTRUE=False
14 def end_div(self):
15 self.ISTRUE=True
16 def start_img(self,artts):
17 for k,v in artts:
18 if k=="src" and self.ISTRUE==True:
19 self.srcs.append(v)
20
21 def download(self):
22 for src in self.srcs:
23 f=open(src[-12:],"wb")
24 print src
25 img=urllib2.urlopen(src)
26 f.write(img.read())
27 f.close()
28 sgm=sgm()
29 for page in range(1,500):
30 url="http://www.qiushibaike.com/late/page/%s?s=4622726" % page
31 data=urllib2.urlopen(url).read()
32 sgm.feed(data)
33 sgm.download()
我最近新学python看到有人用python实现了下载糗百图片的代码。手痒于是也写了一个