下载所有xkcd漫画
# downloads every single xkcd comic import requests,os,bs4 url='http://xkcd.com' # start url os.makedirs('xkcd',exist_ok=True) # store comics in ./xkcd while not url.endswith('#'): # todo:download the page print('downloading page %s...'%url) res = requests.get(url) res.raise_for_status() soup = bs4.BeautifulSoup(res.text) # todo find the url of the comic image comicElem = soup.select('#comic img') if comicElem == []: print('could not find comic image') else: comicUrl = 'http:'+comicElem[0].get('src') # todo: download the iamge print('downloading image %s .... '%(comicUrl)) res = requests.get(comicUrl) res.raise_for_status() # todo: save the image to ./xkcd imageFile = open(os.path.join('xkcd',os.path.basename(comicUrl)),'wb') for chunk in res.iter_content(100000): imageFile.write(chunk) imageFile.close() # todo: get the prev button'url prevLink = soup.select('a[rel="prev"]')[0] url = 'http://xkcd.com'+prevLink.get('href')