python小练——下载指定url中的图片

python小练——下载指定url中的图片

#coding=gbk
#download pictures of the url
#useage: python downpicture.py www.baidu.com

import os
import sys
from html.parser import HTMLParser
from urllib.request import urlopen
from urllib.parse import urlparse

def getpicname(path):
    '''    retrive filename of url        '''
    if os.path.splitext(path)[1] == '':
        return None
    pr=urlparse(path)
    path='http://'+pr[1]+pr[2]
    return os.path.split(path)[1]

def saveimgto(path, urls):
    '''
    save img of url to local path
    '''
    if not os.path.isdir(path):
        print('path is invalid')
        sys.exit()
    else:
        for url in urls:
            of=open(os.path.join(path, getpicname(url)), 'w+b')
            q=urlopen(url)
            of.write(q.read())
            q.close()
            of.close()

class myhtmlparser(HTMLParser):
    '''put all src of img into urls'''
    def __init__(self):
        HTMLParser.__init__(self)
        self.urls=list()
        self.num=0
    def handle_starttag(self, tag, attr):
        if tag.lower() == 'img':
            srcs=[u[1] for u in attr if u[0].lower() == 'src']
            self.urls.extend(srcs)
            self.num = self.num+1

if __name__ == '__main__':
    url=sys.argv[1]
    if not url.startswith('http://'):
        url='http://' + sys.argv[1]
    parseresult=urlparse(url)
    domain='http://' + parseresult[1]

    q=urlopen(url)
    content=q.read().decode('utf-8', 'ignore')
    q.close()

    myparser=myhtmlparser()
    myparser.feed(content)

    for u in myparser.urls:
        if (u.startswith('//')):
            myparser.urls[myparser.urls.index(u)]= 'http:'+u
        elif u.startswith('/'):
            myparser.urls[myparser.urls.index(u)]= domain+u

    saveimgto(r'D:\python\song', myparser.urls)
    print('num of download pictures is {}'.format(myparser.num))

　　result：

　　num of download pictures is 19

大龄程序猿，分享互联网开发相关知识！前端、后端，架构等内容，欢迎关注公众号 chengxuyuangangzi
相关阅读:
Object之总结（一）
Object之registerNatives
Object之finalize
阿里腾讯百度360
Object之getClass
Object之clone
Object之toString
Object之notify
Object之wait
Object之equals与hashCode
原文地址：https://www.cnblogs.com/lyroge/p/2145978.html

最新文章
md5
http与https区别
 GE与POST方法区别
 tcp与udp区别
 Integer类之6
Integer类之5
Integer类之4
Integer类之3
Integer类之2
Integer类之1