• python小练——下载指定url中的图片


    #coding=gbk
    #download pictures of the url
    #useage: python downpicture.py www.baidu.com

    import os
    import sys
    from html.parser import HTMLParser
    from urllib.request import urlopen
    from urllib.parse import urlparse

    def getpicname(path):
        '''    retrive filename of url        '''
        if os.path.splitext(path)[1] == '':
            return None
        pr=urlparse(path)
        path='http://'+pr[1]+pr[2]
        return os.path.split(path)[1]

    def saveimgto(path, urls):
        '''
        save img of url to local path
        '''
        if not os.path.isdir(path):
            print('path is invalid')
            sys.exit()
        else:
            for url in urls:
                of=open(os.path.join(path, getpicname(url)), 'w+b')
                q=urlopen(url)
                of.write(q.read())
                q.close()
                of.close()

    class myhtmlparser(HTMLParser):
        '''put all src of img into urls'''
        def __init__(self):
            HTMLParser.__init__(self)
            self.urls=list()
            self.num=0
        def handle_starttag(self, tag, attr):
            if tag.lower() == 'img':
                srcs=[u[1] for u in attr if u[0].lower() == 'src']
                self.urls.extend(srcs)
                self.num = self.num+1

    if __name__ == '__main__':
        url=sys.argv[1]
        if not url.startswith('http://'):
            url='http://' + sys.argv[1]
        parseresult=urlparse(url)
        domain='http://' + parseresult[1]

        q=urlopen(url)
        content=q.read().decode('utf-8', 'ignore')
        q.close()

        myparser=myhtmlparser()
        myparser.feed(content)

        for u in myparser.urls:
            if (u.startswith('//')):
                myparser.urls[myparser.urls.index(u)]= 'http:'+u
            elif u.startswith('/'):
                myparser.urls[myparser.urls.index(u)]= domain+u

        saveimgto(r'D:\python\song', myparser.urls)
        print('num of download pictures is {}'.format(myparser.num))

      result:

      num of download pictures is 19

  • 相关阅读:
    Object之总结(一)
    Object之registerNatives
    Object之finalize
    阿里腾讯百度360
    Object之getClass
    Object之clone
    Object之toString
    Object之notify
    Object之wait
    Object之equals与hashCode
  • 原文地址:https://www.cnblogs.com/lyroge/p/2145978.html
Copyright © 2020-2023  润新知