• python爬某个网站的图片


    # _*_ coding: gbk _*_
    import urllib
    import urllib2
    import re
    class Spider:
        
        def getImage(self,html):
            request=urllib2.Request(html);
            page=urllib2.urlopen(html);
            html=page.read();        
            pattern=r'src="http:.*.jpg'
            imglist=re.findall(pattern,html);
            cnt=0
            for i in imglist:
                print i[5:];
                urllib.urlretrieve(i[5:], 'E:\images\%s.jpg' % cnt);
                cnt+=1
                if cnt==2:
                    break;
            
            print 'the end'
            
    
    if __name__=="__main__":
        print 'hello'
        s=Spider();
        #html=r"http://baike.baidu.com/link?url=pj6QaA2Zyrxx2WcD4f7vN50LWVIZjJUKYdnnLGMOWnmInlALGH4dXmU86hE3Ar-jmaiahjf2MiEZ3n_0WCOUlFuKwVfYZNKnBwxidD1cC3i";
        html=r"http://baike.baidu.com/link?url=rHaKx7RPBWuR4MxzY0BPhwbLKH4DEdwKPN8EYH-78Zzm7IMUuFTYM0eUZw-j27lHxDxyyNiqkjUg4JG8FvyjNUsuqiTzLixsNSXUtTWiOpQqrtxbf4hkj-n6gF1Nyn4D"
        s.getImage(html);
    

      

    python从某个网站上面爬很多图片的url,主要是从百度风云榜上面爬的,男演员,女演员,男歌手,女歌手,总共200张

    # _*_ coding: gbk _*_
    import urllib
    import urllib2
    import re
    import os
    class Spider:
        
        def getImage(self,html):
            request=urllib2.Request(html);
            page=urllib2.urlopen(html);
            html=page.read();        
            pattern=r'href="http:.*简介'
            imglist=re.findall(pattern,html);
            with open(r'e:\images\paths.txt','w+') as f:
                for i in imglist:
                    print i[6:len(i)-6];
                    f.write(i[6:len(i)-6]);
                    f.write('
    ')
            print len(imglist)
            print 'the end'  
    
    if __name__=="__main__":
        print 'hello'
        s=Spider();
        #html=r"http://baike.baidu.com/link?url=pj6QaA2Zyrxx2WcD4f7vN50LWVIZjJUKYdnnLGMOWnmInlALGH4dXmU86hE3Ar-jmaiahjf2MiEZ3n_0WCOUlFuKwVfYZNKnBwxidD1cC3i";
        html=r'http://top.baidu.com/buzz?b=18&qq-pf-to=pcqq.group'
        s.getImage(html);
       
    

      

  • 相关阅读:
    Django~1
    Python 正则表达式入门(初级篇)
    pytho占位符
    django之基于cookie和装饰器实现用户认证
    form error
    Python之路【第十七篇】:Django【进阶篇 】
    djangoform详解
    django自定义form验证error
    Django CRM客户关系管理系统
    ulimit
  • 原文地址:https://www.cnblogs.com/wuxiangli/p/6099334.html
Copyright © 2020-2023  润新知