• python批量下载色影无忌和蜂鸟的图片 爬虫小应用


    有些冗余信息。由于之前測试正則表達式。所以没有把它们给移走。只是不影响使用。
    # -*- coding:utf-8 -*-
    import re,urllib,sys,os,time
    
    
    def getAllUrl():
    	entry=sys.argv[1]
    	#try:
    	getPage=urllib.urlopen(entry).read()
    	#except:
    	#	print "Error"
    	
    	pattern=re.compile(r'<a href="(.+?

    )".+?>') web_site_pattern=re.compile(r'(http:.+?)') all_url = pattern.findall(getPage) for url in all_url: if web_site_pattern.match(url): print url #print url print "done" def download_pic(): url=sys.argv[1]; #local_path="C:/Tools/source/" connection=urllib.urlopen(url) data=connection.read() print "Waiting to get data" time.sleep(3) connection.close() #analyze #p=re.compile(r'img width="d+".+src="(.+)".+') download_pic_pattern=re.compile(r'<img src="(.+?.jpg)".+?/>') #p10=re.compile(r'(.+).jpg') all_url=download_pic_pattern.findall(data) #print all_url i=1 directory="C:/Tools/source" name_pattern=re.compile(r'/(w+?.jpg)') if not os.path.exists(directory): os.mkdir(directory) for urls in all_url: print urls #print "working" #print local_path i=i+1 name=name_pattern.findall(urls) print name[0] local_path="C:/Tools/source/%s" % name[0] jpeg_connection=urllib.urlopen(urls) jpeg=jpeg_connection.read() time.sleep(1) print "waiting" f=file(local_path,"wb") f.write(jpeg) f.close() jpeg_connection.close() #i=i+1 #f=file(local_path,"wb") #f.write(data) #f.close() print("Done") def download_pic_2(): url=sys.argv[1]; local_path="C:/Tools/a.jpg" data=urllib.urlretrieve(url,local_path) print("Done") def regulation(): str1="abc123*GBK1024abc*defb1kc12*addd" p1=re.compile(r'abc') print p1.findall(str1) p2=re.compile(r'a.c') print p2.findall(str1) p3=re.compile(r'abc*') print p3.findall(str1) p4=re.compile(r'[abc]12') print p4.findall(str1) p5=re.compile(r'd*') print p5.findall(str1) p6=re.compile(r'a[^d]') print p6.findall(str1) p7=re.compile(r'a[^d]*') print p7.findall(str1) p8=re.compile(r'[a-zA-Z]+(d+)') print p8.findall(str1) str2="dadfae ef <img atl="500" src="www.qq.com/1.jpg" width="700"> asdfe aa<ima" p9=re.compile(r'<img .+ src="(.+)" .+>') urls=p9.findall(str2) #print print urls for url in urls: print url if __name__ =="__main__": #main() #download_pic_2() #regulation() download_pic() #getAllUrl()

    
    


    #######兴许

    后面改动了代码,使用beautifulsoup,能够更大范围的下载图片

    代码例如以下: http://www.30daydo.com/article/56


  • 相关阅读:
    Android两级嵌套ListView滑动问题的解决
    关于Android适配华为等带有底部虚拟按键的解决方案
    更加成熟的领导力素质和学习敏感度
    完美解决ScollView内嵌ListView的问题
    Gson序列化问题导致的内存溢出,tip:Background sticky concurrent mark sweep GC freed
    从功能实现到实现设计
    iOS面试题汇总
    2017计算机专业大学排名
    Buffer.from()
    Buffer.isBuffer()详解
  • 原文地址:https://www.cnblogs.com/llguanli/p/7027114.html
Copyright © 2020-2023  润新知