• 爬取必应首页大图


    不废话,直接上代码

    # -*- coding: utf-8 -*-
    # @Author: Wang Hongbin
    # @Email:   wanghongbin@ngoos.org
    # @Date:   2018-03-16 14:19:27
    # @Last Modified by:   Wang Hongbin
    # @Last Modified time: 2018-03-28 16:26:07
    import requests 
    import re 
    import os
    import time #时间模块
    
    local = time.strftime("%Y-%m-%d_")
    baseUrl = "https://cn.bing.com"
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'}
    
    def getImgUrl(url):  
        reg1 = r"(/az/hprichbg/rb/.*?.jpg)"
        con = requests.get(url)
        content = con.text
        imgUrl = re.findall(reg1, content, re.S)[0]    
        # imgLink = baseUrl+imgUrl
        return imgUrl  
    
    def getFilePath():
    	# filePath = '/var/www/html/biYinPic/images/' + time.strftime("%Y%m%d") + '/'
    	filePath = 'C:/Users/Administrator/Pictures/MyDesktop/'
    	if not os.path.exists(filePath):
    		os.mkdir(filePath)
    
    	return filePath
    
    def getImgName(url):
    	reg2 = r"/az/hprichbg/rb/(.*?)_"
    	imgName = re.findall(reg2, url, re.S)[0]
    	imgName = local + imgName + '.jpg'
    	return imgName
    
    
    def downloadByPic(url):
    	imgUrl = getImgUrl(url)
    	imgName = getImgName(imgUrl)
    	filePath = getFilePath()
    	fileName = filePath+imgName
    	
    	picUrl = baseUrl + imgUrl
    	read = requests.get(picUrl)
    
    	f = open(fileName, 'wb')
    	f.write(read.content)
    	f.close()
    
    # reg3 = r'<div class="hplaCata"><div class="hplatt">(.*)</div><div class="hplats">(.*)</div><div id="hplaSnippet">(.*)</div><div class="hplaPvd">(.*)</div>'
    
    downloadByPic(baseUrl)
    print('is ok!')
    

    爬取结果

    下图是七月份至今的爬取图片,因为是在window上执行的,电脑不开机的时候不会执行,代码放在Linux上执行也没问题,使用crontab启个定时器就行了

    https://cdn.jsdelivr.net/gh/WHBLeer/Gallery/img/20201124112720.png

  • 相关阅读:
    Mybatis分页插件PageHelper简单使用
    UUID
    JavaWeb初学者session的使用
    轻松理解AOP思想(面向切面编程)
    DOM操作中,getElementByXXXX 和 querySelector 的区别
    DOM操作中,遍历动态集合的注意事项。ex: elem.children
    微信官方团队放出了UI库,看来以后前端还要学WeChatUI了,哈哈
    jQuery中使用$.each()遍历后台响应的json字符串问题
    平衡树
    后缀数组原理浅析(转载自tqx)
  • 原文地址:https://www.cnblogs.com/sanlilin/p/14145163.html
Copyright © 2020-2023  润新知