• python 代码段: 爬取网页图片地址


    import requests
    import re
    import pymysql
    
    # 连接数据库
    db = pymysql.connect(host='127.0.0.1',port=3306,db='db',user='root',passwd='root',charset='utf8')
    cursor = db.cursor()
    # cursor.execute('select * from table1')
    # print(cursor.fetchall())
    
    '''
    get images
    '''
    def getImagesList(page=1):
        html = requests.get("http://www.abc.com/photo/list/?page={}".format(page)).text
    
        # 正则表达式
        reg = r'data-original="(.*?).*?alt=(.*?)"'
        # 增加匹配效率 S 多行匹配
        reg = re.compile(reg, re.S)
        imagesList = re.findall(reg,html)
        for i in imagesList:
            # print(i)
            image_url = i[0]
            image_title=i[1]
            cursor.execute("insert tablea(`name`,`url` values('{}','{}')".format(image_title,image_url))
            print('saving')
            db.commit()
    
    for i in range(1,101):
        getImagesList(i)
  • 相关阅读:
    Tree MapByFold
    Tree DepthByFold
    Tree SizeByFold
    Tree MaximumByFold
    Tree Fold
    Tree Map
    Tree Depth
    Tree Maximum
    Tree Size
    Tree
  • 原文地址:https://www.cnblogs.com/freeliver54/p/12323792.html
Copyright © 2020-2023  润新知