• python获取数据网页数据并创建文件夹保存(基于python3.6)


    from urllib.parse import urljoin
    import urllib.request
    from bs4 import BeautifulSoup
    import os
    import datetime
    import re
    import errno
    
    def mkdir_p(path):#递归创建多级目录
        try:
            os.makedirs(path)
        except OSError as exc: # Python >2.5 (except OSError, exc: for Python <2.5)
            if exc.errno == errno.EEXIST and os.path.isdir(path):
                pass
            else: raise
    
    def get_link(page):#寻找链接的href
        linkData = []
        for page in page.find_all('td'):
            links = page.select("a")
            for each in links:
                # if str(each.get('href'))[:1] == '/': 过滤if代码
                    data=each.get('href')
                    linkData.append(data)
        return(linkData)
    
    def gain(url):#获取网页指定内容
        page = urllib.request.urlopen(url).read()
        soup = BeautifulSoup(page, 'lxml')#利用soup获取网页内容
        links = get_link(soup)#获取<a href= ? 内容
        return links
    def main(): url = 'https://www.tide-forecast.com/countries/China' Web_Link=gain(url) for Link in range(len(Web_Link)): Link_Add = Web_Link[Link] Link_One = re.split("/", Link_Add) #去除'/',将Link_Add变成数组 Link_Address = Link_One[2] #获取数组第3位值 Link_Address = (Link_Address + '.js') url_Tide = 'https://www.tide-forecast.com/tides/' connet = urljoin(url_Tide, Link_Address) # 拼接网址路径 file = os.path.join('D:\TideData\China' + "/" ) # 拼接绝对路径 mkdir_p(file) print(connet) if os.path.isfile(file): print('文件已存在') else: start = datetime.datetime.now().replace(microsecond=0)#计时工具 url = connet wp = urllib.request.urlopen(url)#打开数据网页数据 content = wp.read() fp = open(file + Link_Address, "wb")#写入指定文件夹 fp.write(content)#写入数据 fp.close()#关闭文件 end = datetime.datetime.now().replace(microsecond=0) print("用时: ", end='') print(end - start) if __name__ == '__main__': main()

    来源于:https://www.cnblogs.com/setname/p/7453778.html

  • 相关阅读:
    【实验吧】CTF_Web_登录一下好吗?
    各种常用数字格式化
    .Net 4.0 (2)
    springBoot+springSecurity 数据库动态管理用户、角色、权限
    springboot+mybatis+SpringSecurity 实现用户角色数据库管理
    Spring boot中Redis的使用
    spring data jpa的使用
    如何优雅的使用mybatis
    WebJars
    mvn打包的POm文件
  • 原文地址:https://www.cnblogs.com/hankleo/p/10649952.html
Copyright © 2020-2023  润新知