• asyncio 简单使用


    import asyncio
    
    
    async def func(i, s):
        print(f'第{i}个协程启动了')
        await asyncio.sleep(s)
    
    
    async def main():
        task = []
        for i in range(100):
            task.append(func(i, 2))
    
        await asyncio.wait(task)
    
    
    if __name__ == '__main__':
        # event_loop = asyncio.get_event_loop()
        # event_loop.run_until_complete(main())
        # 上面这两句等于下面这一句
        asyncio.run(main())

     asyncio爬虫

    import os
    
    import asyncio
    import aiohttp
    import aiofiles
    
    from lxml import etree
    
    
    async def details(url, path):
        async with aiohttp.ClientSession() as request:
            async with request.get(url, verify_ssl=False) as response:
                detail = await response.text(encoding='utf8')
                tree = etree.HTML(detail)
                # 获取章节标题
                chaptertitle = tree.xpath('.//div[contains(@class,"chaptertitle")]/h1/text()')[0].replace('/', '')
                BookText = '
    '.join(tree.xpath('.//div[@id="BookText"]/text()')).replace('u3000u3000', '')
                txt_path = os.path.join(path, f'{chaptertitle}.txt')
                async with aiofiles.open(txt_path, 'w', encoding='utf8') as f:
                    await f.write(BookText)
                print(chaptertitle, url, '下载完成')
    
    
    async def home():
        """
        获取主页
        :return:
        """
        url = "https://www.zanghaihua.org/guichuideng/"
        task_lst = []
        async with aiohttp.ClientSession() as request:
            async with request.get(url, verify_ssl=False) as response:
                html = await response.text(encoding='utf8')
                tree = etree.HTML(html)
                booklist = tree.xpath('.//div[contains(@class,"booklist")]/span')
                dir_path = None
                for book in booklist:
                    if 'v' in book.xpath('@class'):
                        # 说明是标题
                        title = book.xpath('./a/text()')[0]
                        dir_path = os.path.join(os.path.abspath('.'), 'data', title)
                        os.makedirs(dir_path, exist_ok=True)
                    else:
                        # 说明是单纯的章节地址
                        href = book.xpath('./a/@href')[0]
                        if dir_path:
                            task_lst.append(details(href, dir_path))
                await asyncio.wait(task_lst)
    
    
    async def main():
        await home()
    
    
    if __name__ == '__main__':
        import time
    
        start = time.time()
        asyncio.run(main())
        print(time.time() - start)
  • 相关阅读:
    CF #305(Div.2) D. Mike and Feet(数学推导)
    CF #305 (Div. 2) C. Mike and Frog(扩展欧几里得&&当然暴力is also no problem)
    2015百度之星资格赛.1004放盘子(数学推导)
    poj.1988.Cube Stacking(并查集)
    lightoj.1048.Conquering Keokradong(二分 + 贪心)
    CMD 命令汇总
    PLSQL 安装与配置 Oracle
    用 jQuery 实现简单倒计时功能
    C# 从服务器下载文件并保存到客户端
    用 NPOI 组件实现数据导出
  • 原文地址:https://www.cnblogs.com/wtil/p/15023262.html
Copyright © 2020-2023  润新知