• python 简单的动漫排名爬虫


    前两天刚看了两部动漫,找了找动漫排名,发现网上的排名有点老了,于是自己简单写了一点儿代码,非常简单,没有用多线程或多进程

    
    import json
    
    from bs4 import BeautifulSoup
    import requests
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36',
    }
    proxies = {
        'http': 'socks5://127.0.0.1:10808',
        'https': 'socks5://127.0.0.1:10808'
    }
    
    
    def MAL(links: list[str]):
        values = {}
        num = 0
        for link in links:
            res = requests.get(link)
            soup = BeautifulSoup(res.content, 'lxml')
            items = soup.findAll(class_='ranking-list')
            for item in items:
                # rank = item.find(class_='top-anime-rank-text').text
                name = item.find(class_="anime_ranking_h3").text
                score = item.find(class_="score-label").text
                print(name, score)
                values[name] = score
                num += 1
                if num == 100:  # 只获取前 100 个
                    return values
    
    
    def BGM(links: list[str]):
        values = {}
        num = 0
        for link in links:
            res = requests.get(link, headers=headers)
            soup = BeautifulSoup(res.content, 'lxml')
            items = soup.find(class_='browserFull').findAll('li')
            for item in items:
                # rank = item.find(class_='rank').text.replace("Rank ", "")
                name = item.find(class_="l").text
                score = item.find(class_="fade").text
                print(name, score)
                values[name] = score
                num += 1
                if num == 100:
                    return values
    
    
    def ANK(links: list[str]):
        # Anikore 需要登陆才能查看排名,用 session 来登陆并保持会话
        session = requests.session()
        session.post(url='https://www.anikore.jp/users/login/',
                     data={'data[User][email]': "your_username@qq.com",  # 键是登陆页面的用户名和密码标签的 name 属性;值是账户,要改成自己的账户和密码
                           'data[User][original_password]': 'your_password'},
                     headers=headers)
        values = {}
        num = 0
        for link in links:
            res = session.get(link, headers=headers)
            soup = BeautifulSoup(res.content, 'lxml')
            items = soup.findAll(class_='l-searchPageRanking_unit')
            for item in items:
                item = item.find('h2')
                name = item.find(class_="l-searchPageRanking_unit_title").text
                score = item.find(class_="l-searchPageRanking_unit_score").text
                print(name, score)
                values[name] = score
                num += 1
                if num == 100:
                    return values
    
    
    if __name__ == "__main__":
        v1 = MAL(["https://myanimelist.net/topanime.php?limit=%s" % i for i in range(0, 500, 50)])
    
        v2 = BGM(['http://bangumi.tv/anime/browser?sort=rank&page=%s' % i for i in range(1, 10)])
    
        v3 = ANK(['https://www.anikore.jp/pop_ranking/page:%s' % i for i in range(1, 10)])
        
        with open('mal.json', 'w', encoding='utf8') as f:
            json.dump(v1, f, indent=4, ensure_ascii=False)  # ensure_ascii=False 可以让 json 写入非 ASCII 码的内容,即英文以外的其他语言
    
        with open('bgm.json', 'w', encoding='utf8') as f:
            json.dump(v2, f, indent=4, ensure_ascii=False)
    
        with open('ank.json', 'w', encoding='utf8') as f:
            json.dump(v3, f, indent=4, ensure_ascii=False)
    
    
  • 相关阅读:
    jQuery 简单滑动轮播图效果
    西工大:同学你好,回来挂科!
    【入门】产品经理零基础怎么入门?
    【考点】 HashMap,HashTable,CurrentHashMap,LinkedHashMap,TreeMap简述
    P图鬼才们集体上线!高校毕业照P图哪家强?
    【实战】怎样实现前端裁剪上传图片功能
    校招选产品经理岗?给你浇盆水
    战胜70%对手的校招开发岗简历是这个样子的
    两个人遇到熊,装死的和转身跑的,哪个能活下来
    第一份实习工作,我应该学到什么?
  • 原文地址:https://www.cnblogs.com/wztshine/p/16109909.html
Copyright © 2020-2023  润新知