• Day32


    1、爬虫

    import re
    from urllib.request import urlopen
    
    def getPage(url):
        response = urlopen(url)
        return response.read().decode('utf-8')
    
    def parsePage(s):
        com = re.compile(
            '<div class="item">.*?<div class="pic">.*?<em .*?>(?P<id>d+).*?<span class="title">(?P<title>.*?)</span>'
            '.*?<span class="rating_num" .*?>(?P<rating_num>.*?)</span>.*?<span>(?P<comment_num>.*?)评价</span>', re.S)
    
        ret = com.finditer(s)
        for i in ret:
            yield {
                "id": i.group("id"),
                "title": i.group("title"),
                "rating_num": i.group("rating_num"),
                "comment_num": i.group("comment_num"),
            }
    
    def main(num):
        url = 'https://movie.douban.com/top250?start=%s&filter=' % num
        response_html = getPage(url)
        ret = parsePage(response_html)
        print(ret)
        f = open("move_info7", "a", encoding="utf8")
        for obj in ret:
            print(obj)
            data = str(obj)
            f.write(data + "
    ")
        f.close()
    
    if __name__ == '__main__':
        count = 0
        for i in range(10):
            main(count)
            count += 25
    View Code
  • 相关阅读:
    char类型到底是有符号还是无符号
    GNU C编译器的gnu11和c11
    kotlin之包
    mysql 各个版本驱动jar包
    网络优化
    Android 布局优化
    Android之MVVM开发模式
    Android蓝牙开发技术学习总结
    Android 电量优化
    Android中图片优化
  • 原文地址:https://www.cnblogs.com/a352735549/p/8981721.html
Copyright © 2020-2023  润新知