• 爬取豆瓣,写入数据库


    import pymysql
    import requests
    from bs4 import BeautifulSoup
    baseUrl = "https://movie.douban.com/top250?start=%d&filter="
    def get_movies(start):
        url = baseUrl % start
        lists = []
        html = requests.get(url)
        soup = BeautifulSoup(html.content, "html.parser")
        items = soup.find("ol", "grid_view").find_all("li")
    for i in items:
            movie = {}
            movie["rank"] = i.find("em").text
            movie["link"] = i.find("div","pic").find("a").get("href")
            movie["poster"] = i.find("div","pic").find("a").find('img').get("src")
            movie["name"] = i.find("span", "title").text
            movie["score"] = i.find("span", "rating_num").text
            movie["quote"] = i.find("span", "inq").text if(i.find("span", "inq")) else ""
            lists.append(movie)
    return lists
    
    if __name__ == "__main__":
        db = pymysql.connect(host="192.168.1.210",port=3306,user="root",password="ubuntu",db="mysql",charset="utf8mb4")
        cursor = db.cursor()
        cursor.execute("DROP TABLE IF EXISTS movies")
        createTab = """CREATE TABLE movies(
            id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
            name VARCHAR(20) NOT NULL,
            rank VARCHAR(4) NOT NULL,
            link VARCHAR(50) NOT NULL,
            poster VARCHAR(100) NOT NULL,
            score VARCHAR(4) NOT NULL,
            quote VARCHAR(50)
        ) character set = utf8"""
        cursor.execute(createTab)
        start = 0
        while (start < 250):
            lists = get_movies(start)
    for i in lists:
                sql = "INSERT INTO movies(name,rank,link,poster,score,quote) VALUES(%s,%s,%s,%s,%s,%s)"
                try:
                    cursor.execute(sql, (i["name"], i["rank"], i["link"], i["poster"], i["score"], i["quote"]))
                    db.commit()
    print(i["name"]+" is success")
    except:
                    db.rollback()
            start += 25
        db.close()
    

      

  • 相关阅读:
    火狐黑客插件
    使用POI对EXCEL 读入写出
    使用spring quartz实现定时任务
    toad for oracle 快捷键总结
    Oracle查询性能优化
    2.C语言中文网学习Python
    1.编程基础(C语言中文网)
    一键打开ASP.NET WEB网站项目
    解决VS2010无法添加Sql Server数据库的问题
    VS2010 的一个小Bug(已报告给Microsoft Connect并得到确认)
  • 原文地址:https://www.cnblogs.com/peterinblog/p/7182466.html
Copyright © 2020-2023  润新知