• 爬取豆瓣,写入数据库


    import pymysql
    import requests
    from bs4 import BeautifulSoup
    baseUrl = "https://movie.douban.com/top250?start=%d&filter="
    def get_movies(start):
        url = baseUrl % start
        lists = []
        html = requests.get(url)
        soup = BeautifulSoup(html.content, "html.parser")
        items = soup.find("ol", "grid_view").find_all("li")
    for i in items:
            movie = {}
            movie["rank"] = i.find("em").text
            movie["link"] = i.find("div","pic").find("a").get("href")
            movie["poster"] = i.find("div","pic").find("a").find('img').get("src")
            movie["name"] = i.find("span", "title").text
            movie["score"] = i.find("span", "rating_num").text
            movie["quote"] = i.find("span", "inq").text if(i.find("span", "inq")) else ""
            lists.append(movie)
    return lists
    
    if __name__ == "__main__":
        db = pymysql.connect(host="192.168.1.210",port=3306,user="root",password="ubuntu",db="mysql",charset="utf8mb4")
        cursor = db.cursor()
        cursor.execute("DROP TABLE IF EXISTS movies")
        createTab = """CREATE TABLE movies(
            id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
            name VARCHAR(20) NOT NULL,
            rank VARCHAR(4) NOT NULL,
            link VARCHAR(50) NOT NULL,
            poster VARCHAR(100) NOT NULL,
            score VARCHAR(4) NOT NULL,
            quote VARCHAR(50)
        ) character set = utf8"""
        cursor.execute(createTab)
        start = 0
        while (start < 250):
            lists = get_movies(start)
    for i in lists:
                sql = "INSERT INTO movies(name,rank,link,poster,score,quote) VALUES(%s,%s,%s,%s,%s,%s)"
                try:
                    cursor.execute(sql, (i["name"], i["rank"], i["link"], i["poster"], i["score"], i["quote"]))
                    db.commit()
    print(i["name"]+" is success")
    except:
                    db.rollback()
            start += 25
        db.close()
    

      

  • 相关阅读:
    LeetCode 39. Combination Sum
    LeetCode 37. Sudoku Solver
    LeetCode 36. Valid Sudoku
    LeetCode 34. Search for a Range
    LeetCode 33. Search in Rotated Sorted Array
    VS2010出现灾难性错误的解决办法
    双系统下利用MbrFix.exe卸载LINUX系统
    VS 与 SQLite数据库 连接
    人月神话阅读笔记2
    关于疫情数据分析web开发2-网页爬取实现
  • 原文地址:https://www.cnblogs.com/peterinblog/p/7182466.html
Copyright © 2020-2023  润新知