• 爬取豆瓣,写入数据库


    import pymysql
    import requests
    from bs4 import BeautifulSoup
    baseUrl = "https://movie.douban.com/top250?start=%d&filter="
    def get_movies(start):
        url = baseUrl % start
        lists = []
        html = requests.get(url)
        soup = BeautifulSoup(html.content, "html.parser")
        items = soup.find("ol", "grid_view").find_all("li")
    for i in items:
            movie = {}
            movie["rank"] = i.find("em").text
            movie["link"] = i.find("div","pic").find("a").get("href")
            movie["poster"] = i.find("div","pic").find("a").find('img').get("src")
            movie["name"] = i.find("span", "title").text
            movie["score"] = i.find("span", "rating_num").text
            movie["quote"] = i.find("span", "inq").text if(i.find("span", "inq")) else ""
            lists.append(movie)
    return lists
    
    if __name__ == "__main__":
        db = pymysql.connect(host="192.168.1.210",port=3306,user="root",password="ubuntu",db="mysql",charset="utf8mb4")
        cursor = db.cursor()
        cursor.execute("DROP TABLE IF EXISTS movies")
        createTab = """CREATE TABLE movies(
            id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
            name VARCHAR(20) NOT NULL,
            rank VARCHAR(4) NOT NULL,
            link VARCHAR(50) NOT NULL,
            poster VARCHAR(100) NOT NULL,
            score VARCHAR(4) NOT NULL,
            quote VARCHAR(50)
        ) character set = utf8"""
        cursor.execute(createTab)
        start = 0
        while (start < 250):
            lists = get_movies(start)
    for i in lists:
                sql = "INSERT INTO movies(name,rank,link,poster,score,quote) VALUES(%s,%s,%s,%s,%s,%s)"
                try:
                    cursor.execute(sql, (i["name"], i["rank"], i["link"], i["poster"], i["score"], i["quote"]))
                    db.commit()
    print(i["name"]+" is success")
    except:
                    db.rollback()
            start += 25
        db.close()
    

      

  • 相关阅读:
    模糊查询(排除%等通配符并支持不连续关键字查询)
    ideal中运行manven常用操作
    ideal项目启动及问题
    FastJSON 转换List<T> ,Map<T,T>泛型失败 处理方法
    MySQL的SELECT ...for update
    CouchDB客户端开发—Java版
    Spring Data JPA 实例查询
    第一章 计算机网络概述
    第二章 物理层(二)
    Java常考面试题(一)
  • 原文地址:https://www.cnblogs.com/peterinblog/p/7182466.html
Copyright © 2020-2023  润新知