• 3.15学习总结(Python爬取网站数据并存入数据库)


    在官网上下载了Python和PyCharm,并在网上简单的学习了爬虫的相关知识。

    结对开发的第一阶段要求:

    1. 网上爬取最新疫情数据,并存入到MySql数据库中
    2. 在可视化显示数据详细信息

    项目代码

    import requests
    from bs4 import BeautifulSoup
    import json
    import time
    from pymysql import *
    
    def mes():
        url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0'  #请求地址
        headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36 SLBrowser/6.0.1.6181'}#创建头部信息
        resp =  requests.get(url,headers = headers)  #发送网络请求
        content=resp.content.decode('utf-8')
        soup = BeautifulSoup(content, 'html.parser')
        listA = soup.find_all(name='script',attrs={"id":"getAreaStat"})
        account =str(listA)
        mes = account.replace('[<script id="getAreaStat">try { window.getAreaStat = ', '')
        mes=mes.replace('}catch(e){}</script>]','')
        #mes=account[52:-21]
        messages_json = json.loads(mes)
        print(messages_json)
        times=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        print(times)
        provinceList=[]
        cityList=[]
        lenth=total()
        con=len(messages_json)+lenth#算出数据库已有的条数+今天省份的条数,才是城市的开始id
        for item in messages_json:
            lenth+=1
            provinceName=item['provinceName']
            confirmedCount=item['confirmedCount']
            suspectedCount=item['suspectedCount']
            curedCount=item['curedCount']
            deadCount=item['deadCount']
            cities=item['cities']
            provinceList.append((lenth,times,provinceName,None,confirmedCount,suspectedCount,curedCount,deadCount))
            for i in cities:
                con+=1
                provinceName = item['provinceName']
                cityName=i['cityName']
                confirmedCount = i['confirmedCount']
                suspectedCount = item['suspectedCount']
                curedCount = i['curedCount']
                deadCount = i['deadCount']
                cityList.append((con,times,provinceName,cityName,confirmedCount,suspectedCount,curedCount,deadCount))
        insert(provinceList,cityList)
    
    
    def insert(provinceList, cityList):
        provinceTuple=tuple(provinceList)
        cityTuple=tuple(cityList)
        cursor = db.cursor()
        sql = "insert into info values (%s,%s,%s,%s,%s,%s,%s,%s) "
        try:
            cursor.executemany(sql,provinceTuple)
            print("插入成功")
            db.commit()
        except Exception as e:
            print(e)
            db.rollback()
        try:
            cursor.executemany(sql,cityTuple)
            print("插入成功")
            db.commit()
        except Exception as e:
            print(e)
            db.rollback()
        cursor.close()
    def total():
        sql= "select * from info"
        cursor = db.cursor()
        try:
            cursor.execute(sql)
            results = cursor.fetchall()
            lenth = len(results)
            db.commit()
            return lenth
        except:
            print('执行失败,进入回调1')
            db.rollback()
    
    # 连接数据库的方法
    def connectDB():
        try:
            db = connect(host='localhost', port=3306, user='root', password='156132', db='world',charset='utf8')
            print("数据库连接成功")
            return db
        except Exception as e:
            print(e)
        return NULL
    if __name__ == '__main__':
        db=connectDB()
        mes()

    效果展示

    代码询问的同学,博客园地址:https://www.cnblogs.com/Arisf/

    两位学长有详细讲解:https://www.cnblogs.com/studya/p/13062641.html    https://www.cnblogs.com/dazhi151/p/12461830.html

  • 相关阅读:
    根据 谷歌官网写一个 该浏览器上的扩展
    深度学习
    华为在线学习
    pandas 常规操作大全
    猎奇有些意思的
    LINUX 必知必会检测表--通读无关语言
    自动化部署三剑客 gitlab + ansible + jenkins
    python 框架
    delphi 多线程之System.TMonitor
    函数和对象 及 prototype和__proto__
  • 原文地址:https://www.cnblogs.com/1305536110-dym/p/14540746.html
Copyright © 2020-2023  润新知