• 360电影主页和详情页爬去入Mysql库链表读取--lowbiprogrammer


    import requests,os,json
    from lxml import etree
    from pymysql import *
    class Movie(object):
    def __init__(self):
    self.url="https://www.360kan.com/dianying/list.php?year=all&area=all&act=all&cat=106&pageno={}"
    self.headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
    self.start=0
    # 请求数据接受数据
    def get_data(self,url):
    response = requests.get(url,headers=self.headers)
    return response.content
    # 解析数据
    def xml_data(self,data):
    html = etree.HTML(data)
    mes = html.xpath("//ul[@class='list g-clear']/*")
    dict = {}
    for i in mes:
    dict['title'] = i.xpath("./a[@class='js-tongjic']/div[@class='detail']/p/span/text()")[0]
    dict['zhuyan'] = i.xpath("./a[@class='js-tongjic']/div[@class='detail']/p[last()]/text()")[0]
    info_url = "https://www.360kan.com" + i.xpath("./a[@class='js-tongjic']/@href")[0]

    poto_url = i.xpath("./a[@class='js-tongjic']/div[@class='cover g-playicon']/img/@src")
    potoname =self.write_poto(poto_url)
    dict['poto'] = potoname
    mes = self.get_data(info_url)
    meslist = etree.HTML(mes)
    mes_info = meslist.xpath("//div[@class='top-info-detail g-clear']")
    dictid = self.write_database(dict)
    for w in mes_info:
    dict1 = {}
    dict1['daoyan'] = w.xpath("./div[@class='g-clear item-wrap']/p[5]/a/text()")[0]
    dict1['diqu'] = w.xpath("./div[@class='g-clear item-wrap']/p[3]/text()")[0]
    dict1['year'] = w.xpath("./div[@class='g-clear item-wrap']/p[2]/text()")[0]
    dict1['info'] = w.xpath("./div[@class='item-desc-wrap g-clear js-open-wrap']/p/text()")[0]
    dict1['t_id'] = str(dictid)
    self.info_write_database(dict1)
    # 存图片
    def write_poto(self,data):
    path = "d:/img/"
    if not os.path.exists(path):
    os.makedirs(path)
    for n in data:
    poto_mes = self.get_data(n)
    potoname = n.split("/")[-1]
    with open(path+potoname,"wb") as f:
    f.write(poto_mes)
    return potoname
    # 写入数据库
    def database(self):
    conn = connect(host="127.0.0.1",port=3306,user="root",password="mysql",database="lianxi",charset="utf8mb4")
    cur = conn.cursor()
    return cur,conn
    # 写入数据库
    def write_database(self,data):
    cur,conn = self.database()
    title = data["title"]
    zhuyan = data["zhuyan"]
    poto = data["poto"]
    try:
    cur.execute("insert into movie values(0,'"+title+"','"+zhuyan+"','"+poto+"')")
    except:
    pass
    new_id = cur.lastrowid
    conn.commit()

    return new_id


    # 读取数据库
    def read_database(self,):
    cur,conn = self.database()
    cur.execute("select * from movie inner join movies on movie.id=movies.t_id limit 10;")
    ret =cur.fetchall()
    cur.close()
    conn.close()
    list = json.dumps(ret,ensure_ascii=False)
    print(list)
    # info写入数据库
    def info_write_database(self,data1):
    cur,conn = self.database()
    cur.execute("insert into movies values(0,'"+data1['daoyan']+"','"+data1['diqu']+"','"+data1['year']+"','"+data1['info']+"','"+data1['t_id']+"')")
    conn.commit()

    def run(self):
    while True:
    url = self.url.format(self.start)
    data = self.get_data(url)
    if data ==[]:
    break
    else:
    if self.start == 2:
    break
    else:
    self.start+=1
    self.xml_data(data)


    if __name__ == '__main__':
    movie = Movie()
    movie.run()
    movie.read_database()
  • 相关阅读:
    C#在window服务配置Log4Net.dll
    致于即将逝去的2108年,2019年您好
    关于:未能加载文件或程序集“ICSharpCode.SharpZipLib”或它的某一个依赖项异常的解决方案
    Vs 中关于项目中的某 NuGet 程序包还原失败:找不到“xxx”版本的程序包“xxx”
    Git分布式版本控制器常用命令和使用
    微信公众平台网页登录授权多次重定向跳转,导致code使用多次问题
    Visual Studio高效实用的扩展工具、插件
    关于微信企业付款到零钱X509Certificate2读取证书信息,发布到服务器访问不到的解决方案
    关于ASP.NET MVC 项目在本地vs运行响应时间过长无法访问时,解决方法!
    彻底关闭windows10自动更新解决方案
  • 原文地址:https://www.cnblogs.com/xcsg/p/10138706.html
Copyright © 2020-2023  润新知