• Python爬取爬取明星关系并写入csv文件


      今天用Python爬取了明星关系,数据不多,一共1386条数据,代码如下:

      

    import requests
    from bs4 import BeautifulSoup
    import bs4
    import csv
    
    def getHTMLText(url):
        try:
            kv = {'user-agent': 'Mozilla/5.0'}  # 请求头;指定访问浏览器为Mozilla5.0版本的浏览器
            r = requests.get(url)
            r.raise_for_status() #如果状态不是200,引发HTTPError异常
            r.encoding = r.apparent_encoding
            return r.text
        except:
            return "产生异常"
    
    def fillUnivList(ulist,html):
        soup = BeautifulSoup(html,"lxml")
        for lis in soup.find_all('li', {'liodd', 'lieven'}):
            if isinstance(lis, bs4.element.Tag):
                spans = lis('span')
                ulist.append([spans[0].text.strip(), spans[1].text.strip(), spans[2].text.strip()])
    
                #print(spans[0].text.strip(),"    ",spans[1].text.strip(),"    ",spans[2].text.strip(),)
    
        with open('star.csv', 'a', newline='')as f:
            csv_writer = csv.writer(f)
            csv_writer.writerows(ulist)
        f.close()
        #print(ulist)
    
    if __name__ == '__main__':
        print("爬虫开始")
        for page in range(1,100):
            url = "https://www.1905.com/mdb/relation/list/s0t0p" + str(page) + ".html"
    
            html = getHTMLText(url)
            ulist=[]
            fillUnivList(ulist,html)
        print("爬虫结束")
  • 相关阅读:
    ANT安装
    MAVEN配置教程
    闲笔
    js系列
    微信小程序系列_require
    c++复习系列
    codeblocks系列
    mysql系列
    Google Developer Tools
    数学建模算法(三):神经网络
  • 原文地址:https://www.cnblogs.com/qianmo123/p/14626460.html
Copyright © 2020-2023  润新知