• 天猫淘宝评论数据抓取


    import requests
    import re,json
    import pandas
    
    
    class base():
        def __init__(self,url):
            self.url = url
    
        def all_url(self):
            return [self.url + "%s" % i for i in range(1,100)]
    
        def loads_jsonp(self,_jsonp):
            try:
                return json.loads(re.match(".*?({.*}).*",_jsonp,re.S).group(1))
            except:
                raise ValueError('Invalid Input')
    
        def url_req(self,url):
            content = requests.get(url).text
            aa = self.loads_jsonp(content)
            return aa
    
        def taobao_comment(self,data):
            for i in data['comments']:
                data = {}
                data['昵称']=i['user']['nick']
                data['评论']=i['content']
                info_list.append(data)
    
        def tianmao_comment(self,data):
            for i in data['rateList']:
                data = {}
                data['昵称']=i['displayUserNick']
                data['评论']=i['rateContent']
                info_list.append(data)
    
        def comment(self,url):
            data = self.url_req(url)
            self.tianmao_comment(data) if 'tmall' in url else self.taobao_comment(data)
                
    
    def main(url):
        data = base(url)
        for i in data.all_url():
            data.comment(i)
            print(len(info_list))
    
    
    if __name__ == "__main__":
        url = 'https://rate.tmall.com/list_detail_rate.htm?itemId=39258348512&spuId=250685252&sellerId=2106913388&order=3&currentPage='
        info_list = []
        main(url)
        df =pandas.DataFrame(info_list)
        df.to_excel('comments.xlsx',index=False)
  • 相关阅读:
    python基础学习(2)
    python基础语法
    面试常见问题(2)——数据库
    面试常见问题(1)——TCP协议
    python基础回顾(一)
    安装Beautiful Soup
    MongoDB的安装、配置和可视化
    神奇的斐波那契---解决兔子繁衍问题
    正规式、正规文法与自动机
    词法分析程序的设计与实现
  • 原文地址:https://www.cnblogs.com/Erick-L/p/8000637.html
Copyright © 2020-2023  润新知