• Python——爬虫(一定要看下)


    #!/usr/bin/env python3.5
    # -*- coding: utf-8 -*-
    # @Time   : 2018/1/26
    # @Author : Lyrichu
    # @Email  : 919987476@qq.com
    # @File   : NetCloudAnalyse.py
    '''
    @Description:
    Simple Analysis for NetCloud music,including song comments,users info etc.
    And we use pyecharts for visualization analyse. 
    '''
    try:
        from NetCloudCrawler import NetCloudCrawl
    except ImportError:
        from .NetCloudCrawler import NetCloudCrawl
    from pyecharts import Bar,Geo
    import requests 
    import re 
    import time 
    import json 
    import pandas as pd 
    import jieba 
    from wordcloud import WordCloud
    import os 
    from threading import Thread 
    from scipy.misc import imread
    from collections import Counter
    from operator import itemgetter
    
    class NetCloudAnalyse(NetCloudCrawl):
        """
        analyse for NetCloud comments of songs,user info etc. 
        """
        def __init__(self,song_name,singer_name,song_id = 1,singer_id = 1):
            super(NetCloudAnalyse, self).__init__(song_name = song_name,song_id = song_id,
                                                singer_name = singer_name,singer_id = singer_id)
            self.threading_count = 0 # global count for threadings
            self.unknown = "" # blank str for unknown info
    
        def load_comments_csv(self):
            '''
            load crawler comments csv file
            '''
            comments_df = pd.read_csv(self.comments_file_path,engine = 'python',encoding = 'utf-8') # read csv file as dataframe
            return comments_df
    
        def save_users_info_to_file(self):
            with open(self.users_info_file_path,"w",encoding = "utf-8") as fout:
                fout.write("用户ID,抓取时间,动态总数,关注人数,粉丝人数,用户所在地区,用户简介,年龄,累计听歌数量
    ")
                users_url = self.load_users_url()
                num = len(users_url)
                
                # iterate the users url list
                for index,user_url in enumerate(users_url,1):
                    try:
                        user_id = re.search(r'.*id=(d+)',user_url).group(1) # user id
                        # time to crawl this info
                        crawler_time = self.from_timestamp_to_date(time_stamp = time.time())
                        html = requests.get(user_url,headers = self.headers).text
                        # personal events counts
                        event_count_pattern = re.compile(r'<strong id="event_count">(d+?)</strong>')
                        event_count = re.search(event_count_pattern,html)
                        if event_count:
                            event_count = event_count.group(1) 
                        else:
                            event_count = self.unknown
                        # how many people the user follow
                        follow_count_pattern = re.compile(r'<strong id="follow_count">(d+?)</strong>')
                        follow_count = re.search(follow_count_pattern,html)
                        if follow_count:
                            follow_count = follow_count.group(1) 
                        else:
                            follow_count = self.unknown
                        # how many fans the user has
                        fan_count_pattern = re.compile(r'<strong id="fan_count">(d+?)</strong>')
                        fan_count = re.search(fan_count_pattern,html)
                        if fan_count:
                            fan_count = fan_count.group(1)
                        else:
                            fan_count = self.unknown
                        # the location the user is in
                        location_pattern = re.compile('<span>所在地区:(.+?)</span>')
                        location = re.search(location_pattern,html)
                        if location:
                            location = location.group(1)
                        else:
                            location = self.unknown # unknown location
                        description_pattern = re.compile('<div class="inf s-fc3 f-brk">个人介绍:(.*?)</div>')
                        description = re.search(description_pattern,html)
                        if description:   # if user has a description
                            description = description.group(1)
                            description = description.replace(","," ")
                        else:
                            description = self.unknown
                        age_pattern = re.compile(r'<span.*?data-age="(d+)">')
                        age = re.search(age_pattern,html) # if user age info exists
                        if age:
                            age = age.group(1) # note that this age is formatted as timestamp
                            # we should convert it into real age
                            current_year = int(self.from_timestamp_to_date(time_stamp = time.time(),format = "%Y"))
                            age = (current_year-1970) - int(age)//(1000*365*24*3600) # real age
                        else:
                            age = self.unknown
                        listening_songs_num_pattern = re.compile('<h4>累积听歌(d+?)首</h4>')
                        # total listening songs count
                        listening_songs_num = re.search(listening_songs_num_pattern,html)
                        if listening_songs_num:
                            listening_songs_num = listening_songs_num.group(1) 
                        else:
                            listening_songs_num = self.unknown
                        # write user info to the file
                        fout.write("{user_id},{crawler_time},{event_count},{follow_count},{fan_count},{location},{description},{age},{listening_songs_num}
    "
                                    .format(
                                        user_id = user_id,crawler_time = crawler_time,event_count = event_count,
                                        follow_count = follow_count,fan_count = fan_count,location = location,
                                        description = description,age = age,listening_songs_num = listening_songs_num
                                        ))
                        print("Write {current}/{total} user info to file successfully!".format(current = index,total = num))
                    except Exception as e:
                        print("Fail to get No.{index} comment user's info:{error}"
                              .format(index = index,error = e))
    
        def threading_save_users_info_to_file(self,threads = 10):
            '''
            using multithreads to save users info to file
            :param threads: the threads count
            '''
            start_time = time.time()
            with open(self.users_info_file_path,"w",encoding = "utf-8") as fout:
                fout.write("用户ID,抓取时间,动态总数,关注人数,粉丝人数,用户所在地区,用户简介,年龄,累计听歌数量
    ")
            users_url = self.load_users_url()
            num = len(users_url)
            pack = num//threads # urls count every threads process
            unknown = "" # blank str for unknown info
            threads_list = []
            for i in range(threads):
                if i < threads-1:
                    urls = users_url[i*pack:(i+1)*pack]
                else:
                    urls = users_url[i*pack:]
                t = Thread(target = self.save_users_info,args=(urls,num))
                threads_list.append(t)
            for i in range(threads):
                threads_list[i].start()
            for i in range(threads):
                threads_list[i].join()
            end_time = time.time()
            print("Using {threads} threads to save users info done,costs {cost_time} seconds"
                    .format(threads = threads,cost_time = (end_time - start_time)))
    
        def save_users_info(self,users_url,total):
            '''
            add users info to file,this function will be called in threadings
            :param users_url: the processing users url list
            :param total:total users ulr count
            '''
            users_info_list = []
            # note that we use add mode
            with open(self.users_info_file_path,"a",encoding = "utf-8") as fout:
                for user_url in users_url:
                        try:
                            user_id = re.search(r'.*id=(d+)',user_url).group(1) # user id
                            # time to crawl this info
                            crawler_time = self.from_timestamp_to_date(time_stamp = time.time())
                            html = requests.get(user_url,headers = self.headers).text
                            # personal events counts
                            event_count_pattern = re.compile(r'<strong id="event_count">(d+?)</strong>')
                            event_count = re.search(event_count_pattern,html)
                            if event_count:
                                event_count = event_count.group(1) 
                            else:
                                event_count = self.unknown
                            # how many people the user follow
                            follow_count_pattern = re.compile(r'<strong id="follow_count">(d+?)</strong>')
                            follow_count = re.search(follow_count_pattern,html)
                            if follow_count:
                                follow_count = follow_count.group(1) 
                            else:
                                follow_count = self.unknown
                            # how many fans the user has
                            fan_count_pattern = re.compile(r'<strong id="fan_count">(d+?)</strong>')
                            fan_count = re.search(fan_count_pattern,html)
                            if fan_count:
                                fan_count = fan_count.group(1)
                            else:
                                fan_count = self.unknown
                            # the location the user is in
                            location_pattern = re.compile('<span>所在地区:(.+?)</span>')
                            location = re.search(location_pattern,html)
                            if location:
                                location = location.group(1)
                            else:
                                location = self.unknown # unknown location
                            description_pattern = re.compile('<div class="inf s-fc3 f-brk">个人介绍:(.*?)</div>')
                            description = re.search(description_pattern,html)
                            if description:   # if user has a description
                                description = description.group(1)
                                description = description.replace(","," ")
                            else:
                                description = self.unknown
                            age_pattern = re.compile(r'<span.*?data-age="(d+)">')
                            age = re.search(age_pattern,html) # if user age info exists
                            if age:
                                age = age.group(1) # note that this age is formatted as timestamp
                                # we should convert it into real age
                                current_year = int(self.from_timestamp_to_date(time_stamp = time.time(),format = "%Y"))
                                age = (current_year-1970) - int(age)//(1000*365*24*3600) # real age
                            else:
                                age = self.unknown
                            listening_songs_num_pattern = re.compile('<h4>累积听歌(d+?)首</h4>')
                            # total listening songs count
                            listening_songs_num = re.search(listening_songs_num_pattern,html)
                            if listening_songs_num:
                                listening_songs_num = listening_songs_num.group(1) 
                            else:
                                listening_songs_num = self.unknown
                            # write user info to the file
                            user_info = "{user_id},{crawler_time},{event_count},{follow_count},{fan_count},{location},{description},{age},{listening_songs_num}
    ".format(
                                            user_id = user_id,crawler_time = crawler_time,event_count = event_count,
                                            follow_count = follow_count,fan_count = fan_count,location = location,
                                            description = description,age = age,listening_songs_num = listening_songs_num
                                            )
                            users_info_list.append(user_info)
                            print("Get {current}/{total} user info to file successfully!".format(current = self.threading_count,total = total))
                        except Exception as e:
                            print("Fail to get No.{index} comment user's info:{error}"
                                  .format(index = self.threading_count,error = e))
                        self.threading_count += 1
                fout.writelines(users_info_list)
    
    
    
    
        def count_comments_lines(self):
            '''
            count total comments lines
            '''
            with open(self.comments_file_path,"r",encoding = "utf-8") as fin:
                for total,_ in enumerate(fin,1):
                    pass
            return total
    
        
        def from_timestamp_to_date(self,time_stamp,format = "%Y-%m-%d %H:%M:%S"):
            '''
            convert from timestamp to real date formatted in Year-Month-Day etc. 
            :param time_stamp: the time stamp
            :param format: the date format we want to convert
            '''
            real_date = time.strftime(format,time.localtime(time_stamp))
            return real_date
    
        def load_users_url(self):
            '''
            return all users domain page ulr list
            '''
            comments_df = self.load_comments_csv()
            users_id = comments_df['用户ID'].dropna() # user id
            ids_num = len(users_id) # all ids num
            # users id must be integers like string
            users_id = [users_id.iloc[i] for i in range(ids_num) if re.match(r'd+',str(users_id.iloc[i]))]
            users_url = []
            for user_id in users_id:
                users_url.append('http://music.163.com/user/home?id={user_id}'.format(user_id = user_id))
            return list(set(users_url)) # remove the same user's ulr
    
                        
        def load_users_info_csv(self):
            '''
            load users info from file,
            return users info dataframe
            '''
            users_info_df = pd.read_csv(self.users_info_file_path,engine = 'python',encoding = 'utf-8')
            return users_info_df
    
    
        def draw_wordcloud(self,full_comments = True,background_path = "source/JayChou.jpg",font_path = "source/simsun.ttc"):
            '''
            darw wordcloud of full comments of one song or hot comments of a singer
            :param full_comments: True means full comments,False means hot comments
            :param background_path:background image path
            :param font_path: font path
            '''
            abs_path = os.path.split(os.path.realpath(__file__))[0]
            background_path = os.path.join(abs_path,background_path)
            font_path = os.path.join(abs_path,font_path)
            if full_comments:
                file_path = self.comments_file_path
                save_path = os.path.join(self.song_path,self.song_name+".jpg")
            else:
                file_path = os.path.join(self.singer_path,"hot_comments.csv")
                save_path = os.path.join(self.singer_path,self.singer_name+".jpg")
            comments_df = pd.read_csv(file_path,engine = 'python',encoding = 'utf-8')["评论内容"]
            comments_text = ""
            for i in range(len(comments_df)):
                comments_text += str(comments_df.iloc[i]) 
            cut_text = " ".join(jieba.cut(comments_text)) # use blank space to paste cut keywords to str
            color_mask = imread(background_path) # read the background image
            cloud = WordCloud(font_path=font_path,background_color='white',mask=color_mask,max_words=2000,max_font_size=40)
            word_cloud = cloud.generate(cut_text) # 产生词云
            word_cloud.to_file(save_path)
            print("Successfully generate {save_path}".format(save_path =save_path))
    
        def core_visual_analyse(self):
            '''
            core visual analyse for comments and users info,including:
            1. The distribution of comments time,both for months,days(bar to show)
            2. The distribution of comments agree count(bar to show)
            3. The distribution of comment keywords,excluded stopwords(bar to show)
            4. The distribution of users location,using geo to show(geo to show)
            5. The distribution of users location,using bar to show(bar to show)
            6. The distribution of events count(bar to show)
            7. The distribution of follow people count(bar to show)
            8. The distribution of fans count(bar to show)
            9. The distribution of description keywords(excluded stopwords)(bar to show)
            10. The distribution of users age(bar to show)
            11. The distribution of listening songs total count(bar to show)
            '''
            plot_save_path = os.path.join(self.song_path,"plots")
            if not os.path.exists(plot_save_path):
                os.mkdir(plot_save_path)
            comments_df = self.load_comments_csv()
            users_info_df = self.load_users_info_csv()
            # 1. The distribution of comments time,both for months,days and for hours(bar to show)
            comments_time = list(comments_df['评论时间'].dropna())
            # date formatted by year-month
            comments_date_year_month = []
            # date formatted by year-month-day
            comments_date_year_month_day = []
            for comment_time in comments_time:
                # note that the timestamp should divide by 1000 first
                year_month = self.from_timestamp_to_date(comment_time*0.001,format = "%Y-%m")
                year_month_day = self.from_timestamp_to_date(comment_time*0.001,format = "%Y-%m-%d")
                comments_date_year_month.append(year_month)
                comments_date_year_month_day.append(year_month_day)
            
            comments_date_year_month_x,comments_date_year_month_y = zip(*(sorted(Counter(comments_date_year_month).items(),key = itemgetter(0))))
            comments_date_year_month_day_x,comments_date_year_month_day_y = zip(*(sorted(Counter(comments_date_year_month_day).items(),key = itemgetter(0))))
            # year-month bar plot
            comments_date_year_month_bar = Bar(title = "歌曲<{song_name}>评论时间(年-月)数量分布".format(song_name = self.song_name))
            comments_date_year_month_bar.add("年-月",comments_date_year_month_x,comments_date_year_month_y)
            comments_date_year_month_save_path = os.path.join(plot_save_path,"comments_year_month_bar.html")
            comments_date_year_month_bar.render(comments_date_year_month_save_path)
            # year-month-day bar plot
            comments_date_year_month_day_bar = Bar(title = "歌曲<{song_name}>评论时间(年-月-日)数量分布".format(song_name = self.song_name))
            comments_date_year_month_day_bar.add("年-月-日",comments_date_year_month_day_x,comments_date_year_month_day_y)
            comments_date_year_month_day_save_path = os.path.join(plot_save_path,"comments_year_month_day_bar.html")
            comments_date_year_month_day_bar.render(comments_date_year_month_day_save_path)
            # 2. The distribution of comments agree count(bar to show)
            agree_count = list(comments_df['点赞总数'].dropna())
            agree_count_x,agree_count_y = zip(*(sorted(Counter(agree_count).items(),key = itemgetter(0))))
            agree_count_bar = Bar(title = "歌曲<{song_name}>评论点赞数量分布".format(song_name = self.song_name))
            agree_count_bar.add("点赞数量",agree_count_x,agree_count_y)
            agree_count_save_path = os.path.join(plot_save_path,"agree_count_bar.html")
            agree_count_bar.render(agree_count_save_path)
            # 3. The distribution of comment keywords,excluded stopwords(bar to show)
            comments_text = "".join(list(comments_df['评论内容'].dropna()))
            comments_keywords = jieba.cut(comments_text)
            # remove the stopwords and word that length less than 2
            stopwords = self.load_stopwords()
            comments_keywords = [keyword for keyword in comments_keywords if keyword not in stopwords and len(keyword) > 1]
            comments_keywords_x,comments_keywords_y = zip(*(sorted(Counter(comments_keywords).items(),key = itemgetter(1),reverse = True)))
            comments_keywords_bar = Bar(title = "歌曲<{song_name}>评论关键词数量分布(已去除停用词)".format(song_name = self.song_name))
            comments_keywords_bar.add("关键词",comments_keywords_x,comments_keywords_y)
            comments_keywords_save_path = os.path.join(plot_save_path,"comments_keywords_bar.html")
            comments_keywords_bar.render(comments_keywords_save_path)
            # 4. The distribution of users location,using geo to show(geo to show)
            users_location = list(users_info_df['用户所在地区'].dropna())
            users_city = [] # city users in
            all_cities = self.load_all_cities()
            for location in users_location:
                for city in all_cities:
                    if city in location:
                        users_city.append(city.replace("市",""))
            users_city_data = list(Counter(users_city).items()) 
            users_city_geo = Geo("歌曲<{song_name}>评论用户所在地区分布".format(song_name = self.song_name),title_color="#fff", title_pos="left",
                                    width=1200, height=600, background_color='#404a59')
            attr, value = users_city_geo.cast(users_city_data)
            users_city_geo.add("", attr, value, visual_range=[0, 200], visual_text_color="#fff", symbol_size=15, is_visualmap=True)
            users_city_save_path = os.path.join(plot_save_path,"users_city_geo.html")
            users_city_geo.render(users_city_save_path)
    
            # 5. The distribution of users location,using bar to show(bar to show)
            users_location_x,users_location_y = zip(*(sorted(Counter(users_location).items(),key = itemgetter(1),reverse = True)))
            users_location_bar = Bar(title = "歌曲<{song_name}>评论用户所在地区分布".format(song_name = self.song_name))
            users_location_bar.add("用户所在地区",users_location_x,users_location_y)
            users_location_save_path = os.path.join(plot_save_path,"users_location_bar.html")
            users_location_bar.render(users_location_save_path)
            # 6. The distribution of events count(pie to show)
            events_count = list(users_info_df['动态总数'].dropna())
            events_count_x,events_count_y = zip(*(sorted(Counter(events_count).items(),key = itemgetter(0))))
            events_count_bar = Bar(title = "歌曲<{song_name}>评论用户动态总数分布".format(song_name = self.song_name))
            events_count_bar.add("用户动态总数",events_count_x,events_count_y)
            events_count_save_path = os.path.join(plot_save_path,"events_count_bar.html")
            events_count_bar.render(events_count_save_path)
            # 7. The distribution of follow people count(bar to show)
            follow_count = list(users_info_df['关注人数'].dropna())
            follow_count_x,follow_count_y = zip(*(sorted(Counter(follow_count).items(),key = itemgetter(0))))
            follow_count_bar = Bar(title = "歌曲<{song_name}>评论用户关注人数分布".format(song_name = self.song_name))
            follow_count_bar.add("用户关注人数",follow_count_x,follow_count_y)
            follow_count_save_path = os.path.join(plot_save_path,"follow_count_bar.html")
            follow_count_bar.render(follow_count_save_path)
            # 8. The distribution of fans count(bar to show)
            fans_count = list(users_info_df['粉丝人数'].dropna())
            fans_count_x,fans_count_y = zip(*(sorted(Counter(fans_count).items(),key = itemgetter(0))))
            fans_count_bar = Bar(title = "歌曲<{song_name}>评论用户粉丝人数分布".format(song_name = self.song_name))
            fans_count_bar.add("用户粉丝人数",fans_count_x,fans_count_y)
            fans_count_save_path = os.path.join(plot_save_path,"fans_count_bar.html")
            fans_count_bar.render(fans_count_save_path)
            # 9. The distribution of description keywords(excluded stopwords)(bar to show)
            description_text = "".join(list(users_info_df['用户简介'].dropna()))
            description_keywords = jieba.cut(description_text)
            description_keywords = [keyword for keyword in description_keywords if keyword not in stopwords and len(keyword) > 1]
            description_keywords_x,description_keywords_y = zip(*(sorted(Counter(description_keywords).items(),key = itemgetter(1),reverse = True)))
            description_keywords_bar = Bar(title = "歌曲<{song_name}>评论用户简介关键词数量分布(已去除停用词)".format(song_name = self.song_name))
            description_keywords_bar.add("用户简介关键词",description_keywords_x,description_keywords_y)
            description_keywords_save_path = os.path.join(plot_save_path,"description_keywords_bar.html")
            description_keywords_bar.render(description_keywords_save_path)
            # 10. The distribution of users age(bar to show)
            age_count = list(users_info_df['年龄'].dropna())
            age_count = [age for age in age_count if age >= 0] # filter legal age
            age_count_x,age_count_y = zip(*(sorted(Counter(age_count).items(),key = itemgetter(0))))
            age_count_bar = Bar(title = "歌曲<{song_name}>评论用户年龄分布".format(song_name = self.song_name))
            age_count_bar.add("年龄",age_count_x,age_count_y)
            age_count_save_path = os.path.join(plot_save_path,"age_count_bar.html")
            age_count_bar.render(age_count_save_path)
            # 11. The distribution of listening songs total count(bar to show)
            listening_songs_count = list(users_info_df['累计听歌数量'].dropna())
            listening_songs = {'0-100':0,'100-1000':0,'1000-10000':0,'>10000':0}
            for c in listening_songs_count:
                if c < 100:
                    listening_songs['0-100'] += 1
                elif c < 1000:
                    listening_songs['100-1000'] += 1
                elif c < 10000:
                    listening_songs['1000-10000'] += 1
                else:
                    listening_songs['>10000'] += 1
            listening_songs_count_x,listening_songs_count_y = zip(*sorted(Counter(listening_songs).items(),key = itemgetter(1),reverse = True))
            listening_songs_count_bar = Bar(title = "歌曲<{song_name}>评论用户听歌总数分布".format(song_name = self.song_name))
            listening_songs_count_bar.add("听歌总数",listening_songs_count_x,listening_songs_count_y)
            listening_songs_count_save_path = os.path.join(plot_save_path,"listening_songs_count_bar.html")
            listening_songs_count_bar.render(listening_songs_count_save_path)
    
    
    
    
        def load_stopwords(self):
            '''
            load stopwords list
            '''
            abs_path = os.path.split(os.path.realpath(__file__))[0]
            stopwords_path = os.path.join(abs_path,"source","stopwords.txt")
            with open(stopwords_path,"r",encoding = "utf-8") as f:
                stopwords = f.readlines()
            stopwords = [word.strip() for word in stopwords]
            return list(set(stopwords))
    
        def load_all_cities(self):
            '''
            load all cities from province_cities.json file,
            to match city from location text
            '''
            abs_path = os.path.split(os.path.realpath(__file__))[0]
            province_cities_file = os.path.join(abs_path,"source","province_cities.json")
            all_cities = []
            with open(province_cities_file,"r",encoding = "utf-8") as fin:
                content = fin.read()
                d = json.loads(content)
                for province in d:
                    for city in province['city']:
                        all_cities.append(city['name'])
            return all_cities
    
        def generate_all_analyse_files(self,threads = 10):
            '''
            generate all analyse files,including:
            1. generate users info file
            2. generate wordcloud picture
            3. generate core analyse files
            '''
            self.threading_save_users_info_to_file(threads)
            self.draw_wordcloud()
            self.core_visual_analyse()
    
        def _test_load_all_cities(self):
            all_cities = self.load_all_cities()
            print("There are %d cities." % len(all_cities))
            print(all_cities)
    
        def _test_load_stopwords(self):
            stopwords = self.load_stopwords()
            print('There are %d stopwords.' % len(stopwords))
            # print first 100 stopwords
            print(stopwords[:100])
            
    
        def _test_load_comments_csv(self):
            df = self.load_comments_csv()
            print(df.head)
    
        def _test_count_comments_lines(self):
            total = self.count_comments_lines()
            print("{file} has {total} comments.".format(file = self.comments_file_path,total = total))
    
        def _test_from_timestamp_to_date(self):
            comments_df = self.load_comments_csv()
            comments_timestamp = comments_df['评论时间'].dropna() # drop na value
            show_num = 10 # lines to show
            print(self.song_name)
            print("timestamp           real_date")
            for i in range(show_num):
                time_stamp = comments_timestamp.iloc[i]
                if time_stamp:
                    real_date = self.from_timestamp_to_date(time_stamp)
                    print("%s       %s" %(time_stamp,real_date))
    
        def _test_load_users_url(self):
            users_url = self.load_users_url()
            print("There are %d users ulr." % len(users_url))
            num = 10
            print("Top %d users ulr are:" % num)
            for i in range(num):
                print("{index}:{url}".format(index = i+1,url = users_url[i]))
    
        def _test_load_users_info_csv(self):
            users_info_df = self.load_users_info_csv()
            print(users_info_df.head())
    
        def _test_save_users_info_to_file(self):
            self.save_users_info_to_file()
    
        def _test_draw_wordcloud(self):
            full_comments = False
            self.draw_wordcloud(full_comments = full_comments)
    
        def _test_core_visual_analyse(self):
            self.core_visual_analyse()
    
        def _test_threading_save_users_info_to_file(self,threads = 10):
            self.threading_save_users_info_to_file(threads)
    
        def _test_netcloudanalyse_all(self):
            self._test_save_users_info_to_file()
            self._test_threading_save_users_info_to_file(20)
            self._test_load_comments_csv()
            self._test_count_comments_lines()
            self._test_from_timestamp_to_date()
            self._test_load_users_url()
            self._test_load_users_info_csv()
            self._test_draw_wordcloud()
            self._test_core_visual_analyse()
            self._test_load_stopwords()
            self._test_load_all_cities()
            
    
    # if __name__ == '__main__':
    #     song_name = '晴天'
    #     song_id = 186016
    #     singer_name = '周杰伦'
    #     singer_id = 6452
    #     netcloud_analyse = NetCloudAnalyse(song_name = song_name,song_id = song_id,singer_name = singer_name,
    #                                         singer_id = singer_id)
    #     #netcloud_analyse._test_netcloudanalyse_all()
    #     netcloud_analyse.generate_all_analyse_files(100)
    

      

  • 相关阅读:
    HUD——T 3836 Equivalent Sets
    HDU——T 2594 Simpsons’ Hidden Talents
    vertical-align和line-height的深入应用
    November 7th 2016 Week 46th Monday
    November 6th 2016 Week 46th Sunday
    November 5th Week 45th Saturday 2016
    November 4th Week 45th Friday 2016
    【2017-01-08】QTimer与QThread的调度时间精度
    November 3rd Week 45th Thursday 2016
    November 2nd Week 45th Wednesday 2016
  • 原文地址:https://www.cnblogs.com/x54256/p/8963750.html
Copyright © 2020-2023  润新知