• 数据分析


      1 #!/usr/bin/python
      2 #-*- coding: utf-8 -*-
      3 
      4 
      5 
      6 import os
      7 import re
      8 import csv
      9 import time
     10 import json
     11 import jieba
     12 from jieba import analyse
     13 import pandas as pd
     14 import itchat
     15 import base64
     16 from snownlp import SnowNLP
     17 import requests
     18 import sys
     19 from collections import Counter
     20 import matplotlib.pyplot as plt
     21 from pylab import *
     22 from faceApi import FaceAPI
     23 mpl.rcParams['font.sans-serif'] = ['SimHei']
     24 from PIL import Image
     25 import numpy as np
     26 from wordcloud import WordCloud
     27 from pyecharts import Pie, Map, Style, Page, Bar
     28 
     29 def analyseSex(firends):
     30     sexs = list(map(lambda x:x['Sex'],friends[1:]))
     31     counts = Counter(sexs).items()
     32     counts = sorted(counts, key=lambda x:x[0], reverse=False)
     33     counts = list(map(lambda x:x[1],counts))
     34     labels = ['不明','男性','女性']
     35     colors = ['red','yellow','blue']
     36     plt.figure(figsize=(8,5), dpi=80)
     37     plt.axes(aspect=1) 
     38     plt.pie(counts, 
     39             labels=labels, 
     40             colors=colors, 
     41             labeldistance = 1.1, 
     42             autopct = '%3.1f%%',
     43             shadow = False, 
     44             startangle = 90, 
     45             pctdistance = 0.6 
     46     )
     47     plt.legend(loc='upper right',)
     48     plt.title(u'%s的微信好友性别组成' % friends[0]['NickName'])
     49     plt.show()
     50 
     51 def analyseLocation(friends):
     52     freqs = {}
     53     headers = ['NickName','Province','City']
     54     with open('location.csv','w',encoding='utf-8',newline='',) as csvFile:
     55         writer = csv.DictWriter(csvFile, headers)
     56         writer.writeheader()
     57         for friend in friends[1:]:
     58             row = {}
     59             row['NickName'] = friend['NickName']
     60             row['Province'] = friend['Province']
     61             row['City'] = friend['City']
     62             if(friend['Province']!=None):
     63                 if(friend['Province'] not in freqs):
     64                    freqs[friend['Province']] = 1
     65                 else:
     66                    freqs[friend['Province']] = 1
     67             writer.writerow(row)
     68     
     69 
     70 
     71 def analyseHeadImage(frineds):
     72     # Init Path
     73     basePath = os.path.abspath('.')
     74     baseFolder = basePath + '\HeadImages\'
     75     if(os.path.exists(baseFolder) == False):
     76         os.makedirs(baseFolder)
     77 
     78     # Analyse Images
     79     faceApi = FaceAPI()
     80     use_face = 0
     81     not_use_face = 0
     82     image_tags = ''
     83     for index in range(1,len(friends)):
     84         friend = friends[index]
     85         # Save HeadImages
     86         imgFile = baseFolder + '\Image%s.jpg' % str(index)
     87         imgData = itchat.get_head_img(userName = friend['UserName'])
     88         if(os.path.exists(imgFile) == False):
     89             with open(imgFile,'wb') as file:
     90                 file.write(imgData)
     91 
     92         # Detect Faces
     93         time.sleep(1)
     94         result = faceApi.detectFace(imgFile)
     95         if result == True:
     96             use_face += 1
     97         else:
     98             not_use_face += 1 
     99 
    100         # Extract Tags
    101         result = faceApi.extractTags(imgFile)
    102         image_tags += ','.join(list(map(lambda x:x['tag_name'],result)))
    103     
    104     labels = [u'使用人脸头像',u'不使用人脸头像']
    105     counts = [use_face,not_use_face]
    106     colors = ['red','yellow']
    107     plt.figure(figsize=(8,5), dpi=80)
    108     plt.axes(aspect=1) 
    109     plt.pie(counts, #性别统计结果
    110             labels=labels, #性别展示标签
    111             colors=colors, #饼图区域配色
    112             labeldistance = 1.1, #标签距离圆点距离
    113             autopct = '%3.1f%%', #饼图区域文本格式
    114             shadow = False, #饼图是否显示阴影
    115             startangle = 90, #饼图起始角度
    116             pctdistance = 0.5 #饼图区域文本距离圆点距离
    117     )
    118     plt.legend(loc='upper right',)
    119     plt.title(u'%s的微信好友使用人脸头像情况' % friends[0]['NickName'])
    120     plt.show() 
    121 
    122     image_tags = image_tags.encode('iso8859-1').decode('utf-8')
    123     back_coloring = np.array(Image.open('face.jpg'))
    124     wordcloud = WordCloud(
    125         font_path='simfang.ttf',
    126         background_color="white",
    127         max_words=1200,
    128         mask=back_coloring, 
    129         max_font_size=85,
    130         random_state=75,
    131         width=800, 
    132         height=480, 
    133         margin=15
    134     )
    135 
    136     wordcloud.generate(image_tags)
    137     plt.imshow(wordcloud)
    138     plt.axis("off")
    139     plt.show()
    140 
    141 def analyseSignature(friends):
    142     signatures = ''
    143     emotions = []
    144     pattern = re.compile("1fd.+")
    145     print (pattern)
    146     for friend in friends:
    147         signature = friend['Signature']
    148         if(signature != None):
    149             signature = signature.strip().replace('span', '').replace('class', '').replace('emoji', '')
    150             signature = re.sub(r'1f(d.+)','',signature)
    151             if(len(signature)>0):
    152                 nlp = SnowNLP(signature)
    153                 emotions.append(nlp.sentiments)
    154                 signatures += ' '.join(jieba.analyse.extract_tags(signature,5))
    155             print(signatures)
    156     with open('signatures.txt','wt',encoding='utf-8') as file:
    157          file.write(signatures)
    158 
    159     # Sinature WordCloud
    160     back_coloring = np.array(Image.open('flower.jpg'))
    161     wordcloud = WordCloud(
    162         font_path='simfang.ttf',
    163         background_color="white",
    164         max_words=1200,
    165         mask=back_coloring, 
    166         max_font_size=75,
    167         random_state=45,
    168         width=960, 
    169         height=720, 
    170         margin=15
    171     )
    172 
    173     wordcloud.generate(signatures)
    174     plt.imshow(wordcloud)
    175     plt.axis("off")
    176     plt.show()
    177     wordcloud.to_file('signatures.jpg')
    178     
    179     # Signature Emotional Judgment
    180     count_good = len(list(filter(lambda x:x>0.66,emotions)))
    181     count_normal = len(list(filter(lambda x:x>=0.33 and x<=0.66,emotions)))
    182     count_bad = len(list(filter(lambda x:x<0.33,emotions)))
    183     print(count_good * 100/len(emotions))
    184     print(count_normal * 100/len(emotions))
    185     print(count_bad * 100/len(emotions))
    186     print(count_good)
    187     print(count_normal)
    188     print(count_bad)
    189     labels = [u'负面消极',u'中性',u'正面积极']
    190     values = (count_bad ,count_normal,count_good)
    191     plt.rcParams['font.sans-serif'] = ['simHei'] 
    192     plt.rcParams['axes.unicode_minus'] = False
    193     plt.xlabel(u'情感判断')
    194     plt.ylabel(u'频数')
    195     plt.xticks(range(3),labels)
    196     plt.legend(loc='upper right',)
    197     plt.bar(range(3), values, color = 'rgb')
    198     plt.title(u'%s的微信好友签名信息情感分析' % friends[0]['NickName'])
    199     plt.show()
    200 
    201 def create_charts():
    202     users = itchat.get_friends()
    203     page = Page()
    204     style = Style(width=1100, height=600)
    205     style_middle = Style(width=900, height=500)
    206     data = prov_stats(users)
    207     attr, value = data
    208     chart = Map('中国地图', **style.init_style)
    209     chart.add('', attr, value, is_label_show=True, is_visualmap=True, visual_text_color='#000')
    210     page.add(chart)
    211     page.render()
    212 
    213 
    214 def prov_stats(users):
    215     prv = pd.DataFrame(users)
    216     prv_cnt = prv.groupby('Province', as_index=True)['Province'].count().sort_values()
    217     attr = list(map(lambda x: x if x != '' else '未知', list(prv_cnt.index)))
    218     return attr, list(prv_cnt)
    219 
    220 # login wechat and extract friends
    221 itchat.auto_login(hotReload = True)
    222 friends = itchat.get_friends(update = True)
    223 create_charts()
    224 analyseSex(friends)
    225 analyseSignature(friends)
    226 analyseHeadImage(friends)
    227 analyseLocation(friends)
  • 相关阅读:
    安装配置ssh免密码登录
    大数据学习之Linux环境搭建(导航)
    Linux下搭建sqli-labs环境
    SpringMVC freemarker 中 Could not resolve view with name 'XXX.ftl' in servlet with name 'SpringMVC'
    配置FreeMarker时IDEA提示cannot resolve property 'templateLoaderPath'
    MySQL在指定字段后添加一个新字段
    META-INF/MANIFEST.MF file not found in unnamed.war
    Java获取音频播放时长
    JS实现阿拉伯数字转韩文
    微信公众号开发-素材管理-调用接口返回结果一览表
  • 原文地址:https://www.cnblogs.com/mmzq/p/10678353.html
Copyright © 2020-2023  润新知