• Python编程系统下的大数据处理(微信公众号的预测评价)


    import sqlite3
    import xlrd
    
    def read_excel():
        c = []
        # 打开文件
        workbook = xlrd.open_workbook(r'F:/期中作业/dataWx_org.xlsx')
        # 获取所有sheet
        sheet_name = workbook.sheet_names()[0]
        sheet = workbook.sheet_by_name(sheet_name)
        
        #获取一行的内容
        for i in range(1,sheet.nrows):
            a = []
            for j in range(0,sheet.ncols):
                a.append(sheet.cell(i,j).value)
            c.append(a[0:13])
        #返回整个表单
        return c
                
                
    def table_create(c):  
        '''创建表并导入数据'''
        #创建表
        conn = sqlite3.connect('./exp.db')
        curs = conn.cursor()
        try:
            curs.execute('''CREATE TABLE IF NOT EXISTS TB_CHECK
                   (item FLOAT,
                    name TEXT,
                    wx_name TEXT,
                    title TEXT,
                    top FLOAT,
                    posttime TEXT,
                    day INT,
                    readnum_pm FLOAT,
                    likenum_pm FLOAT,
                    get_time_pm TEXT,
                    status FLOAT,
                    url TEXT,
                    content TEXT);''')
        except sqlite3.OperationalError as e:
            print(e)
    
        #导入数据
    #    for each in c:
    #        curs.execute("INSERT INTO TB_CHECK VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)"
    #                     , each[0:13])
        conn.commit()
        return conn, curs
     
    def final_number(conn, curs):
        '''算法A估计超过10万的阅读量'''
        #设置阅读总量、关键字、时间,三个列表
        key = [i for i in range(1,31)]
        allnumber = []
        DAY = [i for i in range(41883,41913)]
        #计算每天的阅读总量并记录在列表中
        for i in range(0,30):
            onenumber = 0
            for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.day=%d"%DAY[i]):
                onenumber += row[7]
            allnumber.append(onenumber)
        #设置关键字key,排除无效数据
        for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.readnum_pm=100001"):
            for i in range(0,30):
                if DAY[i]==row[6]:
                    key[i]=0
        #计算平均每天的阅读总量sumnumber                              
        daytime = 30
        sumnumber = 0                
        for i in range(0,30):
            if key[i]==0:
                daytime -= 1
            else:
                sumnumber += allnumber[i]
        sumnumber /= daytime
        print(sumnumber)       
        #利用算法计算阅读量超过100001的实际阅读量,存入finalnum
        finalnum = []
        for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.readnum_pm=100001"):
            for i in range(0,30):
                if DAY[i]==row[6]:      
                    finalnumber=sumnumber-allnumber[i]
                    if finalnumber<0:
                        finalnumber *= (-1)
                    finalnumber += 100001
                    finalnum.append(finalnumber)
                    print(finalnumber)
        #将新数据重新导入db文件
    #    for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.readnum_pm=100001"):
    #        for i in range(0,30):
    #            curs.execute('UPDATE TB_CHECK SET TB_CHECK.readnum_pm=? WHERE TB_CHECK.readnum_pm=?',(finalnum[i], 100001))        
        
        conn.commit()
    
    def final_Rank(conn, curs): 
        '''算法B评价微信影响力'''
        #设置三个等级参数、总等级参数、条目参数
        allRank1 = 0
        allRank2 = 0
        allRank3 = 0
        Rank = []
        Name = []
        ITEM = [i for i in range(1,33)]
        #计算三个总参数,分别是总头条数,总阅读量,总点赞数
        for row in curs.execute("SELECT * FROM TB_CHECK"):
            allRank1 += row[4]
            allRank2 += row[7]
            allRank3 += row[8]
        #求每个微信号的影响力等级,存入Rank中    
        for i in range(0,32):
            Rank1 = 0
            Rank2 = 0
            Rank3 = 0
            for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.item=%f"%ITEM[i]):
                Rank1 += row[4]
                Rank2 += row[7]
                Rank3 += row[8]
            Rank.append(Rank1/allRank1*0.3 + Rank2/allRank2*0.4 +Rank3/allRank3*0.3)
            Name.append(row[1])
        #冒泡排序
        for i in range(0,32):
            for j in range(0,32):
                if Rank[i]>=Rank[j]:
                    k = Rank[i]
                    Rank[i] = Rank[j]
                    Rank[j] = k
                        
                    k = ITEM[i]
                    ITEM[i] = ITEM[j]
                    ITEM[j] = k
                    
                    k = str(k)
                    k = Name[i]
                    Name[i] = Name[j]
                    Name[j] = k
        #显示计算结果,分别是排名,微信号序列,微信号名称,微信号评分
        for i in range(0,32):
            print(i+1,ITEM[i],Name[i],int(Rank[i]*1000))
            
        conn.commit()
    
    def forecast(conn, curs):
        '''算法C预计微信影响力变化'''
        #设置三个参数,日期,条目,等级
        DAY = [i for i in range(41883,41913)]
        ITEM = [i for i in range(1,33)]
        Rank_DAY = []
        Rank = []
        Name = []
        NAME = []
        IT = []
        #计算每天每个微信号的排名变化,存入IT,算法过程类似于算法B部分
        for i in range(0,30):
            allRank1 = 0
            allRank2 = 0
            allRank3 = 0
            for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.day=%d"%DAY[i]):
                allRank1 += row[4]
                allRank2 += row[7]
                allRank3 += row[8]
            for Item in ITEM:
                Rank1 = 0
                Rank2 = 0
                Rank3 = 0
                for row in curs.execute("SELECT * FROM TB_CHECK WHERE TB_CHECK.day=%f AND TB_CHECK.item=%f"%(DAY[i],Item)):
                    Rank1 += row[4]
                    Rank2 += row[7]
                    Rank3 += row[8]
                Rank_DAY.append(Rank1/allRank1*0.3 + Rank2/allRank2*0.4 +Rank3/allRank3*0.3)
                Name.append(row[1])
            IT_DAY = [i for i in range(1,33)]
            for j in range(0,32):
                for k in range(0,32):
                    if Rank_DAY[j]>=Rank_DAY[k]:
                        k = Rank_DAY[i]
                        Rank_DAY[i] = Rank_DAY[j]
                        Rank_DAY[j] = k
                            
                        k = IT_DAY[i]
                        IT_DAY[i] = IT_DAY[j]
                        IT_DAY[j] = k 
                             
                        k = str(k)
                        k = Name[i]
                        Name[i] = Name[j]
                        Name[j] = k
            IT.append(IT_DAY)
            NAME.append(Name)
            Rank.append(Rank_DAY)
    #        print(IT)
        #输入变化后的日期,输出变化结果
        K = 1
        while K == 1:
            inputday = int(input("输入未来天数"))
            for i in range(0,30):
                if inputday == i:
                    for j in range(0,32):
                        print(IT[i][j],NAME[i][j],int(Rank[i][j]*1000))
        
        conn.commit()             
                          
                        
    def main():
        c = read_excel()
        conn, curs = table_create(c)
        i = 1
        while i == 1:
            K = int(input("输入你要的操作:1,算法A   2,算法B   3,算法C    :"))
            if K == 1: 
                final_number(conn, curs)
            elif K == 2:         
                final_Rank(conn, curs)
            elif K == 3:      
                forecast(conn, curs)  
        conn.close()
    
    main()
  • 相关阅读:
    Day 22 初识面向对象
    Day 21 内存处理与正则
    Day 20 常用模块(三)
    Day 18 常用模块(二)
    url解析
    jQuery---扩展事件
    jQuery---文档操作
    jQuery---属性操作
    jQuery---基本语法
    CSS---常用属性总结
  • 原文地址:https://www.cnblogs.com/wangluoyouling/p/6907640.html
Copyright © 2020-2023  润新知