• Coursera课程笔记----P4E.Capstone----Week 6&7


    Visualizing Email Data(Week 6&7)

    code segment

    gword.py

    import sqlite3
    import time
    import zlib
    import string
    
    conn = sqlite3.connect('index.sqlite')
    cur = conn.cursor()
    
    cur.execute('SELECT id, subject FROM Subjects')
    subjects = dict()
    for message_row in cur :
        subjects[message_row[0]] = message_row[1]
    
    # cur.execute('SELECT id, guid,sender_id,subject_id,headers,body FROM Messages')
    cur.execute('SELECT subject_id FROM Messages')
    counts = dict()
    for message_row in cur :
        text = subjects[message_row[0]]
        text = text.translate(str.maketrans('','',string.punctuation))
        text = text.translate(str.maketrans('','','1234567890'))
        text = text.strip()
        text = text.lower()
        words = text.split()
        for word in words:
            if len(word) < 4 : continue
            counts[word] = counts.get(word,0) + 1
    
    x = sorted(counts, key=counts.get, reverse=True)
    highest = None
    lowest = None
    for k in x[:100]:
        if highest is None or highest < counts[k] :
            highest = counts[k]
        if lowest is None or lowest > counts[k] :
            lowest = counts[k]
    print('Range of counts:',highest,lowest)
    
    # Spread the font sizes across 20-100 based on the count
    bigsize = 80
    smallsize = 20
    
    fhand = open('gword.js','w')
    fhand.write("gword = [")
    first = True
    for k in x[:100]:
        if not first : fhand.write( ",
    ")
        first = False
        size = counts[k]
        size = (size - lowest) / float(highest - lowest)
        size = int((size * bigsize) + smallsize)
        fhand.write("{text: '"+k+"', size: "+str(size)+"}")
    fhand.write( "
    ];
    ")
    fhand.close()
    
    print("Output written to gword.js")
    print("Open gword.htm in a browser to see the vizualization")
    

    gline.py

    import sqlite3
    import time
    import zlib
    
    conn = sqlite3.connect('index.sqlite')
    cur = conn.cursor()
    
    cur.execute('SELECT id, sender FROM Senders')
    senders = dict()
    for message_row in cur :
        senders[message_row[0]] = message_row[1]
    
    cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
    messages = dict()
    for message_row in cur :
        messages[message_row[0]] = (message_row[1],message_row[2],message_row[3],message_row[4])
    
    print("Loaded messages=",len(messages),"senders=",len(senders))
    
    sendorgs = dict()
    for (message_id, message) in list(messages.items()):
        sender = message[1]
        pieces = senders[sender].split("@")
        if len(pieces) != 2 : continue
        dns = pieces[1]
        sendorgs[dns] = sendorgs.get(dns,0) + 1
    
    # pick the top schools
    orgs = sorted(sendorgs, key=sendorgs.get, reverse=True)
    orgs = orgs[:10]
    print("Top 10 Organizations")
    print(orgs)
    
    counts = dict()
    months = list()
    # cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
    for (message_id, message) in list(messages.items()):
        sender = message[1]
        pieces = senders[sender].split("@")
        if len(pieces) != 2 : continue
        dns = pieces[1]
        if dns not in orgs : continue
        month = message[3][:7]
        if month not in months : months.append(month)
        key = (month, dns)
        counts[key] = counts.get(key,0) + 1
    
    months.sort()
    # print counts
    # print months
    
    fhand = open('gline.js','w')
    fhand.write("gline = [ ['Month'")
    for org in orgs:
        fhand.write(",'"+org+"'")
    fhand.write("]")
    
    for month in months:
        fhand.write(",
    ['"+month+"'")
        for org in orgs:
            key = (month, org)
            val = counts.get(key,0)
            fhand.write(","+str(val))
        fhand.write("]");
    
    fhand.write("
    ];
    ")
    fhand.close()
    
    print("Output written to gline.js")
    print("Open gline.htm to visualize the data")
    
  • 相关阅读:
    Java 反射机制
    Hibernate学习
    js学习
    如何在jsp中引入bootstrap
    bootstrap学习一
    第二章、初级篇
    定风波
    Java反射机制
    数据库的优化方法
    MySQL常用的查询语句回顾
  • 原文地址:https://www.cnblogs.com/maimai-d/p/12775931.html
Copyright © 2020-2023  润新知