• Coursera课程笔记----P4E.Capstone----Week 6&7


    Visualizing Email Data(Week 6&7)

    code segment

    gword.py

    import sqlite3
    import time
    import zlib
    import string
    
    conn = sqlite3.connect('index.sqlite')
    cur = conn.cursor()
    
    cur.execute('SELECT id, subject FROM Subjects')
    subjects = dict()
    for message_row in cur :
        subjects[message_row[0]] = message_row[1]
    
    # cur.execute('SELECT id, guid,sender_id,subject_id,headers,body FROM Messages')
    cur.execute('SELECT subject_id FROM Messages')
    counts = dict()
    for message_row in cur :
        text = subjects[message_row[0]]
        text = text.translate(str.maketrans('','',string.punctuation))
        text = text.translate(str.maketrans('','','1234567890'))
        text = text.strip()
        text = text.lower()
        words = text.split()
        for word in words:
            if len(word) < 4 : continue
            counts[word] = counts.get(word,0) + 1
    
    x = sorted(counts, key=counts.get, reverse=True)
    highest = None
    lowest = None
    for k in x[:100]:
        if highest is None or highest < counts[k] :
            highest = counts[k]
        if lowest is None or lowest > counts[k] :
            lowest = counts[k]
    print('Range of counts:',highest,lowest)
    
    # Spread the font sizes across 20-100 based on the count
    bigsize = 80
    smallsize = 20
    
    fhand = open('gword.js','w')
    fhand.write("gword = [")
    first = True
    for k in x[:100]:
        if not first : fhand.write( ",
    ")
        first = False
        size = counts[k]
        size = (size - lowest) / float(highest - lowest)
        size = int((size * bigsize) + smallsize)
        fhand.write("{text: '"+k+"', size: "+str(size)+"}")
    fhand.write( "
    ];
    ")
    fhand.close()
    
    print("Output written to gword.js")
    print("Open gword.htm in a browser to see the vizualization")
    

    gline.py

    import sqlite3
    import time
    import zlib
    
    conn = sqlite3.connect('index.sqlite')
    cur = conn.cursor()
    
    cur.execute('SELECT id, sender FROM Senders')
    senders = dict()
    for message_row in cur :
        senders[message_row[0]] = message_row[1]
    
    cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
    messages = dict()
    for message_row in cur :
        messages[message_row[0]] = (message_row[1],message_row[2],message_row[3],message_row[4])
    
    print("Loaded messages=",len(messages),"senders=",len(senders))
    
    sendorgs = dict()
    for (message_id, message) in list(messages.items()):
        sender = message[1]
        pieces = senders[sender].split("@")
        if len(pieces) != 2 : continue
        dns = pieces[1]
        sendorgs[dns] = sendorgs.get(dns,0) + 1
    
    # pick the top schools
    orgs = sorted(sendorgs, key=sendorgs.get, reverse=True)
    orgs = orgs[:10]
    print("Top 10 Organizations")
    print(orgs)
    
    counts = dict()
    months = list()
    # cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
    for (message_id, message) in list(messages.items()):
        sender = message[1]
        pieces = senders[sender].split("@")
        if len(pieces) != 2 : continue
        dns = pieces[1]
        if dns not in orgs : continue
        month = message[3][:7]
        if month not in months : months.append(month)
        key = (month, dns)
        counts[key] = counts.get(key,0) + 1
    
    months.sort()
    # print counts
    # print months
    
    fhand = open('gline.js','w')
    fhand.write("gline = [ ['Month'")
    for org in orgs:
        fhand.write(",'"+org+"'")
    fhand.write("]")
    
    for month in months:
        fhand.write(",
    ['"+month+"'")
        for org in orgs:
            key = (month, org)
            val = counts.get(key,0)
            fhand.write(","+str(val))
        fhand.write("]");
    
    fhand.write("
    ];
    ")
    fhand.close()
    
    print("Output written to gline.js")
    print("Open gline.htm to visualize the data")
    
  • 相关阅读:
    51nod乘积之和
    Dell服务器安装OpenManage(OMSA)
    Nginx反向代理PHP
    搭建haproxy
    108. Convert Sorted Array to Binary Search Tree
    60. Permutation Sequence
    142. Linked List Cycle II
    129. Sum Root to Leaf Numbers
    118. Pascal's Triangle
    26. Remove Duplicates from Sorted Array
  • 原文地址:https://www.cnblogs.com/maimai-d/p/12775931.html
Copyright © 2020-2023  润新知