Powershell实现:http://www.cnblogs.com/IvanChen/p/4488246.html
今天通过Python实现:
# coding=utf-8 import re import requests import csv import sys from lxml import etree reload(sys) sys.setdefaultencoding('utf8') summaryurl = 'https://technet.microsoft.com/en-us/library/security/mt637763.aspx' summarycontent = requests.get(summaryurl).content selector = etree.HTML(summarycontent) mslist = selector.xpath('//*[@id="mainBody"]/table/tr/td[2]/p/a/text()') pattern_published_date = re.compile(r"[a-zA-Z]*?s[0-9]*?,s[0-9]*") pattern_severity = re.compile(r"[a-zA-Z]*$") pattern_kbnumber = re.compile(r"d+") pattern_vultype = re.compile(r"Information Disclosure|Remote Code Execution|Elevation of Privilege|Security Feature Bypass|Cumulative Security Update|Denial of Service|Tampering|Spoofing", re.I) csvfile = file('eggs.csv', 'wb') writer = csv.writer(csvfile, dialect="excel") writer.writerow(['Date', 'MSRC', 'KB', 'Severity', 'Version', 'Summary', 'Type']) for eachmsrc in mslist: msrcurl = "https://technet.microsoft.com/en-us/library/security/" + eachmsrc + ".aspx" msrc_content = requests.get(msrcurl).content msrc_selector = etree.HTML(msrc_content) published_date = msrc_selector.xpath('//*[@id="pubInfo"]/p[1]/text()') kbnumber = msrc_selector.xpath('//*[@id="mainBody"]/h2/text()') severity = msrc_selector.xpath('//*[@id="content"]/div[2]/h1/text()') version = msrc_selector.xpath('//*[@id="pubInfo"]/p[2]/text()') summary = msrc_selector.xpath('//*[@id="mainBody"]/div[3]/div/p[1]/text()') vultype = msrc_selector.xpath('string(//*[@id="mainBody"]/div[3]/div)') ft_published_date = re.search(pattern_published_date, published_date[0]).group() ft_kbnumber = re.search(pattern_kbnumber, kbnumber[0]).group() ft_severity = re.search(pattern_severity, severity[0].strip(' ')).group() ft_version = version[1] ft_summary = summary[0] ft_vultype = re.search(pattern_vultype, vultype) if ft_vultype: writer.writerow([ft_published_date, eachmsrc, ft_kbnumber, ft_severity, ft_version, ft_summary, ft_vultype.group()]) else: vultype = msrc_selector.xpath('string(//*[@id="mainBody"]/div[position()>3]/div/table)') ft_vultype = re.search(pattern_vultype, vultype) writer.writerow([ft_published_date, eachmsrc, ft_kbnumber, ft_severity, ft_version, ft_summary, ft_vultype.group()]) csvfile.close()