• python 爬取乌云所有厂商名字,url,漏洞总数 并存入数据库


    需要:MySQLdb 
    下面是数据表结构:

     
    /*
    Navicat MySQL Data Transfer
     
    Source Server         : 127.0.0.1
    Source Server Version : 50509
    Source Host           : 127.0.0.1:3306
    Source Database       : wooyun
     
    Target Server Type    : MYSQL
    Target Server Version : 50509
    File Encoding         : 65001
     
    Date: 2015-09-24 17:38:14
    */
     
    SET FOREIGN_KEY_CHECKS=0;
     
    -- ----------------------------
    -- Table structure for wooyun_vul
    -- ----------------------------
    DROP TABLE IF EXISTS `wooyun_vul`;
    CREATE TABLE `wooyun_vul` (
      `id` int(8) NOT NULL AUTO_INCREMENT,
      `corpsname` varchar(255) DEFAULT NULL,
      `corpsurl` varchar(255) DEFAULT NULL,
      `vulcount` int(255) DEFAULT NULL,
      PRIMARY KEY (`id`)
    ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
    

    python 脚本:

    #conding=utf-8
    import urllib2
    import urllib
    import re
    import MySQLdb
     
    url = "http://wooyun.org/corps/page/"
    def getWooyuncorps(url):
        request = urllib2.Request(url)
        request.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36')
        reponse = urllib2.urlopen(request)
        content = reponse.read()
        pattern1 = re.compile(r'<td width="370"><a href="/corps/(.*?)">.*?</a></td>')
        pattern2 = re.compile(r'<a rel="nofollow" href="(.*?)" target=')
        corps = pattern1.findall(content)
        corpsUrl = pattern2.findall(content)
        return corps,corpsUrl
     
    def getcorpscount(url):
        request = urllib2.Request(url)
        request.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36')
        reponse = urllib2.urlopen(request)
        content = reponse.read()
        pattern = re.compile(r'<p class="page">.*?(d+).*')
        count = pattern.findall(content)
        return count
     
    corpslist = []
    corpsurllist = []
    countlist = []
    for i in range(1,37):
        corps,corpsUrl = getWooyuncorps(url+str(i))
        for corp in corps:
            corpslist.append(corp)
        for urls in corpsUrl:
            corpsurllist.append(urls)
    print len(corpslist),len(corpsurllist)
     
    for i in range(0,len(corpslist)):
        newurl = "http://www.wooyun.org/corps/"+urllib.quote(corpslist[i])
        #print newurl
        count = getcorpscount(newurl)
        #print count
        for countA in count:
            countlist.append(countA)
     
    #print len(countlist)
    conn = MySQLdb.connect('localhost','root','','wooyun')
    cur = conn.cursor()
    sql = "set names 'utf8'"
    cur.execute(sql)
    conn.commit()
     
    for s in range(0,len(countlist)):
        sql = 'insert into wooyun_vul(corpsname,corpsurl,vulcount) values("%s","%s",%d)' %(corpslist[s],corpsurllist[s],int(countlist[s]))
        print sql
        cur.execute(sql)
        conn.commit()
     
    conn.close()
    print "success"
    

      

  • 相关阅读:
    Windows下不能启动mysql服务错误总结
    使用NSOperationQueue简化多线程开发(转)
    “四人帮”的设计模式经得起时间的考验么?(转)
    ObjectiveC category
    svn add 输出 A (bin) (转)
    NSNotification学习笔记
    浅析UITableViewCell的工作机制
    关于git分支的使用
    delegate使用方法之assign
    ARC(Automatic Reference Counting )技术概述(转)
  • 原文地址:https://www.cnblogs.com/depycode/p/5190061.html
Copyright © 2020-2023  润新知