爬取乌云上所有人民币和乌云符号的漏洞（python脚本）

  1 import httplib
  2 from HTMLParser import HTMLParser
  3 import urlparse
  4 import urllib
  5 from bs4 import BeautifulSoup
  6 import re
  7 from time import sleep
  8 '''
  9 usage: 
 10 
 11 
 12 input your wooyun cookies,then just go crawl!!
 13 
 14 author: Elliott
 15 
 16 
 17 '''
 18 
 19 
 20 
 21 
 22 domain = 'wooyun.org'
 23 cookies = ''  # !!!!here   input your wooyun cookies
 24 user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0'
 25 
 26 
 27 
 28 def countnumber():  # the function to get page num
 29     global domain
 30     global cookies
 31     global user_agent
 32     conn = httplib.HTTPConnection(domain)
 33     conn.request('GET','/user.php?action=openbugs','',{'Cookie':cookies,'User-Agent':user_agent,'Referer':'http://wooyun.org/index.php','Host':'wooyun.org'})
 34     content = conn.getresponse()
 35     content = content.read()
 36     soup = BeautifulSoup(content)
 37     tag = soup.find_all('p',attrs={'class':'page'})
 38     if len(tag) == 0:
 39         tag = 'None'
 40     else:
 41         tag = str(tag[0])
 42     pattern = re.compile('>.*<a class="current">')
 43     result = pattern.findall(tag)
 44     if len(result) == 0:
 45         result = 'None'
 46     else:
 47         result = str(result[0])
 48     number = filter(str.isdigit, result)
 49     num = number[5:]  #get then total page number
 50     return int(num)
 51 
 52 
 53 def dealthepage(content):
 54     global domain
 55     global cookies
 56     global user_agent
 57     conn = httplib.HTTPConnection(domain)
 58     soup = BeautifulSoup(content)
 59     k = soup.find_all('a')
 60     item = k[27:47]
 61     pattern = re.compile('href="(.+?)"')
 62     hreaf = []
 63     for i in range(len(item)):
 64     ss = pattern.findall(str(item[i]))
 65         if len(ss) == 0:
 66             break
 67     hreaf.append(str(ss[0]))
 68     for i in hreaf:
 69         #sleep(0.5)
 70     conn.request('GET',i,'',{'Cookie':cookies,'User-Agent':user_agent,'Referer':'http://wooyun.org/index.php','Host':'wooyun.org'})
 71     content2 = conn.getresponse()
 72     content2 = content2.read()
 73     soup2 = BeautifulSoup(content2)
 74     imgtag = soup2.find_all(class_='credit')
 75     '''may be $ or cloud'''
 76     if len(imgtag) != 0:
 77         findcloud = re.compile('src="/images/credit.png"')
 78         findmoney = re.compile('src="/images/m(.?).png"')
 79         cloudnum = findcloud.findall(content2)
 80         moneylevel = findmoney.findall(content2)
 81         cloud = 0
 82         money = 0
 83         if len(cloudnum) != 0:
 84             if len(cloudnum) == 1:
 85                 cloud = 1
 86             if len(cloudnum) == 2:
 87                 cloud = 2
 88             if len(cloudnum) == 3:
 89                 cloud = 3
 90         if len(moneylevel) != 0:
 91             if len(moneylevel) == 1:
 92                 money = 1
 93             if len(moneylevel) == 2:
 94                 money = 2
 95             if len(moneylevel) == 3:
 96                 money = 3
 97         title = soup2.findAll(attrs={"class":"wybug_title"})
 98         if len(title) == 0:
 99             title = 'No Title'
100         else:
101             title = str(title[0])
102         deltag = re.compile('r<[^>]+>')
103         title = deltag.sub('',title)
104         author = soup2.findAll(attrs={"class":"wybug_author"})
105         if len(author) == 0:
106             author = 'No name'
107         else:
108             author = str(author[0])
109         author = deltag.sub('',author)
110         date = soup2.findAll(attrs={"class":"wybug_date"})
111         if len(date) == 0:
112             date = 'No time'
113         else:
114             date = str(date[0])
115         date = deltag.sub('',date)
116         link = "http://www.wooyun.org"+i
117         link = str(link)
118         f = open("test.html","a+")
119         s = "<tr><td>level:cloud="+str(cloud)+"money="+str(money)+"</td><th>"+date+"</th><td><a href='"+link+"'>"+title+"</a></td><th>"+author+"</th></tr><br>"
120         f.write(s)
121         f.close
122 
123 
124 
125 
126 
127 
128 if __name__ == '__main__':
129     num = countnumber()  #get page num
130     for i in range(num):
131         conn = httplib.HTTPConnection(domain)
132         conn.request('GET','/user.php?action=openbugs&pNO='+str(i+1),'',{'Cookie':cookies,'User-Agent':user_agent,'Referer':'http://wooyun.org/index.php','Host':'wooyun.org'})
133         content = conn.getresponse()
134         content = content.read()
135         dealthepage(content)
136 
137 
138 
139 
140 
141

附章效果图：

相关阅读:
Educational Codeforces Round 51 (Rated for Div. 2)
Kruskal重构树入门
 编译原理词法分析
 java.lang.String内部结构的变化
 android 世界各国英文简写代码资源文件
 openCV python 安装
 解读30个提高Web程序执行效率的好经验
 从认知盈余说起，也谈分享精神
 STL set multiset map multimap unordered_set unordered_map example
[置顶] 【Git入门之一】Git是神马？
原文地址：https://www.cnblogs.com/elliottc/p/4992700.html