主要使用站长工具爬取,仅用于学术研究,勿用于商业场景
import re
import requests
import time
url = "http://ip.tool.chinaz.com/"
rg=r'<em>[-*\u4E00-\u9FA5A-Za-z0-9_& ]*?<\/em>'
burn = [r'<em>', r'</em>']
def getIP(ipstr):
while True:
res = requests.get(url+ipstr)
for i in re.findall(rg, res.text):
for j in burn:
i = i.replace(j, '')
return i
time.sleep(1)
利用Flask服务器日志查看后台访问IP的地址及其次数
# 运行参考
# PS F:\Output\IPco> python .\fresh.py .\1.txt
import re
import pprint
import sys
name=sys.argv[1]
rg = r'\n(?:[0-9]{1,3}\.){3}[0-9]{1,3} \- \- \['
burn = ['\n', ' - - [']
allIP = []
with open(name, 'r', encoding='utf8') as f:
for i in re.findall(rg, f.read()):
for j in burn:
i = i.replace(j, '')
allIP.append(i)
import collections
# allIP = list(set(allIP))
allIP = dict(collections.Counter(allIP))
allIP = list(allIP.items())
allIP.sort(key=lambda x: -x[1])
import requests
import time
url = "http://ip.tool.chinaz.com/"
rg=r'<em>[-*\u4E00-\u9FA5A-Za-z0-9_& ]*?<\/em>'
burn = [r'<em>', r'</em>']
def getIP(ipstr):
while True:
res = requests.get(url+ipstr)
for i in re.findall(rg, res.text):
for j in burn:
i = i.replace(j, '')
return i
time.sleep(1)
# print(getIP("119.90.42.91"))
for ipS, cou in allIP:
print(f"{cou}\t{ipS}\t{getIP(ipS)}")