Python之爬虫-段子网
https://ishuo.cn
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import re
import requests
response = requests.get('https://ishuo.cn')
data = response.text
print(data)
r = re.findall('<div class="content">(.*?)</div>',data)
for i in r:
print(i)
http://duanziwang.com/
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import re
import requests
response =requests.get('http://duanziwang.com/')
data = response.text
result = re.findall('<a href="http://duanziwang.com/.*?.html">(.*?)</a>',data)
for i in result:
print(i)