import requests
import json
# **************爬虫伪装 响应头*********************
respon = requests.get('https://foofish.net', headers = {'user-agent' : 'Mozilla/5.0'})
print(respon.status_code)
print(respon.reason)
for key,value in respon.headers.items():
print('%s : %s' % (key, value))
print(respon.url)
# *****************文件读写*************************
r = requests.get('https://pic1.zhimg.com/v2-2e92ebadb4a967829dcd7d05908ccab0_b.jpg')
with open('a.jpg', 'wb') as f:
f.write(r.content)
# *****************网页编码处理**********************
r = requests.get("https://foofish.net/understand-http.html")
r.encoding = 'utf-8' # 解决乱码 r.text乱码
print(r.text)
# *******************json解析 ***********************
r = requests.get('https://www.v2ex.com/api/topics/hot.json')
# json解析
print(r.json())
# ******************** 设置代理 *********************
proxies = {
'http' : 'http://10.10.1.10:3128',
'https' : 'http://10.10.1.10:1080',
}
r = requests.get('https://www.google.com.hk/?gws_rd=ssl', proxies = proxies)
r.encoding = 'utf-8'
print(r.text)
# *********************登录session **************************