response = requests.get("http://www.baidu.com")
response.content.decode("utf-8") 返回bytes类型 decode解码
response.text request.encoding = "gbk" # 修改编码 返回str类型
获取图片
# coding=utf-8 import requests url = "http://wap.jiapai.net.cn/images/1.jpg" response = requests.get(url) with open("baidu.png","wb") as f: f.write(response.content)
---
# 状态码
response.status_code
# 响应头
response.headers
# 请求头
response.request.headers
200
{'Content-Length': '20851', 'Content-Type': 'image/jpeg', 'Last-Modified': 'Sun, 28 Jul 2019 04:29:48 GMT', 'Accept-Ranges': 'bytes', 'ETag': '"1f3f6d17fd44d51:0"', 'Set-Cookie': 'sdwaf-test-item=1ed57f5405075208510954035156575b5c5754065406040d015701515e520c; path=/; HttpOnly', 'X-Powered-By': 'SDWAF', 'Date': 'Tue, 05 May 2020 01:56:48 GMT'} {'User-Agent': 'python-requests/2.23.0', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}
---
# 发送带header的请求
# coding=utf-8 import requests url = "http://wap.jiapai.net.cn/images/1.jpg" headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36"} response = requests.get(url,headers=headers) print(response.status_code) print(response.headers) print(response.request.headers)
---
# 发送带参数的请求
params = {"":""}
url_temp = "www.baidu.com/s?"
requests.get(url_temp,params=params)
---
# 占位符 建议使用format+ {} 代替
input_string = input("")
url = "http://www.baidu.com/s?wd={}".format(input_string) || url = "https://www.baidu.com/s?wd=%s"%input_string
---
列表推导式
# 范围0~9
[i for i in range(10)]
# i对2取余 输出
[i%2 for i in range(10)]
# i对2取余,如果对而取余等于0 则输出
[i%2 for i in range(10) if i%2==0]
---
## 面向对象
- 对象
- 生活中的事务
- 类
- 对事务的抽象 在代码中实现class 类型
- 实例
- 使用之前对类的实例化之后的结果
---
# get 请求贴吧
# coding=utf-8 import requests class TiebaSpider: def __init__(self,tieba_name): self.tieba_name = tieba_name self.url_temp = "https://tieba.baidu.com/f?kw="+tieba_name+"&ie=utf-8&pn={}" self.headers ={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36"} def get_url_list(self): return [self.url_temp.format(i*50) for i in range(10)] def parse_url(self, url): response = requests.get(url, headers=self.headers) return response.content.decode("utf-8") def save_html_str(self, html_str, page_num): file_path = "{}-第{}页".format(self.tieba_name, page_num) with open(file_path, "w", encoding="utf-8") as f: f.write(html_str) def run(self): # 1 构造url url_list = self.get_url_list() # 2 发送请求 获取相应 for url in url_list: html_str = self.parse_url(url) # 3 保存 page_num = url_list.index(url) + 1 self.save_html_str(html_str, page_num) if __name__ == "__main__": tieba_spider = TiebaSpider("李毅") tieba_spider.run()
# 保存贴吧内容到本地
---
# Post 请求 安全 大文本传输
data = {"":""} # 字典
requests.post("https://www.baidu.com",data = data,headers=headers)