一、requests
1. GET请求
1 # 1、无参数实例 2 3 import requests 4 5 ret = requests.get('https://github.com/timeline.json') 6 7 print(ret.url) 8 print(ret.text) 9 10 11 12 # 2、有参数实例 13 14 import requests 15 16 payload = {'key1': 'value1', 'key2': 'value2'} 17 ret = requests.get("http://httpbin.org/get", params=payload) 18 19 print(ret.url) 20 print(ret.text)
2.POST请求
1 # 1、基本POST实例 2 3 import requests 4 5 payload = {'key1': 'value1', 'key2': 'value2'} 6 ret = requests.post("http://httpbin.org/post", data=payload) 7 8 print(ret.text) 9 10 11 # 2、发送请求头和数据实例 12 13 import requests 14 import json 15 16 url = 'https://api.github.com/some/endpoint' 17 payload = {'some': 'data'} 18 headers = {'content-type': 'application/json'} 19 20 ret = requests.post(url, data=json.dumps(payload), headers=headers) 21 22 print(ret.text) 23 print(ret.cookies)
3.其它请求
1 requests.get(url, params=None, **kwargs) 2 requests.post(url, data=None, json=None, **kwargs) 3 requests.put(url, data=None, **kwargs) 4 requests.head(url, **kwargs) 5 requests.delete(url, **kwargs) 6 requests.patch(url, data=None, **kwargs) 7 requests.options(url, **kwargs) 8 9 # 以上方法均是在此方法的基础上构建 10 requests.request(method, url, **kwargs)
4.汽车之家新闻
1 import requests 2 from bs4 import BeautifulSoup 3 4 # response = requests.get("http://www.autohome.com.cn/news/") 5 # # response.text 是str 6 # # response.content 是bytes二进制 7 # 8 # response.encoding = 'gbk' # 网站使用了gbk 9 # root = BeautifulSoup(response.text, 'html.parser') # 将返回结果拿到用bs解析 10 # outer_div_obj = root.find(name='div', id='auto-channel-lazyload-article') # 打到div id='xx' 11 # li_obj_list = outer_div_obj.find_all(name='li') # 拿到里面所有的LI 12 # 13 # for li_obj in li_obj_list: 14 # if not li_obj.find('h3'): 15 # continue 16 # title_obj = li_obj.find('h3') # 拿到对象 H3标签 17 # summary_obj = li_obj.find('p') # 拿到对象 P标签 18 # img_obj = li_obj.find('img') # 拿到对象 IMG标签 19 # src = img_obj.attrs.get('src') # 从IMG标签对象中拿到src属性 20 # 21 # print(src, title_obj.text, summary_obj.text) 22 23 response = requests.get("http://www.autohome.com.cn/news/") 24 response.encoding = 'gbk' 25 26 soup = BeautifulSoup(response.text, 'html.parser') 27 tag = soup.find(name='div', attrs={'id': 'auto-channel-lazyload-article'}) 28 li_list = tag.find_all('li') 29 30 for li in li_list: 31 h3 = li.find('h3') 32 33 if not h3: 34 continue 35 print("