这个算是爬虫的老梗了.
就是用requests 和 beautifulsoup来操作一波.
1 import requests 2 from bs4 import BeautifulSoup 3 4 ret = requests.get(url="https://www.autohome.com.cn/news/") 5 code = ret.apparent_encoding 6 ret.encoding = ret.apparent_encoding 7 # print(ret.text) 8 9 soup = BeautifulSoup(ret.text, 'html.parser') 10 # print(soup) 11 12 div = soup.find(name='div', id='auto-channel-lazyload-article') 13 # print(div) 14 li_list = div.find_all(name='li') 15 # print(li_list) 16 for it in li_list: 17 h3 = it.find(name='h3') 18 if not h3: 19 continue 20 # print(h3.text) 21 p = it.find(name='p') 22 a = it.find(name='a') 23 img = it.find(name='img') 24 src = img.get('src') 25 26 file_name = './image/' + src.rsplit('__', maxsplit=1)[1] 27 28 ret_img = requests.get( 29 url='https:' + src 30 ) 31 32 with open(file_name, 'wb') as fw: 33 fw.write(ret_img.content) 34 35 print(h3.text, a.get('href')) 36 print(p.text) 37 print('=' * 15)