import requests url='http://localhost:63342/new/news.html?_ijt=55294hg253a9s359i3e3f9kdku' res=requests.get(url) res.encoding='utf-8' from bs4 import BeautifulSoup soup=BeautifulSoup(res.text,'html.parser')
取出h1标签的文本
soups=soup.select('h1')[0].text print(soups)
取出a标签的链接
soupa=soup.a.attrs print(soupa['href'])
取出所有li标签的所有内容
for li in soup.find_all('li'): print(li.contents)
取出一条新闻的标题、链接、发布时间、来源
print(soup.select('div.article-info')[0].text) print(soup.select('div .text-title')[0].find('h1').text)