取出h5标签的文本
取出a标签的链接
取出所有li标签的所有内容
取出一条新闻的标题、链接、发布时间、来源
import requests from bs4 import BeautifulSoup url ='http://www.gzcc.cn/' res = requests.get(url) res.encoding = 'utf-8' #print(res.text) page = BeautifulSoup(res.text ,'html.parser') #print(page.text) print(page.h5.text) urlList=page.select('img') #循环输出图片地址 for item in urlList: print(item.attrs['src']) newsurl='http://news.gzcc.cn/html/2018/xiaoyuanxinwen_0328/9113.html' newsRes=requests.get(newsurl) newsRes.encoding='utf-8' newsPage=BeautifulSoup(newsRes.text,'html.parser') #输出标题 print(newsPage.select('.show-title')[0].text) #输出时间和作者 print(newsPage.select('.show-info')[0].text) #输出连接 #print(newsPage.select('.show-content')[0].select('p')[4].select('img')[0].attrs['src']) imagePath=newsPage.select('.show-content')[0].select('img') for item in imagePath: print(item.attrs['src'])