import requests url = "http://news.gzcc.cn/html/xiaoyuanxinwen/" res = requests.get(url) res.encoding = 'utf-8' # 利用BeautifulSoup的HTML解析器,生成结构树 from bs4 import BeautifulSoup soup = BeautifulSoup(res.text, 'html.parser') for news in soup.select('li'): if len(news.select('.news-list-title')) > 0: #首页文章标题 title = news.select('.news-list-title')[0].text #首页文章描述 description = news.select('.news-list-description')[0].text #首页文章信息 info = news.select('.news-list-info')[0].text #首页文章链接 href = news.select('a')[0]['href'] url = href res= requests.get(url) res.encoding = 'utf-8' soup = BeautifulSoup(res.text, 'html.parser') #获取每篇文章的信息 newinfo = soup.select('.show-info')[0].text #获取文章内容 content = soup.select('#content')[0].text #日期 date = newinfo.split()[0] #当日时间 time = newinfo.split()[1] #作者 author = newinfo.split()[2] #审核 checker = newinfo.split()[3] #来源 source = newinfo.split()[4] print('------------------------------------------------------------------------------') print("文章标题:" + title ) print(" 文章描述:" + description ) print(" 文章信息: "+date +' '+ time +' '+ author +' '+ checker +' '+ source) print(" 文章链接:" + href ) print(content) print('------------------------------------------------------------------------------')