import requests res=requests.get('http://www.baidu.com') res.encoding='utf-8' print(res.text)
from bs4 import BeatifulSoup html = """ ... <html><head>head title</head><p>history</p></html>""" soup=BeautifulSoup(html)
print(soup.prettify()) print(soup.select('p')) print(soup.select('p')[0]) print(soup.select('p')[0].text)
print(soup.p)
print(soup.p.attr)
print(soup.find_all('p'))
print(soup.find_all(id='dwww'))
////////////////++++++++++////////
names = soup.find_all('td', class_="job")
re.findAll(">(.{2,5})</a>", names) //正则表达式匹配a链接中任意2到5个字符
soup re组合使用
////////////////++++++++++////////
links=soup.select('p')
for link in links:
print(link.text)