举例:
"""Beautiful Soup库的基本元素"""
import requests
from bs4 import BeautifulSoup
url = "https://python123.io/ws/demo.html"
r = requests.get(url)
demo = r.text
soup = BeautifulSoup(demo, "html.parser")
#print(soup.prettify())
# <title>This is a python demo page</title>
print(soup.title)
tag = soup.a
# <a class="py1" href="http://www.icourse163.org/course/BIT-268001" id="link1">Basic Python</a>
print(tag)
# a
print(soup.a.name)
# p
print(soup.a.parent.name)
# body
print(soup.a.parent.parent.name)
# html
print(soup.a.parent.parent.parent.name)
# [document]
print(soup.a.parent.parent.parent.parent.name)
# {'href': 'http://www.icourse163.org/course/BIT-268001', 'class': ['py1'], 'id': 'link1'}
print(tag.attrs)
# ['py1']
print(tag.attrs['class'])
# http://www.icourse163.org/course/BIT-268001
print(tag.attrs['href'])
# <class 'dict'>
print(type(tag.attrs))
# <class 'bs4.element.Tag'>
print(type(tag))
# Basic Python
print(soup.a.string)
# <p class="title"><b>The demo python introduces several python courses.</b></p>
print(soup.p)
# The demo python introduces several python courses.
print(soup.p.string)
# <class 'bs4.element.NavigableString'>
print(type(soup.p.string))
# HTML注释(comment)的类型
"""Beautiful Soup库的基本元素""" import requests from bs4 import BeautifulSoup # HTML注释(comment)的类型 newHTML = "<b><!--This is a comment--></b><p>This is not a comment</p>" newsoup = BeautifulSoup(newHTML, "html.parser") # This is a comment print(newsoup.b.string) #只能获取注释一行信息;print(newsoup.b.contents)# 可获取多行信息
# This is not a comment
print(newsoup.p.string) # <class 'bs4.element.Comment'> print(type(newsoup.b.string)) # <class 'bs4.element.NavigableString'> print(type(newsoup.p.string))