from bs4 import BeautifulSoup import re soup=BeautifulSoup('''<!DOCTYPE html> <html1> <head> <meta charset="utf-8"> <title>菜鸟教程(runoob.com)</title> </head> <body> <hl>我的第一标题</hl> <p id="first">我的第一个段落。</p> </body> <table border="1"> <tr> <td>row 1, cell 1</td> <td>row 1, cell 2</td> </tr> <tr> <td>row 2, cell 1</td> <td>row 2, cell 2</td> <tr> </table> </html>''') print("打印head标签和我的学号") print(soup.head,"我的学号:03") print("获取body标签内容",soup.body) print("获取id为first的标签对象",soup.find_all(id="first")) st=soup.text pp = re.findall(u'[u1100-uFFFDh]+?',st) print("获取并打印html页面中的中文字符") print(pp)