有时间的时候可以看看

# from django.test import TestCase
#
# # Create your tests here.
#
#
# # 可变数据类型  [] {}
# # 不可变数据类型 数字 字符串  元组
#
# # s="hello".upper()
# # print(s)
#
#
# # l=[1,2,3]
# #
# # c=[4,5]
# #
# # l.append(c)
# # c.append(7)
# #
# # print(c)  #
# # print(l)
#
#
#
#
# # d1={"name":"yuan"}
# # d2={"age":12}
# # d1["xxx"]=d2
# #
# # d2["height"]="180cm"
# #
# # print(d1)   # {"name":"yuan","xxx":{"age":12,"height":"180cm"}}
#
#
#
#
# # d1={1:{"xxx":[12,34]},2:{"xxx":[34,56,[777,888,999],[11,8238,99]]}}
# # d2={"xxx":[777,888,999,[11,8238,99]]}
# # d3={"xxx":[11,8238,99]}
# #
# # d1[2]["xxx"].append(d2["xxx"])
# # d2["xxx"].append(d3["xxx"])
# #
# # print(d1)   #  {1:{"xxx":[12,34]},2:{"xxx":[34,56,[777,888,999,[11,8238,99]]]}}
# # print(d2)   #  {"xxx":[777,888,999,[11,8238,99]]}
# # print(d3)   #  d3={"xxx":[11,8238,99]}
#
#
#
#
# #=========================================================================
# '''
# [
#   {'id': 1, 'content': '...', 'Pid': None, 'chidren_commentList': [{'id': 5, 'content': '...', 'Pid': 1, 'chidren_commentList': []},]},
#   {'id': 2, 'content': '...', 'Pid': None, 'chidren_commentList': []},
#   {'id': 3, 'content': '...', 'Pid': None, 'chidren_commentList': [{'id': 4, 'content': '...', 'Pid': 3, 'chidren_commentList': [{'id': 6, 'content': '...', 'Pid': 4, 'chidren_commentList': []},]},]},
#   {'id': 4, 'content': '...', 'Pid': 3, 'chidren_commentList': [{'id': 6, 'content': '...', 'Pid': 4, 'chidren_commentList': []},]},
#   {'id': 5, 'content': '...', 'Pid': 1, 'chidren_commentList': []},
#   {'id': 6, 'content': '...', 'Pid': 4, 'chidren_commentList': []},
#   {'id': 7, 'content': '...', 'Pid': 3, 'chidren_commentList': []},
#   {'id': 8, 'content': '...', 'Pid': 7, 'chidren_commentList': []},
#   {'id': 9, 'content': '...', 'Pid': None, 'chidren_commentList': []}
#
#  ]
#
# '''
#
# comment_list=[
#
#     {"id":1,"content":"...","Pid":None},
#     {"id":2,"content":"...","Pid":None},
#     {"id":3,"content":"...","Pid":None},
#     {"id":4,"content":"...","Pid":1},
#     {"id":5,"content":"...","Pid":1},
#     {"id":6,"content":"...","Pid":4},
#     {"id":7,"content":"...","Pid":3},
#     {"id":8,"content":"...","Pid":7},
#     {"id":9,"content":"...","Pid":None},
#
# ]
#
# comment_dict={}
#
# for comment in comment_list:
#     comment["chidren_commentList"]=[]
#     comment_dict[comment["id"]] = comment
#
# print(comment_dict)
#
# '''
#
# comment_dict:
#
# {
# 1: {'id': 1, 'content': '...', 'Pid': None, 'chidren_commentList': []},
# 2: {'id': 2, 'content': '...', 'Pid': None, 'chidren_commentList': []},
# 3: {'id': 3, 'content': '...', 'Pid': None, 'chidren_commentList': []},
# 4: {'id': 4, 'content': '...', 'Pid': 1, 'chidren_commentList': []},
# 5: {'id': 5, 'content': '...', 'Pid': 1, 'chidren_commentList': []},
# 6: {'id': 6, 'content': '...', 'Pid': 4, 'chidren_commentList': []},
# 7: {'id': 7, 'content': '...', 'Pid': 3, 'chidren_commentList': []},
# 8: {'id': 8, 'content': '...', 'Pid': 7, 'chidren_commentList': []},
# 9: {'id': 9, 'content': '...', 'Pid': None, 'chidren_commentList': []}
#
# }
#
#
# '''
#
# ret=[]
#
# for comment in comment_list:   # comment :  {'id': 1, 'content': '...', 'Pid': None, 'chidren_commentList': [{'id': 5, 'content': '...', 'Pid': 1, 'chidren_commentList': []},]},
#     pid=comment.get("Pid")
#     if  pid:
#         print(comment)         #  {'id': 4, 'content': '...', 'Pid': 1, 'chidren_commentList': []}
#         comment_dict[pid]["chidren_commentList"].append(comment)
#     else:
#         ret.append(comment)
#
#
# print(ret)
#
#
#
#


#####################################################################################################

html_doc = """

<html>
 <head>
  <title>
   The Dormouse's story
  </title>
 </head>
 <body>
  <p class="title">
   <b>
    The Dormouse's story
   </b>
  </p>
  <div id="d1" class="d1">
    <b>
    The Dormouse's story2
    </b></div>
  <p class="story">
   Once upon a time there were three little sisters; and their names were
   <a class="sister0" href="http://example.com/elsie" id="link1">
    Elsie
   </a>
   ,
   <a class="sister1" href="http://example.com/lacie" id="link2">
    Lacie
   </a>
   and
   <a class="sister2" href="http://example.com/tillie" id="link3">
    Tillie
   </a>
   ;
and they lived at the bottom of a well.
  </p>
   <script>alert(1234)</script>
  <p class="story sister2">
   ...
  </p>
 </body>
</html>
"""


# 第一步：
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_doc, 'html.parser')

# 第二步：
# print(soup.prettify()) # 无用

# 第三步： 查找标签

# print(soup.a) # 只能找到第一个标签
# print(soup.find_all("a")) # 找到符合条件的所有的标签

# print(soup.a["class"])

#

# print(soup.find_all(name="a",attrs={"class":"sister2"}))

# for ele_a in soup.find_all("a"):
#     print(ele_a["class"])


# for ele_a in soup.find_all('a'):
#     print(ele_a.attrs)  # {'class': ['sister0'], 'href': 'http://example.com/elsie', 'id': 'link1'}
#     del ele_a["class"]
#
# for ele_a in soup.find_all('a'):
#     print(ele_a)

# for ele in soup.find_all():
#     if ele.attrs:
#         if ele.attrs.get("class"):
#            print(ele.attrs)  # {'class': ['sister0'], 'href': 'http://example.com/elsie', 'id': 'link1'}
#            del ele["class"]
#
# print(soup)


# for ele_a in soup.find_all("a"):
#     print(ele_a.string)    # string

for ele in soup.find_all("script"):
    ele.string.replace_with("// 别瞎玩")

print(soup)

相关阅读:
【Lucene4.8教程之五】Luke
【Tika基础教程之一】Tika基础教程
 【Lucene4.8教程之四】分析
 【Lucene4.8教程之六】QueryParser与Query子类：如何生成Query对象
 【Lucene4.8教程之三】搜索
 Java路径问题最终解决方案—可定位所有资源的相对路径寻址
 java.util.logging.Logger基础教程
 【Lucene4.8教程之二】索引
 【Lucene4.8教程之一】使用Lucene4.8进行索引及搜索的基本操作
 【Heritrix基础教程之4】开始一个爬虫抓取的全流程代码分析
原文地址：https://www.cnblogs.com/ctztake/p/8001216.html