# from django.test import TestCase # # # Create your tests here. # # # # 可变数据类型 [] {} # # 不可变数据类型 数字 字符串 元组 # # # s="hello".upper() # # print(s) # # # # l=[1,2,3] # # # # c=[4,5] # # # # l.append(c) # # c.append(7) # # # # print(c) # # # print(l) # # # # # # d1={"name":"yuan"} # # d2={"age":12} # # d1["xxx"]=d2 # # # # d2["height"]="180cm" # # # # print(d1) # {"name":"yuan","xxx":{"age":12,"height":"180cm"}} # # # # # # d1={1:{"xxx":[12,34]},2:{"xxx":[34,56,[777,888,999],[11,8238,99]]}} # # d2={"xxx":[777,888,999,[11,8238,99]]} # # d3={"xxx":[11,8238,99]} # # # # d1[2]["xxx"].append(d2["xxx"]) # # d2["xxx"].append(d3["xxx"]) # # # # print(d1) # {1:{"xxx":[12,34]},2:{"xxx":[34,56,[777,888,999,[11,8238,99]]]}} # # print(d2) # {"xxx":[777,888,999,[11,8238,99]]} # # print(d3) # d3={"xxx":[11,8238,99]} # # # # # #========================================================================= # ''' # [ # {'id': 1, 'content': '...', 'Pid': None, 'chidren_commentList': [{'id': 5, 'content': '...', 'Pid': 1, 'chidren_commentList': []},]}, # {'id': 2, 'content': '...', 'Pid': None, 'chidren_commentList': []}, # {'id': 3, 'content': '...', 'Pid': None, 'chidren_commentList': [{'id': 4, 'content': '...', 'Pid': 3, 'chidren_commentList': [{'id': 6, 'content': '...', 'Pid': 4, 'chidren_commentList': []},]},]}, # {'id': 4, 'content': '...', 'Pid': 3, 'chidren_commentList': [{'id': 6, 'content': '...', 'Pid': 4, 'chidren_commentList': []},]}, # {'id': 5, 'content': '...', 'Pid': 1, 'chidren_commentList': []}, # {'id': 6, 'content': '...', 'Pid': 4, 'chidren_commentList': []}, # {'id': 7, 'content': '...', 'Pid': 3, 'chidren_commentList': []}, # {'id': 8, 'content': '...', 'Pid': 7, 'chidren_commentList': []}, # {'id': 9, 'content': '...', 'Pid': None, 'chidren_commentList': []} # # ] # # ''' # # comment_list=[ # # {"id":1,"content":"...","Pid":None}, # {"id":2,"content":"...","Pid":None}, # {"id":3,"content":"...","Pid":None}, # {"id":4,"content":"...","Pid":1}, # {"id":5,"content":"...","Pid":1}, # {"id":6,"content":"...","Pid":4}, # {"id":7,"content":"...","Pid":3}, # {"id":8,"content":"...","Pid":7}, # {"id":9,"content":"...","Pid":None}, # # ] # # comment_dict={} # # for comment in comment_list: # comment["chidren_commentList"]=[] # comment_dict[comment["id"]] = comment # # print(comment_dict) # # ''' # # comment_dict: # # { # 1: {'id': 1, 'content': '...', 'Pid': None, 'chidren_commentList': []}, # 2: {'id': 2, 'content': '...', 'Pid': None, 'chidren_commentList': []}, # 3: {'id': 3, 'content': '...', 'Pid': None, 'chidren_commentList': []}, # 4: {'id': 4, 'content': '...', 'Pid': 1, 'chidren_commentList': []}, # 5: {'id': 5, 'content': '...', 'Pid': 1, 'chidren_commentList': []}, # 6: {'id': 6, 'content': '...', 'Pid': 4, 'chidren_commentList': []}, # 7: {'id': 7, 'content': '...', 'Pid': 3, 'chidren_commentList': []}, # 8: {'id': 8, 'content': '...', 'Pid': 7, 'chidren_commentList': []}, # 9: {'id': 9, 'content': '...', 'Pid': None, 'chidren_commentList': []} # # } # # # ''' # # ret=[] # # for comment in comment_list: # comment : {'id': 1, 'content': '...', 'Pid': None, 'chidren_commentList': [{'id': 5, 'content': '...', 'Pid': 1, 'chidren_commentList': []},]}, # pid=comment.get("Pid") # if pid: # print(comment) # {'id': 4, 'content': '...', 'Pid': 1, 'chidren_commentList': []} # comment_dict[pid]["chidren_commentList"].append(comment) # else: # ret.append(comment) # # # print(ret) # # # # ##################################################################################################### html_doc = """ <html> <head> <title> The Dormouse's story </title> </head> <body> <p class="title"> <b> The Dormouse's story </b> </p> <div id="d1" class="d1"> <b> The Dormouse's story2 </b></div> <p class="story"> Once upon a time there were three little sisters; and their names were <a class="sister0" href="http://example.com/elsie" id="link1"> Elsie </a> , <a class="sister1" href="http://example.com/lacie" id="link2"> Lacie </a> and <a class="sister2" href="http://example.com/tillie" id="link3"> Tillie </a> ; and they lived at the bottom of a well. </p> <script>alert(1234)</script> <p class="story sister2"> ... </p> </body> </html> """ # 第一步: from bs4 import BeautifulSoup soup = BeautifulSoup(html_doc, 'html.parser') # 第二步: # print(soup.prettify()) # 无用 # 第三步: 查找标签 # print(soup.a) # 只能找到第一个标签 # print(soup.find_all("a")) # 找到符合条件的所有的标签 # print(soup.a["class"]) # # print(soup.find_all(name="a",attrs={"class":"sister2"})) # for ele_a in soup.find_all("a"): # print(ele_a["class"]) # for ele_a in soup.find_all('a'): # print(ele_a.attrs) # {'class': ['sister0'], 'href': 'http://example.com/elsie', 'id': 'link1'} # del ele_a["class"] # # for ele_a in soup.find_all('a'): # print(ele_a) # for ele in soup.find_all(): # if ele.attrs: # if ele.attrs.get("class"): # print(ele.attrs) # {'class': ['sister0'], 'href': 'http://example.com/elsie', 'id': 'link1'} # del ele["class"] # # print(soup) # for ele_a in soup.find_all("a"): # print(ele_a.string) # string for ele in soup.find_all("script"): ele.string.replace_with("// 别瞎玩") print(soup)