def _get_new_data(self, page_url, soup,final_str_name):
con_datas = []
res_data = {}
#url
res_data['url'] = page_url
res_data['name']='Ant'
content = soup.find(attrs={'class':'content'})
#print(content.get_text())
#标题统一存入names即可 cont 是全文内容
names = content.find_all(attrs={'class':'section'})
cont = content.get_text()
i = 0
j = 0
#print(conts)
str_name = ""
while(j < len(names)):
fc = names[j]
j = j + 1
name = fc.get_text()
name = name.strip()
str_cont = ""
if(name == final_str_name):
break
while(1):
line_cont = ""
while(1):
ch = cont[i]
i = i + 1
line_cont = line_cont + ch
if(ch=='
'):
break
line_cont = line_cont.strip()
if(line_cont == name):
con_data = {}
con_data['name'] = str_name
con_data['cont'] = str_cont
con_datas.append(con_data)
str_name = name
# print("111111")
# print(con_data['name'])
# print("333333")
# print(con_data['cont'])
# print("222222")
break
else:
str_cont = str_cont + line_cont
str_cont = str_cont + '
'
return res_data,con_datas