"""
抓取
解析
存储
"""
import re
#import ast
from urllib import parse
from datetime import datetime
import requests
from scrapy import Selector
domain = "http://www.91jf.com/"
#函数用来保存写入测试文本
'''
def write_txt():
f = open("a.txt", 'w')
f.write("hello world")
f.close()
'''
def get_nodes_json():
left_menu_text = requests.get("http://www.91jf.com/").text
sel = Selector(text=left_menu_text)
all_divs = sel.xpath("//div[@class='class_child_li']//a[@href]").extract()
#all_divs = sel.xpath("//div[@class='class_menu']//a[@href]")
#all_divs = selector.xpath("//div[@class='class_child_li']//li//span/text()")
#nodes_str_match = re.search(r"(<li><a href=".*>)", left_menu_text)#此处测试正则匹配的代码,没有调通
#print(all_divs)#
if all_divs:
nodes_lists = []
for i in range(len(all_divs)):
nodes_str = all_divs[i]
print(nodes_str)
nodes_lists.append(nodes_str)
return nodes_lists
return []
if __name__ == "__main__":
get_nodes_json()