from bs4 import BeautifulSoup import requests link_list = [] def get_soup(url): #获取网页的HTML文件,并用BeautifulSoup做成soup html = requests.get(url) soup = BeautifulSoup(html.text,'lxml') return soup def get_link_list(url,soup): #获取每个租房的url,并做成list links = get_soup(url).select('#page_list > ul > li > a') for link in links: link_list.append(link['href']) return link_list def get_content(): #获取每个租房页面的基本信息 for index_url in get_link_list(url,get_soup(url)): get_soup(index_url) title = get_soup(index_url).select('div.pho_info > h4 > em') price = get_soup(index_url).select('#pricePart > div.day_l > span') image = get_soup(index_url).select('#curBigImage') name = get_soup(index_url).select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a') addr = get_soup(index_url).select(' div.pho_info > p') data = { 'name' : name[0].string, 'title' : title[0].string, 'addr' : addr[0]['title'], #因为有的客房没有地址,所以通过获取title标签属性来获得地址 'price' : price[0].string, 'image' : image[0]['src'] } print(data) for i in range(1): url = 'http://hz.xiaozhu.com/search-duanzufang-p%d-0/' %i get_content()