python爬虫--小猪短租的租房信息
利用requests获取网页
利用Beautifulsoup4和lxml解析网页
具体代码如下
from bs4 import BeautifulSoup
import requests
url = 'http://bj.xiaozhu.com/fangzi/1508951935.html'
web_data = requests.get(url)
soup = BeautifulSoup(web_data.text, 'lxml')
title = soup.select('div.pho_info > h4 ')[0].text
address = soup.select('div.pho_info > p ')[0].get('title')
price = soup.select('div.day_l > span')[0].text
first_pic = soup.select('#curBigImage')[0].get('src')
landlord_pic = soup.select('div.member_pic > a > img')[0].get('src')
landlord_name = soup.select('div.w_240 > h6 > a')[0].text
if soup.select('span[class="member_girl_ico"]'):
landlord_gender = 'female'
else:
landlord_gender = 'male'
data = {
'title': title,
'address': address,
'price': price,
'first_pic': first_pic,
'landlord_pic': landlord_pic,
'landlord_name': landlord_name,
'landlord_gender': landlord_gender
}
print(data)