# coding=utf-8
"""
Author: nieliangcai
version: 0.1
date: 2019/7/29 11:30
"""
import requests_html
import xlwt
import time
import openpyxl
from pprint import pprint
now_time = time.strftime("%Y%m%d%H%M")
session = requests_html.HTMLSession()
House_List = ["徐泾北城", "泗泾", "佘山", "九亭", "宝龙广场", "洞泾", "蟠龙路", "宝山"]
def write_house_data(table_info, title="地区房价%s.xlsx" % now_time):
"""
:param table_info: 所有内容
:param title: filename
:return:
"""
# print(table_info)
# 创建一个空的Workbook,并且删除默认的Sheetname
file = openpyxl.Workbook()
file.remove(file["Sheet"])
# file = xlwt.Workbook()
for i in range(len(table_info)):
"""sheet_name和单元格内容"""
address = table_info[i][0]
Values = table_info[i][1]
# 使用抓到的数据做sheet_name
table = file.create_sheet(address)
# table = file.add_sheet(address)
title_list = ['Title', 'house_info', 'height', 'total_price', 'unit_price']
# 写数据从1开始
for i in range(5):
table.cell(1, i+1, title_list[i])
# print(Values)
for i in range(len(Values)):
for j in range(len(Values[i])):
# print(Values[i][j])
table.cell(i + 2, j+1, Values[i][j])
file.save(title)
all_house = []
for house in House_List:
URL = "https://sh.lianjia.com/ershoufang/rs%s/" % house
res = session.get(URL)
title_all = res.html.find(".title>a") # 获取标题信息
house_info_all = res.html.find(".houseInfo") # 房源信息
position_Info_all = res.html.find(".positionInfo") # 位置
totalPrice = res.html.find(".totalPrice") # 总价
unitPrice = res.html.find(".unitPrice") # 单价
list_house = []
for i in range(len(title_all)):
list_house.append([title_all[i].text, house_info_all[i].text, position_Info_all[i].text, totalPrice[i].text,
unitPrice[i].text])
all_house.append(list_house)
zipped = list(zip(House_List, all_house))
pprint(zipped)
write_house_data(zipped)