• 买房指南,链家最近房源


    # coding=utf-8

    """
    Author: nieliangcai
    version: 0.1

    date: 2019/7/29 11:30
    """

    import requests_html
    import xlwt
    import time
    import openpyxl
    from pprint import pprint


    now_time = time.strftime("%Y%m%d%H%M")
    session = requests_html.HTMLSession()

    House_List = ["徐泾北城", "泗泾", "佘山", "九亭", "宝龙广场", "洞泾", "蟠龙路", "宝山"]


    def write_house_data(table_info, title="地区房价%s.xlsx" % now_time):
    """
    :param table_info: 所有内容
    :param title: filename
    :return:
    """
    # print(table_info)
    # 创建一个空的Workbook,并且删除默认的Sheetname
    file = openpyxl.Workbook()
    file.remove(file["Sheet"])
    # file = xlwt.Workbook()

    for i in range(len(table_info)):
    """sheet_name和单元格内容"""
    address = table_info[i][0]
    Values = table_info[i][1]

    # 使用抓到的数据做sheet_name
    table = file.create_sheet(address)
    # table = file.add_sheet(address)
    title_list = ['Title', 'house_info', 'height', 'total_price', 'unit_price']
    # 写数据从1开始
    for i in range(5):
    table.cell(1, i+1, title_list[i])

    # print(Values)
    for i in range(len(Values)):
    for j in range(len(Values[i])):
    # print(Values[i][j])
    table.cell(i + 2, j+1, Values[i][j])
    file.save(title)


    all_house = []
    for house in House_List:
    URL = "https://sh.lianjia.com/ershoufang/rs%s/" % house
    res = session.get(URL)
    title_all = res.html.find(".title>a") # 获取标题信息
    house_info_all = res.html.find(".houseInfo") # 房源信息
    position_Info_all = res.html.find(".positionInfo") # 位置
    totalPrice = res.html.find(".totalPrice") # 总价
    unitPrice = res.html.find(".unitPrice") # 单价

    list_house = []
    for i in range(len(title_all)):
    list_house.append([title_all[i].text, house_info_all[i].text, position_Info_all[i].text, totalPrice[i].text,
    unitPrice[i].text])
    all_house.append(list_house)

    zipped = list(zip(House_List, all_house))
    pprint(zipped)
    write_house_data(zipped)
  • 相关阅读:
    为Android编译bash
    编译toybox
    RGB信仰灯
    如何用Fiddler抓BlueStacks的HTTPS包
    Adobe Acrobat快捷方式
    [MS-SHLLINK]: Shell Link (.LNK) Binary File Format
    BZOJ 3993 星际战争
    BZOJ 3996 线性代数
    BZOJ 1797 最小割
    BZOJ 2726 任务安排
  • 原文地址:https://www.cnblogs.com/nieliangcai/p/11263581.html
Copyright © 2020-2023  润新知