• 爬取小猪短租房.py文件


    from bs4 import BeautifulSoup
    import requests
    import time

    urls = []
    def get_link_from(page_number):
    for each_number in range(1,page_number):
    list_view = 'http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(each_number)
    wb_data = requests.get(list_view)
    soup = BeautifulSoup(wb_data.text,'lxml')
    for link in soup.select('div.result_btm_con.lodgeunitname'):
    urls.append(link.get('detailurl'))
    return urls

    def print_gender(class_name):
    if class_name == "member_girl_ico":
    return "女"
    if class_name == "member_girl_icol":
    return "男"

    def get_item_info(page_number):
    urls = get_link_from(page_number)
    for url in urls:

    wb_data =requests.get(url)
    soup = BeautifulSoup(wb_data.text,'lxml')
    data = {

    'title ': soup.select('div.pho_info > h4')[0].text,
    'address' : soup.select('div.pho_info > p > span')[0].text.strip(' '),
    'price' : soup.select('#pricePart > div.day_l > span')[0].text,
    'pic' : soup.select('#curBigImage')[0].get('src'),
    'host_name' : soup.select('div.w_240 > h6')[0].text,
    'host_gender' : soup.select('div.w_240 > h6 > span')[0].get('class')[0],
    }
    print(data)


    get_item_info(14)

    
    
  • 相关阅读:
    Hibernate 工作原理及为什么要用
    一款很好用的JQuery dtree树状图插件(一)
    android PopupWindow
    android 截屏工具类
    ubuntu 中文输入法
    Google GCM推送
    windows 安装配置 ant
    (转)Angular中的拦截器Interceptor
    flex 布局 自己做的demo
    flex布局 (转)
  • 原文地址:https://www.cnblogs.com/dws-love-jfl-1314/p/6008046.html
Copyright © 2020-2023  润新知