• python 之 爬普房网


    from bs4 import BeautifulSoup
    import re
    import requests
    import pandas
    ## pa pufangwang
    class down(object):
    def __init__(self):
    self.calls = []
    self.urls = []
    self.nums = 0
    def get_down_urls(self):
    self.urls = []
    htmls =["http://www.0594.com/list-117---1_90-90_120-2----3361---1.html?pb=&od=&hasphoto=1&ft=0"
    ,'http://www.0594.com/list-117---1_90-90_120-2----3361---2.html?pb=&od=&hasphoto=1&ft=0']

    for html in htmls:
    req = requests.get(html)
    bea = BeautifulSoup(req.text)
    find_url = bea.find_all("div",class_="houseList")

    urlss = BeautifulSoup(str(find_url))
    kkk = urlss.find_all("a")
    self.nums = int(len(kkk))
    for eatch in kkk:
    self.urls.append(eatch.get("href"))
    set(self.urls)


    def get_down_data(self,straget):
    self.calls=[]
    html = straget # 网页
    req = requests.get(html) # 获取本地相应
    bea = BeautifulSoup(req.text) # 创建实例
    allss = bea.find_all("div",class_="inforTxt")
    dls1 = BeautifulSoup(str(allss))
    dl = dls1.find_all("dl")


    for eatch in dl:
    self.calls.append(eatch.text.replace(" ", ""))
    print(self.calls)


    ds = down()
    ds.get_down_urls()
    dss =list(set(ds.urls))
    '''for i in range(ds.nums):
    ds.get_down_data(ds.urls[i])
    '''

    for i in range(ds.nums):
    ds.get_down_data(dss[i])
  • 相关阅读:
    867. Transpose Matrix
    896. Monotonic Array
    Java并发包中线程池ThreadPoolExecutor原理探究
    Java中的线程协作之Condition
    Java中的读写锁
    Java中的锁——Lock和synchronized
    Java中的队列同步器AQS
    Java并发编程基础之volatile
    leetcode-数组中只出现一次的数字
    leetcode-比特位计数
  • 原文地址:https://www.cnblogs.com/baili-luoyun/p/8619864.html
Copyright © 2020-2023  润新知