• Python小爬虫练习


    # coding: utf-8
    __author__ = 'zhangcx'
    from urllib3 import PoolManager
    import codecs
    import json
    
    class myjob(object):
        def __init__(self):
            self._page = 1
            self._totalPageCount = 0
            self._first = True
            self._hasNextPage = True
            self._http =  PoolManager()
    
        def getjob(self):
            if(self._hasNextPage):
                    r = self._http.request('POST','http://www.lagou.com/jobs/positionAjax.json?px=default&city=%E6%AD%A6%E6%B1%89'
                                           ,{'first':'%s' % self._first,'pn':'%d' % self._page,'kd':'Java'})
                    items = json.loads( r.data.decode('utf-8'))
                    for item in items['content']['result']:
                        print("{name},{positionName},{salary}".format(name=item['companyShortName'],positionName = item['positionName'],salary=item['salary']))
                    #print(items)
                    self._hasNextPage = items['content']['hasNextPage']
                    self._totalPageCount = items['content']['totalPageCount']
    
                    if(self._page > 0):
                        self._first = 'false'
                    if((self._page+ 1) > self._totalPageCount):
                        self._hasNextPage = False
                    self._page += 1
                    self.getjob()
    
    
    if __name__ == "__main__":
        job = myjob()
        job.getjob()



  • 相关阅读:
    csuoj-1004-Xi and Bo
    csuoj-1003-UC Browser
    网络命令
    linux网络配置
    java面向对象
    java类 面向对象
    java方法 Scanner、Random类
    java集合
    java数组
    java流程控制语句
  • 原文地址:https://www.cnblogs.com/huangzelin/p/5024452.html
Copyright © 2020-2023  润新知