• 被执行人查询


    
    
    #-*- coding: utf-8 -*-
    # search.py
    
    import urllib2
    import json
    import threading
    #import copy
    from sgmllib import SGMLParser
    
    DETAIL_URL = "http://shixin.court.gov.cn/detail?id={}"
    DETAIL_KEYS = ["age", "sexy", "cardNum", "areaName", "courtName", "gistId", "regDate", "gistUnit", "duty", "performance", "disruptTypeName"]
    csv_file = open('search.csv', 'w')
    # 写入文件 锁
    file_lock = threading.RLock()
    # 记录也写入的单位数
    num = 0
    
    class GetIdList(SGMLParser):
        def reset(self):
            self.all_data = []
            self.IDlist = []
            self.flag = False
            self.getdata = False
            SGMLParser.reset(self)
    
        def start_tr(self, attrs):
            for k,v in attrs:#遍历div的所有属性以及其值
                # tr style="height:28px;"
                if k == 'style' and v == 'height:28px;':#确定进入了<div class='entry-content'>
                    self.flag = True
                    return
    
        def end_tr(self):#遇到</div>
            self.flag = False
            if self.IDlist:
                self.get_detail(self.IDlist[1])
    
        def start_a(self, attrs):
            if self.getdata == True:
                for k,v in attrs:
                    if k == 'id':
                        self.IDlist.append(v)
    
        def start_td(self, attrs):
            if self.flag == False:
                return
            self.getdata = True
    
        def end_td(self):#遇到</p>
            if self.getdata:
                self.getdata = False
    
        def handle_data(self, text):#处理文本
            if self.getdata:
                self.IDlist.append(text)
    
        def get_detail(self, pid):
            while True:
                print pid, self.IDlist[2]
                try:
                    detail_msg = urllib2.urlopen(DETAIL_URL.format(pid)).read()
                except urllib2.HTTPError as e:
                    continue
                break
    
            detail = json.loads(detail_msg)
    
            self.IDlist = self.IDlist[:-4]
            for item in DETAIL_KEYS:
                value = detail.get(item, '')
                self.IDlist.append(value)
            import sys
    
            reload(sys)
    
            sys.setdefaultencoding('utf-8')
    
            for index,item in enumerate(self.IDlist):
                self.IDlist[index] = str(item).decode('utf-8')
    
    
            self.all_data.append(','.join(self.IDlist))
    
            self.IDlist = []
            pass
    
        def print_data(self):
            if file_lock.acquire():
                for i in self.all_data:
                    i = i.replace('
    ', '')
                    print >> csv_file, i
                    global num
                    num += 1 
                    print num
                file_lock.release()
                
    
    class MyThread(threading.Thread):
        def __init__(self, first_num, last_num):
            threading.Thread.__init__(self)
            self.first_num = first_num
            self.last_num = last_num
        
        def run(self):
            for i in range(self.first_num, self.last_num + 1):
                try_cnt = 0
                while True:
                    try:
                        try_cnt += 1
                        content = urllib2.urlopen('http://shixin.court.gov.cn/personMore.do?currentPage={}'.format(i)).read()
                    except urllib2.HTTPError as e:
                        if try_cnt < 5:
                            continue
                        # 尝试5次都失败 跳过这个id的查询
                        break
                    break
            
                # print content
                if content: 
                    t = GetIdList()
                    t.feed(content)
                    t.print_data()
                    print "*********Pag finshed: ", i 
                else:
                    break
    
    # 不小于次数的总查询量
    #all_search_num = 90000
    all_search_num = 90
    # 启用的线程数
    all_thread_num = 9
    
    gap = all_search_num / all_thread_num + 1
    
    for thread_num in range(0, all_thread_num):
        this_thread = MyThread(thread_num * gap + 1, (thread_num + 1) * gap) 
        this_thread.start()
    
    
    
     
  • 相关阅读:
    设置Kali Linux虚拟机连接网络
    修改Kali Linux终端主题
    Kali Linux修改桌面默认图标
    Metasploit技巧命令支持tips
    修改Kali Linux终端历史记录大小
    ListView 实现进度条显示
    Delphi 10.3.2 社区版的安装
    VMWare 下安装 Windows XP
    重新认识 Delphi
    VMWare 下安装 MSDN版 MS-DOS 6.22
  • 原文地址:https://www.cnblogs.com/chens-smile/p/4488417.html
Copyright © 2020-2023  润新知