• 批量去重URL地址并剔除打不开网址


    #coding=utf-8
    
    import os
    import httplib
    import socket
    
    dictlist ={};
    
    def ReadHost():
        hosts = [];
        obn = open('d:/sss.txt', 'rb');
        for line in obn:
            #sometime you should filter 
    
            line = line.strip('
    ')
            hosts.append(line)
        obn.close();
        return hosts;
    
    def GetWebStatus(host):
        try:
            conn = httplib.HTTPConnection(host)
            conn.request('GET','url')
            result = conn.getresponse()
            resultStatus= result.status
           # print(host,resultStatus)
            conn.close()
            if(resultStatus != 200):
                return 0
            else:
                return 1
        except httplib.HTTPException,e:
            return 0
    
    def SysDNS():
        hosts = ReadHost();
    
        for host in hosts:
            #print(host)
            try:
    
                if (GetWebStatus(host) == 0):
                    continue
                myaddrs = socket.getaddrinfo(host,None)
                for eachaddr in myaddrs:
                    addrs = eachaddr[4][0]
                    #print((addrs))
                    if(dictlist.has_key(addrs)):
                        break;
                    else:
                        dictlist[addrs] = host;
                        #print(host)
                        break;
            except socket.herror,e:
                continue;
            except socket.gaierror,e1:
                continue;
            except Exception as e2:
                print(e2)
                continue
    
    
    
    def showDict():
        fw = open("d:/out.txt","wb");
        for (k,v) in dictlist.items():
            #print(k,v)
            fw.writelines(v);
        fw.close();
    
    if __name__ == "__main__":
        SysDNS();
        showDict();
    

    url可能会出现的错误:

    [Errno 10060]
    [Errno 10061]
    [Errno 10054]
    [Errno 10053]

  • 相关阅读:
    java IO
    免费的编程中文书籍索引-转
    js两种定义函数、继承方式及区别
    为什么这样写js:(function ($) { })(jQuery);
    Cannot open connection 解决办法
    dos下mysql登陆
    spring-AOP-1
    el表达式
    设计模式——"simple Factory"
    软件生产性控制
  • 原文地址:https://www.cnblogs.com/xiaobaichuangtianxia/p/3842562.html
Copyright © 2020-2023  润新知