• Tkinter爬虫(Zealer、Mydrivers)--with Proxy


    import urllib2
    
    class UseProxy(object):
        def __init__(self):
            self.user = 'aaaa'
            self.password = 'bbbb'
            self.proxyserver = 'xxx.yyy.zzz:8080'
            self.content = ''
    
        def getproxy(self):
            proxy = 'http://{}:{}@{}'.format(self.user, self.password, self.proxyserver)
            proxy_handler = urllib2.ProxyHandler({'http': proxy})
            opener = urllib2.build_opener(proxy_handler, urllib2.HTTPHandler)
            # self.content = opener.open(self.url).read().decode('utf-8')
            return opener
    UseProxy

    from urlparse import urljoin
    import re
    from UseProxy import *
    from bs4 import BeautifulSoup
    
    class GetZealerVideo(object):
        def __init__(self):
            self.url = 'http://www.zealer.com'
            self.content = ''
            self.lists = []
    
        def splitcontent(self, proxyset):
            # self.proxyset = UseProxy()
            self.content = proxyset.getproxy().open(self.url).read().decode('utf-8')
            # self.useproxy()
            soup = BeautifulSoup(self.content, "html.parser")
            founddiv = soup.findAll('div', {'class': 'subject'})
            foundli = soup.findAll('div', {'id': re.compile("^li_layer")})
            l = len(founddiv)
            self.lists = []
            if l == len(foundli):
                    for i in range(l):                    
                b = re.findall('/post/d+'), str(foundli[i]))[1] self.lists.append(urljoin(self.url, b)) self.lists.append(founddiv[i].contents[0].encode(
    'utf-8')) return self.lists if __name__ == '__main__': gvideo = GetZealerVideo() proxyset = UseProxy() print '.'.join(gvideo.splitcontent(proxyset)).decode('utf-8')
    GetZealerVideo

    from UseProxy import *
    from bs4 import BeautifulSoup
    
    class GetMydrivers(object):
        def __init__(self):
            self.url = 'http://www.mydrivers.com'
            self.content = ''
            self.lists = []
    
        def splitcontent(self, proxyset):
            # self.useproxy()
            self.content = proxyset.getproxy().open(self.url).read()
            soup = BeautifulSoup(self.content, "html.parser", from_encoding="gb18030")
            print soup.original_encoding
            founddiv = soup.findAll('span', {'class': 'titl'})
    
            for i in range(len(founddiv)):
                self.lists.append(founddiv[i].contents[0])
            return self.lists
    
    if __name__ == '__main__':
        gnews = GetMydrivers()
        proxyset = UseProxy()
        lists = gnews.splitcontent(proxyset)
        for l in lists:
                print str(l).decode('utf-8').encode('gb18030')
    GetMydrivers


    # -*- coding: utf-8 -*-
    from Tkinter import *
    from time import ctime
    import os
    import re
    import GetZealerVideo as soup
    import GetMydrivers as mnews
    from UseProxy import *
    
    class GetResource(object):
        def __init__(self):
            self.win = Tk()
    
            self.l1 = StringVar(self.win)
            self.msg = ""
            self.frame = Frame(width=800, height=600, bg='white')
            # self.frame.grid_propagate(False)
            # self.frame.grid()
            self.frame.propagate(False)
            self.frame.pack()
    
            self.scroll = Scrollbar(self.frame)
            self.scroll.pack(side=RIGHT, fill=Y)
            # self.scroll.grid(row=0, column=1)
            self.listbox = Listbox(self.frame, selectbackground='blue', font='12', heigh=550, width=750, yscrollcommand=self.scroll.set,
                                   xscrollcommand=self.scroll.set)
            self.listbox.pack(side=TOP, fill=BOTH)
            # self.listbox.grid(row=0, column=0)
            self.listbox.bind('<Double-1>', self.get_select)
    
            self.frame2 = Frame(width=800, height=50, bg='white')
            self.frame2.propagate(False)
            self.frame2.pack()
            # self.frame2.grid_propagate(False)
            # self.frame2.grid()
            Button(self.frame2, text=u'Get Zealer', command=self.zealer_video).pack(expand=YES)
            # Button(self.frame2, text=u'Get Zealer', command=self.zealer_video).grid(row=0, column=0)
    
            Button(self.frame2, text=u'Get Mydrivers', command=self.my_drivers).pack(expand=YES)
            # Button(self.win, text=u'Get Mydrivers', command=self.my_drivers).grid(row=1, column=1)
    
        def my_drivers(self):
            print 'start get at:', ctime()
            self.listbox.delete(0, END)
            self.getm = mnews.GetMydrivers()
            proxyset = UseProxy()
            for l in self.getm.splitcontent(proxyset):
                s = str(l).decode('utf-8')
                try:
                    self.listbox.insert(END, re.findall(r'(?<=href=").+?(?=">)', s)[0]+"
    ")
                    self.listbox.insert(END, re.findall(r'(?<=>).+?(?=<)', s)[0]+"
    ")
                    self.listbox.update()
                except IndexError:
                    pass
            print 'get done at:', ctime()
    
        def zealer_video(self):
            print 'start get at:', ctime()
            self.listbox.delete(0, END)
            self.getz = soup.GetZealerVideo()
            proxyset = UseProxy()
            for l in self.getz.splitcontent(proxyset):
                self.listbox.insert(END, l+"
    ")
                self.listbox.update()
            print 'get done at:', ctime()
    
        def get_select(self, ev=None):
            self.listbox.config(selectbackground='red')
            print self.listbox.curselection()
            self.check = self.listbox.get(self.listbox.curselection())
            if self.check:
                if re.match('http', self.check):
                    os.startfile(self.check)
    
    def main():
        d = GetResource()
        mainloop()
    
    if __name__ == '__main__':
        main()
  • 相关阅读:
    create_project.py报错问题,建议用回python2.7
    windows下执行build_native.sh报权限问题
    编辑器CocoStudio和CocosBuilder的对比
    双击判断
    Web文件的ContentType类型大全
    Java四类八种数据类型
    自己写的通过ADO操作mysql数据库
    使用Cout输出String和CString对象
    CString和string头文件
    C++连接mysql数据库的两种方法
  • 原文地址:https://www.cnblogs.com/guojian2080/p/4631822.html
Copyright © 2020-2023  润新知