• 07.Cookie池设计


    Cookie池设计思路:

    看了IP池设计,Cookie设计也是差不多一样的原理

      1.获取Cookie的来源 (可能需要IP池作为支撑)

      2.Cookie程序内管理

      3.应用到requests

    程序实现:

    selenium获取Cookie来源:

    import time
    from concurrent.futures import ThreadPoolExecutor
    from queue import Queue
    from selenium import webdriver
    from selenium.common.exceptions import TimeoutException
    import sys
    sys.path.append("D:\Work\IPS")
    from redis_cli import IPS_
    from threading import Lock
    ips = IPS_()
    import random
    
    
    class IPS_():
        def __init__(self):
            self.lock1 = Lock()
            self.IpUrls  = ['https://xm.esfxiaoqu.zhuge.com/1007323/',
                            'https://xm.esfxiaoqu.zhuge.com/1001471/',
                            'https://xm.esfxiaoqu.zhuge.com/1007892/',
                            'https://xm.esfxiaoqu.zhuge.com/1003688/',
                            'https://xm.esfxiaoqu.zhuge.com/1001693/'
                        ]
            self.queue_ip = Queue()
            self.threadPoll = ThreadPoolExecutor(max_workers=8)
    
        def get_ip(self):
            self.lock1.acquire()
            ip = ips.one()
            self.lock1.release()
            return ip
    
        def thread_PullIP(self):
            # 两个子线程去访问付费ip网址,主线程继续往下执行。
            for i in range(20):
                self.threadPoll.submit(self.pullIP)
                # self.pullIP()
    
        def pullIP(self):
            ip = self.get_ip()
            n = 0
            while True:
                try:
                    url = random.choice(self.IpUrls)
                    options = webdriver.ChromeOptions()
                    options.add_experimental_option('excludeSwitches', ['enable-automation'])
                    options.add_argument('--headless')
                    options.add_argument("--disable-blink-features=AutomationControlled")
    
                    # 不加载图片
                    prefs = {
                            # 不加载imgs
                            'profile.managed_default_content_settings.images': 2,
                             # 不加载弹窗
                             'profile.default_content_setting_values': {
                                 'notifications': 2
                             }
                             }
                    options.add_experimental_option('prefs', prefs)
    
                    # 添加代理和头部
                    options.add_argument(('--proxy-server=http://' + ip))
                    options.add_argument(
                        'User-Agent={}'.format(
                            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.81 Safari/537.36'))
    
                    # 设置路径
                    driver = webdriver.Chrome(options=options,executable_path='D:\zhoukai_workspace\WebDriver\chromedriver.exe')
                    driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
                        "source": """
                                                                    Object.defineProperty(navigator, 'webdriver', {
                                                                      get: () => undefined
                                                                    })
                                                                  """
                    })
                    driver.implicitly_wait(5)
                    driver.set_page_load_timeout(20)
                    driver.set_script_timeout(20)
                    driver.get(url)
                    time.sleep(1)
                    if n > 15:
                        driver.quit()
                        return '', ip
                    elif 'arg1=' in driver.page_source:
                        n += 1
                        ip = self.get_ip()
                        driver.quit()
                    else:
                        try:
                            acw_tc = driver.get_cookie(name='acw_tc')['value']
                            acw_sc__v2 = driver.get_cookie(name='acw_sc__v2')['value']
                            coo = 'acw_tc={0}; acw_sc__v2={1}'.format(acw_tc, acw_sc__v2)
                            time1 = time.time()
                            cookie = '{}+{}--{}'.format(coo,ip,time1)
                            print(cookie)
                            driver.quit()
                            with open('D:\JR\jr\ZKGIT\ZhuGeZhaoFang\Cookie_pool\cookie.txt', 'a') as f:
                                f.write(cookie)
                                f.write('\n')
                        except:
                            n += 1
                            ip = self.get_ip()
                            driver.quit()
                except TimeoutException as ex:
                    driver.quit()
                    n += 1
                    ip = self.get_ip()
                    print('关闭drive界面')
                except Exception as ex:
                    driver.quit()
                    print(ex)
                    n += 1
                    ip = self.get_ip()
                    print('关闭drive界面')
    
        def delete_cookie(self):
            while True:
                datas = []
                with open('cookie.txt', 'r') as f:
                    for line in f.readlines():
                        line = line.strip('\n')  # 去掉列表中每一个元素的换行符
                        datas.append(line)
    
                with open('cookie.txt', 'w') as f:
                    for data in datas:
                        try:
                            local_time = float(data.split('--')[-1])
                            if int(float(time.time()) - local_time) > 300:
                                print('{} --- 过期'.format(data))
                                continue
                            f.write(data)
                            f.write('\n')
                        except:
                            pass
    
        def run(self):
    
            self.thread_PullIP()
    
            # self.delete_cookie()
    
    if __name__ == '__main__':
    
        IPS_().run()
  • 相关阅读:
    starUML学习笔记一
    android 反编译教程
    android asyncTask 笔记
    android v13 的新特性
    ViewPage+Fragment+indicator+Tabhost效果
    android dp sp pt mm in px
    Tabhost+framgent+ViewPager滑动效果
    android studio github 项目导入问题
    Fragment 中 onCreate和onCreateView的区别
    android 抽屉式滑动demo
  • 原文地址:https://www.cnblogs.com/zhouA/p/15964502.html
Copyright © 2020-2023  润新知