Cookie池设计思路:
看了IP池设计,Cookie设计也是差不多一样的原理
1.获取Cookie的来源 (可能需要IP池作为支撑)
2.Cookie程序内管理
3.应用到requests
程序实现:
selenium获取Cookie来源:
import time from concurrent.futures import ThreadPoolExecutor from queue import Queue from selenium import webdriver from selenium.common.exceptions import TimeoutException import sys sys.path.append("D:\Work\IPS") from redis_cli import IPS_ from threading import Lock ips = IPS_() import random class IPS_(): def __init__(self): self.lock1 = Lock() self.IpUrls = ['https://xm.esfxiaoqu.zhuge.com/1007323/', 'https://xm.esfxiaoqu.zhuge.com/1001471/', 'https://xm.esfxiaoqu.zhuge.com/1007892/', 'https://xm.esfxiaoqu.zhuge.com/1003688/', 'https://xm.esfxiaoqu.zhuge.com/1001693/' ] self.queue_ip = Queue() self.threadPoll = ThreadPoolExecutor(max_workers=8) def get_ip(self): self.lock1.acquire() ip = ips.one() self.lock1.release() return ip def thread_PullIP(self): # 两个子线程去访问付费ip网址,主线程继续往下执行。 for i in range(20): self.threadPoll.submit(self.pullIP) # self.pullIP() def pullIP(self): ip = self.get_ip() n = 0 while True: try: url = random.choice(self.IpUrls) options = webdriver.ChromeOptions() options.add_experimental_option('excludeSwitches', ['enable-automation']) options.add_argument('--headless') options.add_argument("--disable-blink-features=AutomationControlled") # 不加载图片 prefs = { # 不加载imgs 'profile.managed_default_content_settings.images': 2, # 不加载弹窗 'profile.default_content_setting_values': { 'notifications': 2 } } options.add_experimental_option('prefs', prefs) # 添加代理和头部 options.add_argument(('--proxy-server=http://' + ip)) options.add_argument( 'User-Agent={}'.format( 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.81 Safari/537.36')) # 设置路径 driver = webdriver.Chrome(options=options,executable_path='D:\zhoukai_workspace\WebDriver\chromedriver.exe') driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) """ }) driver.implicitly_wait(5) driver.set_page_load_timeout(20) driver.set_script_timeout(20) driver.get(url) time.sleep(1) if n > 15: driver.quit() return '', ip elif 'arg1=' in driver.page_source: n += 1 ip = self.get_ip() driver.quit() else: try: acw_tc = driver.get_cookie(name='acw_tc')['value'] acw_sc__v2 = driver.get_cookie(name='acw_sc__v2')['value'] coo = 'acw_tc={0}; acw_sc__v2={1}'.format(acw_tc, acw_sc__v2) time1 = time.time() cookie = '{}+{}--{}'.format(coo,ip,time1) print(cookie) driver.quit() with open('D:\JR\jr\ZKGIT\ZhuGeZhaoFang\Cookie_pool\cookie.txt', 'a') as f: f.write(cookie) f.write('\n') except: n += 1 ip = self.get_ip() driver.quit() except TimeoutException as ex: driver.quit() n += 1 ip = self.get_ip() print('关闭drive界面') except Exception as ex: driver.quit() print(ex) n += 1 ip = self.get_ip() print('关闭drive界面') def delete_cookie(self): while True: datas = [] with open('cookie.txt', 'r') as f: for line in f.readlines(): line = line.strip('\n') # 去掉列表中每一个元素的换行符 datas.append(line) with open('cookie.txt', 'w') as f: for data in datas: try: local_time = float(data.split('--')[-1]) if int(float(time.time()) - local_time) > 300: print('{} --- 过期'.format(data)) continue f.write(data) f.write('\n') except: pass def run(self): self.thread_PullIP() # self.delete_cookie() if __name__ == '__main__': IPS_().run()