1,参考:
https://stackoverflow.com/questions/43665276/how-to-run-google-chrome-headless-in-docker
https://hub.docker.com/r/browserless/chrome/dockerfile
2,python 做为调用脚本 ,
# -*- coding: utf-8 -*- from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.wait import WebDriverWait # 用于实例化一个Driver的显式等待 from selenium.webdriver.common.by import By # 内置定位器策略集 from selenium.webdriver.support import expected_conditions as EC # 内置预期条件函数,具体API请参考此小节后API链接 chrome_options = Options() # chrome_options.add_argument("--disable-extensions") chrome_options.add_argument("--disable-gpu") # chrome_options.add_argument("--no-sandbox") # linux only chrome_options.add_argument("--headless") chrome_options.headless = True # also works driver = webdriver.Chrome(options=chrome_options) driver.get( 'https://www.********.de/angebote/opel-corsa-c-edition-klima-8xbereift-schiebedach-benzin-silber-018d31ca-5af8-4cd5-8b47-9ee198bca593?cldtidx=20&cldtsrc=listPage') try: WebDriverWait(driver, 200, 0.5).until( EC.presence_of_all_elements_located((By.CLASS_NAME, 'cldt-contact-form-container'))) print(driver.find_element_by_css_selector('.cldt-item .sc-grid-row ').text) print(driver.find_element_by_xpath('/html/body/div[1]/main/div[2]/div[3]/div[2]/div[1]/div[3]/span[3]').text) print(driver.find_element_by_xpath('/html/body/div[1]/main/div[2]/div[3]/div[2]/div[1]/div[3]/span[1]').text) finally: driver.close() # close the driver driver.quit()
3, 在docker 中运行,解决了资源问题。