• Selenium中如何抓取网络请求响应及WebSocket信息


    我们在使用Selenium测试Web或Electronjs/Cef框架应用时,有时候操作一个元素需要判断是否发送了请求以及请求的参数是否正确

    我们可以通过,开启Chrome的性能日志来然后配合driver.get_log("performance")来查看请求,然后对Network相关的日子进行过滤,
    实现如下:

    获取Chrome性能日志

    import json
    from pprint import pprint
    from selenium import webdriver
    
    caps = {
        'browserName': 'chrome',
        'version': '',
        'platform': 'ANY',
        'goog:loggingPrefs': {'performance': 'ALL'},   # 记录性能日志
        'goog:chromeOptions': {'extensions': [], 'args': ['--headless']}  # 无界面模式
    }
    
    driver = webdriver.Chrome(desired_capabilities=caps)
    
    driver.get('https://httpbin.org/get')
    logs = driver.get_log("performance")
    for item in logs:
        log = json.loads(item["message"])["message"]
        pprint(log)
        if "Network.response" in log["method"] or "Network.request" in log["method"] or "Network.webSocket" in log["method"]:
            pprint(log)
    

    运行结果如下:

    {'method': 'Network.responseReceived',
     'params': {'frameId': '2445B94E9E1DB51A1B1F4F3B0A3F03F5',
                'loaderId': 'D0DE1754D5C5F1E54DC3B0DB2A09ADD6',
                'requestId': 'D0DE1754D5C5F1E54DC3B0DB2A09ADD6',
                'response': {'connectionId': 0,
                             'connectionReused': False,
                             'encodedDataLength': -1,
                             'fromDiskCache': False,
                             'fromPrefetchCache': False,
                             'fromServiceWorker': False,
                             'headers': {'Content-Type': 'text/plain;charset=US-ASCII'},
                             'mimeType': 'text/plain',
                             'protocol': 'data',
                             'remoteIPAddress': '',
                             'remotePort': 0,
                             'securityState': 'secure',
                             'status': 200,
                             'statusText': 'OK',
                             'url': 'data:,'},
                'timestamp': 57524.763168,
                'type': 'Document'}}
    {'method': 'Network.requestWillBeSent',
     'params': {'documentURL': 'https://httpbin.org/get',
                'frameId': '2445B94E9E1DB51A1B1F4F3B0A3F03F5',
                'hasUserGesture': False,
                'initiator': {'type': 'other'},
                'loaderId': '8BB61F3D2448E8BC91A4A5AD7E690673',
                'request': {'headers': {'Upgrade-Insecure-Requests': '1',
                                        'User-Agent': 'Mozilla/5.0 (Macintosh; '
                                                      'Intel Mac OS X 10_15_7) '
                                                      'AppleWebKit/537.36 (KHTML, '
                                                      'like Gecko) '
                                                      'HeadlessChrome/91.0.4472.114 '
                                                      'Safari/537.36'},
                            'initialPriority': 'VeryHigh',
                            'method': 'GET',
                            'mixedContentType': 'none',
                            'referrerPolicy': 'strict-origin-when-cross-origin',
                            'url': 'https://httpbin.org/get'},
                'requestId': '8BB61F3D2448E8BC91A4A5AD7E690673',
                'timestamp': 57524.961438,
                'type': 'Document',
                'wallTime': 1626501610.512192}}
    {'method': 'Network.requestWillBeSentExtraInfo',
     'params': {'associatedCookies': [],
                'headers': {':authority': 'httpbin.org',
                            ':method': 'GET',
                            ':path': '/get',
                            ':scheme': 'https',
                            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
                            'accept-encoding': 'gzip, deflate, br',
                            'accept-language': 'en-US',
                            'sec-fetch-dest': 'document',
                            'sec-fetch-mode': 'navigate',
                            'sec-fetch-site': 'none',
                            'sec-fetch-user': '?1',
                            'upgrade-insecure-requests': '1',
                            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X '
                                          '10_15_7) AppleWebKit/537.36 (KHTML, '
                                          'like Gecko) '
                                          'HeadlessChrome/91.0.4472.114 '
                                          'Safari/537.36'},
                'requestId': '8BB61F3D2448E8BC91A4A5AD7E690673'}}
    {'method': 'Network.responseReceivedExtraInfo',
     'params': {'blockedCookies': [],
                'headers': {'access-control-allow-credentials': 'true',
                            'access-control-allow-origin': '*',
                            'content-length': '754',
                            'content-type': 'application/json',
                            'date': 'Sat, 17 Jul 2021 06:00:11 GMT',
                            'server': 'gunicorn/19.9.0'},
                'requestId': '8BB61F3D2448E8BC91A4A5AD7E690673',
                'resourceIPAddressSpace': 'Public'}}
    {'method': 'Network.responseReceived',
     'params': {'frameId': '2445B94E9E1DB51A1B1F4F3B0A3F03F5',
                'loaderId': '8BB61F3D2448E8BC91A4A5AD7E690673',
                'requestId': '8BB61F3D2448E8BC91A4A5AD7E690673',
                'response': {'connectionId': 12,
                             'connectionReused': False,
                             'encodedDataLength': 123,
                             'fromDiskCache': False,
                             'fromPrefetchCache': False,
                             'fromServiceWorker': False,
                             'headers': {'access-control-allow-credentials': 'true',
                                         'access-control-allow-origin': '*',
                                         'content-length': '754',
                                         'content-type': 'application/json',
                                         'date': 'Sat, 17 Jul 2021 06:00:11 GMT',
                                         'server': 'gunicorn/19.9.0'},
                             'mimeType': 'application/json',
                             'protocol': 'h2',
                             'remoteIPAddress': '52.201.75.114',
                             'remotePort': 443,
                             'requestHeaders': {':authority': 'httpbin.org',
                                                ':method': 'GET',
                                                ':path': '/get',
                                                ':scheme': 'https',
                                                'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
                                                'accept-encoding': 'gzip, deflate, '
                                                                   'br',
                                                'accept-language': 'en-US',
                                                'sec-fetch-dest': 'document',
                                                'sec-fetch-mode': 'navigate',
                                                'sec-fetch-site': 'none',
                                                'sec-fetch-user': '?1',
                                                'upgrade-insecure-requests': '1',
                                                'user-agent': 'Mozilla/5.0 '
                                                              '(Macintosh; Intel '
                                                              'Mac OS X 10_15_7) '
                                                              'AppleWebKit/537.36 '
                                                              '(KHTML, like Gecko) '
                                                              'HeadlessChrome/91.0.4472.114 '
                                                              'Safari/537.36'},
                             'responseTime': 1626501611316.694,
                             'securityDetails': {'certificateId': 0,
                                                 'certificateTransparencyCompliance': 'unknown',
                                                 'cipher': 'AES_128_GCM',
                                                 'issuer': 'Amazon',
                                                 'keyExchange': 'ECDHE_RSA',
                                                 'keyExchangeGroup': 'P-256',
                                                 'protocol': 'TLS 1.2',
                                                 'sanList': ['httpbin.org',
                                                             '*.httpbin.org'],
                                                 'signedCertificateTimestampList': [],
                                                 'subjectName': 'httpbin.org',
                                                 'validFrom': 1608508800,
                                                 'validTo': 1642636799},
                             'securityState': 'secure',
                             'status': 200,
                             'statusText': '',
                             'timing': {'connectEnd': 548.386,
                                        'connectStart': 26.524,
                                        'dnsEnd': 26.524,
                                        'dnsStart': 14.11,
                                        'proxyEnd': -1,
                                        'proxyStart': -1,
                                        'pushEnd': 0,
                                        'pushStart': 0,
                                        'receiveHeadersEnd': 803.146,
                                        'requestTime': 57524.962922,
                                        'sendEnd': 548.745,
                                        'sendStart': 548.611,
                                        'sslEnd': 548.36,
                                        'sslStart': 277.934,
                                        'workerFetchStart': -1,
                                        'workerReady': -1,
                                        'workerRespondWithSettled': -1,
                                        'workerStart': -1},
                             'url': 'https://httpbin.org/get'},
                'timestamp': 57525.76746,
                'type': 'Document'}}
    

    获取请求及响应信息

    由于日志中没有接口后台数据和响应数据,我们可以通过执行cdp名利获取,修改后代码如下

    import json
    from pprint import pprint
    from selenium import webdriver
    from selenium.common.exceptions import WebDriverException
    
    
    caps = {
        'browserName': 'chrome',
        'version': '',
        'platform': 'ANY',
        'goog:loggingPrefs': {'performance': 'ALL'},
        'goog:chromeOptions': {'extensions': [], 'args': ['--headless']}
    }
    
    driver = webdriver.Chrome(desired_capabilities=caps)
    
    driver.get('https://httpbin.org/get')
    logs = driver.get_log("performance")
    for item in logs:
        log = json.loads(item["message"])["message"]
        # if "Network.response" in log["method"] or "Network.request" in log["method"] or "Network.webSocket" in log["method"]:
            # pprint(log)
        if log["method"] == 'Network.responseReceived':
            url = log['params']['response']['url']
            if url == 'data:,':  # 过滤掉初始data页面,后续可以根据 log['params']['response']['type']过滤请求类型
                continue
            print('请求', url)
            request_id = log['params']['requestId']
    
            request_headers = log['params']['response']['requestHeaders']
            response_headers = log['params']['response']['headers']
            response_time = log['params']['response']['responseTime']
            status_code = log['params']['response']['status']
    
            try:
                request_data = driver.execute_cdp_cmd('Network.getRequestPostData', {'requestId': request_id})
            except WebDriverException:  # 没有后台数据获取时会有异常
                request_data = None
    
            response_body = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': request_id})['body']
            print('响应', response_body)
    

    执行后显示如下:

    请求 https://httpbin.org/get
    响应 {
      "args": {}, 
      "headers": {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", 
        "Accept-Encoding": "gzip, deflate, br", 
        "Accept-Language": "en-US", 
        "Cache-Control": "max-age=0", 
        "Host": "httpbin.org", 
        "Sec-Fetch-Dest": "document", 
        "Sec-Fetch-Mode": "navigate", 
        "Sec-Fetch-Site": "none", 
        "Sec-Fetch-User": "?1", 
        "Upgrade-Insecure-Requests": "1", 
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/91.0.4472.114 Safari/537.36", 
        "X-Amzn-Trace-Id": "Root=1-60f2dd9d-6533f9526707f25f7d6c38de"
      }, 
      "origin": "123.118.150.190", 
      "url": "https://httpbin.org/get"
    }
    

    参考:How to Capture Network Traffic When Scraping with Selenium & Python
    Chrome DevTools Protocol

  • 相关阅读:
    F
    E
    网上见到一同行发的隐私政策 备以后用
    Cannot connect to the Docker daemon. Is the docker daemon running on this host?
    mark
    转 随机数问题
    随机不同的数
    转 基于Quick-cocos2dx 2.2.3 的动态更新实现完整篇。(打包,服务器接口,模块自更新
    字符串
    关于cmbiling.jar cocos2dx的问题
  • 原文地址:https://www.cnblogs.com/superhin/p/15023302.html
Copyright © 2020-2023  润新知