• 自动化测试爬取抖音的用户信息


    # -*- coding: utf-8 -*-
    # TODO 抖音爬取
    """
    url:"https://www.douyin.com/aweme/v1/web/discover/search/?device_platform=webapp&aid=6383&channel=channel_pc_web&search_channel=aweme_user_web&keyword=%E5%AE%9C%E6%98%8C&search_source=switch_tab&query_correct_type=1&is_filter_search=0&offset=12&count=6&search_id=202203241001190101510740730D33498B&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1536&screen_height=864&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=99.0.4844.51&browser_online=true&engine_name=Blink&engine_version=99.0.4844.51&os_name=Windows&os_version=10&cpu_core_num=4&device_memory=8&platform=PC&downlink=5.75&effective_type=4g&round_trip_time=50&webid=7077846684190148127&msToken=yaBuTTR4uGFpgn39Hv0eT5g_nVQL8oERuLSsXvA6K0ntSj1DIbYncFLlqxo-xZhWUq8KOADdRcIPtN8fMvJDtOhCt6GFPD9U-ixbFpZK5OGzZxvXz44aB7f87gET290=&X-Bogus=DFSzswVOoeJANxo9SRS6El9WX7Jx&_signature=_02B4Z6wo000010FpbQgAAIDDwWuXS4AnaD9BaWmAALJs8TKBDtkXEkGcyTo5VcXbXPFT9YqmbKQ2nb-HKwrkB.uk4dIExcCraLfmDfpppToMLv3cJwMvL7fkFVsnpyZq.P7hhWfx1QPi4O2-12"
    例如:
    "nicename":"xxx"
    "enterprise_verify_reason":"xxx办公室"
    "unique_id":"xxx"
    "follower_count":"492710"
    """
    # @Date    : 2022/3/24 9:59
    # @Author  : layman
    import time
    
    import pymysql
    
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    
    # 打开数据库连接
    db = pymysql.connect(host='localhost', port=3306,
                         user='root', passwd='root', db='xxx', charset='utf8')
    
    cursor = db.cursor()
    driver = webdriver.Chrome()
    driver.get("https://www.douyin.com/search/xxx?source=switch_tab&type=user")
    time.sleep(200)
    # driver.execute_script("window.scrollTo(0,10000)")
    # 滑动到页面底部
    while True:
        time.sleep(2)
        li_list = driver.find_elements(By.XPATH, '//*[@id="dark"]/div[2]/div/div[3]/div[3]/ul/li[*]')
        for li in li_list:
            uname = li.find_element(By.XPATH, './div/a/div[1]/div[2]/p/span/span/span/span/span').text
            try:
                uverify = li.find_element(By.XPATH, './div/a/div[1]/div[2]/div/p').text
            except:
                uverify = '无认证'
            uid = li.find_element(By.XPATH, './div/a/div[2]/span[1]/span').text
            try:
                ufans = li.find_element(By.XPATH, './div/a/div[2]/span[5]').text
            except:
                ufans = '0'
            values = (uname, uverify, uid, ufans)
            try:
                sql = "INSERT INTO douyin(uname, uverify, uid,ufans) VALUES (%s,%s,%s,%s)"
                cursor.execute(sql, values)
                db.commit()
            except:
                pass
    
    
  • 相关阅读:
    activemq安装及使用
    匿名内部类的简单使用
    Struts2中动态方法调用
    重定向和转发的区别
    result转发到一个action
    在Android Studio中导入jar包
    架构文档类别
    Linux/Unix下pid文件的作用
    【8.21校内测试】【最大生成树】【树状数组】【数学】
    【BZOJ】2653: middle
  • 原文地址:https://www.cnblogs.com/shun998/p/16186232.html
Copyright © 2020-2023  润新知