模拟登陆微博相对来说,并不难。验证码是常规的5个随机数字字母的组合,识别起来也比较容易。主要是用到许多Selenium中的知识,如定位标签、输入信息、点击等。如对Selenium的使用并不熟悉,请先移驾《Python爬虫 | Selenium详解》。相信你再来看本篇一定可以看懂。

破解微博登陆的思路:

(1)使用webdriver打开微博网页;

(2)输入用户名和密码,点击登录;

(3)对第二步的结果进行判断

  • 情况一:用户名或者密码错误
  • 情况二:登录成功
  • 情况三:出现验证码图片,需识别
  • 情况四:其他错误

(4)本例中增加了登录成功后获得cookies的情况

import requests
from requests import RequestException
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from chaojiying import Chaojiying


# 超级鹰用户名、密码、软件ID、
CHAOJIYING_USERNAME =
CHAOJIYING_PASSWORD =
CHAOJIYING_SOFT_ID =
CHAOJIYING_KIND = 1006


class LoginWeibo():

    def __init__(self, username, password):
        self.url = 'https://www.weibo.com'
        self.browser = webdriver.Chrome(executable_path='D:downloadpythonRelatedchromedriver.exe')
        self.wait = WebDriverWait(self.browser, 20)
        self.username = username
        self.password = password
        self.chaojiying = Chaojiying(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID)

    # def __del__(self):
    #     self.browser.close()

    def open(self):
        """
        打开网页输入用户名密码
        :return: None
        """
        self.browser.get(self.url)
        username = self.wait.until(EC.presence_of_element_located((By.ID, 'loginname')))
        password = self.wait.until(EC.presence_of_element_located((By.NAME, 'password')))
        username.send_keys(self.username)
        password.send_keys(self.password)

    def get_click_button(self):
        '''
        找到登录按钮
        :return:
        '''
        '''
        <a href="javascript:void(0)" class="W_btn_a btn_32px " action-type="btn_submit" node-type="submitBtn" suda-data="key=tblog_weibologin3&amp;value=click_sign" tabindex="6"><span node-type="submitStates">登录</span></a>
        '''
        button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'W_btn_a')))
        return button

    def login_successfully(self):
        """
        判断登陆是否成功
        :return:
        """
        '''
        登录成功才能看到
        <em class="W_ficon ficon_mail S_ficon">I</em>
        '''
        try:
            return bool(
                WebDriverWait(self.browser, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.ficon_mail')))
            )
        except TimeoutException:
            return False

    def get_click_image(self, name='captcha.png'):
        """
        获取验证码图片
        :param name:
        :return: 图片对象
        """
        try:
            '''
            <img width="95" height="34" action-type="btn_change_verifycode" node-type="verifycode_image" src="https://login.sina.com.cn/cgi/pin.php?r=88815771&amp;s=0&amp;p=gz-66c0488ef9191010d88bea8c9f3a09fdf3bf">
            '''
            element = self.wait.until(
                EC.presence_of_element_located((By.XPATH, '//img[@action-type="btn_change_verifycode"]')))
            image_url = element.get_attribute('src')
            image = get_html(image_url).content
            with open(name, 'wb') as f:
                f.write(image)
            return image
        except NoSuchElementException:
            print('')
        return None

    def password_error(self):
        """
        判断是否密码错误
        :return:
        """
        try:
            element = WebDriverWait(self.browser, 5).until(
                EC.presence_of_element_located((By.XPATH, '//div[@class="W_layer W_layer_pop"]/div/p/span[2]')))
            print(element.text)
            if element.text == '用户名或密码错误。':
                return True
        except TimeoutException:
            return False

    def get_cookies(self):
        """
        获取Cookies
        :return:
        """
        print(self.browser.get_cookies())
        return self.browser.get_cookies()

    def login(self):

        # 1. 打开网址 输入用户名和密码
        self.open()

        # 2. 点击登录按钮
        button = self.get_click_button()
        button.click()

        if self.password_error():
            print('用户名或密码错误')
            return {
                'status': 2,
                'content': '用户名或密码错误'
            }
        if self.login_successfully():
            print('登录成功')
            # 获取帐号对应的cookies
            cookies = self.get_cookies()
            return {
                'status': 1,
                'content': cookies
            }
        else:                                   # 有时会需要验证码
            # 获取验证码图片
            image = self.get_click_image()

            # 识别验证码
            result = self.chaojiying.post_pic(image, CHAOJIYING_KIND)
            print(result)

            # 输入验证码
            '''
            <input type="text" class="W_input " maxlength="6" autocomplete="off" value="验证码" action-data="text=请输入验证码" action-type="text_copy" name="verifycode" node-type="verifycode" tabindex="3">
            '''
            verifycode = self.wait.until(EC.presence_of_element_located((By.NAME, 'verifycode')))
            verifycode.send_keys(result['pic_str'])

            # 点击登录按钮
            button = self.get_click_button()
            button.click()
            if self.login_successfully():
                print('登录成功')
                # 获取帐号对应的cookies
                cookies = self.get_cookies()
                return {
                    'status': 1,
                    'content': cookies
                }
            else:
                self.chaojiying.report_error(result['pic_id'])
                self.login()
                # return {
                #     'status': 3,
                #     'content': '登录失败'
                # }


def get_html(url):
    try:
        # 添加User-Agent,放在headers中,伪装成浏览器
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
        }
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            response.encoding = response.apparent_encoding
            return response
        return None
    except RequestException:
        return None


if __name__ == '__main__':
    result = LoginWeibo('username', 'password').login()

本篇博文仅供学习交流相关的爬虫知识,请勿过度使用,如有任何纠纷,与本人无关。(瑟瑟发抖)