• Python 代码推送百度链接


    通过代码实现抓取个人博客中某一页指定文章链接,并批量将该链接推送到百度站长平台,起到快速收录的目的。

    import sys
    import requests
    from bs4 import BeautifulSoup
    
    # 推送百度爬虫
    def push_page(url):
        headers = {
            'User-Agent': 'curl/7.12.1',
            'Host': 'data.zz.baidu.com',
            'Content-Type': 'text/plain',
            'Content-Length': '83'
        }
        urls = "http://data.zz.baidu.com/urls?site=https://www.lyshark.com&token=MpHvVKjbs10XqaW"
        try:
            html = requests.post(urls, headers=headers, data=url, timeout=5).text
            push_status = eval(html)['success']
            if push_status == 1:
                return 1
            else:
                return 0
        except:
            return 0
    
    # 获取路径
    def get_page(page):
        html = requests.get(page,timeout=5).text
        try:
            bs = BeautifulSoup(html,"html.parser")
            ret = bs.select('div[class="container"] div[class="row"] h2[class="post-title"] a')
            for item in ret:
                push_url = item.get('href')
                push_ref = push_page(push_url)
                print("推送: {} --> 状态: {}".format(push_url,push_ref))
            return 1
        except:
            return 0
    
    if __name__ == "__main__":
        arg = sys.argv
        get_page(arg[1])
    

    2.0批量推送

    import requests
    from bs4 import BeautifulSoup
    
    # 推送百度爬虫
    def push_page(url):
        headers = {
            'User-Agent': 'curl/7.12.1',
            'Host': 'data.zz.baidu.com',
            'Content-Type': 'text/plain-t',
            'Content-Length': '83'
        }
        urls = "http://data.zz.baidu.com/urls?site=https://www.lyshark.com&token=C5pA6XTWlCxdCwB"
        try:
            html = requests.post(urls, headers=headers, data=url, timeout=5).text
            push_status = eval(html)['success']
            if push_status == 1:
                print("推送页面: {} 推送状态: {}".format(url,push_status))
                return 1
            else:
                print("推送页面: {} 推送状态: {}".format(url, push_status))
                return 0
        except:
            return 0
    
    # 获取路径
    def get_page(page):
        push_url_list = []
        html = requests.get(page,timeout=5).text
        try:
            bs = BeautifulSoup(html,"html.parser")
            ret = bs.select('div[class="container"] div[class="row"] h2[class="post-title"] a')
            for item in ret:
                push_url = item.get('href')
                print("提取链接: {}".format(push_url))
                push_url_list.append(push_url)
            return push_url_list
        except:
            return 0
    
    # 生成所有页面链接
    def create_page(start,end):
        page_list = []
        for ea in range(start,end+1):
            page = f"https://www.lyshark.com/page/{ea}/"
            print("创建页面链接: {}".format(page))
            page_list.append(page)
        return page_list
    
    if __name__ == "__main__":
        while True:
            push_url = []
            # 生成页面目录
            page = create_page(1,15)
            for each in page:
                # 输出每一个链接目录树
                ref = get_page(each)
                push_url.extend(ref)
    
            # 开始批量推送
            for url in push_url:
                push_page(url)
    
  • 相关阅读:
    排列组合算法
    C++内存管理——堆&&栈
    编程之美——1.2 中国象棋将帅问题
    Gentoo: fcitx的安装
    Gentoo NTFS USB盘有写权限
    Gentoo U盘无法自动挂载,打开报告Not Authorized,xfce只有logout,suspend/shutdown灰化等问题解决方法
    Kernel: 打开CONFIG_EMBEDDED从而使更多的kernel option可以更改
    Gentoo Enable framebuffer console (没有安装X,KDE的时候)
    转载:Gentoo和Ubuntu包管理命令对比集
    Gentoo Rebuild virtualboxmodules when kernel is updated
  • 原文地址:https://www.cnblogs.com/LyShark/p/15730766.html
Copyright © 2020-2023  润新知