文章更新于:2020-04-13
注:用于记录已经上传的资源,预防重复上传。
一、已经上传审核通过的资源
二、用于采集已经上传资源的脚本
import requests
from bs4 import BeautifulSoup
url = "https://download.csdn.net/my/uploads/2/"
pre_down_url = "https://download.csdn.net"
header = {'User-Agent':'Mozilla/5.0 (Windows NT 7.0; Win64; x64; rv:75.0)',
'cookie':'登录凭据 cookie 放在这里'}
# 拼接URL并访问
for page in range(1,13):
pageurl = url + str(page)
response = requests.get(pageurl, headers = header)
soup = BeautifulSoup(response.text, 'html.parser')
tags = soup.find_all(class_ = "content")
# 获取每页的资源名和链接
for item in tags:
name = item.h3.a.string,
href = item.h3.a['href']
name = str(name[0]).strip()
href = href.strip()
# 格式:[名字](链接)|名字
print("[%s](%s%s) | %s"%(name, pre_down_url, href, name))