爬取静态数据并存储json
import requests import chardet from bs4 import BeautifulSoup import json user_agent='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0' headers={'User-Agent':user_agent} r=requests.get('http://seputu.com/',headers=headers) r.encoding=chardet.detect(r.content)['encoding'] soup=BeautifulSoup(r.text,features='html.parser') content=[] for mulu in soup.find_all(class_='mulu'): h2=mulu.find('h2') if(h2!=None): h2_title=h2.string list=[] for a in mulu.find(class_='box').find_all('a'): href=a.get('href') box_title=a.get('title') print(href,'|||',box_title) list.append({'href':href,'box_title':box_title}) pass content.append({'title':h2_title,'content':list}) pass pass with open('1.json','w') as fp: json.dump(content, fp=fp,indent=4)