#!/usr/bin/env python # -*- coding:utf-8 -*- from bs4 import BeautifulSoup import requests url = 'http://www.jd.com/' headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36' } #User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 web_date = requests.get(url,headers=headers) soup = BeautifulSoup(web_date.text,'lxml') print soup
headers表示头文件,伪装成浏览器浏览网页
wb_data网页数据requests.get请求访问(url网页京东,headers伪装的头文件)
soup解析后的数据BeautifulSoup解析数据(wb_data网页数据,lxml解析的格式按这个要求解析)
打印结果如下: