1.选一个自己感兴趣的主题。
我选取的是猫途鹰旅游景点
from bs4 import BeautifulSoup import requests url = 'https://www.tripadvisor.cn/' wb_data = requests.get(url) soup = BeautifulSoup(wb_data.text,'lxml') for i in soup.select('li'): if len(i.select('.ranking'))>0: sort=i.select('.ranking')[0].text #排名 country=i.select('.countryName')[0].text #国家 city=i.select('.cityName')[0].text #城市 hotel=i.select('.hotelsCount')[0].text #酒店数 toury=i.select('.attractionCount')[0].text #景点数 eat=i.select('.eateryCount')[0].text #餐厅数 review=i.select('.reviewTitle')[0].text #印象 print(sort,country,city,review,hotel,toury,eat)
2.写一篇完整的博客,附上源代码、数据爬取及分析结果,形成一个可展示的成果。
做了一个知了词典的软件
代码如下
import urllib.request import urllib.parse import time import random import hashlib import json from window import Application from tkinter import * from tkinter import Tk,Button,Entry,Label,Text,END class YouDaoHelper(object): def __init__(self): pass def crawl(self,content): timestamp = int(time.time() * 1000) + random.randint(0, 10) u = "fanyideskweb" d = content f = str(timestamp) c = "rY0D^0'nM0}g5Mm1z%1G4" salt = hashlib.md5((u + d + f + c).encode('utf-8')).hexdigest() data = { 'i': content, 'from': 'AUTO', 'to': 'AUTO', 'smartresul': 'dict', 'client': 'fanyideskweb', 'salt': timestamp, 'sign': salt, 'doctypen': 'json', 'version': '2.1', 'keyfrom': 'fanyi.web', 'action': 'FY_BY_CLICKBUTTION', 'typoResult': 'true' } data = urllib.parse.urlencode(data).encode('utf-8') request = urllib.request.Request(url='http://fanyi.youdao.com/translate_o?' 'smartresult=dict&smartresult=rule&sessionFrom=', method='POST', data=data) response = urllib.request.urlopen(request) result_str = response.read().decode('utf-8') result_dict = json.loads(result_str) result = result_dict['translateResult'][0][0]['tgt'] return result class Application(object): def __init__(self): self.helper = YouDaoHelper() self.window = Tk() self.window.title(u'知了词典') # 创建标题 self.window.geometry('280x350+600+300') self.entry = Entry(self.window) # 创建输入框 self.entry.place(x=10, y=10, width=200, height=25) # 放置在哪个地方 self.submit_btu = Button(self.window, text=u'查询', command=self.submit) # 创建按钮 self.submit_btu.place(x=220, y=10, width=50, height=25) # 翻译结果标题 self.title_label = Label(self.window, text=u'翻译结果:') self.title_label.place(x=10, y=55) # 翻译结果 self.result_text = Text(self.window, background='#ccc') self.result_text.place(x=10, y=75, width=260, height=265) def submit(self): # 1.从输入框中获取用户输入的值 content = self.entry.get() #get的方法返回当前输入框的内容 # 2.把这个值发送给有道的服务器,进行翻译 result = self.helper.crawl(content) #3.把结果放置底部的Text控件中 self.result_text.delete(1.0,END)#把第一个的印记删除 self.result_text.insert(END,result)#将要输入的内容输入到翻译结果中 def run(self): self.window.mainloop() if __name__=='__main__': app = Application() app.run()
结果如下: