开发工具:PyCharm
开发环境:python3.6 + flask + requests
开发流程:
1. 启动一个web服务
from flask import Flask
app = Flask(__name__)
if __name__ == '__main__':
app.run(host='127.0.0.1', port=6666)
2. 增加app.route装饰器
from flask import Flask
app = Flask(__name__)
@app.route('/')
def index():
return 'Hello World'
if __name__ == '__main__':
app.run(host='127.0.0.1', port=5000)
3. 增加index.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>仿百度搜索</title>
<style type="text/css">
.align-center{
position:fixed;left:30%;top:30%;margin-left:width/2;margin-top:height/2;
}
</style>
</head>
<body>
<form action="/s" method="get">
<div class="align-center">
<input type="search" name="key"> <input type="submit" value="搜索"><br>
</div>
</form>
</body>
</html>
index.html
4. 增加 render_template
from flask import Flask
from flask import render_template
app = Flask(__name__)
@app.route('/')
def index():
return render_template('index.html')
if __name__ == '__main__':
app.run(host='127.0.0.1', port=5000)
5. 增加返回结果
@app.route('/s')
def search():
return 'Hello World'
6. spider.py
import requests
def getBdMsg(keyword):
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"}
res = requests.get('https://www.baidu.com/s?wd={}'.format(keyword), headers = headers).text
return res
7. 获取搜索框关键字,通过爬虫程序搜索,获得百度搜索结果
from flask import Flask
from flask import render_template
from flask import request
from spider import getBdMsg
app = Flask(__name__)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/s')
def search():
keyword = request.args.get("key")
text = getBdMsg(keyword)
return text
if __name__ == '__main__':
app.run(host='127.0.0.1', port=5000)
8. 修改spider.py的返回结果,通过链式replace(),替换百度图标和“百度一下”
return res.replace('//www.baidu.com/img/baidu_jgylogo3.gif','static/images/google.png').replace('百度一下', 'Google')
附完整源码:
# -*- coding: utf-8 -*-
# @Time : 2018/3/19 12:46
# @Author : TanRong
# @Software: PyCharm
# @File : search.py
from flask import Flask
from flask import render_template
from spider import getBdMsg
from flask import request
# Flask(__name__).run()
app = Flask(__name__)
#app.route装饰器
@app.route('/')
def index():
return render_template('index.html')
@app.route('/s')
def search():
keyword = request.args.get('key')
text = getBdMsg(keyword)
return text
if __name__ == '__main__':
app.run()
search.py
# -*- coding: utf-8 -*-
# @Time : 2018/3/21 18:07
# @Author : TanRong
# @Software: PyCharm
# @File : spider.py
import requests
def getBdMsg(keyword):
# 必须加上请求头,这样才是浏览器请求,不然无返回结果
# F12 - NetWork - Requeset Headers - UserAgent
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"}
res = requests.get('https://www.baidu.com/s?wd={}'.format(keyword), headers = headers).text
return res.replace('//www.baidu.com/img/baidu_jgylogo3.gif','static/images/google.png').replace('百度一下','Google').replace('百度','Google') #链式replace()
if __name__ == '__main__':
getBdMsg('风景')
spider.py
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>仿百度搜索</title>
<style type="text/css">
.align-center{
position:fixed;left:30%;top:30%;margin-left:width/2;margin-top:height/2;
}
</style>
</head>
<body>
<form action="/s" method="get">
<div class="align-center">
<input type="search" name="key"> <input type="submit" value="搜索"><br>
</div>
</form>
</body>
</html>
index.html