脚本内容
// ==UserScript==
// @name 大众点评评论爬虫
// @namespace http://tampermonkey.net/
// @version 0.1
// @description crawl is greate
// @author 陈祥安
// @include http://www.dianping.com/shop*
// @match http://www.dianping.com/ajax/json/shopDynamic/allReview*
// @require http://cdn.bootcss.com/jquery/1.11.2/jquery.js
// @grant GM_xmlhttpRequest
// ==/UserScript==
(function() {
var $x = function (xpath, context) {
var nodes = [];
try {
var doc = (context && context.ownerDocument) || window.document;
var results = doc.evaluate(xpath, context || doc, null, XPathResult.ANY_TYPE, null);
var node;
while (node = results.iterateNext()) {
nodes.push(node);
}
} catch (e) {
throw e;
}
return nodes;
}
var server_url = 'http://127.0.0.1:9090/comment/'
window.addEventListener('load', (event) => {
//关闭弹窗
let close_btn = $(".J-bonus-close")
console.log("准备关闭",close_btn)
if(close_btn){
close_btn.click();
}
let li_item_list=$x("//ul[@class='comment-list J-list']/li[@class='comment-item']/div[@class='content']//p[@class='desc']");
var dataList = [];
li_item_list.forEach(v=>{
console.log(v);
dataList.push({"data":v.innerText})
});
GM_xmlhttpRequest({
method: "POST",
url: server_url,
data : JSON.stringify({'name':"爬虫","dataList":dataList}),
onload: function(response) {
//这里写处理函数
//document.getElementById('text').innerHTML = this.responseText;
console.log(response);
console.log("dataList",dataList);
//window.close();
}
});
});
})();
python代码
# @Author : cxa
# @File : server.py
# @Software: PyCharm
import json
from flask import Flask, request, render_template
app = Flask(__name__)
@app.route('/')
def index():
return "<h1>大众点评API</h1>"
@app.route('/comment/', methods=['GET', 'POST'])
def login():
if request.method == 'POST':
form_data = request.get_data()
result = json.loads(form_data.decode("utf-8"))
print(result)
return result
@app.errorhandler(404)
def miss(e):
return render_template('404.html'), 404
@app.errorhandler(500)
def error(e):
return render_template('500.html'), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=9090, debug=True)