app.py
from flask import Flask, jsonify, render_template, request, json import mysqlUtil app = Flask(__name__) @app.route("/c1") def hellv(): return render_template('view.html') @app.route("/text") def hellt(): return render_template('text.html') @app.route("/findlunwen") def hello(): return @app.route("/") def hellp(): return render_template('find.html') @app.route("/c2", methods=['POST', 'GET']) def wordcloud(): res0 = [] res1 = [] temp = mysqlUtil.select_key() print(temp) for i in temp: res0.append(i[0]) # keyword res1.append(i[1]) # value return jsonify({"keyword": res0, "value": res1}) @app.route("/c3", methods=['POST', 'GET']) def select_lunwen(): res0 = [] res1 = [] res2 = [] res3 = [] tiaojian = request.args.get("tiaojian") firinput = request.args.get("firinput") jingzhun = request.args.get("jingzhun") # print(tiaojian) # print(firinput) # print(jingzhun) if jingzhun == '精准': if tiaojian == '题目': temp = mysqlUtil.select_lunwenj('title', firinput) elif tiaojian == '摘要': temp = mysqlUtil.select_lunwenj('abstract', firinput) elif tiaojian == '作者': temp = mysqlUtil.select_lunwenj('zuozhe', firinput) elif tiaojian == '关键词': temp = mysqlUtil.select_lunwenj('abstract', firinput) else: if tiaojian == '题目': temp = mysqlUtil.select_lunwenm('title', firinput) elif tiaojian == '摘要': temp = mysqlUtil.select_lunwenm('abstract', firinput) elif tiaojian == '作者': temp = mysqlUtil.select_lunwenm('zuozhe', firinput) elif tiaojian == '关键词': temp = mysqlUtil.select_lunwenm('abstract', firinput) for i in temp: res0.append(i[0]) # title res1.append(i[1]) # link res2.append(i[3]) # zuozhe res3.append(i[4]) # time qw = jsonify({"title": res0, "zuozhe": res2, "time": res3, "lianjie": res1}) return jsonify({"title": res0, "zuozhe": res2, "time": res3, "lianjie": res1}) if __name__ == '__main__': app.run(debug=True, host='127.0.0.1', port='5000')
Keyword.py
# -*- coding: utf-8 -*- import sys sys.path.append('../') import jieba import jieba.analyse import mysqlUtil from optparse import OptionParser # file_name = "test.txt" # # content = open(file_name, 'rb').read() # content = "Few-shot learning is an important area of research. Conceptually, humans are readily able to understand new concepts given just a few examples, while in more pragmatic terms, limited-example training situations are common practice. Recent effective approaches to few-shot learning employ a metric-learning framework to learn a feature similarity comparison between a query (test) example, and the few support (training) examples. However, these approaches treat each support class independently from one another, never looking at the entire task as a whole. Because of this, they are constrained to use a single set of features for all possible test-time tasks, which hinders the ability to distinguish the most relevant dimensions for the task at hand. In this work, we introduce a Category Traversal Module that can be inserted as a plug-and-play module into most metric-learning based few-shot learners. This component traverses across the entire support set at once, identifying task-relevant features based on both intra-class commonality and inter-class uniqueness in the feature space. Incorporating our module improves performance considerably (5%-10% relative) over baseline systems on both miniImageNet and tieredImageNet benchmarks, with overall performance competitive with the most recent state-of-the-art systems." # 10表示输出的前10个 # tags = jieba.analyse.extract_tags(content, topK=10, withWeight=True) # # print(tags) # print(",".join(tags)) def getKey(str): counts = {} for i in str: content = jieba.lcut(i[0]) for word in content: if len(word) == 1 or word in nolist:#单个词不计算在内 continue else: counts[word]=counts.get(word,0)+1#遍历所有词语,每出现一次其对应值加1 items = list(counts.items())#将键值对转化为列表 items.sort(key=lambda x:x[1], reverse=True)#根据词语出现的次数进行从大到小的排序 for i in range(20): word, count = items[i] mysqlUtil.insert_key(word, count) print('{0:<5}{1:<5}'.format(word, count)) return items if __name__ == '__main__': nolist ={'are','is','am','and','of','but','so','which','where','when','how','what','that','who','whose','in','at','with','of','for','the','a','an','to','on','we','We','this','by','from','our','as','in','The','can','he','He','The','be','In'} res = mysqlUtil.select_ab() # print(res[0]) getKey(res)
lunwenSpideer.py
# -*- coding:utf-8 -*- import requests import re import json import Mysql headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36" } def getData(): url = "https://openaccess.thecvf.com/menu" res = requests.get(url).text.replace(' ', '').replace('<br>', '') # print(res) getA = re.compile(r'<dd>(.*?) [<a href="(.*?)">Main Conference</a>] [<a href="(.*?)/menu.*?">Workshops</a>]</dd>') keyA = re.findall(getA, res) print("会议有"+str(len(keyA))) print(keyA) httpList = [] httpList2 = [] httpList3 = [] ht = [] h = [] t = [] temp = [] for i in keyA: h1 = i[0] h2 = 'https://openaccess.thecvf.com'+i[1] h3 = 'https://openaccess.thecvf.com'+i[2] httpList.append([h1, h2, h3]) # 会议题目 链接 # print(httpList) for i in httpList: url2 = i[2]+'/menu' res2 = requests.get(url2).text.replace('.py', '') print(url2) getZ = re.compile(r'<dl>(.*?)</dl>', re.DOTALL) keyZ = re.findall(getZ, res2)[0] # print(keyZ) getB = re.compile(r'<a href="/?(?:w+/)?(w+)">(.*?)</a><br><br>.*?</dd>', re.DOTALL) keyB = re.findall(getB, keyZ) # 2 print(keyB) for k in keyB: h1 = i[2]+'/'+k[0] url4 = h1 print(h1) res4 = requests.get(url4).text getX = re.compile(r'<dt class="ptitle"><br><a href="(.*?)">') keyX = re.findall(getX, res4) for y in range(len(keyX)): act1 = 'https://openaccess.thecvf.com'+keyX[y] url3 = act1 # 论文链接 print(act1) res2 = requests.get(url3).text.replace(' ', '') getC = re.compile(r'<meta name="citation_pdf_url" content="(.*?)">.*?<div id="abstract">(.*?)</div>.*?authors+=s+{(.*?)}.*?titles+=s+{(.*?)}.*?booktitles+=s+{(.*?)}.*?months+=s+{(.*?)}.*?years+=s+{(.*?)}', re.DOTALL) keyC = re.findall(getC, res2) print(keyC) t1 = keyC[0][2] # 作者 t2 = keyC[0][3] # 题目 t3 = keyC[0][4] # 书名 t4 = keyC[0][5] + ',' + keyC[0][6] # 日期 t5 = keyC[0][1] # 摘要 t6 = keyC[0][0] # 链接 temp.append([t1, t2, t3, t4, t5, t6]) Mysql.insert_item(temp) temp = [] if __name__ == '__main__': getData()