package com.originalityTest; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.List; import java.util.Set; import java.util.concurrent.ArrayBlockingQueue; import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.data.mongodb.core.query.Query; import org.yqm.nlp.cn.seg.ISegTagger; import org.yqm.nlp.cn.seg.impl.CharNgramSegTagger; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.mongodb.BasicDBObject; import com.mongodb.DB; import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.DBObject; import com.mongodb.MongoClient; import com.mongodb.MongoClientOptions; import com.mongodb.ServerAddress; import com.originalityTest.Test.Consumer; import com.originalityTest.Test.Producer; import redis.clients.jedis.Jedis; import redis.clients.jedis.JedisPool; import redis.clients.jedis.JedisPoolConfig; import us.codecraft.background.entity.KeywordDetailed; import us.codecraft.background.solr.SolrService; import us.codecraft.background.solr.VSMTextSimilarity; import us.codecraft.webmagic.main.CollectInterface; import us.codecraft.webmagic.main.testMain; import us.codecraft.webmagic.model.samples.iask.IaskQuestionModel; import us.codecraft.webmagic.utils.HttpUtils; import us.codecraft.webmagic.utils.MongoUtils; /** * *----------------------------------------------------------------------------- * <br>Copyright (c) 2018 深圳问我时代科技有限公司 * * <p>跑360采集数据跑SEO质量得分 </p> * * @project name : webmagic-samples * @package name : com.originalityTest * @file name : RunJob.java * @author : flm * @date : 2018年10月29日 <br> * *----------------------------------------------------------------------------- */ public class RunJob { protected static String host = "192.168.1.90"; //192.168.9.40:30000 protected static int port = 30000; protected static String dbname = "5118baiduzhidao"; protected static DB mongoDB = null; protected static DB mongoDBURL = null; protected static boolean isTestFlag = false; protected static int initDate = 1000*60*10; private int queueSize = 10000000; private ArrayBlockingQueue<BasicDBObject> queue = new ArrayBlockingQueue<BasicDBObject>(queueSize); static { MongoClientOptions.Builder buide = new MongoClientOptions.Builder(); buide.connectionsPerHost(100);// 与目标数据库可以建立的最大链接数 buide.connectTimeout(1000 * 60 * 20);// 与数据库建立链接的超时时间 buide.maxWaitTime(100 * 60 * 5);// 一个线程成功获取到一个可用数据库之前的最大等待时间 buide.threadsAllowedToBlockForConnectionMultiplier(100); buide.maxConnectionIdleTime(0); buide.maxConnectionLifeTime(0); buide.socketTimeout(0); buide.socketKeepAlive(true); MongoClientOptions myOptions = buide.build(); try { MongoClient mongoClient = new MongoClient(new ServerAddress(host, port), myOptions); mongoDB = mongoClient.getDB(dbname); mongoDBURL = mongoClient.getDB("seo_keyword"); } catch (UnknownHostException e) { e.printStackTrace(); System.exit(0); } } /** * 手动执行方法 * @param args * @throws Exception */ public static void main(String[] args) throws Exception { RunJob runJob = new RunJob(); Producer producer = runJob.new Producer(); Consumer consumer1 = runJob.new Consumer(1); Consumer consumer2 = runJob.new Consumer(2); Consumer consumer3 = runJob.new Consumer(3); Consumer consumer4 = runJob.new Consumer(4); Consumer consumer5 = runJob.new Consumer(5); Consumer consumer6 = runJob.new Consumer(6); Consumer consumer7 = runJob.new Consumer(7); Consumer consumer8 = runJob.new Consumer(8); Consumer consumer9 = runJob.new Consumer(9); Consumer consumer10 = runJob.new Consumer(10); // 生产数据 producer.start(); // 跑数据原创度 consumer1.start(); consumer2.start(); consumer3.start(); consumer4.start(); consumer5.start(); consumer6.start(); consumer7.start(); consumer8.start(); consumer9.start(); consumer10.start(); } class Consumer extends Thread{ int i; public Consumer(int i){ this.i = i; } @Override public void run() { consume(); } private void consume() { while(true){ try { System.out.println("队列获取 队列i:"+i); DBCollection collQuestion = mongoDB.getCollection("soQA"); DBCollection collection = mongoDBURL.getCollection("domain"); BasicDBObject d = queue.take(); List<DBObject> answers= (List<DBObject>) d.get("answers"); String questionTxt = d.getString("title") + d.getString("quest"); String questionTitle = ""; if(d.getString("title")!=null&&d.getString("title")!=""){ questionTitle = d.getString("title"); }else{ questionTitle = d.getString("quest"); } float score = 0F; float answerLenOriginality = OriginalityUtitls.getAnswerLenOriginality(answers); float answerSizeOriginality = OriginalityUtitls.getAnswerrSizeOriginality(answers); float goodOriginality = OriginalityUtitls.getGoodOriginality(answers); float questionOriginality = OriginalityUtitls.getQuestionOriginality(questionTxt, answers); float titleOriginality = OriginalityUtitls.getTitleOriginality(questionTitle); float wenwoOriginality = OriginalityUtitls.getWenwoOriginality(questionTitle, collection); score += Float.valueOf(questionOriginality*0.3+""); score += Float.valueOf(answerLenOriginality*0.2+""); score += Float.valueOf(answerSizeOriginality*0.1+""); score += Float.valueOf(goodOriginality*0.05+""); score += Float.valueOf(titleOriginality*0.2+""); score += Float.valueOf(titleOriginality*0.2+""); score += Float.valueOf(wenwoOriginality*0.1+""); System.out.println("score :"+score); d.put("originality", score); d.put("run", 0); collQuestion.save(d); } catch (InterruptedException e) { e.printStackTrace(); } } } } class Producer extends Thread{ @Override public void run() { produce(); } private void produce() { try { DBCollection collQuestion = mongoDB.getCollection("soQA"); DBObject idQuery = new BasicDBObject(); idQuery.put("state",1); DBCursor lists = collQuestion.find(idQuery); lists.addOption(com.mongodb.Bytes.QUERYOPTION_NOTIMEOUT); int i = 0; while(lists.hasNext()){ BasicDBObject d = (BasicDBObject)lists.next(); queue.put(d); System.out.println((++i)+"条记录 ,向队列取中插入一个元素,队列剩余空间:"+(queueSize-queue.size())); try { Thread.sleep(500); // 控制生产速度,防止队列满 } catch (Exception e) { System.err.println("Thread.sleep....."); } } } catch (InterruptedException e) { e.printStackTrace(); } } } }