注意,整个任务的流程如下:
* Task执行过程:环境配置---> 获取输入数据 ---> map操作(数据打标签,存入hbase)---> groupby(分组)
---> reduce(根据标签聚合) --->sink(存入mongodb)
1、Task任务
package com.youfan.task; import com.youfan.entity.YearBase; import com.youfan.map.YearBaseMap; import com.youfan.reduce.YearBaseReduce; import com.youfan.util.MongoUtils; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.utils.ParameterTool; import org.bson.Document; import java.util.List; /** * * Task执行过程:环境配置---> 获取输入数据 ---> map操作(数据打标签,存入hbase)---> groupby(分组) * ---> reduce(根据标签聚合) --->sink(存入mongodb) */ public class YearBaseTask { public static void main(String[] args) { final ParameterTool params = ParameterTool.fromArgs(args); // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // get input data DataSet<String> text = env.readTextFile(params.get("input")); DataSet<YearBase> mapresult = text.map(new YearBaseMap()); DataSet<YearBase> reduceresutl = mapresult.groupBy("groupfield").reduce(new YearBaseReduce()); try { List<YearBase> reusltlist = reduceresutl.collect();//获取reduce中的所有对象 for(YearBase yearBase:reusltlist){ String yeartype = yearBase.getYeartype(); Long count = yearBase.getCount(); Document doc = MongoUtils.findoneby("yearbasestatics","portrait",yeartype); if(doc == null){ doc = new Document(); doc.put("info",yeartype); doc.put("count",count); }else{ Long countpre = doc.getLong("count"); Long total = countpre+count; doc.put("count",total); } MongoUtils.saveorupdatemongo("yearbasestatics","portrait",doc); } env.execute("year base analy"); } catch (Exception e) { e.printStackTrace(); } } }
2、map任务
package com.youfan.map; import com.yangwj.entity.YearBase; import com.yangwj.util.DateUtils; import com.yangwj.util.HbaseUtils; import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.functions.MapFunction; /** * */ public class YearBaseMap implements MapFunction<String, YearBase>{ @Override public YearBase map(String s) throws Exception { if(StringUtils.isBlank(s)){ return null; } String[] userinfos = s.split(","); String userid = userinfos[0]; String username = userinfos[1]; String sex = userinfos[2]; String telphone = userinfos[3]; String email = userinfos[4]; String age = userinfos[5]; String registerTime = userinfos[6]; String usetype = userinfos[7];//'终端类型:0、pc端;1、移动端;2、小程序端' String yearbasetype = DateUtils.getYearbasebyAge(age);//打标签 String tablename = "userflaginfo"; String rowkey = userid; String famliyname = "baseinfo"; String colum = "yearbase";//年代 HbaseUtils.putdata(tablename,rowkey,famliyname,colum,yearbasetype);//打标签,存入Hbase HbaseUtils.putdata(tablename,rowkey,famliyname,"age",age); YearBase yearBase = new YearBase(); String groupfield = "yearbase=="+yearbasetype;//用于reduce分组 yearBase.setYeartype(yearbasetype); yearBase.setCount(1l); yearBase.setGroupfield(groupfield); return yearBase; } }
3、reduce任务
package com.yangwj.reduce; import com.yangwj.entity.YearBase; import org.apache.flink.api.common.functions.ReduceFunction; /** * */ public class YearBaseReduce implements ReduceFunction<YearBase>{ @Override public YearBase reduce(YearBase yearBase, YearBase t1) throws Exception { String yeartype = yearBase.getYeartype(); Long count1 = yearBase.getCount(); Long count2 = t1.getCount(); YearBase finalyearBase = new YearBase(); finalyearBase.setYeartype(yeartype); finalyearBase.setCount(count1+count2); return finalyearBase; } }
4、HbaseUtil
package com.youfan.util; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException; import java.util.Map; import java.util.Set; /** * Created by li on 2019/1/5. */ public class HbaseUtils { private static Admin admin = null; private static Connection conn = null; static{ // 创建hbase配置对象 Configuration conf = HBaseConfiguration.create(); conf.set("hbase.rootdir","hdfs://192.168.80.134:9000/hbase"); //使用eclipse时必须添加这个,否则无法定位 conf.set("hbase.zookeeper.quorum","192.168.80.134"); conf.set("hbase.client.scanner.timeout.period", "600000"); conf.set("hbase.rpc.timeout", "600000"); try { conn = ConnectionFactory.createConnection(conf); // 得到管理程序 admin = conn.getAdmin(); } catch (IOException e) { e.printStackTrace(); } } /** * 插入数据,create "userflaginfo,"baseinfo" * create "tfidfdata,"baseinfo" */ public static void put(String tablename, String rowkey, String famliyname, Map<String,String> datamap) throws Exception { Table table = conn.getTable(TableName.valueOf(tablename)); // 将字符串转换成byte[] byte[] rowkeybyte = Bytes.toBytes(rowkey); Put put = new Put(rowkeybyte); if(datamap != null){ Set<Map.Entry<String,String>> set = datamap.entrySet(); for(Map.Entry<String,String> entry : set){ String key = entry.getKey(); Object value = entry.getValue(); put.addColumn(Bytes.toBytes(famliyname), Bytes.toBytes(key), Bytes.toBytes(value+"")); } } table.put(put); table.close(); System.out.println("ok"); } /** * */ public static String getdata(String tablename, String rowkey, String famliyname,String colum) throws Exception { Table table = conn.getTable(TableName.valueOf(tablename)); // 将字符串转换成byte[] byte[] rowkeybyte = Bytes.toBytes(rowkey); Get get = new Get(rowkeybyte); Result result =table.get(get); byte[] resultbytes = result.getValue(famliyname.getBytes(),colum.getBytes()); if(resultbytes == null){ return null; } return new String(resultbytes); } /** * */ public static void putdata(String tablename, String rowkey, String famliyname,String colum,String data) throws Exception { Table table = conn.getTable(TableName.valueOf(tablename)); Put put = new Put(rowkey.getBytes()); put.addColumn(famliyname.getBytes(),colum.getBytes(),data.getBytes()); table.put(put); } }
5、MongodbUtil
package com.youfan.util; import com.alibaba.fastjson.JSONObject; import com.mongodb.MongoClient; import com.mongodb.client.FindIterable; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCursor; import com.mongodb.client.MongoDatabase; import org.bson.Document; import org.bson.types.ObjectId; /** * */ public class MongoUtils { private static MongoClient mongoClient = new MongoClient("192.168.80.134",27017); public static Document findoneby(String tablename, String database,String yearbasetype){ MongoDatabase mongoDatabase = mongoClient.getDatabase(database); MongoCollection mongoCollection = mongoDatabase.getCollection(tablename); Document doc = new Document(); doc.put("info", yearbasetype); FindIterable<Document> itrer = mongoCollection.find(doc); MongoCursor<Document> mongocursor = itrer.iterator(); if(mongocursor.hasNext()){ return mongocursor.next(); }else{ return null; } } ` ` public static void saveorupdatemongo(String tablename,String database,Document doc) { MongoDatabase mongoDatabase = mongoClient.getDatabase(database); MongoCollection<Document> mongocollection = mongoDatabase.getCollection(tablename); if(!doc.containsKey("_id")){ ObjectId objectid = new ObjectId(); doc.put("_id", objectid); mongocollection.insertOne(doc); return; } Document matchDocument = new Document(); String objectid = doc.get("_id").toString(); matchDocument.put("_id", new ObjectId(objectid)); FindIterable<Document> findIterable = mongocollection.find(matchDocument); if(findIterable.iterator().hasNext()){ mongocollection.updateOne(matchDocument, new Document("$set",doc)); try { System.out.println("come into saveorupdatemongo ---- update---"+ JSONObject.toJSONString(doc)); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }else{ mongocollection.insertOne(doc); try { System.out.println("come into saveorupdatemongo ---- insert---"+JSONObject.toJSONString(doc)); }catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } } }