• Spark sql读取数据库和ES数据进行处理代码


    读取数据库数据和ElasticSearch数据进行连接处理

    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    
    import org.apache.log4j.Level;
    import org.apache.log4j.Logger;
    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.api.java.function.Function;
    import org.apache.spark.sql.DataFrame;
    import org.apache.spark.sql.Row;
    import org.apache.spark.sql.SQLContext;
    
    import com.dinpay.bdp.rcp.domain.FlowMain;
    import com.dinpay.bdp.rcp.util.CodisUtil;
    import com.dinpay.bdp.rcp.util.Constant;
    import com.google.gson.Gson;
    
    import redis.clients.jedis.Jedis;
    
    /**
     * 首页的数据,定时Job定时刷新
     */
    public class MainFlowProcedure{
         private static Logger logger = Logger.getLogger(MainFlowProcedure.class.getSimpleName());
    
         public static void main(String[] args) {
             
           //屏蔽日志
           Logger.getLogger("org.apache.spark").setLevel(Level.WARN);
           
           //配置SparkConf
           SparkConf conf = new SparkConf().setAppName("MainFlowProcedure").setMaster("local[2]");
           JavaSparkContext sc =new JavaSparkContext(conf);
           SQLContext sqlContext = new SQLContext(sc);
           
           registerTable(sqlContext,"t_sys_attention_library");
           registerTable(sqlContext,"t_sys_big_order");
           registerTable(sqlContext,"t_sys_doubtful_order");
           registerTable(sqlContext,"t_rc_event");
           registerESTable(sqlContext, "t_order");
    
           sqlContext.sql("select merchant_id,count(order_id) as txcnt ,sum(tx_money) as txamount from t_order group by merchant_id")
                        .registerTempTable("t_order_merchant");
        
           sqlContext.sql("select t2.merchant_id,count(t1.order_id) as bigcnt from t_sys_big_order t1 join t_order t2 on t1.order_id = t2.order_id group by t2.merchant_id")
                        .registerTempTable("t_big_merchant");

    sqlContext.sql("select t2.merchant_id,count(t1.order_id) as dbtcnt from t_sys_doubtful_order t1 join t_order t2 on t1.order_id = t2.order_id group by t2.merchant_id") .registerTempTable("t_doubt_merchant"); sqlContext.sql("select merchant_id,count(*) as rccnt from t_rc_event group by merchant_id") .registerTempTable("t_rc_merchant"); sqlContext.sql("select t1.merchant_id,t2.txcnt,t3.dbtcnt,t4.bigcnt,t2.txamount,t5.rccnt from t_sys_attention_library t1 left join t_order_merchant t2 on t1.merchant_id = t2.merchant_id left join t_doubt_merchant t3 on t1.merchant_id = t3.merchant_id left join t_big_merchant t4 on t1.merchant_id = t4.merchant_id left join t_rc_merchant t5 on t1.merchant_id = t5.merchant_id") .registerTempTable("t_attention"); //生成结果集 DataFrame resultDF =sqlContext.sql("select t.merchant_id,t.txcnt,t.dbtcnt,t.bigcnt,t.txamount,t.rccnt from t_attention t"); List<FlowMain> flowMains = resultDF.javaRDD().map(new Function<Row,FlowMain>(){public FlowMain call(Row row){
             FlowMain flowMain = new FlowMain(); flowMain.setMerchantId(row.getString(
    0)); flowMain.setTxCnt(row.isNullAt(1)?0:row.getInt(1)); flowMain.setSuspectCnt(row.isNullAt(2)?0:row.getInt(2)); flowMain.setBigAmountCnt(row.isNullAt(3)?0:row.getInt(3)); flowMain.setTxAmount(row.isNullAt(4)?0.0:row.getDouble(4)); flowMain.setTxRate("偏高"); flowMain.setRcEventCnt(row.isNullAt(5)?0:row.getInt(5)); return flowMain; } }).collect(); Gson gson = new Gson(); String res = gson.toJson(flowMains); //连接codis进行操作,每次将新生成的数据,放到对应的key中 Jedis jedis = CodisUtil.getJedis() ; jedis.set("mainFlow", res); logger.info("插入到Codis数据完成!!!"); sc.stop(); } //获取数据库的表注册为临时表 private static void registerTable(SQLContext sqlContext,String dbtable){ Map<String,String> mcOptions =new HashMap<String, String>(); mcOptions.put("url", Constant.URL); mcOptions.put("driver", Constant.DRIVER); mcOptions.put("dbtable", dbtable); mcOptions.put("user", Constant.USER); mcOptions.put("password", Constant.PASSWD); DataFrame jdbcDF = sqlContext.read().format("jdbc").options(mcOptions).load(); jdbcDF.registerTempTable(dbtable); } //获取ElasticSearch中的索引注册为表 private static void registerESTable(SQLContext sqlContext,String index){ Map<String,String> esOptions =new HashMap<String, String>(); esOptions.put("es.nodes", Constant.ESNODES); esOptions.put("es.port", Constant.ESPORT); esOptions.put("es.index.auto.create", "true"); DataFrame OrderDF = sqlContext.read().format("org.elasticsearch.spark.sql") .options(esOptions) .load(index+"/"+index); OrderDF.registerTempTable(index); } }
  • 相关阅读:
    idea 快捷键
    vue.js
    破解idea
    如何进行反向迭代以及如何实现反向迭代?
    如何使用生成器函数实现可迭代对象?
    从一个实际小例子来看python中迭代器的应用
    MySql中常用的内置函数
    linux服务器重启oracle服务。
    oracle里面清除服务器上所有的oracle服务。
    刷机后的环境变量
  • 原文地址:https://www.cnblogs.com/atomicbomb/p/6678531.html
Copyright © 2020-2023  润新知