• es


    package es
    
    import java.io.InputStream
    import java.text.SimpleDateFormat
    import java.util.{Calendar, Date, Properties}
    
    import org.elasticsearch.spark.rdd.EsSpark
    import org.apache.spark.SparkConf
    import org.apache.spark.sql.SparkSession
    import org.slf4j.LoggerFactory
    
    object ShoppingcartMarketToEsD {
    
      //  private val log = LoggerFactory.getLogger(ShoppingcartMarketToEsD.getClass)
    
      val prop = new Properties()
      val is: InputStream = this.getClass().getResourceAsStream("/elastic.properties")
      prop.load(is)
      val ENVIRONMENT_SETING = "es_host_prd"
      val host = prop.getProperty(ENVIRONMENT_SETING)
    
      def main(args: Array[String]): Unit = {
        val sparkConf = new SparkConf().setAppName("ShoppingcartMarketToEsD")
        //    sparkConf.set("spark.sql.hive.metastorePartitionPruning", "false")
        sparkConf.set("es.nodes", host)
        sparkConf.set("es.nodes.wan.only", "true")
        //    sparkConf.set("es.port", "9200")
        //    sparkConf.set("es.index.auto.create", "true")
        //    sparkConf.set("es.batch.size.entries", "5000")
        //    sparkConf.set("es.write.operation", "upsert")
    
        val session = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
        session.sql("use sospdm")
        session.udf.register("get_utc_time", () => {
          val cal = Calendar.getInstance()
          cal.setTime(new Date())
          val zoneOffset = cal.get(Calendar.ZONE_OFFSET)
          val dstOffset = cal.get(Calendar.DST_OFFSET)
          cal.add(Calendar.MILLISECOND, -(zoneOffset + dstOffset))
          val utcTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(cal.getTime)
          utcTime.replace(" ", "T") + "+0000"
        })
        val querySql = "select pid,shop_id,gds_cd,gds_nm,gds_add_num,gds_add_time,gds_price,expect_tran_price,l4_gds_grp_cd,l4_gds_grp_nm,category_cd,category_nm,brand_cd,brand_nm,'null' as create_user,'null' as update_user,create_time,update_time,get_utc_time() as `@timestamp` from sospdm.tdm_wbank_opts_t_goods_info_ed"
        val resultDF = session.sql(querySql)
        if (!ENVIRONMENT_SETING.contains("prd")) {
          resultDF.show(10)
        }
    
        val tuple = resultDF.rdd.map(row => {
          val pid: String = row.getAs[String]("pid").toString()
          val shop_id: String = row.getAs[String]("shop_id").toString()
          val gds_cd: String = row.getAs[String]("gds_cd").toString()
          val gds_nm: String = row.getAs[String]("gds_nm").toString()
          val gds_add_num: String = row.getAs[String]("gds_add_num").toString()
          val gds_add_time: String = row.getAs[String]("gds_add_time").toString()
          val gds_price: String = row.getAs[String]("gds_price").toString()
          val expect_tran_price: String = row.getAs[String]("expect_tran_price").toString()
          val l4_gds_grp_cd: String = row.getAs[String]("l4_gds_grp_cd").toString()
          val l4_gds_grp_nm: String = row.getAs[String]("l4_gds_grp_nm").toString()
          val category_cd: String = row.getAs[String]("category_cd").toString()
          val category_nm: String = row.getAs[String]("category_nm").toString()
          val brand_cd: String = row.getAs[String]("brand_cd").toString()
          val brand_nm: String = row.getAs[String]("brand_nm").toString()
          val create_user: String = row.getAs[String]("create_user").toString()
          val update_user: String = row.getAs[String]("update_user").toString()
          val create_time: String = row.getAs[String]("create_time").toString()
          val update_time: String = row.getAs[String]("update_time").toString()
          val `@timestamp`: String = row.getAs[String]("@timestamp").toString()
          var map = Map[String, Object]()
          map += ("pid" -> pid)
          map += ("shop_id" -> shop_id)
          map += ("gds_cd" -> gds_cd)
          map += ("gds_nm" -> gds_nm)
          map += ("gds_add_num" -> gds_add_num)
          map += ("gds_add_time" -> gds_add_time)
          map += ("gds_price" -> gds_price)
          map += ("expect_tran_price" -> expect_tran_price)
          map += ("l4_gds_grp_cd" -> l4_gds_grp_cd)
          map += ("l4_gds_grp_nm" -> l4_gds_grp_nm)
          map += ("category_cd" -> category_cd)
          map += ("category_nm" -> category_nm)
          map += ("brand_cd" -> brand_cd)
          map += ("brand_nm" -> brand_nm)
          map += ("create_user" -> create_user)
          map += ("update_user" -> update_user)
          map += ("create_time" -> create_time)
          map += ("@timestamp" -> `@timestamp`)
    
          (shop_id + gds_cd + gds_add_time, map)
        })
        EsSpark.saveToEsWithMeta(tuple, "idx_shop_goods_addcart/idx_shop_goods_addcart")
      }
    
    }
  • 相关阅读:
    .net经典笔试题
    asp.net面试题
    Java 面 试 题
    .net面试题集
    J2EE面试题集锦
    linux下查看硬件信息
    linux下软件的安装[转]
    在Linux系统下优化Oracle具体步骤
    制作Linux的优盘启动盘
    两端对齐,图片文字列表
  • 原文地址:https://www.cnblogs.com/yin-fei/p/10879835.html
Copyright © 2020-2023  润新知