• spark_hive_to_hbase


    object Hive_ODS_PaidMember {

    private val logger = LoggerFactory.getLogger(Hive_ODS_PaidMember.getClass)

    val prop = new Properties()
    val is: InputStream = this.getClass().getResourceAsStream("/hbase.properties")
    prop.load(is)

    val conf = new SparkConf().setAppName("ReadHiveToHbase")
    val sc = new SparkContext(conf)

    val configuration = HBaseConfiguration.create()
    configuration.set("hbase.zookeeper.property.clientPort", prop.getProperty("hbase.clientPort"))
    configuration.set("hbase.zookeeper.quorum", prop.getProperty("hbase.quorum"))
    // configuration.set("hbase.master", "slave01-sit.cnsuning.com:60000")

    def main(args: Array[String]): Unit = {

    val jobConf = new JobConf(configuration)
    jobConf.setOutputFormat(classOf[TableOutputFormat])
    jobConf.set(TableOutputFormat.OUTPUT_TABLE, "ns_sospdm:mds_offline_data")

    val sparkConf = new SparkConf().setAppName("HiveToHbase")
    sparkConf.set("spark.sql.hive.metastorePartitionPruning", "false")
    var querySql = "select * from sospdm.tdm_super_cust_status_init"
    val session = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
    val resultDF = session.sql(querySql)
    val tuple = resultDF.rdd.filter(row => {
    !CommonUtils.isEmpty(row.getAs[String]("cust_num"))
    }).map(row => {
    val cust_num: String = row.getAs[String]("cust_num")
    val paid_type: String = row.getAs[String]("paid_type")
    val eff_date: String = row.getAs[String]("eff_date")
    val levl_change_time: String = row.getAs[String]("levl_change_time")

    val put = new Put(Bytes.toBytes(cust_num.reverse))
    put.add(Bytes.toBytes("bean_json"), Bytes.toBytes("cust_num"), Bytes.toBytes(cust_num))
    put.add(Bytes.toBytes("bean_json"), Bytes.toBytes("postPaidType"), Bytes.toBytes(paid_type))
    put.add(Bytes.toBytes("bean_json"), Bytes.toBytes("postEffDate"), Bytes.toBytes(eff_date))
    put.add(Bytes.toBytes("bean_json"), Bytes.toBytes("levelChgTs"), Bytes.toBytes(levl_change_time))

    (new ImmutableBytesWritable, put)
    })

    tuple.saveAsHadoopDataset(jobConf)
    }
    }

  • 相关阅读:
    SQL Server 存储过程/触发器中调用COM组件的方法
    写入Stream
    Python 3.2 中adodbapi的问题
    Python中将系统输出显示在PyQt中
    动态创建 Lambda 表达式
    Entity Framework框架Code First Fluent API
    扩展IQueryable实现属性名称排序
    在Entity Framework中使用事务
    ASP.NET MVC:通过FileResult向浏览器发送文件
    ASP.NET MVC: 使用自定义 ModelBinder 过滤敏感信息
  • 原文地址:https://www.cnblogs.com/yin-fei/p/12055077.html
Copyright © 2020-2023  润新知