spark_hive_to_hbase

object Hive_ODS_PaidMember {

private val logger = LoggerFactory.getLogger(Hive_ODS_PaidMember.getClass)

val prop = new Properties()
val is: InputStream = this.getClass().getResourceAsStream("/hbase.properties")
prop.load(is)

val conf = new SparkConf().setAppName("ReadHiveToHbase")
val sc = new SparkContext(conf)

val configuration = HBaseConfiguration.create()
configuration.set("hbase.zookeeper.property.clientPort", prop.getProperty("hbase.clientPort"))
configuration.set("hbase.zookeeper.quorum", prop.getProperty("hbase.quorum"))
// configuration.set("hbase.master", "slave01-sit.cnsuning.com:60000")

def main(args: Array[String]): Unit = {

val jobConf = new JobConf(configuration)
jobConf.setOutputFormat(classOf[TableOutputFormat])
jobConf.set(TableOutputFormat.OUTPUT_TABLE, "ns_sospdm:mds_offline_data")

val sparkConf = new SparkConf().setAppName("HiveToHbase")
sparkConf.set("spark.sql.hive.metastorePartitionPruning", "false")
var querySql = "select * from sospdm.tdm_super_cust_status_init"
val session = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
val resultDF = session.sql(querySql)
val tuple = resultDF.rdd.filter(row => {
!CommonUtils.isEmpty(row.getAs[String]("cust_num"))
}).map(row => {
val cust_num: String = row.getAs[String]("cust_num")
val paid_type: String = row.getAs[String]("paid_type")
val eff_date: String = row.getAs[String]("eff_date")
val levl_change_time: String = row.getAs[String]("levl_change_time")

val put = new Put(Bytes.toBytes(cust_num.reverse))
put.add(Bytes.toBytes("bean_json"), Bytes.toBytes("cust_num"), Bytes.toBytes(cust_num))
put.add(Bytes.toBytes("bean_json"), Bytes.toBytes("postPaidType"), Bytes.toBytes(paid_type))
put.add(Bytes.toBytes("bean_json"), Bytes.toBytes("postEffDate"), Bytes.toBytes(eff_date))
put.add(Bytes.toBytes("bean_json"), Bytes.toBytes("levelChgTs"), Bytes.toBytes(levl_change_time))

(new ImmutableBytesWritable, put)
})

tuple.saveAsHadoopDataset(jobConf)
}
}

相关阅读:
SQL Server 存储过程/触发器中调用COM组件的方法
写入Stream
Python 3.2 中adodbapi的问题
Python中将系统输出显示在PyQt中
动态创建 Lambda 表达式
Entity Framework框架Code First Fluent API
扩展IQueryable实现属性名称排序
在Entity Framework中使用事务
ASP.NET MVC：通过FileResult向浏览器发送文件
ASP.NET MVC: 使用自定义 ModelBinder 过滤敏感信息

原文地址：https://www.cnblogs.com/yin-fei/p/12055077.html