• Spark 读取HBase数据


    Spark1.6.2 读取 HBase 1.2.3

    //hbase-common-1.2.3.jar
    //hbase-protocol-1.2.3.jar
    //hbase-server-1.2.3.jar
    //htrace-core-3.1.0-incubating.jar
    //metrics-core-2.2.0.jar

      val sparkConf = new SparkConf()
          .setAppName("User")
    
        // 创建 spark context
        val sc = new SparkContext(sparkConf)
        val sqlContext = new SQLContext(sc)
        import sqlContext.implicits._
    
        // 创建HBase configuration
        val hBaseConf = HBaseConfiguration.create()
        hBaseConf.set("hbase.zookeeper.quorum", "192.168.1.1,192.168.1.2,192.168.1.3")
        hBaseConf.set("hbase.zookeeper.property.clientPort", "2182"); // zookeeper端口号
        //设置表名
        hBaseConf.set(TableInputFormat.INPUT_TABLE, "knowledge")
    
        // 应用newAPIHadoopRDD读取HBase,返回NewHadoopRDD
        val hbaseRDD = sc.newAPIHadoopRDD(hBaseConf,
          classOf[TableInputFormat],
          classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
          classOf[org.apache.hadoop.hbase.client.Result])
    
        // 将数据映射为表  也就是将 RDD转化为 dataframe schema
        // 读取结果集RDD,返回一个MapPartitionsRDD
        val resRDD = hbaseRDD.map(tuple => tuple._2)
    
        //打印读取数据内容
        val user_knowledge = resRDD.map(r => (Bytes.toString(r.getRow),
          Bytes.toString(r.getValue(Bytes.toBytes("behavior"), Bytes.toBytes("reg_id"))),
          Bytes.toString(r.getValue(Bytes.toBytes("behavior"), Bytes.toBytes("create_user_id"))),
          Bytes.toString(r.getValue(Bytes.toBytes("behavior"), Bytes.toBytes("knowledge_id"))),
          Bytes.toString(r.getValue(Bytes.toBytes("behavior"), Bytes.toBytes("create_time")))) //
          ).toDF("row", "reg_id", "create_user_id", "knowledge_id", "create_time")
    
        user_knowledge.registerTempTable("user_knowledge")
    
        // 测试
        val df2 = sqlContext.sql("SELECT * FROM user_knowledge")
    
        df2.collect.foreach(println)
     
        sc.stop
  • 相关阅读:
    Nexus3.0私服搭建
    JavaScript
    Spring基础
    Hibernate注解
    HTML5
    Apache Tomcat
    Java安装(Ubuntu)
    C++ 日期 & 时间
    C++ 引用
    C++ 指针
  • 原文地址:https://www.cnblogs.com/fesh/p/5996656.html
Copyright © 2020-2023  润新知