HBASE操作
hadoop@dblab-VirtualBox:~$ start-all.sh hadoop@dblab-VirtualBox:~$ jps hadoop@dblab-VirtualBox:~$ start-hbase.sh hadoop@dblab-VirtualBox:~$ hbase shell hbase(main):001:0> list hbase(main):002:0> disable 'student' hbase(main):003:0> drop 'student' hbase(main):004:0> list hbase(main):005:0> create 'student','info' hbase(main):006:0> list hbase(main):007:0> put 'student','1','info:name','zhangsan' hbase(main):008:0> put 'student','1','info:gender','Female' hbase(main):009:0> put 'student','1','info:age','23' hbase(main):010:0> put 'student','2','info:name','lisi' hbase(main):011:0> put 'student','2','info:gender','Male' hbase(main):012:0> put 'student','2','info:age','24' hbase(main):013:0> scan 'student' hbase(main):014:0> get 'student','1' hbase(main):015:0> get 'student','1','info:name' hbase(main):016:0> exit hadoop@dblab-VirtualBox:~$
SparkOperateHBase.scala
package dblab.SparkHbaseDemo import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase._ import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.SparkConf object SparkOperateHBase { def main(args: Array[String]) { val conf = HBaseConfiguration.create() val sc = new SparkContext(new SparkConf().setAppName("SparkOperateHBase").setMaster("local")) //设置查询的表名 conf.set(TableInputFormat.INPUT_TABLE, "student") val stuRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result]) val count = stuRDD.count() println("Students RDD Count:" + count) stuRDD.cache() //遍历输出 stuRDD.foreach({ case (_,result) => val key = Bytes.toString(result.getRow) val name = Bytes.toString(result.getValue("info".getBytes,"name".getBytes)) val gender = Bytes.toString(result.getValue("info".getBytes,"gender".getBytes)) val age = Bytes.toString(result.getValue("info".getBytes,"age".getBytes)) println("Row key:"+key+" Name:"+name+" Gender:"+gender+" Age:"+age) }) } }
SparkWriteHBase.scala
package dblab.SparkHbaseDemo import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.mapreduce.TableOutputFormat import org.apache.spark._ import org.apache.hadoop.mapreduce.Job import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.util.Bytes object SparkWriteHBase { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setAppName("SparkWriteHBase").setMaster("local") val sc = new SparkContext(sparkConf) sc.hadoopConfiguration.set(TableOutputFormat.OUTPUT_TABLE, "student") val job = new Job(sc.hadoopConfiguration) job.setOutputKeyClass(classOf[ImmutableBytesWritable]) job.setOutputValueClass(classOf[Result]) job.setOutputFormatClass(classOf[TableOutputFormat[ImmutableBytesWritable]]) //下面这行代码用于构建两行记录 val indataRDD = sc.makeRDD(Array("4,wangwu,Male,26","5,chengxin,Female,27")) val rdd = indataRDD.map(_.split(',')).map{arr => { //设置行健的值 val put = new Put(Bytes.toBytes(arr(0))) //设置info:name列的值 put.add(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(arr(1))) //设置info:gender列的值 put.add(Bytes.toBytes("info"),Bytes.toBytes("gender"),Bytes.toBytes(arr(2))) //设置info:age列的值 put.add(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes(arr(3).toInt)) //构建一个键值对,作为rdd的一个元素 (new ImmutableBytesWritable, put) }} rdd.saveAsNewAPIHadoopDataset(job.getConfiguration()) } }
POX.XML
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>dblab</groupId> <artifactId>SparkHbaseDemo</artifactId> <version>0.0.1-SNAPSHOT</version> <name>${project.artifactId}</name> <properties> <maven.compiler.source>1.6</maven.compiler.source> <maven.compiler.target>1.6</maven.compiler.target> <encoding>UTF-8</encoding> <scala.version>2.11</scala.version> <spark.version>2.1.0</spark.version> <hbase.version>1.1.5</hbase.version> </properties> <dependencies> <!-- Spark --> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_${scala.version}</artifactId> <version>${spark.version}</version> </dependency> <!-- Hbase --> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-common</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>${hbase.version}</version> </dependency> </dependencies> <build> <sourceDirectory>src/main/scala</sourceDirectory> <testSourceDirectory>src/test/scala</testSourceDirectory> </build> </project>