• HBASE_HAD


    HBASE操作

    hadoop@dblab-VirtualBox:~$ start-all.sh
    
    hadoop@dblab-VirtualBox:~$ jps
    
    hadoop@dblab-VirtualBox:~$ start-hbase.sh
    
    hadoop@dblab-VirtualBox:~$ hbase shell
    
    hbase(main):001:0> list
    
    hbase(main):002:0> disable 'student'
    
    hbase(main):003:0> drop 'student'
    
    hbase(main):004:0> list
    
    hbase(main):005:0> create 'student','info'
    
    hbase(main):006:0> list
    
    hbase(main):007:0> put 'student','1','info:name','zhangsan'
    
    hbase(main):008:0> put 'student','1','info:gender','Female'
    
    hbase(main):009:0> put 'student','1','info:age','23'
    
    hbase(main):010:0> put 'student','2','info:name','lisi'
    
    hbase(main):011:0> put 'student','2','info:gender','Male'
    
    hbase(main):012:0> put 'student','2','info:age','24'
    
    hbase(main):013:0> scan 'student'
    
    hbase(main):014:0> get 'student','1'
    
    hbase(main):015:0> get 'student','1','info:name'
    
    hbase(main):016:0> exit
    
    hadoop@dblab-VirtualBox:~$ 
    

      

    SparkOperateHBase.scala

    package dblab.SparkHbaseDemo
    
    import org.apache.hadoop.conf.Configuration 
    import org.apache.hadoop.hbase._
    import org.apache.hadoop.hbase.client._
    import org.apache.hadoop.hbase.mapreduce.TableInputFormat 
    import org.apache.hadoop.hbase.util.Bytes 
    import org.apache.spark.SparkContext 
    import org.apache.spark.SparkContext._
    import org.apache.spark.SparkConf
    
    object SparkOperateHBase
    {
      def main(args: Array[String])
      {
        val conf = HBaseConfiguration.create()
        
        val sc = new SparkContext(new SparkConf().setAppName("SparkOperateHBase").setMaster("local"))
        
        //设置查询的表名
        conf.set(TableInputFormat.INPUT_TABLE, "student")
        
        val stuRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], 
            classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], 
            classOf[org.apache.hadoop.hbase.client.Result])
        
        val count = stuRDD.count()
        
        println("Students RDD Count:" + count)
        
        stuRDD.cache()
        
        //遍历输出
        stuRDD.foreach({ case (_,result) => 
          val key = Bytes.toString(result.getRow)
          val name = Bytes.toString(result.getValue("info".getBytes,"name".getBytes))
          val gender = Bytes.toString(result.getValue("info".getBytes,"gender".getBytes))
          val age = Bytes.toString(result.getValue("info".getBytes,"age".getBytes))
          
          println("Row key:"+key+" Name:"+name+" Gender:"+gender+" Age:"+age)
        })
      }
    }
    

      

    SparkWriteHBase.scala

    package dblab.SparkHbaseDemo
    
    import org.apache.hadoop.hbase.HBaseConfiguration
    import org.apache.hadoop.hbase.mapreduce.TableOutputFormat
    import org.apache.spark._
    import org.apache.hadoop.mapreduce.Job
    import org.apache.hadoop.hbase.io.ImmutableBytesWritable
    import org.apache.hadoop.hbase.client.Result
    import org.apache.hadoop.hbase.client.Put
    import org.apache.hadoop.hbase.util.Bytes
    
    object SparkWriteHBase
    {
      def main(args: Array[String]): Unit = 
      {
        val sparkConf = new SparkConf().setAppName("SparkWriteHBase").setMaster("local")
        
        val sc = new SparkContext(sparkConf)
        
        sc.hadoopConfiguration.set(TableOutputFormat.OUTPUT_TABLE, "student")
        
        val job = new Job(sc.hadoopConfiguration)
        job.setOutputKeyClass(classOf[ImmutableBytesWritable])
        job.setOutputValueClass(classOf[Result])
        job.setOutputFormatClass(classOf[TableOutputFormat[ImmutableBytesWritable]])
        
        //下面这行代码用于构建两行记录
        val indataRDD = sc.makeRDD(Array("4,wangwu,Male,26","5,chengxin,Female,27"))
        
        val rdd = indataRDD.map(_.split(',')).map{arr => {
          //设置行健的值
          val put = new Put(Bytes.toBytes(arr(0)))
          //设置info:name列的值
          put.add(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(arr(1)))
          //设置info:gender列的值
          put.add(Bytes.toBytes("info"),Bytes.toBytes("gender"),Bytes.toBytes(arr(2)))
          //设置info:age列的值
          put.add(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes(arr(3).toInt))
          //构建一个键值对,作为rdd的一个元素
          (new ImmutableBytesWritable, put)
        }}
        
        rdd.saveAsNewAPIHadoopDataset(job.getConfiguration())
      }
    }
    

      

    POX.XML

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
      <modelVersion>4.0.0</modelVersion>
      <groupId>dblab</groupId>
      <artifactId>SparkHbaseDemo</artifactId>
      <version>0.0.1-SNAPSHOT</version>
      <name>${project.artifactId}</name>
     
      <properties>
        <maven.compiler.source>1.6</maven.compiler.source>
        <maven.compiler.target>1.6</maven.compiler.target>
        <encoding>UTF-8</encoding>
        <scala.version>2.11</scala.version>
        <spark.version>2.1.0</spark.version>
        <hbase.version>1.1.5</hbase.version>
      </properties>
    
      <dependencies>
        <!-- Spark -->
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-core_${scala.version}</artifactId>
          <version>${spark.version}</version>
        </dependency>
        
        <!-- Hbase -->
        <dependency>
          <groupId>org.apache.hbase</groupId>
          <artifactId>hbase-server</artifactId>
          <version>${hbase.version}</version>
        </dependency>
        
        <dependency>
          <groupId>org.apache.hbase</groupId>
          <artifactId>hbase-common</artifactId>
          <version>${hbase.version}</version>
        </dependency>
        
        <dependency>
          <groupId>org.apache.hbase</groupId>
          <artifactId>hbase-client</artifactId>
          <version>${hbase.version}</version>
        </dependency>
        
      </dependencies>
    
      <build>
        <sourceDirectory>src/main/scala</sourceDirectory>
        <testSourceDirectory>src/test/scala</testSourceDirectory>
      </build>
    </project>
  • 相关阅读:
    jsonp 监控简陋代码
    Oracle多线程并行使用、关联与指定索引执行
    Oracle计算时间差表达式
    行查列显
    JS时间
    通过string型类名实例化一个类
    sotower1.5-LS_工作流容易出错的地方
    oracle11g ORA-12505
    华项笔记本显示颜色变黄,调整好了重启还是偏黄
    ORA-00031: session marked for kill 处理Oracle中杀不掉的锁
  • 原文地址:https://www.cnblogs.com/Dreamer-Jie/p/14034831.html
Copyright © 2020-2023  润新知