• Hbase Scala代码


    Hbase 老版本代码

    公共方法
      def getConn() = {
        val conf = HBaseConfiguration.create
        conf.set("hbase.zookeeper.quorum","hadoop1,hadoop2,hadoop3")
        val conn = ConnectionFactory.createConnection(conf)//重量级
        val admin = conn.getAdmin;//轻量级
        (admin,conn)
      }

      val admin = getConn()._1
      val conn = getConn()._2
    依赖
            <dependency>
                <groupId>io.netty</groupId>
                <artifactId>netty-all</artifactId>
                <version>4.1.17.Final</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-client</artifactId>
                <version>1.4.13</version>
            </dependency>

            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase</artifactId>
                <version>1.4.13</version>
                <type>pom</type>
            </dependency>

            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-core</artifactId>
                <version>0.20.2</version>
            </dependency>

            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-server</artifactId>
                <version>1.4.13</version>
            </dependency>

            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-common</artifactId>
                <version>1.4.13</version>
            </dependency>
            <dependency>
                <groupId>org.antlr</groupId>
                <artifactId>antlr4-runtime</artifactId>
                <version>4.7</version>
            </dependency>
            <dependency>
                <groupId>commons-cli</groupId>
                <artifactId>commons-cli</artifactId>
                <version>1.4</version>
            </dependency>
            <dependency>
                <groupId>commons-lang</groupId>
                <artifactId>commons-lang</artifactId>
                <version>2.6</version>
            </dependency>
    DDL
        import org.apache.hadoop.hbase.{CellUtil, HBaseConfiguration, HColumnDescriptor}
        import org.apache.hadoop.hbase.{HTableDescriptor, NamespaceDescriptor, TableName}
        import org.apache.hadoop.hbase.client.{ ConnectionFactory, Delete, Get, HBaseAdmin}
        import org.apache.hadoop.hbase.client.{Put, Result, ResultScanner, Scan}
        import org.apache.hadoop.hbase.filter.{ FilterList, SingleColumnValueFilter}
        import org.apache.hadoop.hbase.util.Bytes
        import org.apache.spark.sql.SparkSession

        admin.tableExists(TableName.valueOf("bigdata","student"))//表是否存在

        val hTableDescriptor = new HTableDescriptor(TableName.valueOf("bigdata","student"))//创建表
        hTableDescriptor.addFamily(new HColumnDescriptor("info"));
        admin.createTable(hTableDescriptor)

        admin.disableTables("student")//删除表
        admin.deleteTables("student")

        val namesp = NamespaceDescriptor.create("bigdata").build()//创建命名空间
        admin.createNamespace(namesp)
    DML
        val addtable = conn.getTable(TableName.valueOf("bigdata","student"))//添加数据
        val addput = new Put(Bytes.toBytes("row_key"))
        addput.addColumn(Bytes.toBytes("info"),Bytes.toBytes("column_name"),Bytes.toBytes("column_value"))
        addtable.put(addput)//可以方法多个put对象
        addtable.close()

        val gettable = conn.getTable(TableName.valueOf("bigdata","student"))//获取数据
        val get = new Get(Bytes.toBytes("row_key"))
        get.setMaxVersions()//设置最大版本
        get.addFamily(Bytes.toBytes("info"))//指定列簇
        get.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"))//指定列簇+列
        val result = gettable.get(get).rawCells()//可以方法多个put对象
        for(cell <- result){
          Bytes.toString(CellUtil.cloneRow(cell))//row_key
          Bytes.toString(CellUtil.cloneFamily(cell))//列簇
          Bytes.toString(CellUtil.cloneQualifier(cell))//列名
          Bytes.toString(CellUtil.cloneValue(cell)) //value
        }
        gettable.close()

        val scantable = conn.getTable(TableName.valueOf("bigdata","student"))//scan数据
        val resultScanner = scantable.getScanner(new Scan(Bytes.toBytes("1001"),Bytes.toBytes("1003")))
        import scala.collection.JavaConversions._
        for (result <- resultScanner) {//循环结果
          for(cell <- result.rawCells()){//循环每一个cell
            Bytes.toString(CellUtil.cloneRow(cell))//row_key
          }
          println()
        }

        val deletetable = conn.getTable(TableName.valueOf("bigdata","student"))//删除数据
        val delete = new Delete(Bytes.toBytes("row_key"))
        //生产不用addColumn 因为历史版本会出现
        delete.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"))//找到最大时间戳,给这个版本添加删除标记
        delete.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),1563502276059L)//删除这个时间戳的数据,小于的不删除
        delete.addFamily(Bytes.toBytes("info"))//删除列簇
        delete.addColumns(Bytes.toBytes("info"),Bytes.toBytes("name"))//删除所有版本数据 小于等于这个时间戳的
        deletetable.delete(delete)//可以接受Delete数组

    Hbase 新版本代码

    依赖
            <dependency>
                <groupId>commons-cli</groupId>
                <artifactId>commons-cli</artifactId>
                <version>1.4</version>
            </dependency>
            <dependency>
                <groupId>commons-lang</groupId>
                <artifactId>commons-lang</artifactId>
                <version>2.6</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-core</artifactId>
                <version>0.20.2</version>
            </dependency>
            <dependency>
                <groupId>org.glassfish</groupId>
                <artifactId>javax.el</artifactId>
                <version>3.0.1-b06</version>
            </dependency>
            <dependency>
                <groupId>io.netty</groupId>
                <artifactId>netty-all</artifactId>
                <version>4.1.17.Final</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-client</artifactId>
                <version>2.4.11</version>
                <exclusions>
                    <exclusion>
                        <groupId>org.glassfish</groupId>
                        <artifactId>javax.el</artifactId>
                    </exclusion>
                </exclusions>
            </dependency>
            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-server</artifactId>
                <version>2.4.11</version>
                <exclusions>
                    <exclusion>
                        <groupId>org.glassfish</groupId>
                        <artifactId>javax.el</artifactId>
                    </exclusion>
                </exclusions>
            </dependency>

            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-common</artifactId>
                <version>2.4.11</version>
                <exclusions>
                    <exclusion>
                        <groupId>org.glassfish</groupId>
                        <artifactId>javax.el</artifactId>
                    </exclusion>
                </exclusions>
            </dependency>
            <dependency>
                <groupId>org.antlr</groupId>
                <artifactId>antlr4-runtime</artifactId>
                <version>4.7</version>
            </dependency>
    公共方法
      def getConn() = {
        val conf = HBaseConfiguration.create
        conf.set("hbase.zookeeper.quorum","hadoop1,hadoop2,hadoop3")
        val conn = ConnectionFactory.createConnection(conf)//重量级
        val admin = conn.getAdmin;//轻量级
        (admin,conn)
      }
        val admin = getConn()._1
        val conn = getConn()._2
    DDL
        admin.tableExists(TableName.valueOf("bigdata","student"))//表是否存在

        //创建表 表的build对象
        val tableCreateBuilder = TableDescriptorBuilder.newBuilder(TableName.valueOf("bigdata","student"))
        val columFanilyCreateBuild = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("info"))//列簇build对象
        columFanilyCreateBuild.setMaxVersions(5)
        tableCreateBuilder.setColumnFamily(columFanilyCreateBuild.build())
        admin.createTable(tableCreateBuilder.build())

        //修改表   不修改其他表属性
        val desc = admin.getDescriptor(TableName.valueOf("bigdata","student"))
        val tableModBuilder = TableDescriptorBuilder.newBuilder(desc)
        val famdesc = desc.getColumnFamily(Bytes.toBytes("info"))
        val columFanilyModBuilder = ColumnFamilyDescriptorBuilder.newBuilder(famdesc)//列簇build对象 不修改其他列簇属性
        columFanilyModBuilder.setMaxVersions(5)
        tableModBuilder.setColumnFamily(columFanilyModBuilder.build())
        admin.modifyTable(tableModBuilder.build())

        admin.disableTable(TableName.valueOf("bigdata","student"))//删除表
        admin.deleteTable(TableName.valueOf("bigdata","student"))

        val namesp = NamespaceDescriptor.create("bigdata").build()//创建命名空间
        admin.createNamespace(namesp)
    DML
        val addtable = conn.getTable(TableName.valueOf("bigdata","student"))//添加数据
        val addput = new Put(Bytes.toBytes("row_key"))
        addput.addColumn(Bytes.toBytes("info"),Bytes.toBytes("column_name"),Bytes.toBytes("column_value"))
        addtable.put(addput)//可以方法多个put对象
        addtable.close()

        val gettable = conn.getTable(TableName.valueOf("bigdata","student"))//获取数据
        val get = new Get(Bytes.toBytes("row_key"))
        get.readVersions(10)
        get.addFamily(Bytes.toBytes("info"))//指定列簇
        get.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"))//指定列簇+列
        val result = gettable.get(get).rawCells()//可以方法多个put对象
        for(cell <- result){
          Bytes.toString(CellUtil.cloneRow(cell))//row_key
          Bytes.toString(CellUtil.cloneFamily(cell))//列簇
          Bytes.toString(CellUtil.cloneQualifier(cell))//列名
          Bytes.toString(CellUtil.cloneValue(cell)) //value
        }
        gettable.close()

        val scantable = conn.getTable(TableName.valueOf("bigdata","student"))//scan数据
        val scan = new Scan
        val filterList = new FilterList()
        //new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("0003")));  rowkey过滤器
        //new PrefixFilter("00".getBytes());  rowkey过滤器ColumnValueFilter
        //new FamilyFilter(CompareFilter.CompareOp.LESS, new SubstringComparator("f2"));  列簇过滤器
        //new QualifierFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("name"));  列过滤器
        //new ColumnPrefixFilter(prefix);根据列(又名限定符)名称的前导部分进行过滤
        //new MultipleColumnPrefixFilter(prefixes);byte[][] prefixes = new byte[][] {Bytes.toBytes("abc"), Bytes.toBytes("xyz")}; 多个前缀过滤
        //new ColumnRangeFilter(startColumn, true, endColumn, true); 列区间过滤
        //new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("8"));  值过滤器
        //new ColumnValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("8"));  只匹配获取的单元格
        //new SingleColumnValueFilter("f1".getBytes(), "name".getBytes(), CompareFilter.CompareOp.EQUAL, "刘备".getBytes());  满足条件的整列值的所有字段
        //new SingleColumnValueExcludeFilter("f1".getBytes(), "name".getBytes(), CompareFilter.CompareOp.EQUAL, "刘备".getBytes());  排除满足条件的整列值的所有字段
        //new PageFilter(pageSize);  分页过滤
        //comp=new RegexStringComparator("my.")值比较的正则表达式   new SingleColumnValueFilter("f1".getBytes(), "name".getBytes(), CompareFilter.CompareOp.EQUAL,comp);
        //comp=new SubstringComparator("y val");字串包含   new SingleColumnValueFilter("f1".getBytes(), "name".getBytes(), CompareFilter.CompareOp.EQUAL,comp);



        val nameFilter = new ColumnValueFilter(Bytes.toBytes("info"),Bytes.toBytes("name"),CompareOperator.EQUAL,Bytes.toBytes("zhangsan"))
        filterList.addFilter(nameFilter)
        //保留没有当前列的数据
        val singleFilter = new SingleColumnValueFilter(Bytes.toBytes("info"),Bytes.toBytes("name"),CompareOperator.EQUAL,Bytes.toBytes("zhangsan"))
        filterList.addFilter(singleFilter)
        scan.setFilter(filterList)
        scan.withStartRow(Bytes.toBytes("1001"))
        scan.withStopRow(Bytes.toBytes("1003"))
        val resultScanner = scantable.getScanner(scan)
        import scala.collection.JavaConversions._
        for (result <- resultScanner) {//循环结果
          for(cell <- result.rawCells()){//循环每一个cell
            Bytes.toString(CellUtil.cloneRow(cell))//row_key
          }
          println()
        }

        val deletetable = conn.getTable(TableName.valueOf("bigdata","student"))//删除数据
        val delete = new Delete(Bytes.toBytes("row_key"))
        //生产不用addColumn 因为历史版本会出现
        delete.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"))//找到最大时间戳,给这个版本添加删除标记
        delete.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),1563502276059L)//删除这个时间戳的数据,小于的不删除
        delete.addFamily(Bytes.toBytes("info"))//删除列簇
        delete.addColumns(Bytes.toBytes("info"),Bytes.toBytes("name"))//删除所有版本数据 小于等于这个时间戳的
        deletetable.delete(delete)//可以接受Delete数组
  • 相关阅读:
    Spark_3:Spark集群搭建
    Spark_2:Spark 快速入门教程
    Spark快速大数据分析_11:第十一章
    Spark快速大数据分析_10:第十章
    Spark快速大数据分析_9:第九章
    Spark快速大数据分析_8:第八章
    Spark快速大数据分析_7:第七章
    Spark快速大数据分析_6:第六章
    Spark快速大数据分析_5:第五章
    java 内部类详解
  • 原文地址:https://www.cnblogs.com/wuxiaolong4/p/16731678.html
Copyright © 2020-2023  润新知