.实验内容与完成情况:(实验具体步骤和实验截图说明) (一)编程实现以下指定功能,并用 Hadoop 提供的 HBase Shell 命令完成相同任务: (1) 列出 HBase 所有的表的相关信息,例如表名; 列出所有数据表: 列出表的结构 查询表是否存在 查询表是否可用
packagecn.wl.edu.hbase; import java.io.IOException; public class ListTables { public static Configuration configuration; public static Connection connection; public static Admin admin; public static void listTables() throws IOException { init(); HTableDescriptor[] hTableDescriptors = admin.listTables(); for (HTableDescriptor hTableDescriptor : hTableDescriptors) { System.out.println("table name:" + hTableDescriptor.getNameAsString()); } close(); } public static void init() { configuration = HBaseConfiguration.create(); configuration.set("hbase.rootdir", "hdfs://localhost:9000/hbase"); try { connection = ConnectionFactory.createConnection(configuration); admin = connection.getAdmin(); } catch (IOException e) { e.printStackTrace(); } } public static void close() { try { if (admin != null) { admin.close(); } if (connection != null) { connection.close(); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { try { listTables(); } catch (IOException e) { e.printStackTrace(); } } } (2)在终端打印出指定的表的所有记录数据;
源代码: package cn.wl.edu.hbase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import java.io.IOException; public class ListTableData { public static Configuration configuration; public static Connection connection; public static Admin admin; public static void getData(String tableName)throws IOException{ init(); Table table=connection.getTable(TableName.valueOf(tableName)); Scan scan=new Scan(); ResultScanner scanner=table.getScanner(scan); for(Result result:scanner){ printRecoder(result); } System.out.println(“finish!”); close(); } public static void printRecoder(Result result)throws IOException{ for(Cell cell:result.rawCells()){ System.out.println("行键:"+new String(CellUtil.cloneRow(cell))); System.out.print("列簇: " + new String(CellUtil.cloneFamily(cell))); System.out.print(" 列: " + new String(CellUtil.cloneQualifier(cell))); System.out.print(" 值: " + new String(CellUtil.cloneValue(cell))); System.out.println("时间戳: " + cell.getTimestamp()); } } public static void init() { configuration = HBaseConfiguration.create(); configuration.set("hbase.rootdir", "hdfs://localhost:9000/hbase"); try { connection = ConnectionFactory.createConnection(configuration); admin = connection.getAdmin(); } catch (IOException e) { e.printStackTrace(); } } public static void close() { try { if (admin != null) { admin.close(); } if (connection != null) { connection.close(); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { try { getData("person"); } catch (IOException e) { e.printStackTrace(); } } } (3) 向已经创建好的表添加和删除指定的列族或列; 添加数据
源代码: package cn.wl.edu.hbase; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Table; public class InsertRow { public static Configuration configuration; public static Connection connection; public static Admin admin; public static void insertRow(String tableName,String rowKey,String colFamily,String col,String val)throws IOException{ init(); Table table=connection.getTable(TableName.valueOf(tableName)); Put put=new Put(rowKey.getBytes()); put.addColumn(colFamily.getBytes(),col.getBytes() ,val.getBytes()); table.put(put); System.out.println("insert finish!"); table.close(); } public static void init() { configuration = HBaseConfiguration.create(); configuration.set("hbase.rootdir", "hdfs://localhost:9000/hbase"); try { connection = ConnectionFactory.createConnection(configuration); admin = connection.getAdmin(); } catch (IOException e) { e.printStackTrace(); } } public static void close() { try { if (admin != null) { admin.close(); } if (connection != null) { connection.close(); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { try { insertRow("student","s002","score","math","100"); } catch (IOException e) { e.printStackTrace(); } } } 删除数据
源代码: package cn.wl.edu.hbase; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; public class DeleteRow { public static Configuration configuration; public static Connection connection; public static Admin admin; public static void deleteRow(String tableName, String rowKey, String colFamily, String col) throws IOException { init(); Table table = connection.getTable(TableName.valueOf(tableName)); Delete delete = new Delete(rowKey.getBytes()); delete.addFamily(Bytes.toBytes(colFamily)); delete.addColumn(Bytes.toBytes(colFamily), Bytes.toBytes(col)); table.delete(delete); System.out.println("delete successful!"); table.close(); close(); } public static void init() { configuration = HBaseConfiguration.create(); configuration.set("hbase.rootdir", "hdfs://localhost:9000/hbase"); try { connection = ConnectionFactory.createConnection(configuration); admin = connection.getAdmin(); } catch (IOException e) { e.printStackTrace(); } } public static void close() { try { if (admin != null) { admin.close(); } if (null != connection) { connection.close(); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { try { deleteRow("student", "s002", "score", "math"); } catch (IOException e) { e.printStackTrace(); } } } (3)清空指定的表的所有记录数据;
源代码: package cn.wl.edu.hbase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import java.io.IOException; public class TruncateTable { public static Configuration configuration; public static Connection connection; public static Admin admin; public static void clearRows(String tableName) throws IOException { init(); TableName tablename = TableName.valueOf(tableName); admin.disableTable(tablename); admin.truncateTable(tablename, false); System.out.println("delete table successful!"); close(); } public static void init() { configuration = HBaseConfiguration.create(); configuration.set("hbase.rootdir", "hdfs://localhost:9000/hbase"); try { connection = ConnectionFactory.createConnection(configuration); admin = connection.getAdmin(); } catch (IOException e) { e.printStackTrace(); } } public static void close() { try { if (admin != null) { admin.close(); } if (null != connection) { connection.close(); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { try { clearRows("student"); } catch (IOException e) { e.printStackTrace(); } } } (5) 统计表的行数。
源程序: package cn.wl.edu.hbase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import java.io.IOException; public class CountTable { public static Configuration configuration; public static Connection connection; public static Admin admin; public static void countTable(String tableName) throws IOException { init(); Table table = connection.getTable(TableName.valueOf(tableName)); Scan scan = new Scan(); ResultScanner scanner = table.getScanner(scan); int num = 0; for (Result result = scanner.next(); result != null; result = scanner .next()) { num++; } System.out.println("行数:" + num); scanner.close(); close(); } public static void init() { configuration = HBaseConfiguration.create(); configuration.set("hbase.rootdir", "hdfs://localhost:9000/hbase"); try { connection = ConnectionFactory.createConnection(configuration); admin = connection.getAdmin(); } catch (IOException e) { e.printStackTrace(); } } public static void close() { try { if (admin != null) { admin.close(); } if (null != connection) { connection.close(); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { try { countTable("student"); } catch (IOException e) { e.printStackTrace(); } } } (二) HBase 数据库操作 1. 现有以下关系型数据库中的表和数据,要求将其转换为适合于 HBase 存储的表并插入数据: 学生表(Student) 学号(S_No) 姓名(S_Name) 性别(S_Sex) 年龄(S_Age) 2015001 Zhangsan male 23 2015003 Mary female 22 2015003 Lisi male 24 课程表(Course) 课程号(C_No) 课程名(C_Name) 学分(C_Credit) 123001 Math 2.0 123002 Computer Science 5.0 123003 English 3.0 选课表(SC) 学号(SC_Sno) 课程号(SC_Cno) 成绩(SC_Score) 2015001 123001 86 2015001 123003 69 2015002 123002 77 2015002 123003 99 2015003 123001 98 2015003 123002 95
2.请编程实现以下功能: (1) createTable(String tableName, String[] fields) 创建表,参数 tableName 为表的名称,字符串数组 fields 为存储记录各个字段名称的数组。 要求当 HBase 已经存在名为 tableName 的表的时候,先删除原有的表,然后再创建新的表。 package cn.wl.edu.hbase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import java.io.IOException; public class CreateTable { public static Configuration configuration; public static Connection connection; public static Admin admin; public static void createTable(String tableName, String[] fields) throws IOException { init(); TableName tablename = TableName.valueOf(tableName); if (admin.tableExists(tablename)) { System.out.println("table is exists!"); } HTableDescriptor hTableDescriptor = new HTableDescriptor(tablename); for (String str : fields) { HColumnDescriptor hColumnDescriptor = new HColumnDescriptor(str); hTableDescriptor.addFamily(hColumnDescriptor); } admin.createTable(hTableDescriptor); close(); } public static void init() { configuration = HBaseConfiguration.create(); configuration.set("hbase.rootdir", "hdfs://localhost:9000/hbase"); try { connection = ConnectionFactory.createConnection(configuration); admin = connection.getAdmin(); } catch (IOException e) { e.printStackTrace(); } } public static void close() { try { if (admin != null) { admin.close(); } if (null != connection) { connection.close(); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { String[] fields = { "Score" }; try { createTable("person", fields); } catch (IOException e) { e.printStackTrace(); } } } (2)addRecord(String tableName, String row, String[] fields, String[] values 向表 tableName、行 row(用 S_Name 表示)和字符串数组 fields 指定的单元格中添加对 应的数据 values。其中, fields 中每个元素如果对应的列族下还有相应的列限定符的话,用 “columnFamily:column”表示。例如,同时向“Math”、 “Computer Science”、 “English”三列添加 成绩时,字符串数组 fields 为{“Score:Math”, ”Score:Computer Science”, ”Score:English”},数组 values 存储这三门课的成绩。 源代码: package cn.wl.edu.hbase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import java.io.IOException; public class addRecord { public static Configuration configuration; public static Connection connection; public static Admin admin; public static void addRecord(String tableName, String row, String[] fields, String[] values) throws IOException { init(); Table table = connection.getTable(TableName.valueOf(tableName)); for (int i = 0; i != fields.length; i++) { Put put = new Put(row.getBytes()); String[] cols = fields[i].split(":"); put.addColumn(cols[0].getBytes(), cols[1].getBytes(), values[i].getBytes()); table.put(put); } table.close(); close(); } public static void init() { configuration = HBaseConfiguration.create(); configuration.set("hbase.rootdir", "hdfs://localhost:9000/hbase"); try { connection = ConnectionFactory.createConnection(configuration); admin = connection.getAdmin(); } catch (IOException e) { e.printStackTrace(); } } public static void close() { try { if (admin != null) { admin.close(); } if (null != connection) { connection.close(); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { String[] fields = {"Score:Math", "Score:Computer Science", "Score:English"}; String[] values = {"99", "80", "100"}; try { addRecord("person", "Score", fields, values); } catch (IOException e) { e.printStackTrace(); } System.out.println("finish!"); } } (3) scanColumn(String tableName, String column) 浏览表 tableName 某一列的数据,如果某一行记录中该列数据不存在,则返回 null。要求 当参数 column 为某一列族名称时,如果底下有若干个列限定符,则要列出每个列限定符代表 的列的数据;当参数 column 为某一列具体名称(例如“Score:Math”)时,只需要列出该列的 数据。 源代码: package cn.wl.edu.hbase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException; public class ScanColumn { public static Configuration configuration; public static Connection connection; public static Admin admin; public static void scanColumn(String tableName, String column) throws IOException { init(); Table table = connection.getTable(TableName.valueOf(tableName)); Scan scan = new Scan(); scan.addFamily(Bytes.toBytes(column)); ResultScanner scanner = table.getScanner(scan); for (Result result = scanner.next(); result != null; result = scanner.next()) { showCell(result); } System.out.println("finish!"); table.close(); close(); } public static void showCell(Result result) { Cell[] cells = result.rawCells(); for (Cell cell : cells) { System.out.println("RowName:" + new String(CellUtil.cloneRow(cell)) + " "); System.out.println("Timetamp:" + cell.getTimestamp() + " "); System.out.println("column Family:" + new String(CellUtil.cloneFamily(cell)) + " "); System.out.println("row Name:" + new String(CellUtil.cloneQualifier(cell)) + " "); System.out.println("value:" + new String(CellUtil.cloneValue(cell)) + " "); } } public static void init() { configuration = HBaseConfiguration.create(); configuration.set("hbase.rootdir", "hdfs://localhost:9000/hbase"); try { connection = ConnectionFactory.createConnection(configuration); admin = connection.getAdmin(); } catch (IOException e) { e.printStackTrace(); } } // 关闭连接 public static void close() { try { if (admin != null) { admin.close(); } if (null != connection) { connection.close(); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { try { scanColumn("person", "Score"); } catch (IOException e) { e.printStackTrace(); } } } (4) modifyData(String tableName, String row, String column) 修改表 tableName,行 row(可以用学生姓名 S_Name 表示),列 column 指定的单元格的 数据。 源代码: package cn.wl.edu.hbase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import java.io.IOException; public class ModifyData { public static long ts; public static Configuration configuration; public static Connection connection; public static Admin admin; public static void modifyData(String tableName, String row, String column, String val) throws IOException { init(); Table table = connection.getTable(TableName.valueOf(tableName)); Put put = new Put(row.getBytes()); Scan scan = new Scan(); ResultScanner resultScanner = table.getScanner(scan); for (Result r : resultScanner) { for (Cell cell : r.getColumnCells(row.getBytes(), column.getBytes())) { ts = cell.getTimestamp(); } } put.addColumn(row.getBytes(), column.getBytes(), ts, val.getBytes()); table.put(put); System.out.println("modify successful!"); table.close(); close(); } public static void init() { configuration = HBaseConfiguration.create(); configuration.set("hbase.rootdir", "hdfs://localhost:9000/hbase"); try { connection = ConnectionFactory.createConnection(configuration); admin = connection.getAdmin(); } catch (IOException e) { e.printStackTrace(); } } public static void close() { try { if (admin != null) { admin.close(); } if (null != connection) { connection.close(); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { try { modifyData("person", "Score", "Math", "100"); } catch (IOException e) { e.printStackTrace(); } } } (5) deleteRow(String tableName, String row) 删除表 tableName 中 row 指定的行的记录。 源代码: package cn.wl.edu.hbase; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; public class DeleteRow { public static Configuration configuration; public static Connection connection; public static Admin admin; public static void deleteRow(String tableName, String rowKey, String colFamily, String col) throws IOException { init(); Table table = connection.getTable(TableName.valueOf(tableName)); Delete delete = new Delete(rowKey.getBytes()); delete.addFamily(Bytes.toBytes(colFamily)); delete.addColumn(Bytes.toBytes(colFamily), Bytes.toBytes(col)); table.delete(delete); System.out.println("delete successful!"); table.close(); close(); } public static void init() { configuration = HBaseConfiguration.create(); configuration.set("hbase.rootdir", "hdfs://localhost:9000/hbase"); try { connection = ConnectionFactory.createConnection(configuration); admin = connection.getAdmin(); } catch (IOException e) { e.printStackTrace(); } } public static void close() { try { if (admin != null) { admin.close(); } if (null != connection) { connection.close(); } } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { try { deleteRow("student", "s002", "score", "math"); } catch (IOException e) { e.printStackTrace(); } } } 2.实验中出现问题:(说明和截图) Eclipse运行大多数程序,运行时间特别长,既不报错也不显示结果。 3.解决方案:(列出遇到的问题和解决办法,列出没有解决的问题): 再次对照查看hbase的安装配置教程,发现自己少了一个步骤。 在hbase-env.sh文件中添加了这些路径之后就好了。