Hadoop的文件操作位于包org.apache.hadoop.fs里面,能够进行新建、删除、修改等操作。
比较重要的几个类:
(1)Configuration:HDFS的配置信息;
(2)FileSystem: HDFS文件系统;
(3)Path: HDFS文件或目录的路径;
(4)FileStatus: Path下面的文件或目录;
(5)BlockLocation: 文件块的位置;
(6)DistributedFileSystem: 分布式文件系统;
(7)DatanodeInfo:数据节点信息。
代码:
package com.hellohadoop; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataOutputStream; public class FileOperator { static Configuration conf; static FileSystem hdfs; public static void main(String[] args) throws Exception{ Init(); Upload(); // Create(); // CreateFolder(); // Rename(); // Delete(); // IsExist(); // GetTime(); // GetAllFiles(); // GetLocations(); // GetAllHosts(); } // 初始化 public static void Init() throws Exception{ conf = new Configuration(); hdfs = FileSystem.get(conf); } // 上传文件 public static void Upload() throws Exception{ // 本地文件 Path src = new Path("F:\Hadoop\DataFiles\data.txt"); // 上传路径 Path dst = new Path("hdfs://master:9000/user/Administrator/"); // 上传文件 hdfs.copyFromLocalFile(src, dst); System.out.println("Upload to " + conf.get("fs.default.name")); } // 创建文件 public static void Create() throws Exception{ byte[] buff = "Hello Hadoop!".getBytes(); // 新建文件路径 Path dfs = new Path("hdfs://master:9000/user/Administrator/hello"); FSDataOutputStream outputStream = hdfs.create(dfs); outputStream.write(buff, 0, buff.length); System.out.println("Created!"); } // 创建文件夹 public static void CreateFolder() throws Exception{ // 新建文件路径 Path dfs = new Path("hdfs://master:9000/user/Administrator/helloDir"); hdfs.mkdirs(dfs); System.out.println("Created!"); } // 重新命名HDFS文件 public static void Rename() throws Exception{ Path of = new Path("hdfs://master:9000/user/Administrator/data.txt"); Path nf = new Path("hdfs://master:9000/user/Administrator/newdata.txt"); boolean isOk = hdfs.rename(of, nf); String res = isOk? "Yes": "No"; System.out.println("Result:" + res); } // 删除HDFS文件 public static void Delete() throws Exception{ Path file = new Path("hdfs://master:9000/user/Administrator/helloDir"); // hdfs.delete(file, false);// 第二个为是否递归删除,如果目录下面有文件,是否递归删除为false会报错 hdfs.delete(file, true); } // 查看某个文件是否存在 public static void IsExist() throws IOException{ Path file = new Path("hdfs://master:9000/user/Administrator/hello1"); String res = hdfs.exists(file)? "Yes": "No"; System.out.println("Result:" + res); } // 查看文件最后修改时间 public static void GetTime() throws Exception{ Path file = new Path("hdfs://master:9000/user/Administrator/hello"); FileStatus fs = hdfs.getFileStatus(file); long mTime = fs.getModificationTime(); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss"); String sTime = sdf.format(new Date(mTime)); System.out.println("Modified Time:" + sTime); } // 递归查看文件目录 public static void GetAllFiles() throws Exception{ Path file = new Path("hdfs://master:9000/"); AllFile(file); } public static void AllFile(Path p) throws Exception{ FileStatus[] fs = hdfs.listStatus(p); for (FileStatus f: fs){ // 是文件输出路径 if (hdfs.isFile(f.getPath())){ System.out.println(f.getPath()); } // 是目录继续递归 else{ System.out.println(); System.out.println(f.getPath() + ":"); AllFile(f.getPath()); } } } // 查看文件在HDFS中的位置 public static void GetLocations() throws Exception{ Path file = new Path("hdfs://master:9000/user/Administrator/stsme_20150330.sql"); FileStatus fs = hdfs.getFileStatus(file); BlockLocation[] blk = hdfs.getFileBlockLocations(fs, 0, fs.getLen()); for(int i=0; i<blk.length; i++){ String[] host = blk[i].getHosts(); System.out.println("Block " + i + " Location:" + host[0]); } } // 查看HDFS集群上所有节点 public static void GetAllHosts() throws Exception{ DistributedFileSystem dfs = (DistributedFileSystem) hdfs; DatanodeInfo[] dns = dfs.getDataNodeStats(); for (int i=0; i<dns.length; i++){ System.out.println("Datanode " + i + " Name:" + dns[i].getHostName()); } } }