1.上传本地文件到HDFS
//上传本地文件到HDFS public class CopyFile { public static void main(String[] args) { try { Configuration conf = new Configuration(); String str_src = "/usr/local/myjar/mongo/地图数据/Zhengye_Drive_Testing_Data/solu" + "/solu_Yanming_DriveTesting_09-04.16-17.16-27_True_TA.json"; String str_dst = "hdfs://node4:9000/user/hadoop/TestFile.json"; Path src = new Path(str_src); //本地地址 Path dst = new Path(str_dst); //hdfs地址 FileSystem hdfs = dst.getFileSystem(conf); //FileSystem hdfs = FileSystem.get(URI.create(str_dst),conf); //这样也可以 //伪分布式上面两种都可以,如果直接FileSystem.get(conf),可能出现错误 hdfs.copyFromLocalFile(src, dst); System.out.println("Upload to "+conf.get("fs.default.name")); FileStatus files[] = hdfs.listStatus(dst); for(FileStatus file:files){ System.out.println(file.getPath()); } } catch (IOException e) { e.printStackTrace(); } } }
可能出现的错误 Wrong FS解决方法:
http://blog.csdn.net/kurama_sai/article/details/8604640
http://blog.itpub.net/22846396/viewspace-1119945
2. 在hdfs中创建文件,并写入一行文字
//创建文件,并向文件中写入一行文字 public class CreateFile { public static void main(String[] args) { try { Configuration conf = new Configuration(); byte[] buff = "This is a test line.".getBytes(); String dsf = "hdfs://node4:9000/user/hadoop/Test"; Path pathdsf = new Path(dsf); FileSystem hdfs = pathdsf.getFileSystem(conf); FSDataOutputStream outputStream = hdfs.create(pathdsf); outputStream.write(buff,0,buff.length); System.out.println("Finish write!"); } catch (IOException e) { e.printStackTrace(); } } }
3.删除文件
Configuration conf = new Configuration(); Path path_del = new Path("hdfs://node4:9000/user/hadoop/Test2"); FileSystem hdfs = path_del.getFileSystem(conf); boolean isDeleted = hdfs.delete(path_del,false); //hdfs.delete(path_del,true); //递归删除,如果path_del是一个文件夹,将文件夹以及下面的子文件全删除 System.out.println("delete? " +isDeleted);
4.重命名文件
Configuration conf = new Configuration(); Path path_fr = new Path("hdfs://node4:9000/user/hadoop/Test"); Path path_to = new Path("hdfs://node4:9000/user/hadoop/Test2"); FileSystem hdfs = path_fr.getFileSystem(conf); boolean isRename = hdfs.rename(path_fr, path_to); //对文件进行重命名 System.out.println("is rename? "+isRename);
5.查看文件以及文件系统的各项信息
Configuration conf = new Configuration(); Path findf = new Path("hdfs://node4:9000/user/hadoop/hadoop.txt"); FileSystem hdfs = findf.getFileSystem(conf); //查看某个HDFS文件是否存在 boolean isExists = hdfs.exists(findf); //查看文件或文件夹是否存在 System.out.println("exists? " + isExists); //查看HDFS文件的属性 FileStatus filestatus = hdfs.getFileStatus(findf); long modificationTime = filestatus.getModificationTime(); //最后修改时间 System.out.println("Modification time is: "+modificationTime); long blocksize = filestatus.getBlockSize(); //块大小 System.out.println("Block size is: "+blocksize); //查看某个文件在HDFS集群的位置 BlockLocation[] blkLocations = hdfs.getFileBlockLocations(filestatus, 0, filestatus.getLen()); int blockLen = blkLocations.length; for(int i = 0 ; i < blockLen ; i++){ String[] hosts = blkLocations[i].getHosts(); System.out.println("block "+i+" location: "+hosts[i]); } //查看hdfs文件系统的的各项信息 System.out.println("scheme: "+hdfs.getScheme()); System.out.println("used: "+hdfs.getUsed()); System.out.println("canonical service name: "+hdfs.getCanonicalServiceName()); System.out.println("default block size: "+hdfs.getDefaultBlockSize(findf));
输出结果:
exists? true
Modification time is: 1430225267896
Block size is: 134217728
block 0 location: node4
scheme: hdfs
used: 0
canonical service name: 192.168.1.160:9000
default block size: 134217728
6.读取HDFS中的文件内容
下面代码的效果就是Test文件的内容输出
String dsf = "hdfs://node4:9000/user/hadoop/Test"; Configuration conf = new Configuration(); Path pathdsf = new Path(dsf); FileSystem fs = FileSystem.get(URI.create(dsf), conf); //FileSystem fs = pathdsf.getFileSystem(conf); //这样也可以 FSDataInputStream hdfsInStream = fs.open(pathdsf); byte[] ioBuffer = new byte[1024]; int readLen = hdfsInStream.read(ioBuffer); while (readLen != -1) { System.out.write(ioBuffer, 0, readLen); readLen = hdfsInStream.read(ioBuffer); } hdfsInStream.close(); fs.close();
7.获取集群上所有节点的名称
Configuration conf = new Configuration(); Path path = new Path("hdfs://node4:9000/user/hadoop"); FileSystem fs = path.getFileSystem(conf); DistributedFileSystem dfs = (DistributedFileSystem) fs; DatanodeInfo[] dataNodeStats = dfs.getDataNodeStats(); String[] names = new String[dataNodeStats.length]; for(int i = 0 ; i < dataNodeStats.length ; i++){ names[i] = dataNodeStats[i].getHostName(); System.out.println("no."+i+", name:"+names[i]); }
输出的就是节点名称
no.0, name:node4
no.1, name:node3