• Hadoop java文件操作


    一、向HDFS用户目录下上传文本文件helloworld.txt,并从HDFS中读取文件到控制台上。

    1. 上传文件

    package hdfs;
    
    import java.io.BufferedReader;
    import java.io.InputStreamReader;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.fs.FSDataInputStream;
    
    public class readHDFS {
        public static void main(String[] args) {
            try {
                String filename = "/user/hadoop/helloword.txt";
                Configuration conf = new Configuration();
                conf.set("fs.defaultFS", "hdfs://localhost:9000");
                conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
                FileSystem fs = FileSystem.get(conf);
                FSDataInputStream is = fs.open(new Path(filename));
                BufferedReader buff = new BufferedReader(new InputStreamReader(is));
                for (int i = 0; i < 3; i++) {
                    String content = buff.readLine();//读取文件一行
                    System.out.println(content);
                }
                buff.close(); //关闭文件
                fs.close(); //关闭hdfs
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    

    2. 读文件

    package hdfs;
    
    import java.io.BufferedReader;
    import java.io.InputStreamReader;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.fs.FSDataInputStream;
    
    public class readHDFS {
        public static void main(String[] args) {
            try {
                String filename = "/user/hadoop/helloword.txt";
                Configuration conf = new Configuration();
                conf.set("fs.defaultFS", "hdfs://localhost:9000");
                conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
                FileSystem fs = FileSystem.get(conf);
                FSDataInputStream is = fs.open(new Path(filename));
                BufferedReader buff = new BufferedReader(new InputStreamReader(is));
                for (int i = 0; i < 3; i++) {
                    String content = buff.readLine();//读取文件一行
                    System.out.println(content);
                }
                buff.close(); //关闭文件
                fs.close(); //关闭hdfs
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    

    二、 编写一个Java程序,查询分布式文件系统中的某文件的所有者、文件路径、文件长度、块大小、副本数目、访问时间和修改时间等信息。

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.BlockLocation;
    import org.apache.hadoop.fs.FileSystem;
    
    import org.apache.hadoop.fs.*;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    
    public class FileLocation {
    	public static void main(String[] args) {
    		try {
    			Configuration conf = new Configuration();
    			conf.set("fs.defaultFS", "hdfs://localhost:9000");
    			conf.set("fs.hdfs.impl",
    					"org.apache.hadoop.hdfs.DistributedFileSystem");
    			FileSystem fs = FileSystem.get(conf);
    			Path path = new Path("/user/hadoop/test/demo1.txt");
    			FileStatus filestatus = fs.getFileStatus(path);//
    			BlockLocation[] blks = fs.getFileBlockLocations(filestatus, 0,
    					filestatus.getLen());
    			filestatus.getAccessTime();
    			for (int i = 0; i < blks.length; i++) {
    				String[] hosts = blks[i].getHosts();
    				System.out.println("block_" + i + "_location:" + hosts[0]);
    				System.out.println("文件路径:" + filestatus.getPath());
    				System.out.println("块的大小:" + filestatus.getBlockSize());
    				System.out.println("文件所有者:" + filestatus.getOwner() + ":"
    						+ filestatus.getGroup());
    				System.out.println("文件权限:" + filestatus.getPermission());
    				System.out.println("文件长度:" + filestatus.getLen());
    				System.out.println("备份数:" + filestatus.getReplication());
    				SimpleDateFormat formatter=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    				long accessTime=filestatus.getAccessTime();
    				System.out.println("访问时间:" + formatter.format(new Date(accessTime)));
    				long modifTime=filestatus.getModificationTime();
    				System.out.println("修改时间:" + formatter.format(new Date(modifTime)));
    			}
    
    		} catch (Exception e) {
    			e.printStackTrace();
    		}
    	}
    }
    

    三、编写一个Java程序,进行写文件操作

    package hdfs;
    
    import org.apache.hadoop.conf.Configuration;  
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.FSDataOutputStream;
    import org.apache.hadoop.fs.Path;
    
    public class writeHDFS {
    
    	/**
    	 * @param args
    	 */
    	public static void main(String[] args) {
    		// TODO Auto-generated method stub
                try {
                        Configuration conf = new Configuration();  
                        conf.set("fs.defaultFS","hdfs://localhost:9000");
                        conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
                        FileSystem fs = FileSystem.get(conf);
                        byte[] buff = "你的学号:123456 
    你的名字:张三 
    当前日期:2020.10.12".getBytes(); // 要写入的内容
                        String filename = "test1"; //要写入的文件名
                        FSDataOutputStream os = fs.create(new Path(filename));
                        os.write(buff,0,buff.length);
                        System.out.println("Create:"+ filename);
                        os.close();
                        fs.close();
                } catch (Exception e) {  
                        e.printStackTrace();  
                }  
        }  
    }
    

    四、编写一个Java文件,判断文件是否存在

    package hdfs;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    public class HDFSFileIfExist {
    	  public static void main(String[] args){
    	        try{
    	            String fileName = "test";
    	            Configuration conf = new Configuration();
    	            conf.set("fs.defaultFS", "hdfs://localhost:9000");
    	            conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
    	            FileSystem fs = FileSystem.get(conf);
    	            if(fs.exists(new Path(fileName))){
    	                System.out.println("文件存在");
    	            }else{
    	                System.out.println("文件不存在");
    	            }
    	 
    	        }catch (Exception e){
    	            e.printStackTrace();
    	        }
    	    }
    	}
    
    
    
  • 相关阅读:
    [译]6.1. Data Structures Featured in This Chapter 本章涉及到的数据结构
    Linux中进程结构描述符
    How to uninstall GRUB
    [每日一点]msgsnd函数代码跟踪
    开始从代码入手学习内核
    剖析MagicAjax
    Castle实践6-TypedFactory Facility
    移植MSPetShop3到Castle MonoRail -Model与DAL层的移植(AR)
    热血江湖外挂之【热血江湖自补器 Version 0.1】
    对 "闭包closure" 的一些见解
  • 原文地址:https://www.cnblogs.com/Lin1031/p/14350169.html
Copyright © 2020-2023  润新知