• Hadoop Java Hdfs API


    1. 在本地文件系统生成一个文本文件,,读入文件,将其第101-120字节的内容写入HDFS成为一个新文件
    2. 在HDFS中生成文本文件,读入这个文件,将其第101-120字节的内容写入本地文件系统成为一个新文件

    环境部署:http://www.cnblogs.com/dopeter/p/4630791.html

    FileBuilder.java

    生成文件的工具类,包含在本地生成文件,在Hadoop生成文件,读取Hadoop指定目录的文件

      1 package story;
      2 
      3 import java.io.ByteArrayInputStream;
      4 import java.io.ByteArrayOutputStream;
      5 import java.io.FileNotFoundException;
      6 import java.io.FileWriter;
      7 import java.io.IOException;
      8 import java.io.InputStream;
      9 import java.io.OutputStream;
     10 import java.io.PrintWriter;
     11 import java.io.UnsupportedEncodingException;
     12 import java.net.URI;
     13 
     14 import org.apache.hadoop.conf.Configuration;
     15 import org.apache.hadoop.fs.FileSystem;
     16 import org.apache.hadoop.fs.Path;
     17 import org.apache.hadoop.io.IOUtils;
     18 import org.apache.hadoop.util.Progressable;
     19 
     20 public class FileBuilder {
     21     
     22     //build default test data
     23     public static String BuildTestFileContent()
     24     {
     25         StringBuilder contentBuilder=new StringBuilder();
     26         
     27         for(int loop=0;loop<100;loop++)
     28             contentBuilder.append(String.valueOf(loop));
     29         
     30         String content =contentBuilder.toString();
     31         
     32         return content;
     33     }    
     34     
     35     //build local file
     36     public static void BuildLocalFile(String buildPath,String content) throws FileNotFoundException, UnsupportedEncodingException
     37     {        
     38         /*
     39         FileWriter fileWriter;
     40         try {
     41             fileWriter = new FileWriter(buildPath);
     42             
     43             fileWriter.write(content);        
     44             fileWriter.close();
     45         } catch (IOException e) {            
     46             e.printStackTrace();
     47         }
     48         */
     49         
     50 
     51 
     52             PrintWriter out = new java.io.PrintWriter(new java.io.File(buildPath), "UTF-8");
     53             String text = new java.lang.String(content);
     54             out.print(text);
     55             out.flush();
     56             out.close();
     57 
     58     }
     59     
     60     //upload file to hadoop
     61     public static void BuildHdfsFile(String buildPath,byte[] fileContent) throws IOException
     62     {
     63         //convert to inputstream
     64         InputStream inputStream=new ByteArrayInputStream(fileContent); 
     65         
     66         //hdfs upload
     67         Configuration conf = new Configuration();
     68         
     69         FileSystem fs = FileSystem.get(URI.create(buildPath), conf);
     70         OutputStream outputStream = fs.create(new Path(buildPath), new Progressable() {
     71             public void progress() {
     72                 System.out.print(".");        
     73             }
     74         });
     75         
     76         IOUtils.copyBytes(inputStream, outputStream, fileContent.length, true);
     77     }
     78     
     79     //wrapper for upload file
     80     public static void BuildHdfsFile(String buildPath,String fileContent) throws IOException
     81     {
     82         BuildHdfsFile(buildPath,fileContent.getBytes());
     83     }
     84     
     85     //download file from hadoop
     86     public static byte[] ReadHdfsFile(String readPath)throws IOException
     87     {
     88         byte[] fileBuffer;
     89         Configuration conf = new Configuration();
     90         FileSystem fs = FileSystem.get(URI.create(readPath), conf);
     91         InputStream in = null;
     92         ByteArrayOutputStream out=new ByteArrayOutputStream();
     93         try {
     94         in = fs.open(new Path(readPath));
     95         IOUtils.copyBytes(in, out, 4096, false);
     96         
     97         fileBuffer=out.toByteArray();
     98         } finally {
     99         IOUtils.closeStream(in);
    100         }
    101         
    102         return fileBuffer;
    103     }
    104     
    105 }
    View Code

    FileContentHandler.java

    文件内容的处理类,读取本地文件时设置起始Position与截取的长度,读取从Hadoop下载的文件时设置起始Position与截取的长度

     1 package story;
     2 
     3 import java.io.IOException;
     4 import java.io.RandomAccessFile;
     5 import java.io.UnsupportedEncodingException;
     6 
     7 public class FileContentHandler {
     8     public static byte[] GetContentByLocalFile(String filePath,long beginPosition,int readLength)
     9     {
    10         int readBufferSize=readLength;
    11         byte[] readBuffer=new byte[readBufferSize];
    12         
    13         RandomAccessFile  accessFile;
    14         try {
    15             accessFile=new RandomAccessFile (filePath,"r");
    16             long length=accessFile.length();
    17             System.out.println(length);
    18             
    19             if(length>beginPosition&&length>beginPosition+readBufferSize)
    20             {
    21                 accessFile.seek(beginPosition);
    22                 accessFile.read(readBuffer);
    23                 accessFile.close();
    24             }
    25         } catch ( IOException e) {
    26             // TODO Auto-generated catch block
    27             e.printStackTrace();
    28         }
    29         
    30         return readBuffer;
    31     }
    32     
    33     public static String GetContentByBuffer(byte[] buffer,int beginPosition,int readLength) throws UnsupportedEncodingException
    34     {
    35         String content;
    36         byte[] subBuffer=new byte[readLength];
    37         for(int position=0;position<readLength;position++)
    38             subBuffer[position]=buffer[beginPosition+position];
    39         
    40         buffer=null;
    41         
    42         content=new String(subBuffer,"UTF-8");
    43         System.out.println(content);
    44         
    45         return content;
    46     }
    47     
    48 }
    View Code

    UploadStory.java

    1的流程代码

     1 package story;
     2 
     3 public class UploadStory {
     4     
     5     //public static void main(String[] args) throws Exception {}
     6     
     7     public static void main(String[] args) throws Exception {        
     8         //also define value of parameter from arguments.
     9         String localFilePath="F:/bulid.txt";
    10         String hdfsFilePath="hdfs://hmaster0:9000/user/14699_000/input/build.txt";
    11         int readBufferSize=20;
    12         long fileBeginReadPosition=101;
    13         
    14         //upload story begin.
    15         
    16         //build local file        
    17         FileBuilder.BuildLocalFile(localFilePath,FileBuilder.BuildTestFileContent());        
    18         //read file        
    19         byte[] uploadBuffer=FileContentHandler.GetContentByLocalFile(localFilePath, fileBeginReadPosition, readBufferSize);    
    20         //upload 
    21         if(uploadBuffer!=null&&uploadBuffer.length>0)        
    22             FileBuilder.BuildHdfsFile(hdfsFilePath, uploadBuffer);
    23 
    24     }
    25     
    26 }
    View Code

    DownloadStory.java

    2的流程代码

     1 package story;
     2 
     3 public class DownloadStory {
     4     
     5     //public static void main(String[] args) throws Exception {        }
     6     
     7     
     8     public static void main(String[] args) throws Exception {        
     9         //also define value of parameter from arguments.
    10         String localFilePath="F:/bulid.txt";
    11         String hdfsFilePath="hdfs://hmaster0:9000/user/14699_000/input/build2.txt";
    12         int readBufferSize=20;
    13         int fileBeginReadPosition=101;
    14         
    15         //build file to hadoop
    16         FileBuilder.BuildHdfsFile(hdfsFilePath, FileBuilder.BuildTestFileContent());
    17         
    18         //download file 
    19         byte[] readBuffer=FileBuilder.ReadHdfsFile(hdfsFilePath);
    20         
    21         //handle buffer
    22         String content=FileContentBuilder.GetContentByBuffer(readBuffer, fileBeginReadPosition, readBufferSize);
    23         
    24         //write to local file
    25         FileBuilder.BuildLocalFile(localFilePath, content);
    26     }
    27     
    28 }
    View Code
  • 相关阅读:
    [LeetCode 220.] 存在重复元素 III
    C++ 构造函数 & 析构函数
    [LeetCode 891.] 子序列宽度之和【hard】
    [LeetCode 447.] Number of Boomerangs
    HJ93 数组分组
    HJ77 火车进站
    [LeetCode 338.] 比特位计数
    线段树
    大数量问题的一般解决方法
    字典树
  • 原文地址:https://www.cnblogs.com/dopeter/p/4631840.html
Copyright © 2020-2023  润新知