• 用java api读取HDFS文件


    import java.io.IOException;
    import java.io.InputStream;
    import java.security.PrivilegedExceptionAction;
    import java.text.SimpleDateFormat;
    import java.util.concurrent.ConcurrentHashMap;
    import java.util.concurrent.ConcurrentMap;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.FsStatus;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.security.UserGroupInformation;
    import org.springframework.stereotype.Controller;
    import org.springframework.web.bind.annotation.RequestMapping;
    import org.springframework.web.bind.annotation.RequestMethod;
    import org.springframework.web.bind.annotation.ResponseBody;
    
    import com.alibaba.fastjson.JSON;
    import com.alibaba.fastjson.JSONObject;
    import bean.TableStatistic;
    
    
    @Controller
    @RequestMapping("/dfview")
    public class DataFrameViewController extends BaseController {
    
        private ConcurrentMap<String, UserGroupInformation> cache = new ConcurrentHashMap<String, UserGroupInformation>();
        private ConcurrentMap<String, FileSystem> fileSystemCache = new ConcurrentHashMap<String, FileSystem>();
        private Configuration hadoopConf = new Configuration();
        private static final String HDFS_JSON_NAME = "jsonObj";
    
        @RequestMapping(value = "/getDFviewOfColumn", method = { RequestMethod.GET })
        @ResponseBody
        public TableStatistic getDFviewOfTable(String tableName)
                throws Exception {
            String user = "bi";
            String dirpath = "/user/cbt/datax/temp_transfer/zzzdes";
            Path homePath = new Path(dirpath);
            FileSystem fs = this.createFileSystem(user);
            FileStatus[] stats = fs.listStatus(homePath);
            StringBuffer txtContent = new StringBuffer();
            for (int i = 0; i < stats.length; ++i) {
                if (stats[i].isFile()) {
                    FileStatus file = stats[i];
                    if( HDFS_JSON_NAME.equalsIgnoreCase(file.getPath().getName())){
                        InputStream in = fs.open(file.getPath());
                        byte[] b = new byte[1];
                        while (in.read(b) != -1)
                        {
                        // 字符串拼接
                        txtContent.append(new String(b));
                        }
                        in.close();
                        break;
                    }
                }
            }
            TableStatistic ts = JSON.parseObject(txtContent.toString(), TableStatistic.class);
            return ts;
        }
    
        public static void main(String[] args) throws Exception {
            DataFrameViewController aaa = new DataFrameViewController();
            FileSystem fs = aaa.createFileSystem("bi");
            Path homePath = new Path("/user/cbt/datax/temp_transfer/zzzdes");
            System.out.println("***********************************");
            FileStatus[] stats = fs.listStatus(homePath);
            for (int i = 0; i < stats.length; ++i) {
                if (stats[i].isFile()) {
                    FileStatus file = stats[i];
                    StringBuffer txtContent = new StringBuffer();
                    if( "jsonObj".equalsIgnoreCase(file.getPath().getName())){
                        InputStream in = fs.open(file.getPath());
                        byte[] b = new byte[1];
                        while (in.read(b) != -1)
                        {
                        // 字符串拼接
                        txtContent.append(new String(b));
                        }
    //                    IOUtils.copyBytes(fs.open(file.getPath()), System.out, 4096,false);
                        in.close();
    //                    fs.close();
                    }
                    System.out.print(txtContent.toString());
                    System.out
                            .println("************************************************");
                    JSONObject jb = JSON.parseObject(txtContent.toString());
                    System.out.println("********!!!!! : "  + jb.get("colUnique"));
                    TableStatistic ts = JSON.parseObject(txtContent.toString(), TableStatistic.class);
                    System.out.println("********!!!!! : "  + ts.getColUnique().toString());
                    
                } else if (stats[i].isDirectory()) {
                    System.out.println(stats[i].getPath().toString());
                } else if (stats[i].isSymlink()) {
                    System.out.println("&&&&&&&&" + stats[i].getPath().toString());
                }
    
            }
            FsStatus fsStatus = fs.getStatus(homePath);
        }
    
        public FileSystem createFileSystem(String user) throws Exception {
            final Configuration conf = loadHadoopConf();
            conf.set("hadoop.job.ugi", user);
    //        conf.set("HADOOP_USER_NAME", user);
            if (fileSystemCache.get(user) != null) {
                return fileSystemCache.get(user);
            }
            UserGroupInformation ugi = getProxyUser(user);
            FileSystem fs = ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {
                public FileSystem run() throws Exception {
                    return FileSystem.get(conf);
                }
            });
            fileSystemCache.put(user, fs);
            return fs;
        }
    
        public static final ThreadLocal<SimpleDateFormat> appDateFormat = new ThreadLocal<SimpleDateFormat>() {
            @Override
            public SimpleDateFormat initialValue() {
                SimpleDateFormat dateformat = new java.text.SimpleDateFormat(
                        "yyyy-MM-dd HH:mm:ss");
                return dateformat;
            }
        };
    
        private static final String[] HADOOP_CONF_FILES = { "core-site.xml",
                "hdfs-site.xml" };
    
        private Configuration loadHadoopConf() {
            if (hadoopConf != null) {
                return hadoopConf;
            }
            Configuration conf = new Configuration();
            for (String fileName : HADOOP_CONF_FILES) {
                try {
                    InputStream inputStream = DataFrameViewController.class
                            .getClassLoader().getResourceAsStream(fileName);
                    conf.addResource(inputStream);
                } catch (Exception ex) {
                }
            }
            return conf;
        }
    
        public void destroy() {
            for (UserGroupInformation ugi : cache.values()) {
                try {
                    FileSystem.closeAllForUGI(ugi);
                } catch (IOException ioe) {
    //                 Logger.error("Exception occurred while closing filesystems for "
    //                 + ugi.getUserName(), ioe);
                }
            }
            cache.clear();
        }
    
        private UserGroupInformation getProxyUser(String user) throws IOException {
            cache.putIfAbsent(user, UserGroupInformation.createRemoteUser(user));
            return cache.get(user);
        }
    }
  • 相关阅读:
    非类型模板参数(针对C++)
    继承(针对C++)
    进程中的线程共享的资源有哪些?
    关键字typename(针对C++)
    设计模式之——工厂模式
    利用多线程同步互斥来创建自己的资源锁
    SQL优化总结
    委托与泛型
    第5章 事件和数据回发机制
    jQuery权威指南_读书笔记
  • 原文地址:https://www.cnblogs.com/drawwindows/p/5451907.html
Copyright © 2020-2023  润新知