• HDFS操作全记录


    部署服务端

    1:下载hadoop-1.0.0.tar.gz

    2:安装JDK 1.6

    3:修改配置文件

    core-site.xml

    <configuration>
    <property>
    <name>fs.default.name</name>
    <value>hdfs://10.53.132.52:9000</value>
    </property>
    </configuration>

    hadoop-env.sh

    export JAVA_HOME=/usr/java/jdk1.6.0_29

    hdfs-site.xml

    <configuration>
    <property>
    <name>dfs.replication</name>
    <value>1</value>
    </property>
    <property>
    <name>dfs.permissions</name>
    <value>false</value>
    </property>
    </configuration>

    mapred-site.xml

    <configuration>
    <property>
    <name>mapred.job.tracker</name>
    <value>10.53.132.52:9001</value>
    </property>
    </configuration>

    masters,slaves 里面设置本机IP(不能为localhost)

    赋予本机账户的ssh权限

    $ ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
    $ cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys

    4:开启

    第一次运行要格式化文件系统,以后就不需要了。

    $ bin/hadoop namenode -format

    开启

    $ bin/start-all.sh

    验证

    [root@linux52 ~]# jps
    20255 NameNode
    20565 SecondaryNameNode
    20824 TaskTracker
    20397 DataNode
    20677 JobTracker
    24738 Jps
    [root@linux52 ~]#

    调试java api

    1:需要的库

    1:JDK 1.6
    2:commons-lang-2.2.jar
    3:commons-configuration-1.6.jar
    4:hadoop-core-1.0.0.jar
    5:commons-logging-1.1.1.jar

    2:代码

    public class HDFSUtil {
    private static Logs log = new Logs(); ;
    public synchronized static FileSystem getFileSystem(String ip, int port) {
    FileSystem fs = null;
    String url = "hdfs://" + ip + ":" + String.valueOf(port);

    Configuration config = new Configuration();
    config.set("fs.default.name", url);
    try {
    fs = FileSystem.get(config);
    } catch (Exception e) { }
    return fs;
    }
    public synchronized static void listNode(FileSystem fs) {
    DistributedFileSystem dfs = (DistributedFileSystem) fs;
    try {
    DatanodeInfo[] infos = dfs.getDataNodeStats();
    for (DatanodeInfo node : infos) {
    System.out.println("HostName: " + node.getHostName() + "/n"
    + node.getDatanodeReport());
    System.out.println("--------------------------------");
    }
    } catch (Exception e) { }
    }
    /**
    * 打印系统配置
    *
    * @param fs
    */
    public synchronized static void listConfig(FileSystem fs) {
    Iterator<Entry<String, String>> entrys = fs.getConf().iterator();
    while (entrys.hasNext()) {
    Entry<String, String> item = entrys.next();
    log.info(item.getKey() + ": " + item.getValue());
    }
    }
    /**
    * 创建目录和父目录
    *
    * @param fs
    * @param dirName
    */
    public synchronized static void mkdirs(FileSystem fs, String dirName) {
    // Path home = fs.getHomeDirectory();
    Path workDir = fs.getWorkingDirectory();
    // String dir = workDir + "/" + dirName;//以前的写法 create directory hdfs://10.53.132.52:9000/user/guoyanwei//tmp/testdir
    String dir ="hdfs://10.53.132.52:9000" + dirName;
    Path src = new Path(dir);
    // FsPermission p = FsPermission.getDefault();
    boolean succ;
    try {
    succ = fs.mkdirs(src);
    if (succ) {
    log.info("create directory " + dir + " successed. ");
    } else {
    log.info("create directory " + dir + " failed. ");
    }
    } catch (Exception e) {
    log.error("create directory " + dir + " failed :"+e );
    }
    }
    /**
    * 删除目录和子目录
    *
    * @param fs
    * @param dirName
    */
    public synchronized static void rmdirs(FileSystem fs, String dirName) {
    // Path home = fs.getHomeDirectory();
    // Path workDir = fs.getWorkingDirectory();
    String dir = "hdfs://10.53.132.52:9000" + "/" + dirName;
    Path src = new Path(dir);
    boolean succ;
    try {
    succ = fs.delete(src, true);
    if (succ) {
    log.info("remove directory " + dir + " successed. ");
    } else {
    log.info("remove directory " + dir + " failed. ");
    }
    } catch (Exception e) {
    log.error("remove directory " + dir + " failed :" );
    }
    }
    /**
    * 上传目录或文件
    *
    * @param fs
    * @param local
    * @param remote
    */
    public synchronized static void upload(FileSystem fs, String local,
    String remote) {
    // Path home = fs.getHomeDirectory();
    // Path workDir = fs.getWorkingDirectory();
    String dir ="hdfs://10.53.132.52:9000" + remote;
    Path dst = new Path(dir);
    Path src = new Path(local);
    try {
    fs.copyFromLocalFile(false, true, src, dst);
    log.info("upload " + local + " to " + remote + " successed. ");
    } catch (Exception e) {
    log.error("upload " + local + " to " + remote + " failed :" );
    }
    }
    /**
    * 下载目录或文件
    *
    * @param fs
    * @param local
    * @param remote
    */
    public synchronized static void download(FileSystem fs, String local,
    String remote) {
    Path dst = new Path("hdfs://10.53.132.52:9000/" + remote);
    Path src = new Path(local);
    try {
    fs.copyToLocalFile(false, dst, src);
    log.info("download from " + remote + " to " + local
    + " successed. ");
    } catch (Exception e) {
    log.error("download from " + remote + " to " + local + " failed :" );
    }
    }
    /**
    * 字节数转换
    *
    * @param size
    * @return
    */
    public synchronized static String convertSize(long size) {
    String result = String.valueOf(size);
    if (size < 1024 * 1024) {
    result = String.valueOf(size / 1024) + " KB";
    } else if (size >= 1024 * 1024 && size < 1024 * 1024 * 1024) {
    result = String.valueOf(size / 1024 / 1024) + " MB";
    } else if (size >= 1024 * 1024 * 1024) {
    result = String.valueOf(size / 1024 / 1024 / 1024) + " GB";
    } else {
    result = result + " B";
    }
    return result;
    }
    /**
    * 遍历HDFS上的文件和目录
    *
    * @param fs
    * @param path
    */
    public synchronized static void listFile(FileSystem fs, String path) {
    Path dst = new Path("hdfs://10.53.132.52:9000/" + path);
    try {
    String relativePath = "";
    FileStatus[] fList = fs.listStatus(dst);
    for (FileStatus f : fList) {
    if (null != f) {
    relativePath = new StringBuffer()
    .append(f.getPath().getParent()).append("/")
    .append(f.getPath().getName()).toString();
    if (f.isDir()) {
    listFile(fs, relativePath);
    } else {
    System.out.println(convertSize(f.getLen()) + "/t/t"
    + relativePath);
    }
    }
    }
    } catch (Exception e) { } finally {
    }
    }
    public synchronized static void write(FileSystem fs, String path,
    String data) {
    Path dst = new Path("hdfs://10.53.132.52:9000/" + path);
    try {
    FSDataOutputStream dos = fs.create(dst);
    dos.writeUTF(data);
    dos.close();
    log.info("write content to " + path + " successed. ");
    } catch (Exception e) { }
    }
    public synchronized static String read(FileSystem fs, String path) {
    String content = null;
    Path dst = new Path("hdfs://10.53.132.52:9000/" + path);
    try {
    // reading
    FSDataInputStream dis = fs.open(dst);
    content = dis.readUTF();
    dis.close();
    log.info("read content from " + path + " successed. ");
    } catch (Exception e) { }
    return content;
    }

    3:调用

        public static void main(String[] args) throws Exception {

    FileSystem fs = null;

    try {
    fs = HDFSUtil.getFileSystem("10.53.132.52", 9000);

    String dirName = "/tmp/test/aa";
    String localSrc = "d://8.log";

    String dst = "/tmp/8.log";

    //上传本地文件
    //HDFSUtil.upload(fs, localSrc ,dst);
    //HDFSUtil.listNode(fs);
    //创建目录
    //HDFSUtil.mkdirs(fs, dirName);
    //删除目录
    //HDFSUtil.rmdirs(fs, dirName);
    //下载文件到本地
    //HDFSUtil.download(fs, localSrc, dst);
    //创建文件
    //HDFSUtil.write(fs, "/tmp/9.log", "test-测试");
    //读取文件
    //String content = HDFSUtil.read(fs, "/tmp/9.log");
    //System.out.println(content);
    //遍历文件夹
    //HDFSUtil.listFile(fs, "/tmp/");
    //遍历节点
    //HDFSUtil.listNode(fs);
    //遍历配置信息
    //HDFSUtil.listConfig(fs);
    } catch (Exception e) {
    // TODO: handle exception
    } finally {
    if (fs != null) {
    fs.close();
    }
    }

    System.out.println("over");
    }

    4:管理地址

    http://10.53.132.52:50070
    http://10.53.132.52:50030

    执行结果

    后记

    1:小文件系统需要研究 TFS

    2:不同语言调用需要研究 Avro

  • 相关阅读:
    分库分表就能无限扩容吗?
    每天数十亿次请求的应用经验分享,值得参考!
    QPS、TPS、并发用户数、吞吐量
    Django优雅集成MongoDB
    Linux apt-get
    Python Tenacity 实现"重试机制"
    Kubernetes 系列(一):Kubernetes 介绍、架构、安装
    MongoDB学习笔记:文档Crud Shell
    ubuntu下快速安装rabbitmq
    Reactor模型详解:单Reactor多线程与主从Reactor多线程
  • 原文地址:https://www.cnblogs.com/tommyli/p/2321663.html
Copyright © 2020-2023  润新知