http://wuhongyu.javaeye.com/blog/461477
由于前段时间有这样的一个需求, 尽可能快的算出Hadoop系统里的MD码, 我曾经想过另用Java的Digest类来读取文件, 然后工作交给Digest类来完成, 可没想到的是Java的这个类算MD校验码,效率及其的低下,我在XP上算50M的文件, 居然花费8秒钟的时间.直接放弃. 这个Demo我也顺便推荐一下:
http://www.javablogging.com/?s=Java+Digest+SHA1+MD5
后来google该问题的解决方案, 有一个比较有意思的贴子Fast MD5 Implementation in Java, 呵呵, 由于时间关系, 没来的急看,分享一下:
http://www.twmacinta.com/myjava/fast_md5.php
Fast MD5.. 总给我一个不太舒服的感觉, 最后还是决定放弃使用. 由于Hadoop运行于Linux 环境, 回过神来我想到了Linux上的openssl库. 大伙都知道openssl 里其中就有计算数据的安全通信命令. $cat XXX | openssl sha1.
这里有一个问题, 由于hadoop的文件是存在于集群的, 而openssl 是接受本地文件作为参数的, 我可以把文件从HDFS里读出, 然后再构造文件供openssl 计算. 不过这种方式太囧.
我的解决方法是另用Runtime获取运行对象, 直接执行openssl sha1命令, 其实当系统执行该命令的时候会等待输入的. 此时我可以通过我的文件输入流把数据打印到等待输入的输出流上。
Java 代码 <embed width="14" height="15" pluginspage="http://www.macromedia.com/go/getflashplayer" type="application/x-shockwave-flash" allowscriptaccess="always" quality="high" flashvars="clipboard=%2F%2F%20TODO%20this%20is%20a%20job.%20not%20a%20filedescriptor%0A%09protected%20void%20handle(FileDescriptor%20job)%20throws%20IOException%20%7B%0A%09%09if%20(job%20%3D%3D%20null%20%7C%7C%20job.path%20%3D%3D%20null)%20%7B%0A%09%09%09log.warn(%22Invalid%20job%3A%20%22%20%2B%20job)%3B%0A%09%09%7D%0A%09%09%0A%09%09%2F%2F%20wait%20for%20GC.%20%0A%09%09Configuration%20conf%20%3D%20new%20Configuration()%3B%0A%09%09Path%20path%20%3D%20new%20Path(job.path)%3B%0A%09%09FileSystem%20fs%20%3D%20path.getFileSystem(conf)%3B%0A%09%09%0A%09%09if%20(!fs.exists(path)%20%7C%7C%20!fs.isFile(path))%20%7B%0A%09%09%09log.warn(%22*HADLEJOB%20file%20does%20not%20exist%20for%20path%3A%20%22%20%2B%20path)%3B%0A%09%09%09return%3B%0A%09%09%7D%0A%09%09%0A%09%09InputStream%20is%20%3D%20fs.open(path)%3B%0A%09%09%2F%2F%20Run%20a%20command%0A%09%09String%20cmd%20%3D%20%22openssl%20sha1%22%3B%0A%09%09boolean%20completed%20%3D%20false%3B%0A%09%09%0A%09%09Process%20process%20%3D%20null%3B%20%2F%2F%20Sub%20process%20used%20to%20execute%20the%20command%0A%09%09int%20exitCode%3B%0A%09%09process%20%3D%20Runtime.getRuntime().exec(cmd)%3B%0A%0A%09%09BufferedOutputStream%20os%20%3D%20new%20BufferedOutputStream(process.getOutputStream()%2C%204096)%3B%0A%09%09byte%20buf%5B%5D%20%3D%20new%20byte%5B4096%5D%3B%0A%09%09while%20(true)%20%7B%0A%09%09%09int%20bytesRead%20%3D%20is.read(buf)%3B%0A%09%09%09if%20(bytesRead%20%3D%3D%20-1)%20%7B%0A%09%09%09%09break%3B%0A%09%09%09%7D%0A%09%09%09os.write(buf%2C%200%2C%20bytesRead)%3B%0A%09%09%7D%0A%09%09os.flush()%3B%0A%09%09os.close()%3B%0A%09%09%0A%09%09final%20BufferedReader%20errReader%20%3D%20%0A%09%09%09new%20BufferedReader(new%20InputStreamReader(process.getErrorStream()))%3B%0A%09%09%0A%09%09BufferedReader%20inReader%20%3D%20%0A%09%09%09new%20BufferedReader(new%20InputStreamReader(process.getInputStream()))%3B%0A%09%09%0A%09%09final%20StringBuffer%20errMsg%20%3D%20new%20StringBuffer()%3B%0A%09%09%2F%2F%20read%20error%20and%20input%20streams%20as%20this%20would%20free%20up%20the%20buffers%0A%09%09%2F%2F%20free%20the%20error%20stream%20buffer%0A%09%09Thread%20errThread%20%3D%20new%20Thread()%7B%0A%09%09%09%0A%09%09%09public%20void%20run()%20%7B%0A%09%09%09%09try%20%7B%0A%09%09%09%09%09String%20line%20%3D%20errReader.readLine()%3B%0A%09%09%09%09%09while%20((line%20!%3D%20null)%20%26%26%20!isInterrupted())%20%7B%0A%09%09%09%09%09%09errMsg.append(line)%3B%0A%09%09%09%09%09%09errMsg.append(System.getProperty(%22line.separator%22))%3B%0A%09%09%09%09%09%09line%20%3D%20errReader.readLine()%3B%0A%09%09%09%09%09%7D%0A%09%09%09%09%7D%20catch%20(IOException%20ioe)%20%7B%0A%09%09%09%09%09log.warn(%22Error%20reading%20the%20error%20stream%22%2C%20ioe)%3B%0A%09%09%09%09%7D%0A%09%09%09%7D%0A%09%09%09%0A%09%09%7D%3B%0A%09%09try%20%7B%0A%09%09%09errThread.start()%3B%0A%09%09%7D%20catch%20(IllegalStateException%20ise)%20%7B%0A%09%09%7D%0A%09%09%0A%09%09try%20%7B%0A%09%09%09String%20line%20%3D%20inReader.readLine()%3B%0A%09%09%09if%20(line%20%3D%3D%20null)%20%7B%0A%09%09%09%09throw%20new%20IOException(%22Exception%20a%20line%20not%20the%20end%20of%20stream%22)%3B%0A%09%09%09%7D%0A%09%09%09job.set_sha1(line.trim())%3B%0A%09%09%09%2F%2F%20clear%20the%20input%20stream%20buffer%0A%09%09%09line%20%3D%20inReader.readLine()%3B%0A%09%09%09while%20(line%20!%3D%20null)%20%7B%0A%09%09%09%09line%20%3D%20inReader.readLine()%3B%0A%09%09%09%7D%0A%09%09%09%0A%09%09%09%2F%2F%20wait%20for%20the%20process%20to%20finish%20and%20check%20the%20exit%20code%0A%09%09%09exitCode%20%3D%20process.waitFor()%3B%0A%09%09%09%0A%09%09%09try%20%7B%0A%09%09%09%09errThread.join()%3B%0A%09%09%09%7D%20catch%20(InterruptedException%20ie)%20%7B%0A%09%09%09%09log.warn(%22Interrupted%20while%20reading%20the%20error%20stream%22%2C%20ie)%3B%0A%09%09%09%7D%0A%09%09%09completed%20%3D%20true%3B%0A%09%09%09if%20(exitCode%20!%3D%200)%20%7B%0A%09%09%09%09throw%20new%20IOException(exitCode%20%2B%20errMsg.toString())%3B%0A%09%09%09%7D%0A%09%09%7D%20catch%20(InterruptedException%20ie)%20%7B%0A%09%09%09throw%20new%20IOException(ie.toString())%3B%0A%09%09%7D%20finally%20%7B%0A%09%09%09%2F%2F%20close%20the%20input%20stream%0A%09%09%09try%20%7B%0A%09%09%09%09inReader.close()%3B%0A%09%09%09%7D%20catch%20(IOException%20e)%20%7B%0A%09%09%09%09log.warn(%22Error%20whilke%20closing%20the%20input%20stream%22%2C%20e)%3B%0A%09%09%09%7D%0A%09%09%09if%20(!completed)%20%7B%0A%09%09%09%09errThread.interrupt()%3B%0A%09%09%09%7D%0A%09%09%09try%20%7B%0A%09%09%09%09errReader.close()%3B%0A%09%09%09%7Dcatch(IOException%20ioe)%7B%0A%09%09%09%09log.warn(%22Error%20while%20closing%20the%20error%20stream%22%2C%20ioe)%3B%0A%09%09%09%7D%0A%09%09%09process.destroy()%3B%0A%09%09%7D%0A%09%7D" src="http://www.javaeye.com/javascripts/syntaxhighlighter/clipboard_new.swf">
- // TODO this is a job. not a filedescriptor
- protected void handle(FileDescriptor job) throws IOException {
- if (job == null || job.path == null) {
- log.warn("Invalid job: " + job);
- }
- // wait for GC.
- Configuration conf = new Configuration();
- Path path = new Path(job.path);
- FileSystem fs = path.getFileSystem(conf);
- if (!fs.exists(path) || !fs.isFile(path)) {
- log.warn("*HADLEJOB file does not exist for path: " + path);
- return;
- }
- InputStream is = fs.open(path);
- // Run a command
- String cmd = "openssl sha1";
- boolean completed = false;
- Process process = null; // Sub process used to execute the command
- int exitCode;
- process = Runtime.getRuntime().exec(cmd);
- BufferedOutputStream os = new BufferedOutputStream(process.getOutputStream(), 4096);
- byte buf[] = new byte[4096];
- while (true) {
- int bytesRead = is.read(buf);
- if (bytesRead == -1) {
- break;
- }
- os.write(buf, 0, bytesRead);
- }
- os.flush();
- os.close();
- final BufferedReader errReader =
- new BufferedReader(new InputStreamReader(process.getErrorStream()));
- BufferedReader inReader =
- new BufferedReader(new InputStreamReader(process.getInputStream()));
- final StringBuffer errMsg = new StringBuffer();
- // read error and input streams as this would free up the buffers
- // free the error stream buffer
- Thread errThread = new Thread(){
- public void run() {
- try {
- String line = errReader.readLine();
- while ((line != null) && !isInterrupted()) {
- errMsg.append(line);
- errMsg.append(System.getProperty("line.separator"));
- line = errReader.readLine();
- }
- } catch (IOException ioe) {
- log.warn("Error reading the error stream", ioe);
- }
- }
- };
- try {
- errThread.start();
- } catch (IllegalStateException ise) {
- }
- try {
- String line = inReader.readLine();
- if (line == null) {
- throw new IOException("Exception a line not the end of stream");
- }
- job.set_sha1(line.trim());
- // clear the input stream buffer
- line = inReader.readLine();
- while (line != null) {
- line = inReader.readLine();
- }
- // wait for the process to finish and check the exit code
- exitCode = process.waitFor();
- try {
- errThread.join();
- } catch (InterruptedException ie) {
- log.warn("Interrupted while reading the error stream", ie);
- }
- completed = true;
- if (exitCode != 0) {
- throw new IOException(exitCode + errMsg.toString());
- }
- } catch (InterruptedException ie) {
- throw new IOException(ie.toString());
- } finally {
- // close the input stream
- try {
- inReader.close();
- } catch (IOException e) {
- log.warn("Error whilke closing the input stream", e);
- }
- if (!completed) {
- errThread.interrupt();
- }
- try {
- errReader.close();
- }catch(IOException ioe){
- log.warn("Error while closing the error stream", ioe);
- }
- process.destroy();
- }
- }
该执行方法运行于线程池中, 下一个贴子我将记录真正应用中线程池的实现, 提供线程池的大小设置, shrink等实现.
犯了一个低级错误
Java代码
- # if (job == null || job.path == null) {
- # log.warn("Invalid job: " + job);
- # }
改成:
Java代码
- if (job == null || job.path == null) {
- log.warn("Invalid job: " + job);
- return;
- }