import com.bigdata.kafka.originalLog.HDFSOutputStreamPool; import com.bigdata.kafka.originalLog.StringUtil; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.junit.Test; import java.io.*; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Locale; /** * 测试类 */ public class TestHDFS { static List<File> filelist = new ArrayList<File>(); @Test public void testReadHDFS() throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FSDataInputStream in = fs.open(new Path("hdfs://mycluster/user/centos/words.txt")); ByteArrayOutputStream baos = new ByteArrayOutputStream(); IOUtils.copyBytes(in, baos, 1024); System.out.println(new String(baos.toByteArray())); } @Test public void testReadHDFS1() throws IOException { // FSDataInputStream in = fs.open(new Path("hdfs://mycluster/user/centos/words.txt")); // ByteArrayOutputStream baos = new ByteArrayOutputStream(); // IOUtils.copyBytes(in, baos, 1024); // System.out.println(new String(baos.toByteArray())); List<File> filelist = getFileList("E:\MyProject\bigdata\data"); System.out.println(filelist.size()); FSDataOutputStream out = null; for (File f : filelist) { BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(f))); String line = null; while ((line = in.readLine()) != null) { String newline = line + "________2100100"; // out = HDFSOutputStreamPool.getInstance().takeOutputStream("/test/merge.txt"); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); out= fs.append(new Path("/test/merge.txt")); out.write(newline.getBytes()); out.write(" ".getBytes()); out.hsync(); out.close(); } } } public static List<File> getFileList(String strPath) { File dir = new File(strPath); File[] files = dir.listFiles(); // 该文件目录下文件全部放入数组 if (files != null) { for (int i = 0; i < files.length; i++) { String fileName = files[i].getName(); if (files[i].isDirectory()) { // 判断是文件还是文件夹 getFileList(files[i].getAbsolutePath()); // 获取文件绝对路径 } else if (fileName.endsWith("txt")) { // 判断文件名是否以.avi结尾 String strFileName = files[i].getAbsolutePath(); System.out.println("---" + strFileName); filelist.add(files[i]); } else { continue; } } } return filelist; } /** * 28/Feb/2017:12:17:48 */ @Test public void testDate() throws ParseException { SimpleDateFormat sdf = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss", Locale.US); Date date = new Date(); System.out.println(sdf.format(date)); Date d = sdf.parse("28/Feb/2017:12:17:48"); System.out.println(d); SimpleDateFormat localSDF = new SimpleDateFormat("yyyy/MM/dd/HH/mm", Locale.US); System.out.println(localSDF.format(d)); } @Test public void test1() { String log =/* new String(msg) ;*/ "s203|||192.168.231.1|||-|||28/Feb/2017:15:34:45 +0800|||GET /eshop/phone/mi.html HTTP/1.0|||200|||213|||-|||ApacheBench/2.3|||-"; String[] arr = StringUtil.splitLog(log); //主机名 String hostname = StringUtil.getHostname(arr); //日期串 String dateStr = StringUtil.formatYyyyMmDdHhMi(arr); //path String rawPath = "/user/centos/eshop/raw/" + dateStr + "/" + hostname + ".log"; //写入数据到hdfs System.out.println(log); } }