ZKe
-------------------
XML数据的一个块内的所有属性,转换成TXT文件的一行。众所周知XML文件是通过类似HTML的标签进行数据的定义如图所示
属性由id, article, discuss, insertTime, oigin, person_id, time, transmit,整个数据由RECORD标签括住。
这是一个典型的括号匹配问题,可以定义一个信号量标记数据的开始与结束,另外可以声明一个String类型的变量作为数据缓冲区,遇到</RECORD>标签就将改变量的值写入新文件,遇到<RECORD>便清空改变量。遇到任一属性字段的标签,便写入缓冲变量。
处理方法:
private String oneLine = null; private boolean canPrint = false; private void process(String line){ if(line.startsWith("<RECORD>")){ oneLine = ""; canPrint = false; return; }else if(line.startsWith("<RECORDS>")){ return; }else if(line.startsWith("</RECORD>")){ canPrint = true; return; }else if(line.startsWith("</RECORDS>")){ return; } line = line.trim(); if(line.trim().startsWith("<id>")){ oneLine += line.substring(4, line.length()-5); oneLine += " | "; } else if(line.trim().startsWith("<article>")){ if(line.indexOf("</article>")==-1){ oneLine+= line.substring(9); return; } oneLine += line.substring(9, line.length()-10); oneLine += " | "; }else if(line.trim().startsWith("<discuss>")){ oneLine += line.substring(9, line.length()-10); oneLine += " | "; }else if(line.trim().startsWith("<insertTime>")){ oneLine += line.substring(12, line.length()-13); oneLine += " | "; }else if(line.trim().startsWith("<origin>")){ oneLine += line.substring(8, line.length()-9); oneLine += " | "; }else if(line.trim().startsWith("<person_id>")){ oneLine += line.substring(11, line.length()-12); oneLine += " | "; }else if(line.trim().startsWith("<time>")){ oneLine += line.substring(6, line.length()-7); oneLine += " | "; }else if(line.trim().startsWith("<transmit>")){ oneLine += line.substring(10, line.length()-11); }else if(line.indexOf("</article>")!=-1){ oneLine += line.substring(0, line.length()-10); } }
XML数据的读取使用BufferedReader,写入TXT使用BufferedWriter,注意其中信号量的控制
public void printToTXTFile(){ File file =new File(this.path); File targetFile = new File("/root/myCodes/finalClassDesign/stardardAllData.txt"); try { FileReader fr = new FileReader(file); BufferedReader br = new BufferedReader(fr); FileWriter fw = new FileWriter(targetFile); BufferedWriter bw = new BufferedWriter(fw); String line = ""; while((line = br.readLine())!= null){ process(line); if(canPrint){ bw.write(oneLine); bw.newLine(); // System.out.println(oneLine); } } bw.flush(); bw.close(); fw.close(); br.close(); fr.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
其他部分就是类里面的其他属性了,比如源文件路径,目标文件路径,函数调用了,省略自己补充
转换后的TXT文件内容如下,效果挺好,我是用"|"作为分割,其实有弊端,因为"|"在正则表达式里面被视作通配符,大家改成逗号","或者分号";"甚至斜杠"|"什么的即可