参考:
http://blog.csdn.net/qy20115549/article/details/53556928
<tr> <td class='center'> <font class='blue'> 北京 </font> </td> <td> 36175 </td> <td> 2.03 </td> </tr> <tr> <td class='center'> <font class='blue'> 盐城 </font> </td> <td> 5466 </td> <td> -0.26 </td> </tr>
package Jsoup; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class Main { public static void main(String[] args) throws IOException, IOException { File file = new File("C:\Users\michael\Desktop\test.txt"); InputStreamReader read = new InputStreamReader( new FileInputStream(file), "UTF-8");// 考虑到编码格式 BufferedReader bufferedReader = new BufferedReader(read); String lineTxt = null; String string = ""; while ((lineTxt = bufferedReader.readLine()) != null) { // Syste.out.println(lineTxt); string += lineTxt; } Document document = Jsoup.parse(string); Elements elements = document.select("tr"); for (Element ele : elements) { String city = ele.select("td[class=center]").text(); int price = Integer.parseInt(ele.select("td").get(1).text()); double rose = Float.parseFloat(ele.select("td").get(2).text()) * 0.01; System.out.println(city); } read.close(); } }
问题分析
原来在解析html片段时,Jsoup自动将其补全为html。由于上面的形式是表格里面的一部分内容,所有需要补全。但Jsoup只会添加 信息,而且有可能会把一些信息删除。
如下,为上面html片段,补充的后果。这对解析没有什么帮助。
<html> <head></head> <body> <font class="blue">北京</font>361752.03 <font class="blue">盐城</font>5466-0.26 </body> </html>
解决办法
将表格对应的标签补齐。
String cc="<html> <body> <table> <tbody>"+html.replace("{"data":"", "").replace(""}", "")+"</tbody> </table> </body> </html>"; Document document=Jsoup.parse(cc); Elements elements=document.select("tr"); System.out.println(document); for (Element ele:elements) { String city=ele.select("td[class=center]").text(); int price=Integer.parseInt(ele.select("td").get(1).text()); double rose=Float.parseFloat(ele.select("td").get(2).text())*0.01; System.out.println(city); }