使用Java解析HTML很简单,使用jsoup.jar来解析,使用起来和jquery差不多
下面是两个例子
1.解析web页面
1 import org.jsoup.Connection; 2 import org.jsoup.Jsoup; 3 import org.jsoup.nodes.Document; 4 import org.jsoup.nodes.Element; 5 import org.jsoup.select.Elements; 6 7 8 public class ParseWebPage { 9 10 11 12 public static void main(String[] args) throws Exception { 13 14 Connection conn = Jsoup.connect("http://www.hao123.com"); 15 Document document = conn.get(); 16 17 //解析出 class为feedback的li标签 的后代a标签元素 18 Elements elements = document.select("li.feedback a"); 19 20 for (Element element : elements) { 21 System.out.println(element.html()); 22 System.out.println(element.attr("href")); 23 } 24 25 } 26 27 28 }
2.解析本地页面
1 import java.io.File; 2 3 import org.jsoup.Jsoup; 4 import org.jsoup.nodes.Document; 5 import org.jsoup.nodes.Element; 6 import org.jsoup.select.Elements; 7 8 public class ParseLocalPage { 9 10 public static void main(String[] args) throws Exception { 11 File file =new File("E:/JavaScriptDojo/jqueryui/测试Button.html"); 12 Document document = Jsoup.parse(file, "utf-8"); 13 14 Elements es = document.select("#getDPvalues"); 15 16 for (Element element : es) { 17 System.out.printf("%s\t%s\n" ,element.html() ,element.val()); 18 } 19 } 20 }
很简单吧