htmlunit jar项目路径http://sourceforge.net/projects/htmlunit/files/htmlunit/
demo代码如下
public class AutoLogin { /** 登录页面 */ private static final String LOGIN_URL = "http://website/login.aspx"; /** 任务列表页面 */ private static final String TASK_LIST_URL = "http://website/Banli.aspx"; /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { testHomePage(); } public static void testHomePage() throws Exception { final WebClient webClient = new WebClient(BrowserVersion.INTERNET_EXPLORER_8); webClient.getOptions().setThrowExceptionOnScriptError(false); //此行必须要加 webClient.getOptions().setCssEnabled(false); // webClient.getOptions().setJavaScriptEnabled(true); // webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); webClient.getOptions().setTimeout(300000); // 获取首页 HtmlPage page = (HtmlPage) webClient.getPage(LOGIN_URL); // 根据form的名字获取页面表单,也可以通过索引来获取:page.getForms().get(0) final HtmlForm form = page.getFormByName("form1"); // 用户名/密码 HtmlTextInput textUserName = form.getInputByName("txtUserName"); textUserName.setText("username"); HtmlPasswordInput txtPwd = form.getInputByName("txtPwd"); txtPwd.setText("pass"); //调用JS触发登录按钮 Page page1 = page.executeJavaScript("$('#btn').click()").getNewPage(); page1 = webClient.getPage(TASK_LIST_URL); System.out.println("*************************************************************************************"); System.out.println(page1.getWebResponse().getContentAsString()); System.out.println("*************************************************************************************"); System.out.println(""); System.out.println("Cookies : " + webClient.getCookieManager().getCookies().toString()); } }
搞不清ASP.NET内部什么逻辑,试了很多方法都不行,查看了无所网站,无意中看到一个这个配置http://stackoverflow.com/questions/20352284/scraping-aspx-page-using-htmlunit
1 import java.net.MalformedURLException; 2 3 import com.gargoylesoftware.htmlunit.BrowserVersion; 4 import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; 5 import com.gargoylesoftware.htmlunit.WebClient; 6 import com.gargoylesoftware.htmlunit.html.HtmlElement; 7 import com.gargoylesoftware.htmlunit.html.HtmlPage; 8 9 public class teste { 10 11 public static void main(String args[]) throws FailingHttpStatusCodeException, MalformedURLException, IOException 12 { 13 HtmlPage page = null; 14 String url = "http://www.bmfbovespa.com.br/cias-listadas/empresas-listadas/BuscaEmpresaListada.aspx?Idioma=pt-br"; 15 16 WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17); 17 18 webClient.getOptions().setThrowExceptionOnScriptError(false); 19 webClient.getOptions().setCssEnabled(false); 20 webClient.getOptions().setJavaScriptEnabled(false); 21 webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); 22 webClient.getOptions().setTimeout(30000); 23 24 page = webClient.getPage( url ); 25 26 System.out.println("Current page: Empresas Listadas | BM&FBOVESPA"); 27 28 HtmlElement theElement1 = (HtmlElement) page.getElementById("ctl00_contentPlaceHolderConteudo_BuscaNomeEmpresa1_btnTodas"); 29 page = theElement1.click(); 30 31 System.out.println(page.asText()); 32 33 System.out.println("Test has completed successfully"); 34 } 35 36 }
最后测试下来,如果不加 webClient.getOptions().setThrowExceptionOnScriptError(false);就一直报这个错误
1 Exception in thread "main" ======= EXCEPTION START ======== 2 Exception class=[java.lang.RuntimeException] 3 com.gargoylesoftware.htmlunit.ScriptException: Exception invoking click 4 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954) 5 at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) 6 at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513) 7 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836) 8 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812) 9 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800) 10 at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910) 11 at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScript(HtmlPage.java:878) 12 at com.suypower.AutoLogin12345.testHomePage(AutoLogin12345.java:48) 13 at com.suypower.AutoLogin12345.main(AutoLogin12345.java:23) 14 Caused by: java.lang.RuntimeException: Exception invoking click 15 at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:181) 16 at net.sourceforge.htmlunit.corejs.javascript.FunctionObject.call(FunctionObject.java:449) 17 at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1536) 18 at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798) 19 at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:105) 20 at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:411) 21 at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:309) 22 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3286) 23 at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:115) 24 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827) 25 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939) 26 ... 9 more 27 Caused by: com.gargoylesoftware.htmlunit.ScriptException: TypeError: Cannot read property "nodeName" from null (http://xxxx/305000772#7) 28 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954) 29 at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) 30 at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513) 31 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836) 32 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812) 33 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800) 34 at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910) 35 at com.gargoylesoftware.htmlunit.html.HtmlScript.executeInlineScriptIfNeeded(HtmlScript.java:354) 36 at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:415) 37 at com.gargoylesoftware.htmlunit.html.HtmlScript$3.execute(HtmlScript.java:271) 38 at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:293) 39 at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:799) 40 at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source) 41 at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:756) 42 at org.cyberneko.html.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1170) 43 at org.cyberneko.html.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1072) 44 at org.cyberneko.html.filters.DefaultFilter.endElement(DefaultFilter.java:206) 45 at org.cyberneko.html.filters.NamespaceBinder.endElement(NamespaceBinder.java:330) 46 at org.cyberneko.html.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3126) 47 at org.cyberneko.html.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2093) 48 at org.cyberneko.html.HTMLScanner.scanDocument(HTMLScanner.java:920) 49 at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:499) 50 at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:452) 51 at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) 52 at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1039) 53 at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:252) 54 at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:198) 55 at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:271) 56 at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:159) 57 at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:478) 58 at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:352) 59 at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPageIfPossible(BaseFrameElement.java:183) 60 at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPage(BaseFrameElement.java:121) 61 at com.gargoylesoftware.htmlunit.html.HtmlPage.loadFrames(HtmlPage.java:1893) 62 at com.gargoylesoftware.htmlunit.html.HtmlPage.initialize(HtmlPage.java:227) 63 at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:485) 64 at com.gargoylesoftware.htmlunit.WebClient.loadDownloadedResponses(WebClient.java:2135) 65 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.doProcessPostponedActions(JavaScriptEngine.java:982) 66 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.processPostponedActions(JavaScriptEngine.java:1072) 67 at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:789) 68 at com.gargoylesoftware.htmlunit.html.HtmlImageInput.click(HtmlImageInput.java:152) 69 at com.gargoylesoftware.htmlunit.javascript.host.html.HTMLInputElement.click(HTMLInputElement.java:477) 70 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 71 at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 72 at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 73 at java.lang.reflect.Method.invoke(Method.java:606) 74 at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:153) 75 ... 19 more 76 Caused by: net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot read property "nodeName" from null (http://xxxx/305000772#7) 77 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3935) 78 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3919) 79 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError(ScriptRuntime.java:3944) 80 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError2(ScriptRuntime.java:3960) 81 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.undefReadError(ScriptRuntime.java:3971) 82 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getObjectProp(ScriptRuntime.java:1519) 83 at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1243) 84 at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798) 85 at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:118) 86 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827) 87 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939) 88 ... 65 more 89 Enclosed exception: 90 java.lang.RuntimeException: Exception invoking click 91 at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:181) 92 at net.sourceforge.htmlunit.corejs.javascript.FunctionObject.call(FunctionObject.java:449) 93 at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1536) 94 at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798) 95 at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:105) 96 at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:411) 97 at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:309) 98 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3286) 99 at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:115) 100 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827) 101 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939) 102 at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) 103 at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513) 104 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836) 105 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812) 106 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800) 107 at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910) 108 at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScript(HtmlPage.java:878) 109 at com.suypower.AutoLogin12345.testHomePage(AutoLogin12345.java:48) 110 at com.suypower.AutoLogin12345.main(AutoLogin12345.java:23) 111 Caused by: com.gargoylesoftware.htmlunit.ScriptException: TypeError: Cannot read property "nodeName" from null (http://xxx/305000772#7) 112 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954) 113 at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628) 114 at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513) 115 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836) 116 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812) 117 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800) 118 at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910) 119 at com.gargoylesoftware.htmlunit.html.HtmlScript.executeInlineScriptIfNeeded(HtmlScript.java:354) 120 at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:415) 121 at com.gargoylesoftware.htmlunit.html.HtmlScript$3.execute(HtmlScript.java:271) 122 at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:293) 123 at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:799) 124 at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source) 125 at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:756) 126 at org.cyberneko.html.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1170) 127 at org.cyberneko.html.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1072) 128 at org.cyberneko.html.filters.DefaultFilter.endElement(DefaultFilter.java:206) 129 at org.cyberneko.html.filters.NamespaceBinder.endElement(NamespaceBinder.java:330) 130 at org.cyberneko.html.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3126) 131 at org.cyberneko.html.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2093) 132 at org.cyberneko.html.HTMLScanner.scanDocument(HTMLScanner.java:920) 133 at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:499) 134 at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:452) 135 at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) 136 at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1039) 137 at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:252) 138 at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:198) 139 at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:271) 140 at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:159) 141 at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:478) 142 at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:352) 143 at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPageIfPossible(BaseFrameElement.java:183) 144 at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPage(BaseFrameElement.java:121) 145 at com.gargoylesoftware.htmlunit.html.HtmlPage.loadFrames(HtmlPage.java:1893) 146 at com.gargoylesoftware.htmlunit.html.HtmlPage.initialize(HtmlPage.java:227) 147 at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:485) 148 at com.gargoylesoftware.htmlunit.WebClient.loadDownloadedResponses(WebClient.java:2135) 149 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.doProcessPostponedActions(JavaScriptEngine.java:982) 150 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.processPostponedActions(JavaScriptEngine.java:1072) 151 at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:789) 152 at com.gargoylesoftware.htmlunit.html.HtmlImageInput.click(HtmlImageInput.java:152) 153 at com.gargoylesoftware.htmlunit.javascript.host.html.HTMLInputElement.click(HTMLInputElement.java:477) 154 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 155 at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 156 at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 157 at java.lang.reflect.Method.invoke(Method.java:606) 158 at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:153) 159 ... 19 more 160 Caused by: net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot read property "nodeName" from null (http://xxx/305000772#7) 161 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3935) 162 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3919) 163 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError(ScriptRuntime.java:3944) 164 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError2(ScriptRuntime.java:3960) 165 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.undefReadError(ScriptRuntime.java:3971) 166 at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getObjectProp(ScriptRuntime.java:1519) 167 at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1243) 168 at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798) 169 at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:118) 170 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827) 171 at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939) 172 ... 65 more 173 ======= EXCEPTION END ========
希望能帮助到你,晚安!