• htmlunit模拟登录


    htmlunit jar项目路径http://sourceforge.net/projects/htmlunit/files/htmlunit/

    demo代码如下

    public class AutoLogin {
    
    	/** 登录页面 */
    	private static final String LOGIN_URL = "http://website/login.aspx";
    	/** 任务列表页面 */
    	private static final String TASK_LIST_URL = "http://website/Banli.aspx";
    	
    	/**
    	 * @param args
    	 * @throws Exception 
    	 */
    	public static void main(String[] args) throws Exception {
    		testHomePage();
    	}
    	
    	public static void testHomePage() throws Exception {
    	    final WebClient webClient = new WebClient(BrowserVersion.INTERNET_EXPLORER_8);
    
    	    webClient.getOptions().setThrowExceptionOnScriptError(false); //此行必须要加
    	    webClient.getOptions().setCssEnabled(false);
    //	    webClient.getOptions().setJavaScriptEnabled(true);
    //	    webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
    	    webClient.getOptions().setTimeout(300000);
    	    
    	    // 获取首页
    	    HtmlPage page = (HtmlPage) webClient.getPage(LOGIN_URL);
    
    	    // 根据form的名字获取页面表单,也可以通过索引来获取:page.getForms().get(0)
    	    final HtmlForm form = page.getFormByName("form1");
    	    
    	    // 用户名/密码
    	    HtmlTextInput textUserName = form.getInputByName("txtUserName");  
    	    textUserName.setText("username");
    	    HtmlPasswordInput txtPwd = form.getInputByName("txtPwd");
    	    txtPwd.setText("pass");
    
    	    //调用JS触发登录按钮
    	    Page page1 = page.executeJavaScript("$('#btn').click()").getNewPage();
    	    
    	    page1 = webClient.getPage(TASK_LIST_URL);
    
    	    System.out.println("*************************************************************************************");
    	    System.out.println(page1.getWebResponse().getContentAsString());
    	    System.out.println("*************************************************************************************");
    	    System.out.println("");
    	    System.out.println("Cookies : " + webClient.getCookieManager().getCookies().toString());
    	}
    }
    

      搞不清ASP.NET内部什么逻辑,试了很多方法都不行,查看了无所网站,无意中看到一个这个配置http://stackoverflow.com/questions/20352284/scraping-aspx-page-using-htmlunit

     1 import java.net.MalformedURLException;
     2 
     3 import com.gargoylesoftware.htmlunit.BrowserVersion;
     4 import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
     5 import com.gargoylesoftware.htmlunit.WebClient;
     6 import com.gargoylesoftware.htmlunit.html.HtmlElement;
     7 import com.gargoylesoftware.htmlunit.html.HtmlPage;
     8 
     9 public class teste {
    10 
    11     public static void main(String args[]) throws FailingHttpStatusCodeException, MalformedURLException, IOException
    12     {
    13        HtmlPage page = null;
    14        String url = "http://www.bmfbovespa.com.br/cias-listadas/empresas-listadas/BuscaEmpresaListada.aspx?Idioma=pt-br";
    15 
    16        WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17);
    17 
    18        webClient.getOptions().setThrowExceptionOnScriptError(false);
    19        webClient.getOptions().setCssEnabled(false);
    20        webClient.getOptions().setJavaScriptEnabled(false);
    21        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
    22        webClient.getOptions().setTimeout(30000);
    23 
    24        page = webClient.getPage( url );
    25 
    26        System.out.println("Current page: Empresas Listadas | BM&FBOVESPA");
    27 
    28        HtmlElement theElement1 = (HtmlElement) page.getElementById("ctl00_contentPlaceHolderConteudo_BuscaNomeEmpresa1_btnTodas");
    29        page = theElement1.click();
    30 
    31        System.out.println(page.asText());
    32 
    33        System.out.println("Test has completed successfully");
    34     }
    35 
    36 }

    最后测试下来,如果不加 webClient.getOptions().setThrowExceptionOnScriptError(false);就一直报这个错误

      1 Exception in thread "main" ======= EXCEPTION START ========
      2 Exception class=[java.lang.RuntimeException]
      3 com.gargoylesoftware.htmlunit.ScriptException: Exception invoking click
      4     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954)
      5     at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628)
      6     at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513)
      7     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836)
      8     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812)
      9     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800)
     10     at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910)
     11     at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScript(HtmlPage.java:878)
     12     at com.suypower.AutoLogin12345.testHomePage(AutoLogin12345.java:48)
     13     at com.suypower.AutoLogin12345.main(AutoLogin12345.java:23)
     14 Caused by: java.lang.RuntimeException: Exception invoking click
     15     at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:181)
     16     at net.sourceforge.htmlunit.corejs.javascript.FunctionObject.call(FunctionObject.java:449)
     17     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1536)
     18     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798)
     19     at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:105)
     20     at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:411)
     21     at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:309)
     22     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3286)
     23     at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:115)
     24     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827)
     25     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939)
     26     ... 9 more
     27 Caused by: com.gargoylesoftware.htmlunit.ScriptException: TypeError: Cannot read property "nodeName" from null (http://xxxx/305000772#7)
     28     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954)
     29     at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628)
     30     at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513)
     31     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836)
     32     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812)
     33     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800)
     34     at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910)
     35     at com.gargoylesoftware.htmlunit.html.HtmlScript.executeInlineScriptIfNeeded(HtmlScript.java:354)
     36     at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:415)
     37     at com.gargoylesoftware.htmlunit.html.HtmlScript$3.execute(HtmlScript.java:271)
     38     at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:293)
     39     at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:799)
     40     at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source)
     41     at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:756)
     42     at org.cyberneko.html.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1170)
     43     at org.cyberneko.html.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1072)
     44     at org.cyberneko.html.filters.DefaultFilter.endElement(DefaultFilter.java:206)
     45     at org.cyberneko.html.filters.NamespaceBinder.endElement(NamespaceBinder.java:330)
     46     at org.cyberneko.html.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3126)
     47     at org.cyberneko.html.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2093)
     48     at org.cyberneko.html.HTMLScanner.scanDocument(HTMLScanner.java:920)
     49     at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:499)
     50     at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:452)
     51     at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
     52     at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1039)
     53     at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:252)
     54     at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:198)
     55     at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:271)
     56     at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:159)
     57     at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:478)
     58     at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:352)
     59     at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPageIfPossible(BaseFrameElement.java:183)
     60     at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPage(BaseFrameElement.java:121)
     61     at com.gargoylesoftware.htmlunit.html.HtmlPage.loadFrames(HtmlPage.java:1893)
     62     at com.gargoylesoftware.htmlunit.html.HtmlPage.initialize(HtmlPage.java:227)
     63     at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:485)
     64     at com.gargoylesoftware.htmlunit.WebClient.loadDownloadedResponses(WebClient.java:2135)
     65     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.doProcessPostponedActions(JavaScriptEngine.java:982)
     66     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.processPostponedActions(JavaScriptEngine.java:1072)
     67     at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:789)
     68     at com.gargoylesoftware.htmlunit.html.HtmlImageInput.click(HtmlImageInput.java:152)
     69     at com.gargoylesoftware.htmlunit.javascript.host.html.HTMLInputElement.click(HTMLInputElement.java:477)
     70     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
     71     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
     72     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
     73     at java.lang.reflect.Method.invoke(Method.java:606)
     74     at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:153)
     75     ... 19 more
     76 Caused by: net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot read property "nodeName" from null (http://xxxx/305000772#7)
     77     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3935)
     78     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3919)
     79     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError(ScriptRuntime.java:3944)
     80     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError2(ScriptRuntime.java:3960)
     81     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.undefReadError(ScriptRuntime.java:3971)
     82     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getObjectProp(ScriptRuntime.java:1519)
     83     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1243)
     84     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798)
     85     at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:118)
     86     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827)
     87     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939)
     88     ... 65 more
     89 Enclosed exception: 
     90 java.lang.RuntimeException: Exception invoking click
     91     at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:181)
     92     at net.sourceforge.htmlunit.corejs.javascript.FunctionObject.call(FunctionObject.java:449)
     93     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1536)
     94     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798)
     95     at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:105)
     96     at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:411)
     97     at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:309)
     98     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3286)
     99     at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:115)
    100     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827)
    101     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939)
    102     at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628)
    103     at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513)
    104     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836)
    105     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812)
    106     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800)
    107     at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910)
    108     at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScript(HtmlPage.java:878)
    109     at com.suypower.AutoLogin12345.testHomePage(AutoLogin12345.java:48)
    110     at com.suypower.AutoLogin12345.main(AutoLogin12345.java:23)
    111 Caused by: com.gargoylesoftware.htmlunit.ScriptException: TypeError: Cannot read property "nodeName" from null (http://xxx/305000772#7)
    112     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954)
    113     at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628)
    114     at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513)
    115     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836)
    116     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812)
    117     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800)
    118     at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910)
    119     at com.gargoylesoftware.htmlunit.html.HtmlScript.executeInlineScriptIfNeeded(HtmlScript.java:354)
    120     at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:415)
    121     at com.gargoylesoftware.htmlunit.html.HtmlScript$3.execute(HtmlScript.java:271)
    122     at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:293)
    123     at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:799)
    124     at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source)
    125     at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:756)
    126     at org.cyberneko.html.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1170)
    127     at org.cyberneko.html.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1072)
    128     at org.cyberneko.html.filters.DefaultFilter.endElement(DefaultFilter.java:206)
    129     at org.cyberneko.html.filters.NamespaceBinder.endElement(NamespaceBinder.java:330)
    130     at org.cyberneko.html.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3126)
    131     at org.cyberneko.html.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2093)
    132     at org.cyberneko.html.HTMLScanner.scanDocument(HTMLScanner.java:920)
    133     at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:499)
    134     at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:452)
    135     at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
    136     at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1039)
    137     at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:252)
    138     at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:198)
    139     at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:271)
    140     at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:159)
    141     at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:478)
    142     at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:352)
    143     at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPageIfPossible(BaseFrameElement.java:183)
    144     at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPage(BaseFrameElement.java:121)
    145     at com.gargoylesoftware.htmlunit.html.HtmlPage.loadFrames(HtmlPage.java:1893)
    146     at com.gargoylesoftware.htmlunit.html.HtmlPage.initialize(HtmlPage.java:227)
    147     at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:485)
    148     at com.gargoylesoftware.htmlunit.WebClient.loadDownloadedResponses(WebClient.java:2135)
    149     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.doProcessPostponedActions(JavaScriptEngine.java:982)
    150     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.processPostponedActions(JavaScriptEngine.java:1072)
    151     at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:789)
    152     at com.gargoylesoftware.htmlunit.html.HtmlImageInput.click(HtmlImageInput.java:152)
    153     at com.gargoylesoftware.htmlunit.javascript.host.html.HTMLInputElement.click(HTMLInputElement.java:477)
    154     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    155     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    156     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    157     at java.lang.reflect.Method.invoke(Method.java:606)
    158     at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:153)
    159     ... 19 more
    160 Caused by: net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot read property "nodeName" from null (http://xxx/305000772#7)
    161     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3935)
    162     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3919)
    163     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError(ScriptRuntime.java:3944)
    164     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError2(ScriptRuntime.java:3960)
    165     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.undefReadError(ScriptRuntime.java:3971)
    166     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getObjectProp(ScriptRuntime.java:1519)
    167     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1243)
    168     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798)
    169     at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:118)
    170     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827)
    171     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939)
    172     ... 65 more
    173 ======= EXCEPTION END ========

    希望能帮助到你,晚安!

  • 相关阅读:
    Android第二次作业
    六月十二课下作业
    第十三周课下作业
    第十三周上机练习
    第十二周课下作业
    第十二周上机作业
    十一周课下作业
    第十一周上机作业
    第十周上机练习
    android-7增删改查
  • 原文地址:https://www.cnblogs.com/yimu/p/LOVE_HCJ.html
Copyright © 2020-2023  润新知