在做采集时,有些网页因服务器限制用webclient或者webrequest不能获取html,这时我们可以用webbrowser的方法来绕过对方服务器的限制,但是在实例化webbrowser 的时候发现会报上面的错误,查了一些资料,最终写出下面的代码,能实现功能,不过可以再优化,留待以后有空的时候研究一下,下面是实现代码:
private static string htmlstr;
private static void GetHtmlWithBrowser(object url)
{
htmlstr = string.Empty;
WebBrowser wb = new WebBrowser();
wb.AllowNavigation = true;
wb.Url = new Uri(url.ToString());
DateTime dtime = DateTime.Now;
double timespan = 0;
while (timespan < 10 || wb.ReadyState != WebBrowserReadyState.Complete)
{
Application.DoEvents();
DateTime time2 = DateTime.Now;
timespan = (time2 - dtime).TotalSeconds;
}
if (wb.ReadyState == WebBrowserReadyState.Complete)
{
htmlstr = wb.DocumentText;
}
}
/// <summary>
/// 在单线程中启用浏览器
/// </summary>
public static void RunWithSingleThread(object url,ref string html)
{
ParameterizedThreadStart ps = new ParameterizedThreadStart(GetHtmlWithBrowser);
Thread t = new Thread(ps);
t.IsBackground = true;
t.ApartmentState = ApartmentState.STA;
t.Start(url);
html = htmlstr;
}