用C# winform的控件web browser 读取网页内容,分析一下数据,做一些采集工作。
如果是同一个域名下面还是好办的,基本上用HtmlAgilityPack就完全可以解决问题。
但是现在遇到跨域问题,比如我需要打开页面上存在的广告联盟的地址,进行保存。
这就是牵扯到跨域。 一般的错误是:拒绝访问。
"Access is denied. (Exception from HRESULT: 0x80070005 (E_ACCESSDENIED))"。
因为你没有在这个网站去修改另一个网站数据的权利。
怎么办?很困恼吧。现在就告诉大家一个好办法。
直接上代码了。
工具类,大家保存成一个类。需要引用mshtml
1 using System; 2 using System.Runtime.InteropServices; 3 using System.Windows.Forms; 4 using mshtml; 5 6 namespace WebBrowserTest 7 { 8 9 // This is the COM IServiceProvider interface, not System.IServiceProvider .Net interface! 10 11 [ComImport(), ComVisible(true), Guid("6D5140C1-7436-11CE-8034-00AA006009FA"), 12 13 InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)] 14 15 public interface IServiceProvider 16 { 17 18 [return: MarshalAs(UnmanagedType.I4)] 19 20 [PreserveSig] 21 22 int QueryService(ref Guid guidService, ref Guid riid, [MarshalAs(UnmanagedType.Interface)] out object ppvObject); 23 24 } 25 26 public enum OLECMDF 27 { 28 29 OLECMDF_DEFHIDEONCTXTMENU = 0x20, 30 31 OLECMDF_ENABLED = 2, 32 33 OLECMDF_INVISIBLE = 0x10, 34 35 OLECMDF_LATCHED = 4, 36 37 OLECMDF_NINCHED = 8, 38 39 OLECMDF_SUPPORTED = 1 40 41 } 42 43 public enum OLECMDID 44 { 45 46 OLECMDID_PAGESETUP = 8, 47 48 OLECMDID_PRINT = 6, 49 50 OLECMDID_PRINTPREVIEW = 7, 51 52 OLECMDID_PROPERTIES = 10, 53 54 OLECMDID_SAVEAS = 4 55 56 } 57 58 public enum OLECMDEXECOPT 59 { 60 61 OLECMDEXECOPT_DODEFAULT, 62 63 OLECMDEXECOPT_PROMPTUSER, 64 65 OLECMDEXECOPT_DONTPROMPTUSER, 66 67 OLECMDEXECOPT_SHOWHELP 68 69 } 70 71 [ComImport, Guid("D30C1661-CDAF-11d0-8A3E-00C04FC9E26E"), TypeLibType(TypeLibTypeFlags.FOleAutomation | TypeLibTypeFlags.FDual | TypeLibTypeFlags.FHidden)] 72 73 public interface IWebBrowser2 74 { 75 76 [DispId(100)] 77 78 void GoBack(); 79 80 [DispId(0x65)] 81 82 void GoForward(); 83 84 [DispId(0x66)] 85 86 void GoHome(); 87 88 [DispId(0x67)] 89 90 void GoSearch(); 91 92 [DispId(0x68)] 93 94 void Navigate([In] string Url, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers); 95 96 [DispId(-550)] 97 98 void Refresh(); 99 100 [DispId(0x69)] 101 102 void Refresh2([In] ref object level); 103 104 [DispId(0x6a)] 105 106 void Stop(); 107 108 [DispId(200)] 109 110 object Application { [return: MarshalAs(UnmanagedType.IDispatch)] get; } 111 112 [DispId(0xc9)] 113 114 object Parent { [return: MarshalAs(UnmanagedType.IDispatch)] get; } 115 116 [DispId(0xca)] 117 118 object Container { [return: MarshalAs(UnmanagedType.IDispatch)] get; } 119 120 [DispId(0xcb)] 121 122 object Document { [return: MarshalAs(UnmanagedType.IDispatch)] get; } 123 124 [DispId(0xcc)] 125 126 bool TopLevelContainer { get; } 127 128 [DispId(0xcd)] 129 130 string Type { get; } 131 132 [DispId(0xce)] 133 134 int Left { get; set; } 135 136 [DispId(0xcf)] 137 138 int Top { get; set; } 139 140 [DispId(0xd0)] 141 142 int Width { get; set; } 143 144 [DispId(0xd1)] 145 146 int Height { get; set; } 147 148 [DispId(210)] 149 150 string LocationName { get; } 151 152 [DispId(0xd3)] 153 154 string LocationURL { get; } 155 156 [DispId(0xd4)] 157 158 bool Busy { get; } 159 160 [DispId(300)] 161 162 void Quit(); 163 164 [DispId(0x12d)] 165 166 void ClientToWindow(out int pcx, out int pcy); 167 168 [DispId(0x12e)] 169 170 void PutProperty([In] string property, [In] object vtValue); 171 172 [DispId(0x12f)] 173 174 object GetProperty([In] string property); 175 176 [DispId(0)] 177 178 string Name { get; } 179 180 [DispId(-515)] 181 182 int HWND { get; } 183 184 [DispId(400)] 185 186 string FullName { get; } 187 188 [DispId(0x191)] 189 190 string Path { get; } 191 192 [DispId(0x192)] 193 194 bool Visible { get; set; } 195 196 [DispId(0x193)] 197 198 bool StatusBar { get; set; } 199 200 [DispId(0x194)] 201 202 string StatusText { get; set; } 203 204 [DispId(0x195)] 205 206 int ToolBar { get; set; } 207 208 [DispId(0x196)] 209 210 bool MenuBar { get; set; } 211 212 [DispId(0x197)] 213 214 bool FullScreen { get; set; } 215 216 [DispId(500)] 217 218 void Navigate2([In] ref object URL, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers); 219 220 [DispId(0x1f5)] 221 222 OLECMDF QueryStatusWB([In] OLECMDID cmdID); 223 224 [DispId(0x1f6)] 225 226 void ExecWB([In] OLECMDID cmdID, [In] OLECMDEXECOPT cmdexecopt, ref object pvaIn, IntPtr pvaOut); 227 228 [DispId(0x1f7)] 229 230 void ShowBrowserBar([In] ref object pvaClsid, [In] ref object pvarShow, [In] ref object pvarSize); 231 232 [DispId(-525)] 233 234 WebBrowserReadyState ReadyState { get; } 235 236 [DispId(550)] 237 238 bool Offline { get; set; } 239 240 [DispId(0x227)] 241 242 bool Silent { get; set; } 243 244 [DispId(0x228)] 245 246 bool RegisterAsBrowser { get; set; } 247 248 [DispId(0x229)] 249 250 bool RegisterAsDropTarget { get; set; } 251 252 [DispId(0x22a)] 253 254 bool TheaterMode { get; set; } 255 256 [DispId(0x22b)] 257 258 bool AddressBar { get; set; } 259 260 [DispId(0x22c)] 261 262 bool Resizable { get; set; } 263 264 } 265 266 class CorssDomainHelper 267 { 268 269 private static Guid IID_IWebBrowserApp = new Guid("0002DF05-0000-0000-C000-000000000046"); 270 271 private static Guid IID_IWebBrowser2 = new Guid("D30C1661-CDAF-11D0-8A3E-00C04FC9E26E"); 272 273 // Utility for IE cross domain access 274 275 // Returns null in case of failure. 276 277 public static IHTMLDocument3 GetDocumentFromWindow(IHTMLWindow2 htmlWindow) 278 { 279 280 if (htmlWindow == null) 281 { 282 return null; 283 } 284 285 // First try the usual way to get the document. 286 287 try 288 { 289 290 IHTMLDocument2 doc = htmlWindow.document; 291 292 return (IHTMLDocument3)doc; 293 294 } 295 296 catch (COMException comEx) 297 { 298 299 // I think COMException won't be ever fired but just to be sure ... 300 301 } 302 303 catch (UnauthorizedAccessException) 304 { 305 306 } 307 308 catch (Exception ex) 309 { 310 return null; 311 } 312 313 // At this point the error was E_ACCESSDENIED because the frame contains a document from another domain. 314 // IE tries to prevent a cross frame scripting security issue. 315 316 try 317 { 318 319 // Convert IHTMLWindow2 to IWebBrowser2 using IServiceProvider. 320 IServiceProvider sp = (IServiceProvider)htmlWindow; 321 // Use IServiceProvider.QueryService to get IWebBrowser2 object. 322 Object brws = null; 323 sp.QueryService(ref IID_IWebBrowserApp, ref IID_IWebBrowser2, out brws); 324 // Get the document from IWebBrowser2. 325 IWebBrowser2 browser = (IWebBrowser2)(brws); 326 return (IHTMLDocument3)browser.Document; 327 } 328 329 catch (Exception ex) 330 { 331 Console.WriteLine(ex); 332 } 333 return null; 334 } 335 } 336 }
调用方法:
1 public void test() 2 { 3 WebBrowser browser = new WebBrowser(); 4 5 HTMLDocument doc = (HTMLDocument)browser.Document.DomDocument; 6 7 for (int i = 0; i < browser.Document.Window.Frames.Count; i++) 8 { 9 10 IHTMLDocument3 baiduDoc = CorssDomainHelper.GetDocumentFromWindow(browser.Document.Window.Frames[i].DomWindow 11 12 as IHTMLWindow2); 13 14 if (baiduDoc != null && baiduDoc.documentElement != null && baiduDoc.documentElement.document != null) 15 { 16 17 IHTMLElementCollection linkss = ((HTMLDocument)(baiduDoc.documentElement.document)).links; 18 19 foreach (mshtml.IHTMLElement element in linkss) 20 { 21 22 //加入你的代码就可以了。 23 24 } 25 } 26 } 27 }
原文出自:http://www.cnblogs.com/Leo_wl/p/3181353.html