WebBrowser 操作记要
WebBrowser 操作记要 WebBrowser1.GoHome; //到浏览器默认主页 WebBrowser1.Refresh; //刷新 WebBrowser1.GoBack; //后退 WebBrowser1.GoForward; //前进 WebBrowser1.Navigate('...'); //打开指定页面 WebBrowser1.Navigate('about:blank'); //打开空页面 ________________________________________ //打开空页面, 并写入... WebBrowser1.Navigate('about: <head> <title>标题> </title> <body>页面内容 </body>'); ________________________________________ //读取网页脚本中的变量: procedure TForm1.Button1Click(Sender: TObject); var s: string; i: Integer; begin s := WebBrowser1.OleObject.document.Script.str; i := WebBrowser1.OleObject.document.Script.num; ShowMessage(s); //Hello ShowMessage(IntToStr(i)); //99 //也可以这样读: s := WebBrowser1.OleObject.document.parentWindow.str; i := WebBrowser1.OleObject.document.parentWindow.num; ShowMessage(s); //Hello ShowMessage(IntToStr(i)); //99 end; &lt;br&gt;假如网页中有这样的语句:&lt;br&gt;&amp;amp;lt;script&amp;amp;gt;&lt;br&gt;var&lt;br&gt; str = "Hello";&lt;br&gt; i = 99;&lt;br&gt;&amp;amp;lt;/script&amp;amp;gt; ________________________________________ //调用网页脚本中的函数: procedure TForm1.Button1Click(Sender: TObject); begin WebBrowser1.OleObject.document.parentWindow.MB(); //HTML-Js //如需指定脚本语言, 需要: WebBrowser1.OleObject.document.parentWindow.execScript('MB()','JavaScript'); //HTML-Js end; &lt;br&gt;假如有这样的脚本:&lt;br&gt;&amp;amp;lt;script&amp;amp;gt;&lt;br&gt;function MB(){ &lt;br&gt; alert('HTML-Js');&lt;br&gt;}&lt;br&gt;&amp;amp;lt;/script&amp;amp;gt; ________________________________________ //判断网页及内部框架网页是否全部下载完毕 procedure TForm1.WebBrowser1DocumentComplete(ASender: TObject; const pDisp: IDispatch; var URL: OleVariant); begin if WebBrowser1.Application = pDisp then begin Text := '网页下载完毕!'; end; end; ________________________________________ //改变背景色或背景图片: WebBrowser1.OleObject.document.body.bgcolor := '#FF0000'; WebBrowser1.OleObject.document.body.background := '...图片地址'; ________________________________________ //操作有 ID 标签的对象: var s: string; begin s := WebBrowser1.OleObject.document.getElementByID('span1').innerText; ShowMessage(s); //这是 span1 标签中的内容 //或者: s := WebBrowser1.OleObject.document.parentWindow.span1.innerText; ShowMessage(s); //这是 span1 标签中的内容 //隐藏它: WebBrowser1.OleObject.document.parentWindow.span1.style.display := 'none'; end; &lt;br&gt;假如网页中有这样的内容:&lt;br&gt;&amp;amp;lt;span id=span1&amp;amp;gt;这是 span1 标签中的内容&amp;amp;lt;/span&amp;amp;gt;&lt;br&gt; ________________________________________ //获取网页源代码 var s: string; begin s := WebBrowser1.OleObject.document.body.innerHTML; //body内的所有代码 s := WebBrowser1.OleObject.document.body.outerHTML; //body内的所有代码, 包含body标签 s := WebBrowser1.OleObject.document.documentElement.innerHTML; //html内的所有代码 end; //获取网页全部源代码 uses ActiveX; var ms: TMemoryStream; begin if not Assigned(WebBrowser1.Document) then Exit; ms := TMemoryStream.Create; (WebBrowser1.Document as IPersistStreamInit).Save(TStreamAdapter.Create(ms), True); ms.Position := 0; Memo1.Lines.LoadFromStream(ms, TEncoding.UTF8); // Memo1.Lines.LoadFromStream(ms, TEncoding.Default); {GB2312 等双字节} ms.Free; end; ________________________________________ //WebBrowser 中的右键菜单 //先要添加ApplicationEvents1,指定其Message事件 //屏蔽右键菜单 procedure TForm1.ApplicationEvents1Message(var Msg: tagMSG; var Handled: Boolean); begin with Msg do begin if not IsChild(WebBrowser1.Handle, hWnd) then Exit; Handled := (message = WM_RBUTTONDOWN) or (message = WM_RBUTTONUP) or (message = WM_CONTEXTMENU); end; end; //替换右键菜单 procedure TForm1.ApplicationEvents1Message(var Msg: tagMSG; var Handled: Boolean); var mPoint: TPoint; begin if IsChild(WebBrowser1.Handle, Msg.Hwnd) and ((Msg.Message = WM_RBUTTONDOWN) or (Msg.Message = WM_RBUTTONUP)) then begin GetCursorPos(mPoint); //得到光标位置 PopupMenu1.Popup(mPoint.X, mPoint.Y); //弹出popupmenu1的菜单 Handled:=True; end; end; ________________________________________ //新页面写入 begin WebBrowser1.Navigate('about:blank'); WebBrowser1.OleObject.Document.Writeln('ok'); end; //从流中写入: var ms: TMemoryStream; begin ms := TMemoryStream.Create; Memo1.Lines.SaveToStream(ms); ms.Position := 0; (WebBrowser1.Document as IPersistStreamInit).Load(TStreamAdapter.Create(ms)); ms.Free; end; //禁止提示脚步错误 procedure TForm1.WebBrowser1NavigateComplete2(ASender: TObject; const pDisp: IDispatch; var URL: OleVariant); begin WebBrowser1.Silent := True; end; //禁止弹出窗口 procedure TForm1.WebBrowser1NewWindow2(ASender: TObject; var ppDisp: IDispatch; var Cancel: WordBool); begin Cancel := True; end; ________________________________________ procedure TMainFrm.btnTestClick(Sender: TObject); var HtmlDoc:IHTMLDocument2; myitem:Olevariant; i:integer; str:string; begin myitem:= Web.Document; if myitem.frames.length <>0 then myitem:=myitem.frames.item(2).document; for i := 0 to myitem.all.length - 1 do begin if myitem.all.item(i).tagName = 'SELECT' then // 下拉框选择 begin myitem.all.item(i).selectedindex:= myitem.all.item(i).options.length-1; if strtoint(myitem.all.item(i).value) <0 then myitem.all.item(i).value:=0; end; if myitem.all.item(i).tagName = 'INPUT' then begin if Uppercase(myitem.all.item(i).type)='SUBMIT' then//提交表单 myitem.all.item(i).click; end; end; end;
webbrowser本窗口打开
unit Unit1; interface uses Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms, Dialogs, OleCtrls, SHDocVw, StdCtrls; type TForm1 = class(TForm) Button1: TButton; WebBrowser1: TWebBrowser; procedure Button1Click(Sender: TObject); procedure WebBrowser1NewWindow2(Sender: TObject; var ppDisp: IDispatch; var Cancel: WordBool); procedure tempWBBeforeNavigate2(Sender: TObject; const pDisp: IDispatch; var URL, Flags, TargetFrameName, PostData, Headers: OleVariant; var Cancel: WordBool); procedure WebBrowser1DocumentComplete(Sender: TObject; const pDisp: IDispatch; var URL: OleVariant); private { Private declarations } public { Public declarations } end; var Form1: TForm1; implementation {$R *.dfm} var tempWB : TWebBrowser; procedure TForm1.Button1Click(Sender: TObject); begin Self.WebBrowser1.Navigate('http://www.google.com.hk'); end; procedure TForm1.WebBrowser1NewWindow2(Sender: TObject; var ppDisp: IDispatch; var Cancel: WordBool); begin if not Assigned(tempWB) then tempWB := TWebBrowser.Create(Self); tempWB.OnBeforeNavigate2 := Self.tempWBBeforeNavigate2; ppDisp := tempWB.OleObject; end; procedure TForm1.tempWBBeforeNavigate2(Sender: TObject; const pDisp: IDispatch; var URL, Flags, TargetFrameName, PostData, Headers: OleVariant; var Cancel: WordBool); begin Self.WebBrowser1.Navigate(Url); Cancel := True; end; procedure TForm1.WebBrowser1DocumentComplete(Sender: TObject; const pDisp: IDispatch; var URL: OleVariant); begin if Assigned(tempWB) then FreeAndNil(tempWB); end; end.
webbrowser获取页面全部链接
unit Unit1; interface uses Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms, Dialogs, StdCtrls, OleCtrls, SHDocVw; type TForm1 = class(TForm) WebBrowser1: TWebBrowser; Button1: TButton; Edit1: TEdit; ListBox1: TListBox; procedure Button1Click(Sender: TObject); procedure WebBrowser1DocumentComplete(Sender: TObject; const pDisp: IDispatch; var URL: OleVariant); private { Private declarations } public { Public declarations } end; var Form1: TForm1; implementation {$R *.dfm} procedure TForm1.Button1Click(Sender: TObject); var i: Integer; begin webbrowser1.Navigate(edit1.Text); end; procedure TForm1.WebBrowser1DocumentComplete(Sender: TObject; const pDisp: IDispatch; var URL: OleVariant); var i:integer; begin for i := 0 to Webbrowser1.OleObject.Document.links.Length - 1 do Listbox1.Items.Add(Webbrowser1.OleObject.Document.Links.Item(i)); end; end.
操作WebBrowser 元素值
1. 自动填写表单并发布 两种方法 var i:integer; Doc:IHTMLDocument2; input:OleVariant; userinputelement,pwdinputelement,ValidateElement:ihtmlinputelement; ValidateImage: IHTMLImgElement; imagecount:integer; form:ihtmlformelement; myitem:Olevariant; begin Doc:=WebBrowser1.document as ihtmldocument2; if doc=nil then exit; // 第一种方式 userinputelement:=(doc.all.item('nicknameId',0) as ihtmlinputelement); userinputelement.value:=edit7.text; pwdinputelement:=(doc.all.item('pwd',0) as ihtmlinputelement); pwdinputelement.value:=edit8.text; pwdinputelement:=(doc.all.item('name',0) as ihtmlinputelement); pwdinputelement.value:=edit9.text; myitem:=WebBrowser1.document; // 第二种方式 并可操作 combo radio select 元素表 for i:=0 to myitem.all.length-1 do begin /// if myitem.all.item(i).tagName = 'SELECT' then // 下拉框选择 begin if myitem.all.item(i).Name='birth_year' then myitem.all.item(i).value:='1980'; // end; if myitem.all.item(i).tagName = 'INPUT' then begin if Uppercase(myitem.all.item(i).type)='RADIO' then begin if myitem.all.item(i).value='男生' then myitem.all.item(i).checked:=true; // 选中值是 '求'的选项 end; if Uppercase(myitem.all.item(i).type)=Text then begin myitem.all.item(i).value=''; end; end; end; 2.操作超链接 var i:integer; myitem:Olevariant; begin //xid_reg_handle myitem:=WebBrowser1.document; // 第一种方式 aVal:=myitem.getElementById('xid_reg_handle').href; myitem.getElementById('xid_reg_handle').click; // 模拟点击超链接 showmessage(InttosTr(myitem.Links.length)); for i:=0 to myitem.Links.length-1 do begin // myitem.Links.item(i).href // hrefUrl 可获取 if myitem.Links.item(i).innertext='名称' then // <a href=''> 名称' </a> myitem.Links.item(i).click;// 模拟点击超链接 end; end;
通过MSHTML实现一个HTML解析类
最近经常会模拟网页提交返回网页源码,然后获得网页中相应的元素,于是需要常常解析Html中相应的各种元素,网络是个好东西,搜索一番,就找到了好几个Delphi版本的HtmlParser的类库,试着使用了几个,发现解析起来都不完整,或多或少的回出现一些问题!于是想到了如果界面上有一个浏览器,我们可以通过WebBrowser的Document接口对网页元素进行操作,很是方便!但是模拟网页提交,界面上是不一定要出现WebBrowser的,肯定有办法,不通过WebBrowser就直接解析HTML的,那便是我不要WebBrowser这个外壳,只要他里面的Document文档接口对象就能实现对Html的解析了,查找了一番MSDN,然后Google一下,果然可行,构建方法如下:
//创建IHTMLDocument2接口 CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc); 接口创建好了之后就能够对文档元素进行解析了,很是爽快! 结合了我自己的特有操作,我对Combobox,Table,Frame等一些网页元素做了相应的封装,实现了一个HTMLParser,大致代码如下: 这里只给出声明,代码请在最后下载 复制代码 代码 (******************************************************) (* 得闲工作室 *) (* 网页元素操作类库 *) (* *) (* DxHtmlElement Unit *) (* Copyright(c) 2008-2010 不得闲 *) (* email:appleak46@yahoo.com.cn QQ:75492895 *) (******************************************************) unit DxHtmlElement; interface uses Windows,sysUtils,Clipbrd,MSHTML,ActiveX,OleCtrls,Graphics,TypInfo; {Get EleMent Type} function IsSelectElement(eleElement: IHTMLElement): Boolean; function IsPwdElement(eleElement: IHTMLElement): Boolean; function IsTextElement(element: IHTMLElement): boolean; function IsTableElement(element: IHTMLElement): Boolean; function IsElementCollection(element: IHTMLElement): Boolean; function IsChkElement(element: IHTMLElement): boolean; function IsRadioBtnElement(element: IHTMLElement): boolean; function IsMemoElement(element: IHTMLElement): boolean; function IsFormElement(element: IHTMLElement): boolean; function IsIMGElement(element: IHTMLElement): boolean; function IsInIMGElement(element: IHTMLElement): boolean; function IsLabelElement(element: IHTMLElement): boolean; function IsLinkElement(element: IHTMLElement): boolean; function IsListElement(element: IHTMLElement): boolean; function IsControlElement(element: IHTMLElement): boolean; function IsObjectElement(element: IHTMLElement): boolean; function IsFrameElement(element: IHTMLElement): boolean; function IsInPutBtnElement(element: IHTMLElement): boolean; function IsInHiddenElement(element: IHTMLElement): boolean; function IsSubmitElement(element: IHTMLElement): boolean; {Get ImgElement Data} function GetPicIndex(doc: IHTMLDocument2; Src: string; Alt: string): Integer; function GetPicElement(doc: IHTMLDocument2;imgName: string;src: string;Alt: string): IHTMLImgElement; function GetRegCodePic(doc: IHTMLDocument2;ImgName: string; Src: string; Alt: string): TPicture; overload; function GetRegCodePic(doc: IHTMLDocument2;Index: integer): TPicture; overload; function GetRegCodePic(doc: IHTMLDocument2;element: IHTMLIMGElement): TPicture;overload; type TObjectFromLResult = function(LRESULT: lResult;const IID: TIID; WPARAM: wParam;out pObject): HRESULT; stdcall; TEleMentType = (ELE_UNKNOW,ELE_TEXT,ELE_PWD,ELE_SELECT,ELE_CHECKBOX,ELE_RADIOBTN,ELE_MEMO,ELE_FORM,ELE_IMAGE, ELE_LABEL,ELE_LINK,ELE_LIST,ELE_CONTROL,ELE_OBJECT,ELE_FRAME,ELE_INPUTBTN,ELE_INIMAGE,ELE_INHIDDEN); function GetElementType(element: IHTMLELEMENT): TEleMentType; function GetElementTypeName(element: IHTMLELEMENT): string; function GetHtmlTableCell(aTable: IHTMLTable;aRow,aCol: Integer): IHTMLElement; function GetHtmlTable(aDoc: IHTMLDocument2; aIndex: Integer): IHTMLTable; function GetWebBrowserHtmlTableCellText(Doc: IHTMLDocument2; const TableIndex, RowIndex, ColIndex: Integer;var ResValue: string): Boolean; function GetHtmlTableRowHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement; function GetWebBrowserHtmlTableCellHtml(Doc: IHTMLDocument2; const TableIndex,RowIndex,ColIndex: Integer;var ResValue: string): Boolean; function GeHtmlTableHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement; function GetWebBrowserHtmlTableHtml(Doc: IHTMLDocument2; const TableIndex,RowIndex: Integer;var ResValue: string): Boolean; type TDxWebFrameCollection = class; TDxWebElementCollection = class; TLoadState = (Doc_Loading,Doc_Completed,Doc_Invalidate); TDxWebFrame = class private FFrame: IHTMLWINDOW2; FElementCollections: TDxWebElementCollection; FWebFrameCollections: TDxWebFrameCollection; function GetSrc: string; function GetElementCount: integer; function GetWebFrameCollections: TDxWebFrameCollection; function GetElementCollections: TDxWebElementCollection; function GetDocument: IHTMLDOCUMENT2; function GetReadState: TLoadState; function GetIsLoaded: boolean; procedure SetFrame(const Value: IHTMLWINDOW2); function GetName: string; public Constructor Create(IFrame: IHTMLWINDOW2); Destructor Destroy;override; property Frame: IHTMLWINDOW2 read FFrame write SetFrame; property Src: string read GetSrc; property Document: IHTMLDOCUMENT2 read GetDocument; property Name: string read GetName; property Frames: TDxWebFrameCollection read GetWebFrameCollections; property ElementCount: integer read GetElementCount; property ElementCollections: TDxWebElementCollection read GetElementCollections; property ReadyState: TLoadState read GetReadState; property IsLoaded: boolean read GetIsLoaded; end; TDxWebFrameCollection = Class private FFrameCollection: IHTMLFramesCollection2; Frame: TDxWebFrame; function GetCount: integer; function GetFrameInterfaceByIndex(index: integer): IHTMLWINDOW2; function GetFrameInterfaceByName(Name: string): IHTMLWINDOW2; function GetFrameByIndex(index: integer): TDxWebFrame; function GetFrameByName(Name: string): TDxWebFrame; procedure SetFrameCollection(const Value: IHTMLFramesCollection2); public Constructor Create(ACollection: IHTMLFramesCollection2); Destructor Destroy;override; property FrameCollection: IHTMLFramesCollection2 read FFrameCollection write SetFrameCollection; property Count: integer read GetCount; property FrameInterfaceByIndex[index: integer]: IHTMLWINDOW2 read GetFrameInterfaceByIndex; property FrameInterfaceByName[Name: string]: IHTMLWINDOW2 read GetFrameInterfaceByName; property FrameByIndex[index: integer]: TDxWebFrame read GetFrameByIndex; property FrameByName[Name: string]: TDxWebFrame read GetFrameByName; end; TDxWebElementCollection = class private FCollection: IHTMLElementCollection; FChildCollection: TDxWebElementCollection; function GetCollection(index: String): TDxWebElementCollection; function GetCount: integer; function GetElement(itemName: string; index: integer): IHTMLElement; function GetElementByName(itemName: string): IHTMLELEMENT; function GetElementByIndex(index: integer): IHTMLELEMENT; procedure SetCollection(const Value: IHTMLElementCollection); public Constructor Create(ACollection: IHTMLElementCollection); Destructor Destroy;override; property Collection: IHTMLElementCollection read FCollection write SetCollection; property ChildElementCollection[index: String]: TDxWebElementCollection read GetCollection; property ElementCount: integer read GetCount; property Element[itemName: string;index: integer]: IHTMLElement read GetElement; property ElementByName[itemName: string]: IHTMLELEMENT read GetElementByName; property ElementByIndex[index: integer]: IHTMLELEMENT read GetElementByIndex; end; TLinkCollection = class(TDxWebElementCollection) end; TDxWebTable = class; TDxTableCollection = class private FTableCollection: IHTMLElementCollection; FDocument: IHTMLDOCUMENT2; FWebTable: TDxWebTable; function GetTableInterfaceByName(AName: string): IHTMLTABLE; procedure SetDocument(Value: IHTMLDOCUMENT2); function GetTableInterfaceByIndex(index: integer): IHTMLTABLE; function GetCount: integer; function GetTableByIndex(index: integer): TDxWebTable; function GetTableByName(AName: string): TDxWebTable; public Constructor Create(Doc: IHTMLDOCUMENT2); destructor Destroy;override; property TableInterfaceByName[AName: string]: IHTMLTABLE read GetTableInterfaceByName; property TableInterfaceByIndex[index: integer]: IHTMLTABLE read GetTableInterfaceByIndex; property TableByName[AName: string]: TDxWebTable read GetTableByName; property TableByIndex[index: integer]: TDxWebTable read GetTableByIndex; property Document: IHTMLDOCUMENT2 read FDocument write SetDocument; property Count: integer read GetCount; end; TDxWebTable = class private FTableInterface: IHTMLTABLE; function GetRowCount: integer; procedure SetTableInterface(const Value: IHTMLTABLE); function GetCell(ACol, ARow: integer): string; function GetRowColCount(RowIndex: integer): integer; function GetInnerHtml: string; function GetInnerText: string; function GetCellElement(ACol, ARow: Integer): IHTMLTableCell; public Constructor Create(ATable: IHTMLTABLE); property TableInterface: IHTMLTABLE read FTableInterface write SetTableInterface; property RowCount: integer read GetRowCount; property Cell[ACol: integer;ARow: integer]: string read GetCell; property CellElement[ACol: Integer;ARow: Integer]: IHTMLTableCell read GetCellElement; property RowColCount[RowIndex: integer]: integer read GetRowColCount; property InnerHtml: string read GetInnerHtml; property InnerText: string read GetInnerText; end; TDxWebCombobox = class private FHtmlSelect: IHTMLSelectElement; function GetCount: Integer; procedure SetItemIndex(const Value: Integer); function GetItemIndex: Integer; function GetName: string; procedure SetName(const Value: string); function GetValue: string; procedure SetValue(const Value: string); procedure SetCombInterface(const Value: IHTMLSelectElement); function GetItemByName(EleName: string): string; function GetItemByIndex(index: integer): string; function GetItemAttribute(index: Integer; AttribName: string): OleVariant; public constructor Create(AWebCombo: IHTMLSelectElement); procedure Add(Ele: IHTMLElement); procedure Insert(Ele: IHTMLElement;Index: Integer); procedure Remove(index: Integer); property CombInterface: IHTMLSelectElement read FHtmlSelect write SetCombInterface; property Count: Integer read GetCount; property ItemIndex: Integer read GetItemIndex write SetItemIndex; property ItemByIndex[index: integer]: string read GetItemByIndex; property ItemByName[EleName: string]: string read GetItemByName; property ItemAttribute[index: Integer;AttribName: string]: OleVariant read GetItemAttribute; property Name: string read GetName write SetName; property value: string read GetValue write SetValue; end; implementation end. 复制代码 HTMLParser解析类的代码实现单元 复制代码 代码 (******************************************************) (* 得闲工作室 *) (* HTML解析单元库 *) (* *) (* DxHtmlParser Unit *) (* Copyright(c) 2008-2010 不得闲 *) (* email:appleak46@yahoo.com.cn QQ:75492895 *) (******************************************************) unit DxHtmlParser; interface uses Windows,MSHTML,ActiveX,DxHtmlElement,Forms; type TDxHtmlParser = class private FHtmlDoc: IHTMLDocument2; FHTML: string; FWebTables: TDxTableCollection; FWebElements: TDxWebElementCollection; FWebComb: TDxWebCombobox; procedure SetHTML(const Value: string); function GetWebCombobox(AName: string): TDxWebCombobox; public constructor Create; destructor Destroy;override; property HTML: string read FHTML write SetHTML; property WebTables: TDxTableCollection read FWebTables; property WebElements: TDxWebElementCollection read FWebElements; property WebCombobox[Name: string]: TDxWebCombobox read GetWebCombobox; end; implementation { TDxHtmlParser } constructor TDxHtmlParser.Create; begin CoInitialize(nil); //创建IHTMLDocument2接口 CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc); Assert(FHtmlDoc<>nil,'构建HTMLDocument接口失败'); FHtmlDoc.Set_designMode('On'); //设置为设计模式,不执行脚本 while not (FHtmlDoc.readyState = 'complete') do begin sleep(1); Application.ProcessMessages; end; FWebTables := TDxTableCollection.Create(FHtmlDoc); FWebElements := TDxWebElementCollection.Create(nil); FWebComb := TDxWebCombobox.Create(nil); end; destructor TDxHtmlParser.Destroy; begin FWebTables.Free; FWebElements.Free; FWebComb.Free; CoUninitialize; inherited; end; function TDxHtmlParser.GetWebCombobox(AName: string): TDxWebCombobox; begin if FWebElements.Collection <> nil then begin FWebComb.CombInterface := FWebElements.ElementByName[AName] as IHTMLSelectElement; Result := FWebComb; end else Result := nil; end; procedure TDxHtmlParser.SetHTML(const Value: string); begin if FHTML <> Value then begin FHTML := Value; FHtmlDoc.body.innerHTML := FHTML; FWebElements.Collection := FHtmlDoc.all; end; end; end.
用MSHTML控制网页中所有元素
http://www.cnblogs.com/yuanbao/archive/2007/09/03/878213.html 前些日子用VS2005中的WebBrowser来控制网页中的元素,虽然VS2005封装了很多不错的功能,但用起来总觉得有所欠缺。比如我想如得到框架页内网页的源码,找来的去,就是找不到合适的方法。 MSHTML是微软公司的一个COM组件,该组件封装了HTML语言中的所有元素及其属性,通过其提供的标准接口,可以访问指定网页的所有元素.MSHTML对象模型是由一些对象和集合组成的.处于根部的是HTML,描述了打开页面的1个窗口,包括一系列集合和对象。如Frames集合,History,Location,Navigator,Document,Vi—sum,Event对象等.其中描述呈现在客户窗口实际网页的是Document对象。由一系列的属性、方法、对象和集合组成.其中All集合中包含网页中所有标记(Tag)元素,其主要的方法和属性有: (1)Length(长度):即标记出现的个数,可以把标记的集合理解为从0开始的一维数组,其次序按照标记在网页位置排列; (2)Tags(标记):用于过滤出给定标记的集合,如Doc.Al1.Tags(P)得到所有分段标记P; (3)Item(项目):用于选择集合中的某1个元素,如object.item(0)得到集合的第1个元素,而object.item(i)得到第i+1个元素. 此外,IHTMLElement也是个常用的集合对象,代表网页中指定标记的集合,通过这个集合对象,可以得到网页上特定标记的内容.IHTMLElement有4个主要属性: (1)InnerText:开始标记和结束标记之间的文本; (2)InnerHTML:开始标记和结束标记之间的文本和HTML; (3)OuterText:对象的文本; (4)OuterHTML:对象的文本和HTML. 自动提交 procedure TForm1.Button1Click(Sender: TObject); var Doc:IHTMLDocument2; input:OleVariant; userinputelement,pwdinputelement:ihtmlinputelement; begin doc:=webbrowser1.document as ihtmldocument2; userinputelement:=(doc.all.item('user'(也就是网页中用户名控件的名字),0) as ihtmlinputelement); userinputelement.value:=edit1.text;(也就是你要向网页输入的东西) pwdinputelement:=(doc.all.item('password',0) as ihtmlinputelement); pwdinputelement.value:=edit2.text; input:=doc.all.item('submit',0); input.click; end; 当提交数据按钮没有NAME属性时,采用如下方法: procedure TForm1.Button1Click(Sender: TObject); var Doc:IHTMLDocument2; form:ithmlformelement; userinputelement,pwdinputelement:ihtmlinputelement; begin doc:=webbrowser1.document as ihtmldocument2; userinputelement:=(doc.all.item('user'(也就是网页中用户名控件的名字),0) as ihtmlinputelement); userinputelement.value:=edit1.text;(也就是你要向网页输入的东西) pwdinputelement:=(doc.all.item('password',0) as ihtmlinputelement); pwdinputelement:=edit2.text; form:=(doc.all.item('login_form',0) as ihtmlformelement): form.submit; end; 当前页为框架页时,采用如下方法: procedure TForm1.Button1Click(Sender: TObject); var doc2:IHTMLDocument2; o : Olevariant; ole_index: OleVariant; frame_dispatch: IDispatch; frame_win: IHTMLWindow2; frame_doc: IHTMLDocument2; begin begin doc2 := WebBrowser1.Document as IHTMLDocument2; ole_index:=0; frame_dispatch := doc2.Frames.Item(ole_index); if frame_dispatch <> nil then begin frame_win := frame_dispatch as IHTMLWindow2; frame_doc := frame_win.document; // memo1.lines.add(IHTMLDocument2(frame_doc).body.outerHTML); End; end; 0 0 (请您对文章做出评价)
TWebBrowser设置
去除滚动条和边框 ((WebBrowser1.Document as IHTMLDocument2).body as HTMLBody).scroll := 'no'; (WebBrowser1.Document as IHTMLDocument2).body.style.border := '0'; (WebBrowser1.Document as IHTMLDocument2).body.style.borderStyle := 'none'; (WebBrowser1.Document as IHTMLDocument2).body.style.margin := '0'; (WebBrowser1.Document as IHTMLDocument2).body.style.padding := '0'; (WebBrowser1.Document as IHTMLDocument2).body.style.overflow := 'hidden'; 模拟点击网页中的按钮 {模拟一个页面} WebBrowser1.Navigate( 'about:<head><title>标题</title><body bgcolor=#ff0000>'+ '<form method="POST" action="http://del.cnblogs.com">'+ '<input type="submit" value="提交" id="btnID" name="btnName">'+ '</form></body>'); {假如知道按钮名称, 譬如是: btnName} WebBrowser1.OleObject.document.all.item('btnName').click; WebBrowser1.OleObject.document.all.item('btnName', 0).click; {假如知道按钮的 ID, 譬如是: btnID} WebBrowser1.OleObject.document.getElementByID('btnID').click; {假如只知道是第几个按钮, 譬如是第一个} WebBrowser1.OleObject.document.getElementsByTagName('input').item(0).click; MaxScrollHeight := doc.body.getAttribute('ScrollHeight', 0); //获得滚动条最大高度 MaxScrollWidth := doc.body.getAttribute('ScrollWidth', 0);//获得滚动条最大宽度 Form1.WebBrowser1.OleObject.Document.ParentWindow.ScrollBy(MaxScrollWidth ,MaxScrollHeight ); //滚动到最右最下 //MaxScrollHeight := doc.body.getAttribute('ScrollHeight', 0); //获得滚动条最大高度 MaxScrollWidth := doc.body.getAttribute('ScrollWidth', 0);//获得滚动条最大宽度
Webbrowser 一些特殊網頁元素的訪問
在論壇上偶有朋友問及網頁中 表格數據的讀取!今天再遇到。便先寫在這。以後再遇到其他的元素再一一添加 <一>.delphi 中 webbrowser 對表格數據的讀取 var ovTable: OleVariant; i,j: integer; begin ovTable:=Wb.OleObject.Document.all.tags('TABLE').item(1);//取第二表格集合 for i := 1 to (ovTable.Rows.Length - 1) do //循環行 begin for j := 1 to (ovTable.Rows.Item(i).Cells.Length ) do// 循環列 begin 單元格數據 := ovTable.Rows.Item(i).Cells.Item(j-1).InnerText; end; end; end; ==============================2011年6月22日================================= <二>對css中定義的背景圖片讀取方法 WebBrowser1.OleObject.document.getElementById('bgDiv').currentStyle.BackGroundImage WebBrowser1.OleObject.document.body.currentStyle.BackGroundImage ==============================2011年9月25日================================= <三> Delphi歷遍网页中指定标签内的子元素 首先取得我们想要提取的标签,比如www.baidu.com首页的某个 div id 为 ‘nv’ 申明 tags为 OleVariant; tags:= :=wb.OleObject.document.all.item('nv',0).all; 获取nv标签内的第一个字元素A 的outerhtml为: str := tags.item(0,0).outerhtml;//其中0就代理第一个元素 ==============================2013年2月28日================================= document.getElementById('bet-race-num-1').parentNode.nextSibling.firstChild.value='abc'
通过webbrowser读取网页上确定标签ID的值
Edit1.text := ((wbmap.document as IHTMLDocument2).all.item('tname', 0) as ihtmlinputelement).value dit1.text:= wbmap.OleObject.document.getElementByID('tname').value; wbmap:webbrowser; tname:网页标签ID名;
js nextSibling属性和previousSibling属性
1:nextSibling属性 该属性表示当前节点的下一个节点(其后的节点与当前节点同属一个级别);如果其后没有与其同级的节点,则返回null。 需要特别注意的是:该属性在不同的浏览器中的执行结果并不都相同,见下面例示: 先来看一个例子: view plaincopy to clipboardprint? <body> <div> <input id="a4" type="button" onclick="alert(this.nextSibling);" value="d" /> <input id="a5" type="button" onclick="alert(this.nextSibling);" value="e" /> </div> </bod 该对象的结构表面上看,div的nextSibling只有2项——两个input节点。但实际上有5项——/n,input,/n,input,/n。这是因为input作为创建各种表单输入控件的标签,无论是生成button、checkbox、radio...等或其他表单控件,IE都会自动在后面创建一个1字节位的空白。 IE将跳过在节点之间产生的空格文档节点(如:换行字符),而Mozilla不会这样——FF会把诸如空格换行之类的排版元素视作节点读取,因此,在ie中用nextSibling便可读取到的下一个节点元素,在FF中就需要这样写:nextSibling.nextSibling了。 opera和safari对nextSibling的处理方式与FF一致 2:previousSibling属性 该属性与nextSibling属性的作用正好相反。例如:someTagObject.nextSibling.previousSibling其实返回的是该标签元素本身,但前提必须是:该标签元素的后面必须有一个同级的元素,否则就返回null了。 3:通过nextSibling或者 previousSibling所获得的HTML标签元素对象的属性问题 一般先通过nextSibling.nodeName来获知其标签名,或者通过nextSibling.nodeType来获知其标签类型,然后,如果该nextSibling.nodeName = #text,则通过nextSibling.nodeValue来获知其文本值;否则,可以通过nextSibling.innerHTML等其他常用标签元素属性来获取其属性。
遍历li
var i, j,m,n: integer; ovTable,ovTableul: OleVariant; //这一部分是取得“无序列表”的部分 ovTable:=Webbrowser1.OleObject.Document.getElementsByTagName('ul').item(0); ovTableul:=ovTable.getElementsByTagName('li'); n:=ovTableul.Length; if n>0 then begin for i:=0 to n-1 do begin self.Memo1.Lines.Add(ovTableul.item(i).InnerText); end; end;
Webbrowser无Name及ID时自动点击按钮
procedure TForm1.Button1Click(Sender: TObject); var i:integer; begin for i:=0 to wb1.OleObject.document.getElementsByTagName('a').length-1 do begin if wb1.OleObject.document.getElementsByTagName('a').item(i).innerhtml='唯一关键字1' then begin memo1.Lines.Add(wb1.OleObject.document.getElementsByTagName('a').item(i+1).innerhtml); if wb1.OleObject.document.getElementsByTagName('a').item(i+1).innerhtml<>'唯一关键字2'then wb1.OleObject.document.getElementsByTagName('a').item(i+1).click; end; if wb1.OleObject.document.getElementsByTagName('a').item(i).innerhtml='唯一关键字2' then begin memo1.Lines.Add(wb1.OleObject.document.getElementsByTagName('a').item(i-1).innerhtml); if wb1.OleObject.document.getElementsByTagName('a').item(i-1).innerhtml<>'唯一关键字1'then begin wb1.OleObject.document.getElementsByTagName('a').item(i-1).click; break; end; end; end; end; 以上代码基本实现了无name和无id的自动点击。
设置WebBrowser 代理服务器 与 UserAgent
uses UrlMon, WinInet; {------------------------------------------------------------------------------- 过程名: SetProcessProxy 作者: kelei 日期: 2013.08.03 参数: aProxyServer代理服务器; aProxyPort代理服务器端口 返回值: True设置成功 SetProcessProxy('127.0.0.1', 80); -------------------------------------------------------------------------------} function SetProcessProxy(const aProxyServer: string; const aProxyPort: Integer): Boolean; var vProxyInfo: TInternetProxyInfo; begin vProxyInfo.dwAccessType := INTERNET_OPEN_TYPE_PROXY; vProxyInfo.lpszProxy := PChar(Format('http=%s:%d', [aProxyServer, aProxyPort])); vProxyInfo.lpszProxyBypass := PChar(''); Result := UrlMkSetSessionOption(INTERNET_OPTION_PROXY, @vProxyInfo, SizeOf(vProxyInfo, 0) = S_OK; end; {------------------------------------------------------------------------------- 过程名: SetProcessUserAgent 作者: kelei 日期: 2013.08.03 参数: aUserAgent HTTP请求头UserAgent内容 返回值: True设置成功 SetProcessUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3') -------------------------------------------------------------------------------} function SetProcessUserAgent(const aUserAgent: string): Boolean; begin Result := UrlMkSetSessionOption(URLMON_OPTION_USERAGENT, PChar(aUserAgent), Length(aUserAgent), 0) = S_OK; end;
WebBrowser 点击任意元素 或图片
procedure TForm1.btnClickUrlClick(Sender: TObject); var J:integer; spDisp: IDispatch; IDoc1: IHTMLDocument2; ielc: IHTMLElementCollection ; ihtml:IHTMLElement; iane:IHTMLAnchorElement; begin WebNav.Document.QueryInterface(IHTMLDocument2,iDoc1); ielc:=idoc1.Get_all; for J:=0 to ielc.length-1 do begin Application.ProcessMessages; spDisp := ielc.item(J, 0); if SUCCEEDED(spDisp.QueryInterface(IHTMLAnchorElement ,iane))then begin if iane.href='http://www.nq51.com/' then //这里我在网页里的url是http://www.nq51.com调用的时候自动加上了'/'需要注意一下 begin ihtml:=ielc.item(J,0) as IHTMLElement; ihtml.click; end; end; end; end;
WebBrowser自动填表
1 <input type="text" name="xxx" size="20"> 对于网页这种连接 我们可以用如下方式实现填写内容。 var o: Olevariant; all: IHTMLElementCollection; item: IHTMLElement; begin o := WebBrowser1.oleobject.document.all.item('xxx', 0); o.value:='myValue'; 2 o := Web1.oleobject.document.all.item('username',0); o.value:='liupan9999'; Memo1.Lines.Add('填入密码'); o := Web1.oleobject.document.all.item('password',0); o.value:='songbai1'; Memo1.Lines.Add('登录'); Web1.oleobject.document.Forms.Item('loginform', 0).submit;
delphi 几个实用的HTML解析函数
1)HTML 标签值攫取函数,任意标签哦,纯字符串分析,可以配合IDHTTP编程 uses StrUtils; function ExtractHtmlTagValues(const HtmlText: string; TagName, AttribName: string; var Values: TStringList): integer; function FindFirstCharAfterSpace(const Line: string; StartPos: integer): Integer; var i: integer; begin Result := -1; for i := StartPos to Length(Line) do begin if (Line[i] <> ' ') then begin Result := i; exit; end; end; end; function FindFirstSpaceAfterChars(const Line: string; StartPos: integer): Integer; begin Result := PosEx(' ', Line, StartPos); end; function FindFirstSpaceBeforeChars(const Line: string; StartPos: integer): Integer; var i: integer; begin Result := 1; for i := StartPos downto 1 do begin if (Line[i] = ' ') then begin Result := i; exit; end; end; end; var InnerTag: string; LastPos, LastInnerPos: Integer; SPos, LPos, RPos: Integer; AttribValue: string; ClosingChar: char; TempAttribName: string; begin Result := 0; LastPos := 1; while (true) do begin // find outer tags '<' & '>' LPos := PosEx('<', HtmlText, LastPos); if (LPos <= 0) then break; RPos := PosEx('>', HtmlText, LPos+1); if (RPos <= 0) then LastPos := LPos + 1 else LastPos := RPos + 1; // get inner tag InnerTag := Copy(HtmlText, LPos+1, RPos-LPos-1); InnerTag := Trim(InnerTag); // remove spaces if (Length(InnerTag) < Length(TagName)) then continue; // check tag name if (SameText(Copy(InnerTag, 1, Length(TagName)), TagName)) then begin // found tag AttribValue := ''; LastInnerPos := Length(TagName)+1; while (LastInnerPos < Length(InnerTag)) do begin // find first '=' after LastInnerPos RPos := PosEx('=', InnerTag, LastInnerPos); if (RPos <= 0) then break; // this way you can check for multiple attrib names and not a specific attrib SPos := FindFirstSpaceBeforeChars(InnerTag, RPos); TempAttribName := Trim(Copy(InnerTag, SPos, RPos-SPos)); if (true) then begin // found correct tag LPos := FindFirstCharAfterSpace(InnerTag, RPos+1); if (LPos <= 0) then begin LastInnerPos := RPos + 1; continue; end; LPos := FindFirstCharAfterSpace(InnerTag, LPos); // get to first char after '=' if (LPos <= 0) then continue; if ((InnerTag[LPos] <> '"') and (InnerTag[LPos] <> '''')) then begin // AttribValue is not between '"' or ''' so get it RPos := FindFirstSpaceAfterChars(InnerTag, LPos+1); if (RPos <= 0) then AttribValue := Copy(InnerTag, LPos, Length(InnerTag)-LPos+1) else AttribValue := Copy(InnerTag, LPos, RPos-LPos+1); end else begin // get url between '"' or ''' ClosingChar := InnerTag[LPos]; RPos := PosEx(ClosingChar, InnerTag, LPos+1); if (RPos <= 0) then AttribValue := Copy(InnerTag, LPos+1, Length(InnerTag)-LPos-1) else AttribValue := Copy(InnerTag, LPos+1, RPos-LPos-1) end; if (SameText(TempAttribName, AttribName)) and (AttribValue <> '') then begin Values.Add(AttribValue); inc(Result); end; end; if (RPos <= 0) then LastInnerPos := Length(InnerTag) else LastInnerPos := RPos+1; end; end; end; end; 用法示例: 取得页面中所有链接 var Links : TStringList; LinkFound,i : Integer; begin Links := TStringList.Create; LinkFound := ExtractHtmlTagValues(HtmlText,'A','HREF',Links); for i:=0 to LinkFound-1 do begin //Add your own codes here end; Links.Free; end; 2)表单元素值攫取函数,可以从HTML文本中按照给定的Input名称解析出其Value function GetValByName(S, Sub: string) : string; var EleS,EleE,iPos: Integer; ELeStr,ValSt: String; St,Ct : Integer; function FindEleRange(str: string ; front : boolean; posi : integer): Integer; var i: integer; begin if Front then begin for i:=posi-1 downto 1 do if Str[i]='<' then begin Result := i; break; end; end else begin for i := posi+1 to length(Str) do if Str[i]='>' then begin Result := i; break; end; end; end; function FindEnd (str : string; posi : integer) : Integer; var i: integer; begin for i:=posi to length(str) do begin if (str[i] ='"') or (str[i] ='''') or (str[i] =' ') then begin result := i-1; break; end; end; end; begin iPos := Pos('name="'+lowercase(Sub)+'"',lowercase(S)); if iPos = 0 then iPos := Pos('name='+lowercase(Sub),lowercase(S)); if iPos = 0 then iPos := Pos('name='''+lowercase(Sub)+'''',lowercase(S)); if iPos = 0 then exit; EleS := FindEleRange(S,TRUE,iPos); EleE := FindEleRange(S,FALSE,iPos); EleStr := Copy(S,EleS,EleE-EleS+1); ValSt := 'value="'; iPos := Pos(ValSt,EleStr); if iPos = 0 then begin ValSt := 'value='''; iPos := Pos(ValSt,EleStr); end; if iPos = 0 then begin ValSt := 'value='; iPos := Pos(ValSt,EleStr); end; St := iPos+length(ValSt); Ct := FindEnd(EleStr,St)-St+1; Result := Copy(EleStr,St,Ct); end; 用法示例: 取得页面中名为 Submit 的表单项的值 var InputValue : String; begin InputValue := GetValByName(HtmlText,'Submit'); end; 3)取某两个字符串中间的字符 function getStrFromHtml(var Source: String; SbStr, bStr, eStr: String): String; var I: Integer; sbPos, bPos, ePos: Integer; S: String; begin S := Source; Result := '' ; if SBStr <> '' then Begin sbPos := Pos(UpperCase(SbStr), UpperCase(S)); if sbPos > 0 then Delete(S, 1, sbPos - 1 + length(sbStr)) Else Exit; End; bPos := Pos(UpperCase(bStr), UpperCase(S)); if bPos > 0 then Delete(S, 1, bPos - 1 + length(bStr)) Else Exit; ePos := pos(UpperCase(eStr), UpperCase(S)); if ePos > 0 then Delete(S, ePos, length(S)); Result := S; end; 用法实例: FUserID := getStrFromHtml(reqStr, 'id="userID"', 'value="', '"');
WebBorwser 解决无法模拟Enter
procedure TForm1.ApplicationEvents1Message(var Msg: tagMSG; var Handled: Boolean); { fixes the malfunction of some keys within webbrowser control } const StdKeys = [VK_TAB, VK_RETURN]; { standard keys } ExtKeys = [VK_DELETE, VK_BACK, VK_LEFT, VK_RIGHT]; { extended keys } fExtended = $01000000; { extended key flag } begin Handled := False; with Msg do if ((Message > = WM_KEYFIRST) and (Message <= WM_KEYLAST)) and ((wParam in StdKeys) or {$IFDEF VER120}(GetKeyState(VK_CONTROL) < 0) or {$ENDIF} (wParam in ExtKeys) and ((lParam and fExtended) = fExtended)) then try if IsChild(WebBrowser1.Handle, hWnd) then { handles all browser related messages } begin with WebBrowser1.Application as IOleInPlaceActiveObject do Handled := TranslateAccelerator(Msg) = S_OK; if not Handled then begin Handled := True; TranslateMessage(Msg); DispatchMessage(Msg); end; end; except end; end; // IEMessageHandler uses activex, OleCtrls......
设置webbrowser 为单独代理不影响IE
我看到有一个VB写的程序,webbrowser可以单独设置代理,360,ie8和火狐的IP不变,(测试网页www.myip.cn或者百度 我的IP)360网络检查也没有看到代理,但是那个程序确实是通过http代理浏览网页,(代理IP如211.136.10.25:80)各位大虾知道在delphi程序中怎么实现吗,有代码更好,我自己网上找了一段VB代码,但测试不成功,100分,不够的话我再加分 VB代码如下 [程序设计]设置程序中使用的WebBrowser控件的代理,不影响系统IE浏览器 Option Explicit Private Declare Function InternetSetOption Lib "wininet.dll" Alias "InternetSetOptionA" (ByVal hInternet As Long, ByVal dwOption As Long, ByRef lpBuffer As Any, ByVal dwBufferLength As Long) As Long Private Type INTERNET_PROXY_INFO dwAccessType As Long lpszProxy As String lpszProxyBypass As String End Type Private Const INTERNET_OPTION_PROXY = 38 Private Const INTERNET_OPTION_SETTINGS_CHANGED = 39 Private Const INTERNET_OPEN_TYPE_DIRECT = 1 Private Const INTERNET_OPEN_TYPE_PROXY = 3 Private Sub SetProxy(aStrIP As String, aStrPort As String, aBolUseProxy As Boolean) Dim strProxy As String Dim inf As INTERNET_PROXY_INFO aStrIP = Trim(aStrIP) aStrPort = Trim(aStrPort) If (aStrIP + aStrPort = "") Or Not aBolUseProxy Then strProxy = "" Else strProxy = "http=" + aStrIP + ":" + aStrPort End If If Trim(strProxy) <> "" Then inf.dwAccessType = INTERNET_OPEN_TYPE_PROXY inf.lpszProxy = strProxy inf.lpszProxyBypass = "" Call InternetSetOption(0, INTERNET_OPTION_PROXY, inf, Len(inf)) Call InternetSetOption(0, INTERNET_OPTION_SETTINGS_CHANGED, "", 0) Else inf.dwAccessType = INTERNET_OPEN_TYPE_DIRECT inf.lpszProxy = "" inf.lpszProxyBypass = "" Call InternetSetOption(0, INTERNET_OPTION_PROXY, inf, Len(inf)) Call InternetSetOption(0, INTERNET_OPTION_SETTINGS_CHANGED, "", 0) End If End Sub '===使用代理上网 Private Sub Command1_Click() SetProxy txtIP.Text, txtPort.Text, True WebBrowser1.Navigate "http://ipseeker.cn" End Sub '===不使用代理上网 Private Sub Command2_Click() SetProxy txtIP.Text, txtPort.Text, False WebBrowser1.Navigate "http://ipseeker.cn" End Sub Private Sub Form_Load() WebBrowser1.Navigate "http://ipseeker.cn" End Sub 原文地址 http://www.agoil.cn/bbs/read-htm-tid-207697.html
提取 webbrowser鼠标单击的超链接的文字内容
我使用万一的代码做了个例子 应该能满足你的需求 a.html <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=gb2312" /> <title>无标题文档</title> </head> <body> <label>btn <input id="aa" type="button" onclick="location='http://www.google.com'" name="Submit" value="提交" /> </label> <a id="bb" href="http://www.baidu.com">ahref</a> <br /> <label>btn <input id="aa" type="button" onclick="" name="Submit" value="提交" /> </label> <a href="http://so.com" target="_blank">so</a> </body> </html> 单元文件. 窗体上一个memo, 一个webBrowser unit Unit1; interface uses Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms, Dialogs, OleCtrls, SHDocVw, MSHTML, ActiveX, StdCtrls; type TObjectProcedure = procedure of object; TEventObject = class(TInterfacedObject, IDispatch) private FOnEvent: TObjectProcedure; protected function GetTypeInfoCount(out Count: Integer): HResult; stdcall; function GetTypeInfo(index, LocaleID: Integer; out TypeInfo): HResult; stdcall; function GetIDsOfNames(const IID: TGUID; Names: Pointer; NameCount, LocaleID: Integer; DispIDs: Pointer): HResult; stdcall; function Invoke(dispid: Integer; const IID: TGUID; LocaleID: Integer; Flags: Word; var Params; VarResult, ExcepInfo, ArgErr: Pointer): HResult; stdcall; public constructor Create(const OnEvent: TObjectProcedure); property OnEvent: TObjectProcedure read FOnEvent write FOnEvent; end; TForm1 = class(TForm) wb1: TWebBrowser; mmo1: TMemo; procedure wb1BeforeNavigate2(ASender: TObject; const pDisp: IDispatch; var URL, Flags, TargetFrameName, PostData, Headers: OleVariant; var Cancel: WordBool); procedure FormCreate(Sender: TObject); procedure wb1TitleChange(ASender: TObject; const Text: WideString); procedure wb1DocumentComplete(ASender: TObject; const pDisp: IDispatch; var URL: OleVariant); private { Private declarations } FCurrHrefText : string; procedure Document_OnMouseOver; public { Public declarations } end; var Form1: TForm1; htmlDoc: IHTMLDocument2; implementation {$R *.dfm} procedure TForm1.Document_OnMouseOver; var element: IHTMLElement; begin FCurrHrefText := ''; if htmlDoc = nil then Exit; element := htmlDoc.parentWindow.event.srcElement; mmo1.Clear; if LowerCase(element.tagName) = 'a' then begin mmo1.Lines.Add('LINK info...'); mmo1.Lines.Add(Format('HREF : %s', [element.getAttribute('href', 0)])); mmo1.Lines.Add(Format('title : %s', [element.innerText])); FCurrHrefText := element.innerText; end else if LowerCase(element.tagName) = 'img' then begin mmo1.Lines.Add('IMAGE info...'); mmo1.Lines.Add(Format('SRC : %s', [element.getAttribute('src', 0)])); end else begin mmo1.Lines.Add(Format('TAG : %s', [element.tagName])); mmo1.Lines.Add(Format('TAG : %s', [element.getAttribute('value', 0)])); end; end; procedure TForm1.FormCreate(Sender: TObject); begin wb1.Navigate(ExtractFilePath(Application.ExeName)+'a.html'); //wb1.Navigate('http://passport.csdn.net/UserLogin.aspx'); Mmo1.Clear; Mmo1.Lines.Add('Move your mouse over the document...'); end; procedure TForm1.wb1BeforeNavigate2(ASender: TObject; const pDisp: IDispatch; var URL, Flags, TargetFrameName, PostData, Headers: OleVariant; var Cancel: WordBool); begin //如果是直接打开新窗口, 也是同理获得元素信息 if Pos('http:', URL) > 0 then begin ShowMessage('当前URL描述:' + FCurrHrefText); // Cancel := True; end; htmlDoc := nil; end; procedure TForm1.wb1DocumentComplete(ASender: TObject; const pDisp: IDispatch; var URL: OleVariant); begin if Assigned(wb1.Document) then begin htmlDoc := wb1.Document as IHTMLDocument2; if htmlDoc.frames.length > 0 then begin htmlDoc := (IDispatch(htmlDoc.frames.item(0)) as IHTMLWindow2).Document; end; htmlDoc.onmouseover := (TEventObject.Create(Document_OnMouseOver) as IDispatch); end; end; procedure TForm1.wb1TitleChange(ASender: TObject; const Text: WideString); begin end; { TEventObject } constructor TEventObject.Create(const OnEvent: TObjectProcedure); begin inherited Create; FOnEvent := OnEvent; end; function TEventObject.GetIDsOfNames(const IID: TGUID; Names: Pointer; NameCount, LocaleID: Integer; DispIDs: Pointer): HResult; begin Result := E_NOTIMPL; end; function TEventObject.GetTypeInfo(index, LocaleID: Integer; out TypeInfo): HResult; begin Result := E_NOTIMPL; end; function TEventObject.GetTypeInfoCount(out Count: Integer): HResult; begin Result := E_NOTIMPL; end; function TEventObject.Invoke(dispid: Integer; const IID: TGUID; LocaleID: Integer; Flags: Word; var Params; VarResult, ExcepInfo, ArgErr: Pointer): HResult; begin if (dispid = DISPID_VALUE) then begin if Assigned(FOnEvent) then FOnEvent; Result := S_OK; end else Result := E_NOTIMPL; end; end. 刚才看错了 正确的处理方法如下 //单元文件 //窗口控件及命名见单元文件内的定义 //已在Delphi xe测试通过 unit Unit11; interface uses Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms, Dialogs, OleCtrls, SHDocVw, ComCtrls, activeX; type TForm11 = class(TForm) pgc1: TPageControl; ts1: TTabSheet; ts2: TTabSheet; wb1: TWebBrowser; wb2: TWebBrowser; procedure FormCreate(Sender: TObject); procedure pgc1Change(Sender: TObject); private { Private declarations } //当前激活的WebBrowser控件 {当激活的WebBrowser控件变化时更新该字段的值, 在IEMessageHandler中使用} FCurrBW : TWebBrowser; procedure IEMessageHandler(var Msg: TMsg; var Handled: Boolean); public { Public declarations } end; var Form11: TForm11; implementation {$R *.dfm} procedure TForm11.IEMessageHandler(var Msg: TMsg; var Handled: Boolean); const StdKeys = [VK_TAB, VK_RETURN]; { 标准键 } ExtKeys = [VK_DELETE, VK_BACK, VK_LEFT, VK_RIGHT]; { 扩展键 } fExtended = $01000000; { 扩展键标志 } begin Handled := False; if (FCurrBW = nil) then begin Handled := False; Exit; end; with Msg do begin if ((Message >= WM_KEYFIRST) and (Message <= WM_KEYLAST)) and ((wParam in StdKeys) or (GetKeyState(VK_CONTROL) < 0) or (wParam in ExtKeys) and ((lParam and fExtended) = fExtended)) then begin try with FCurrBW.Application as IOleInPlaceActiveObject do Handled := TranslateAccelerator(Msg) = S_OK; if not Handled then begin Handled := True; TranslateMessage(Msg); DispatchMessage(Msg); end; except end; end; end; end; procedure TForm11.pgc1Change(Sender: TObject); begin case pgc1.ActivePageIndex of 0 : FCurrBW := wb1; 1 : FCurrBW := wb2; end; end; procedure TForm11.FormCreate(Sender: TObject); begin FCurrBW := wb1; Application.OnMessage := IEMessageHandler; wb1.Navigate('http://bbs.csdn.net/topics/390341172?page=1#post-393434373'); wb2.Navigate('http://bbs.csdn.net/topics/390341172?page=1#post-393434373'); end; end.
getElementBy系列
getElementBy系列 WEB标准下可以通过getElementById(), getElementsByName(), and getElementsByTagName_r()访问DOCUMENT中的任一个标签 1、getElementById() getElementById()可以访问DOCUMENT中的某一特定元素,顾名思义,就是通过ID来取得元素,所以只能访问设置了ID的元素。 比如说有一个DIV的ID为docid:<div id="docid"></div> 那么就可以用getElementById("docid")来获得这个元素。返回具有指定 ID 属性值的第一个 2.getElementsByName() 这个是通过NAME来获得元素,但不知大家注意没有,这个是GET ELEMENTS,复数ELEMENTS代表获得的不是一个元素,为什么呢? 因 为DOCUMENT中每一个元素的ID是唯一的,但NAME却可以重复。打个比喻就像人的身份证号是唯一的(理论上,虽然现实中有重复),但名字重复的却 很多。如果一个文档中有两个以上的标签NAME相同,那么getElementsByName()就可以取得这些元素组成一个数组。 比如有两个DIV: <div name="docname" id="docid1"></div> <div name="docname" id="docid2"></div> 那么可以用getElementsByName("docname")获得这两个DIV,用getElementsByName("docname")[0]访问第一个DIV。 3、getElementsByTagName_r() 这 个呢就是通过TAGNAME(标签名称)来获得元素,一个DOCUMENT中当然会有相同的标签,所以这个方法也是取得一个数组。可以用 getElementsByTagName_r("div")来访问它们,用getElementsByTagName_r("div")[0]访问第一个 DIV,用getElementsByTagName_r("div")[1]访问第二个DIV。 如: <body> <div name="docname" id="docid1" onClick="bgcolor()"></div> <div name="docname" id="docid2" onClick="bgcolor()"></div> </body> </html> <script language="JavaScript" type="text/JavaScript"> <!-- function bgcolor(){ var docnObj=document.getElementsByTagName_r("div"); docnObj[0].style.backgroundColor = "black"; docnObj[1].style.backgroundColor = "black"; } --> </script> 总 结一下标准DOM,访问某一特定元素尽量用标准的getElementById(),访问标签用标准的getElementByTagName(),但 IE不支持getElementsByName(),所以就要避免使用getElementsByName(),但 getElementsByName()和不符合标准的document.all[]也不是全无是处,它们有自己的方便之处,用不用那就看网站的用户使用 什么浏览器,由你自己决定了。 Javascript中的getElementById十分常用,但在标准的页面中,一个id只能出现一次, 如果我想同时控制多个元素,例如点一个链接,让多个层隐藏,该怎么做?用class,当然,同一个class是可以允许在页面中重复出现的,那么有没有 getElementByClass呢?没有,但是可以解决: //创建一个数组 var allPageTags = new Array(); function hideDivWithClasses(theClass) { var allPageTags=document.getElementsByTagName_r("div"); //遍历页面中的所有标签 for (i=0; i<allPageTags.length; i++) //找到我们需要改变的class if (allPageTags[i].className==theClass) { //改变这个class的样式 allPageTags[i].style.display='none'; } } } ——————————————应用(照片日记编辑图文排版)—————————— //图文右对齐 function phototxtright() { var Tags = new Array(); var j=0; var theClassl='photo_edit'; var theClassc='photo_editcenter'; var theClassr='photo_editright'; var allHTMLTags=document.getElementsByTagName_r('*'); for (var i=0; i<allHTMLTags.length; i++) { if (allHTMLTags[i].className==theClassl||allHTMLTags[i].className==theClassc) { Tags[j]=allHTMLTags[i]; Tags[j].className="photo_editright"; j++; } } document.getElementByIdx_x("edit_box").style.textAlign="right"; } //改变字号 function setfontsize(num) { var Tags = new Array(); var j=0; var theClasstxt='nofocused'; var allHTMLTags=document.getElementsByTagName_r('*'); for (var i=0; i<allHTMLTags.length; i++) { if (allHTMLTags[i].className==theClasstxt) { Tags[j]=allHTMLTags[i]; Tags[j].style.fontSize= num+"px"; j++; } } document.getElementByIdx_x("edit_box").style.fontSize= num+"px"; } =================================== 详见: http://hi.baidu.com/dandan_ze/item/b91adaa7f30ef0dc5bf19116
利用WebBorwser和MSHTML.tlb做广告过滤器完全源码公开
程序组成: 两个引用对象:Microsoft HTML Object Library,Microsoft Internet Object 两个窗体: frmAbout.frm frmMenu.frm 两个*.bas: APIs.bas,mSysTray.bas 两个Class: MyIE.cls, windows.cls(其中windows.cls是collection对象的扩展,放MyIE.cls) 下面公开这两个主要类的代码(如要全部代码请留email,要看演示上www.jjsoft.cn,版权归作者,要用于商业目的请和作者联系fazhu@163.net) myIE.cls ------------------------------------------------------------------------------------------------------ Option EXPlicit Private WithEvents mIE As SHDocVw.InternetExplorer Private WithEvents IE_IFrame As MSHTML.HTMLIFrame Private WithEvents win2 As MSHTML.HTMLWindow2 Private WithEvents doc2 As MSHTML.HTMLDocument '/////////////////////////////////////////////////////// '判断Frame对象 Private tmpIE_IFrame As MSHTML.HTMLIFrame Private IE_FCols As MSHTML.FramesCollection '/////////////////////////////////////////////////////// Private body As MSHTML.HTMLBody Private IElements As MSHTML.IHTMLElement Private mHWnd As Long Private mDoc As MSHTML.IHTMLDocument2 Private isLoaded As Integer Private isClicked As Integer Private isCleaned As Integer Private tmpState As String Private Const FlashClassID As String = "CLSID:D27CDB6E-AE6D-11CF-96B8-444553540000" 'determine the refresh button is clicked 'Private m_nPageCounter As Integer 'Private m_nObjCounter As Integer Private m_bIsRefresh As Boolean Private mSArrays As Variant Private mPtr As POINTAPI '////////////////////////////////////////// Public Function Banding(item As SHDocVw.InternetExplorer) As SHDocVw.InternetExplorer On Error GoTo Err Dim tmpName As String, tmpie As SHDocVw.InternetExplorer 'Dim tmpdoc As MSHTML.HTMLDocument Set tmpie = item If (tmpie Is Nothing) Then Exit Function If Not (TypeOf item Is IWebBrowser2) Then Exit Function tmpName = tmpie.FullName tmpName = Mid(tmpName, InStrRev(tmpName, "") + 1) If UCase(tmpName) = "IEXPLORE.EXE" Then Set mIE = tmpie mHWnd = mIE.hwnd ' Call BandingDoc(mIE2) End If tmpName = "" Set tmpie = Nothing Set Banding = mIE Bye: If Not (tmpie Is Nothing) Then Set tmpie = Nothing Exit Function Err: MsgBox "Error:" & Err.Description & " in Banding" Resume Bye End Function Public Property Get IEHandle() As Long IEHandle = mHWnd End Property Private Sub Class_Initialize() m_bIsRefresh = True '//////////////////////// '非弹出式广告特征集 mSArrays = Array("input", "a", "iframe", "area", "frame") '//////////////////////// End Sub Private Sub Class_Terminate() Set mDoc = Nothing Set mIE = Nothing End Sub Private Sub mIE_BeforeNavigate2(ByVal pDisp As Object, URL As Variant, Flags As Variant, TargetFrameName As Variant, PostData As Variant, Headers As Variant, Cancel As Boolean) On Error Resume Next Dim tmpie As SHDocVw.InternetExplorer If Not (mDoc Is Nothing) Then Set mDoc = Nothing Else Exit Sub End If Call BandingDoc("mIE_BeforeNavigate2") 'm_nPageCounter = m_nPageCounter + 1 End Sub Private Sub mIE_DocumentComplete(ByVal pDisp As Object, URL As Variant) On Error Resume Next 'm_nPageCounter = m_nPageCounter - 1 Call BandingDoc("mIE_DocumentComplete") If m_bIsRefresh Then If (tmpState = "interactive") Then _ isLoaded = 1 Call BandingDoc2(mIE) Else If (tmpState = "complete") Then _ isLoaded = 1 Call BandingDoc2(mIE) End If End Sub Private Sub mIE_DownloadBegin() On Error Resume Next If Not (mDoc Is Nothing) Then Set mDoc = Nothing Call BandingDoc("mIE_DownloadBegin") 'Remarked by zdj 2004-02-02 'If m_bIsRefresh = False Then m_bIsRefresh = True 'm_nObjCounter = m_nObjCounter + 1 End Sub Private Sub mIE_DownloadComplete() 'm_nObjCounter = m_nObjCounter - 1 'Call BandingDoc("mIE_DownloadComplete") 'If (tmpState = "complete") Then ' isLoading = 0 ' Call BandingDoc2(mIE) 'End If '//////////////////////////////////////////// 'The refresh button is clicked 'If Not (m_bIsRefresh) Then m_bIsRefresh = True 'If m_nObjCounter = 1 Then m_nObjCounter = 0 'Remarked by zdj 2004-02-02 'If (m_bIsRefresh) Then ' isLoaded = 1 ' Call BandingDoc2(mIE) 'End If ' '//////////////////////////////////////////// End Sub Private Sub BandingDoc(ByVal strWhere As String) On Error GoTo Err: If mIE Is Nothing Then Exit Sub End If If mDoc Is Nothing Then Set mDoc = mIE.document tmpState = mDoc.readyState If tmpState <> "complete" Then isLoaded = 0 'Debug.Print mDoc.readyState & " " & strWhere Bye: Exit Sub Err: If Err.Number = -2147467259 Then Resume Bye MsgBox Err.Number & Err.Description & strWhere Resume Bye End Sub Private Sub mIE_NavigateComplete2(ByVal pDisp As Object, URL As Variant) 'm_nPageCounter = m_nPageCounter + 1 'm_nObjCounter = m_nObjCounter + 1 'Remarked by zdj 2004-02-02 'm_bIsRefresh = False End Sub Private Sub mIE_NewWindow2(ppDisp As Object, Cancel As Boolean) Dim tmpobj As IHTMLDocument2, tmpString As String Dim notPopups As Boolean, tmpobj2 As IHTMLElement Dim i As Integer If (BlockedPopups = True) Then GetCursorPos mPtr Set tmpobj = mIE.document Set tmpobj2 = tmpobj.elementFromPoint(mPtr.X, mPtr.Y) If tmpobj2 Is Nothing Then notPopups = Not (isLoaded = 0) Else If (tmpobj2.document.activeElement) Is Nothing Then notPopups = Not (isLoaded = 0) Else tmpString = LCase(tmpobj2.document.activeElement.tagName) For i = LBound(mSArrays) To UBound(mSArrays) If tmpString = CStr(mSArrays(i)) Then notPopups = True Exit For End If Next i End If End If If notPopups = False Then Cancel = True If EnabledBeep Then Beep 500, 100 isCleaned = isCleaned + 1 End If End If Set tmpobj2 = Nothing Set tmpobj = Nothing End Sub Private Sub BandingDoc2(ByVal pDisp As Object) On Error Resume Next Dim tmpdoc As Object, iwin As MSHTML.HTMLWindow2 Dim tmpdoc2 As MSHTML.HTMLDocument Dim i As Integer, j As Integer Dim ii As Integer, jj As Integer Dim k As Integer, killed As Boolean If TypeOf pDisp Is IWebBrowser2 Then Call pDisp.ExecWB(OLECMDID_SHOWMESSAGE, OLECMDEXECOPT_DONTPROMPTUSER) Set tmpdoc = pDisp.document If TypeName(tmpdoc) = "HTMLDocument" Then Set doc2 = tmpdoc Set win2 = doc2.parentWindow Set body = doc2.body 'Skip the error message 'win2.clearTimeout (0) '绑定flash对象 If (BlockedFlash = True) Then i = cleanFlash(doc2.All.tags("OBJECT"), doc2.All.tags("EMBED")) End If '绑定动画对象 If (BlockedAnimate = True) Then j = cleanAnimated(doc2.All.tags("IMG")) End If '///////////////////////////////// If (BlockedFlying = True) Then k = cleanFlyingAds(doc2.All.tags("DIV")) End If '//////////////////////////////////////////////// '过滤框架中的广告 If TypeName(doc2.body) = "HTMLFrameSetSite" Then If doc2.readyState = "complete" Then win2.Status = "正在阻止框架中的广告..." ii = RecursivlyFlash(doc2.frames) jj = RecursivlyAnimate(doc2.frames) 'win2.Status = "阻止完毕!" End If End If '//////////////////////////////////////////////// '////////////////////////////////// ' skip the onload event in body tag 'body.onload = "" body.onunload = "" '////////////////////////////////// killed = (isCleaned > 0 Or i > 0 Or j > 0 Or ii > 0 Or jj > 0 Or k > 0) If (killed) Then Call showAlertInfo(isCleaned + i + j + ii + jj + k) End If End If End If isCleaned = 0 Set tmpdoc = Nothing End Sub Private Function cleanFlash(ByVal item As MSHTML.IHTMLElementCollection, ByVal item2 As MSHTML.IHTMLElementCollection) As Integer On Error GoTo Errs Dim i As Integer Dim objelments As MSHTML.HTMLObjectElement, objstyle As MSHTML.IHTMLStyle Dim objembed As MSHTML.HTMLEmbed '网页中无此标签的对象 If (item Is Nothing) Then Exit Function i = 0 '///////////////////////////////////////////////////////// For Each objelments In item 'DoEvents If Not (objelments Is Nothing) Then If (item.Length = 0) Then Exit For If UCase(objelments.classid) = FlashClassID Then Set objstyle = objelments.Style With objstyle .visibility = "Hidden" '.Width = 0 '.Height = 0 End With Set objstyle = Nothing i = i + 1 End If End If Next objelments '////////////////////////////////////////////////////////// '网页中无此标签的对象 If (item2 Is Nothing) Then Exit Function For Each objembed In item2 'DoEvents If Not (objembed Is Nothing) Then If (item2.Length = 0) Then Exit For If InStr(1, LCase(objembed.src), ".swf") > 0 Then Set objstyle = objembed.Style With objstyle .visibility = "Hidden" '.Width = 0 '.Height = 0 End With Set objstyle = Nothing End If End If Next objembed cleanFlash = i Bye: Exit Function Errs: cleanFlash = -1 Resume Bye End Function Private Function cleanAnimated(ByVal item As MSHTML.IHTMLElementCollection) As Integer On Error GoTo Errs Dim i As Integer Dim objImgs As MSHTML.IHTMLImgElement, objImg As MSHTML.HTMLImg Dim objstyle As MSHTML.IHTMLStyle '网页中无此标签的对象 If (item Is Nothing) Then Exit Function i = 0 For Each objImgs In item If Not (objImgs Is Nothing) Then If (item.Length = 0) Then Exit For Set objImg = objImgs Set objstyle = objImg.Style If InStr(1, LCase(objImg.src), ".gif") > 0 Then DoEvents With objstyle .visibility = "hidden" '.Width = 0 '.Height = 0 End With i = i + 1 End If End If Set objstyle = Nothing Set objImg = Nothing Next objImgs cleanAnimated = i Bye: Exit Function Errs: cleanAnimated = -1 Resume Bye End Function Private Function RecursivlyFlash(ByRef frame As FramesCollection) As Integer On Error GoTo Errs Dim X As Object, ihtmle As IHTMLElementCollection Dim i As Integer, spWin As IHTMLWindow2 Set X = frame.document.frames If X.Length = 0 Then Exit Function For i = 0 To X.Length - 1 'DoEvents Call RecursivlyFlash(X(i)) Set ihtmle = X(i).document.All If BlockedFlash Then RecursivlyFlash = cleanFlash(ihtmle.tags("OBJECT"), ihtmle.tags("EMBED")) End If Set ihtmle = Nothing Next i Bye: Exit Function Errs: RecursivlyFlash = -1 Resume Bye End Function Private Function RecursivlyAnimate(ByRef frame As FramesCollection) As Integer On Error GoTo Errs Dim X As Object, ihtmle As IHTMLElementCollection Dim i As Integer, spWin As IHTMLWindow2 Set X = frame.document.frames If X.Length = 0 Then Exit Function For i = 0 To X.Length - 1 'DoEvents Call RecursivlyAnimate(X(i)) Set ihtmle = X(i).document.All If BlockedAnimate Then RecursivlyAnimate = cleanAnimated(ihtmle.tags("IMG")) End If Set ihtmle = Nothing Next i Bye: Exit Function Errs: RecursivlyAnimate = -1 Resume Bye End Function Private Function cleanFlyingAds(ByVal item As MSHTML.IHTMLElementCollection) As Integer On Error GoTo Errs Dim i As Integer, l As Integer, j As Integer Dim tmpobj As Object l = item.Length For i = 0 To l - 1 DoEvents Set tmpobj = item(i) If (tmpobj.Style.position = "absolute") Then tmpobj.Style.visibility = "hidden" j = j + 1 End If Set tmpobj = Nothing Next i cleanFlyingAds = j Bye: Exit Function Errs: cleanFlyingAds = -1 Resume Bye End Function '///////////////////////////////////////////////////////////// '显示警告语 Private Sub showAlertInfo(ByVal Count As Integer) With win2 .Status = "已阻止网页中符合条件的" & Count & "个广告!(www.jjsoft.cn)" End With End Sub '//////////////////////////////////////////////////////////// Private Sub AlertBeep() Beep 500, 500 End Sub Private Sub win2_onunload() On Error Resume Next ' the refresh button is clicked If mDoc.readyState = "complete" Then m_bIsRefresh = True isLoaded = 1 End Sub ------------------------------------------------------------------------------------------------------ Windows.cls '局部变量,保存集合 Private mCol As Collection Private WithEvents winShell As SHDocVw.ShellWindows Private Function Add(Key As SHDocVw.InternetExplorer) As MyIE '创建新对象 Dim objNewMember As MyIE Set objNewMember = New MyIE '设置传入方法的属性 If Not objNewMember.Banding(Key) Is Nothing Then mCol.Add objNewMember, CStr(objNewMember.IEHandle) End If '返回已创建的对象 Set Add = objNewMember Set objNewMember = Nothing End Function Public Property Get item(vntIndexKey As Variant) As MyIE '引用集合中的一个元素时使用。 'vntIndexKey 包含集合的索引或关键字, '这是为什么要声明为 Variant 的原因 '语法:Set foo = x.Item(xyz) or Set foo = x.Item(5) Set item = mCol(vntIndexKey) End Property Public Property Get Count() As Long '检索集合中的元素数时使用。语法:Debug.Print x.Count Count = mCol.Count End Property Public Sub Remove(vntIndexKey As Variant) '删除集合中的元素时使用。 'vntIndexKey 包含索引或关键字,这是为什么要声明为 Variant 的原因 '语法:x.Remove(xyz) mCol.Remove vntIndexKey End Sub Public Property Get NewEnum() As IUnknown '本属性允许用 For...Each 语法枚举该集合。 Set NewEnum = mCol.[_NewEnum] End Property Private Sub Class_Initialize() '创建类后创建集合 Call Refresh End Sub Private Sub Class_Terminate() '类终止后破坏集合 Set mCol = Nothing Set winShell = Nothing End Sub Private Sub Refresh() On Error GoTo Proc_Err Dim SWs As New SHDocVw.ShellWindows Dim var As SHDocVw.InternetExplorer Set mCol = Nothing Set mCol = New Collection For Each var In SWs Add var Next If ObjPtr(winShell) <> ObjPtr(SWs) Then Set winShell = SWs End If Set SWs = Nothing Set var = Nothing Exit Sub Proc_Err: End Sub Private Sub winShell_WindowRegistered(ByVal lCookie As Long) Call Refresh End Sub Private Sub winShell_WindowRevoked(ByVal lCookie As Long) Call Refresh End Sub -----------------------------------------------------------------------------------------------------
始终用WebBrowser打开网页
要在同一个WebBrowser里显示,可以这样: 再放一个小的WebBrowser2,设置它在WebBrowser1下面(设置Visible为False好象无效), // 在WebBrowser1的OnNewWindow2事件中: procedure TForm1.WebBrowser1NewWindow2(Sender: TObject; var ppDisp: IDispatch; var Cancel: WordBool); begin ppDisp := WebBrowser2.Application; // 新的窗口先指向WebBrowser2 end; // 在WebBrowser2的OnBeforeNavigate2事件中: procedure TForm1.WebBrowser2BeforeNavigate2(Sender: TObject; const pDisp: IDispatch; var URL, Flags, TargetFrameName, PostData, Headers: OleVariant; var Cancel: WordBool); begin WebBrowser1.Navigate(string(URL)); // 再指回WebBrowser1 Cancel := True; end;
关于delphi点击webbrowser中任意一点的问题
有时候我们需要delphi载入webbrowser1打开网页的时候 需要点击某一个点的位置 可能是坐标 可能是按钮 可能是其他的控件 应该如何来实现呢? 这里来简单说明一下点击坐标的过程 点击过程很明显我们移动鼠标来点击或者发送消息来点击 移动鼠标点击的比较常见 这里详细说明一下发送消息来点击的办法 发送消息来点击的思路是sendmessage()发送消息来实现的 导入句柄 点击的就可以了。但是这里的句柄(webbrowser的句柄)其实是不好找的。如果找到了合适的正确的句柄点击起来还是非常容易的 这里有一个过程可以清楚的说明sendmessage的点击过程 procedure sendclick(var x,y:integer) ; begin SendMessage(GetWindow(GetWindow(form1.WebBrowser1.Handle, GW_CHILD), GW_CHILD),WM_LBUTTONDOWN, ////MK_LBUTTON, MAKELONG(strtoint(edit2.Text),strtoint(edit3.Text))); MK_LBUTTON, MAKELONG(x,y)); sleep(500); SendMessage(GetWindow(GetWindow(form1.WebBrowser1.Handle, GW_CHILD), GW_CHILD),WM_LBUTTONUP, ////MK_LBUTTON, MAKELONG(strtoint(edit2.Text),strtoint(edit3.Text))); MK_LBUTTON, MAKELONG(x,y)) ; PostMessage(GetWindow(GetWindow(form1.WebBrowser1.Handle, GW_CHILD), GW_CHILD),WM_LBUTTONDOWN, ////MK_LBUTTON, MAKELONG(strtoint(edit2.Text),strtoint(edit3.Text))); MK_LBUTTON, MAKELONG(x,y)); sleep(500); PostMessage(GetWindow(GetWindow(form1.WebBrowser1.Handle, GW_CHILD), GW_CHILD),WM_LBUTTONUP, ////MK_LBUTTON, MAKELONG(strtoint(edit2.Text),strtoint(edit3.Text))); MK_LBUTTON, MAKELONG(x,y)) ; end; 这里定义了一个过程 GetWindow(GetWindow(form1.WebBrowser1.Handle, GW_CHILD), GW_CHILD) 这是webbrowser的句柄 整个过程发送了 sendmessage和postmessage2个包 这里是防止一次没点中 所以再补点一次 关于点击的坐标是过程中导入的参数 x,y。 这里的X Y坐标是相对于窗体的坐标 就是相对于webbrowser的坐标 所以必须要用spy++来查找点击的坐标
具有自动查找Web页面上所有链接的网络浏览器
具有自动查找Web页面上所有链接的网络浏览器 1. 概述 WEB的应用已经深入到现在社会的方方面面,作为一个软件开发人员或其他技术人员,都有可能遇见在Internet上查询大量的资料和信息的情况,一般来说用的最多的就是WEB的搜索Engine。当我们使用它查出大量的资料链接后,还有可能遇到更多的链接,但要自己去看他们是否是链接,那确实是一件很麻烦的事情。这篇文章就是来讲,如何用Delphi的MSHTML_TLB.pas来开发一个具有自动查找Web页面上所有链接的简单网络浏览器。我是在IE5的环境下写的这个程序,当然它可以向下兼容,如IE4。 2.关于MSHTML_TLB.pas MSHTML_TLB.pas是Deliphi里面自带的一个类。它的含义是Microsoft HTML对象库。它不能够包含在所有的工程或程序中,原因是它实在是太大了,整个文件的代码共有241,899l行,那么长。大小约有12M。下面我们来看看它是如何加入到程序中的。 1. 首先,我们打开Delphi,建立一个新的application。我把form1保存为MainFrm.pas,把application保存为FindLinks.dPR. 2. 要想实现IE 的功能我们就必须要使用Microsoft HTML对象库(MSHTML type library.)如何实现呢?如图1, Project->Import Type Library: 然后你会看到关于"Microsoft HTML Object Library (Version 4.0)"的列表,如图2。 接下来可能会遇到一些问题。比如,在列表里面没有出现"Microsoft HTML Object Library (Version 4.0)"。这是为什么呢?那是IE的问题,由于IE版本的不同(我用的是IE5)。我建议最好是先查询你的计算机里面有没有mshtml.tlb这个文件。 在9x里面它是存在与C:WINDOWSSYSTEM目录里面,在2000里面它在WINNTsystem32目录里面。如果找到了这个文件,就可以用图2的click on the "Add..." button,然后选择mshtml.tlb,就可以了,如果没有找到它,那说明你没有安装IE或你的IE版本太低,请升级IE。 最后,当我们选择了倒入的库后,会等待一段时间,因为它实在是太长了,不过请千万不要因为是死机了。它会给自动查找提供很多帮助。 3. 工程实现。 界面设计如下图: 使用以下组件: 控件 命名 TEXT TLabel lblURL 资料网址 TEdit edtURL http://www.huihu.com TButton btnFindLinks 查询连接 TListBox lstbxLinks null 4. 程序设计 1. 在Form1的interface部分,在uses后面加入,OleCtrls, SHDocVw, and OleServer.这些所应用的类,都是基于我们所要创建的TinternetExplorer的,它是IE的ActiveX的对象。但是这里还有其它的方式(TinternetExplorer)进行,我们采用TwebBrowser 控制在我们的form1。 2. 我们在private里面加入如下代码: FInternetExplorer: TInternetExplorer; procedure WebBrowserDocumentComplete(Sender: TObject; var pDisp: OleVariant; var URL: OleVariant); 最后用Ctrl-Shift-C完成类的声明。 3. 在impelmentation后面加入如下声明: uses MSHTML_TLB, ComObj; 要使用的类。 4. 在form1的OnCreate事件中加入如下: FInternetExplorer := TInternetExplorer.Create(Self); FInternetExplorer.OnDocumentComplete := WebBrowserDocumentComplete; 5. 最后在form1的TForm1.WebBrowserDocumentComplete里面加入如下代码: 1. procedure TForm1.WebBrowserDocumentComplete(Sender: TObject; 2. var pDisp: OleVariant; var URL: OleVariant); 3. var 4. Doc: IHTMLDocument2; 5. ElementCollection: IHTMLElementCollection; 6. HtmlElement: IHTMLElement; 7. I: Integer; 8. AnchorString: string; 9. begin 10. lstbxLinks.Clear; 11. // 在处理网页的时候发现它没有完全下载,将不会进行处理连接 12. Doc := FInternetExplorer.Document as IHTMLDocument2; 13. if Doc = nil then 14. raise Exception.Create('Couldn''t convert the ' + 15. 'FInternetExplorer.Document to an IHTMLDocument2'); 16. // 夺取web上的所有元素。 17. ElementCollection := Doc.all; 18. for I := 0 to ElementCollection.length - 1 do 19. begin 20. file://得到当前的元素 21. HtmlElement := ElementCollection.item(I, '') as IHTMLElement; 22. // 查找网页原代码中的LINK标记。 23. // 发现其它的html标记 (例如: TABLE, FONT, etc.) 24. if HTMLElement.tagName = 'A' then 25. begin 26. // 在详细的link里面抓取innerText,innertext就是标记中<href=后面的东西>例如: 27. // 我们在web里面看见"西南民族学院" 28. // <a href="http://www.swun.edu.cn"><b>西南民族学院</b></a>. 29. AnchorString := HtmlElement.innerText; 30. if AnchorString = '' then 31. AnchorString := '(Empty Name)'; 32. AnchorString := AnchorString + ' - ' + 33. (HtmlElement as IHTMLAnchorElement).href; 34. lstbxLinks.Items.Add(AnchorString); 35. end; 36. end; 37. end; 最后我们在button(btnFindLinks)加入Onclick 事件: 1. // 在被浏览的web里面进行查询连接。 2. FInternetExplorer.Navigate(edtURL.Text, EmptyParam, EmptyParam, EmptyParam, EmptyParam); 从以上的程序里面我们可以看出它的原理了,实际上是很简单的,看过html原代码的人都知道,使网页产生连接的代码就是:<a href="http://www.swun.edu.cn"><b>西南民族学院</b></a>. 我程序的原理就是通过截取href后面的字符串,并在"""号后面截止。 然后把它保存为另外的字符串。然后通过TwebBrowser显示出来。 最后让我们来编译这个程序,的却,编译它很费时间,因为编译多达241,899l行的MSHTML_TLB.pas,是一件很麻烦的事情。其中还包括多达20多个的warning错误,但请放心这是MSHTML_TLB.pas的问题,与其它程序无关。这样一个小型的查找Web页面上所有链接的简单网络浏览器就出现在我们面前。本程序在IE5.0和Delphi6下编译通过。
webbrowser 常用方法示例
var Form : IHTMLFormElement ; D:IHTMLDocument2 ; begin with WebBrowser1 do begin D := Document as IHTMLDocument2; Form := D.Forms.item( 'form1 ',0) as IHTMLFormElement; //form1为表单名 //title为表单中的文本框 (form.item( 'title ',0) as IHTMLElement).setAttribute( 'value ',s_title,0); (form.item( 'content ',0) as IHTMLElement).setAttribute( 'value ',edit1.text,0); (form.item( 'add ',0) as IHTMLElement).click;//add为按钮名称 end; 在delphi的WebBrowser中获取和设置Input表单值 var i:Integer; myole:oleVariant; begin myole := wb1.Document; for i := 0 to myole.all.length - 1 do begin if myole.all.item(i).tagName = 'INPUT' then begin mmo1.Lines.Add(myole.all.item(i).name); mmo1.Lines.Add(myole.all.item(i).value); end; end; end; WebBrowser1.GoHome; //到浏览器默认主页 WebBrowser1.Refresh; //刷新 WebBrowser1.GoBack; //后退 WebBrowser1.GoForward; //前进 WebBrowser1.Navigate('...'); //打开指定页面 WebBrowser1.Navigate('about:blank'); //打开空页面 -------------------------------------------------------------------------------- //打开空页面, 并写入... WebBrowser1.Navigate('about:<head><title>标题></title><body>页面内容</body>'); -------------------------------------------------------------------------------- //读取网页脚本中的变量: procedure TForm1.Button1Click(Sender: TObject); var s: string; i: Integer; begin s := WebBrowser1.OleObject.document.Script.str; i := WebBrowser1.OleObject.document.Script.num; ShowMessage(s); //Hello ShowMessage(IntToStr(i)); //99 //也可以这样读: s := WebBrowser1.OleObject.document.parentWindow.str; i := WebBrowser1.OleObject.document.parentWindow.num; ShowMessage(s); //Hello ShowMessage(IntToStr(i)); //99 end; 假如网页中有这样的语句: <script> var str = "Hello"; i = 99; </script> -------------------------------------------------------------------------------- //调用网页脚本中的函数: procedure TForm1.Button1Click(Sender: TObject); begin WebBrowser1.OleObject.document.parentWindow.MB(); //HTML-Js //如需指定脚本语言, 需要: WebBrowser1.OleObject.document.parentWindow.execScript('MB()','JavaScript'); //HTML-Js end; 假如有这样的脚本: <script> function MB(){ alert('HTML-Js'); } </script> -------------------------------------------------------------------------------- //判断网页及内部框架网页是否全部下载完毕 procedure TForm1.WebBrowser1DocumentComplete(ASender: TObject; const pDisp: IDispatch; var URL: OleVariant); begin if WebBrowser1.Application = pDisp then begin Text := '网页下载完毕!'; end; end; -------------------------------------------------------------------------------- //改变背景色或背景图片: WebBrowser1.OleObject.document.body.bgcolor := '#FF0000'; WebBrowser1.OleObject.document.body.background := '...图片地址'; -------------------------------------------------------------------------------- //操作有 ID 标签的对象: var s: string; begin s := WebBrowser1.OleObject.document.getElementByID('span1').innerText; ShowMessage(s); //这是 span1 标签中的内容 //或者: s := WebBrowser1.OleObject.document.parentWindow.span1.innerText; ShowMessage(s); //这是 span1 标签中的内容 //隐藏它: WebBrowser1.OleObject.document.parentWindow.span1.style.display := 'none'; end; 假如网页中有这样的内容: <span id=span1>这是 span1 标签中的内容</span> -------------------------------------------------------------------------------- //获取网页源代码 var s: string; begin s := WebBrowser1.OleObject.document.body.innerHTML; //body内的所有代码 s := WebBrowser1.OleObject.document.body.outerHTML; //body内的所有代码, 包含body标签 s := WebBrowser1.OleObject.document.documentElement.innerHTML; //html内的所有代码 end; -------------------------------------------------------------------------------- //WebBrowser 中的右键菜单 //先要添加ApplicationEvents1,指定其Message事件 //屏蔽右键菜单 procedure TForm1.ApplicationEvents1Message(var Msg: tagMSG; var Handled: Boolean); begin with Msg do begin if not IsChild(WebBrowser1.Handle, hWnd) then Exit; Handled := (message = WM_RBUTTONDOWN) or (message = WM_RBUTTONUP) or (message = WM_CONTEXTMENU); end; end; //替换右键菜单 procedure TForm1.ApplicationEvents1Message(var Msg: tagMSG; var Handled: Boolean); var mPoint: TPoint; begin if IsChild(WebBrowser1.Handle, Msg.Hwnd) and ((Msg.Message = WM_RBUTTONDOWN) or (Msg.Message = WM_RBUTTONUP)) then begin GetCursorPos(mPoint); //得到光标位置 PopupMenu1.Popup(mPoint.X, mPoint.Y); //弹出popupmenu1的菜单 Handled:=True; end; end; -------------------------------------------------------------------------------- //新页面写入 begin WebBrowser1.Navigate('about:blank'); WebBrowser1.OleObject.Document.Writeln('ok'); end;
Delphi实现网页采集
说到网页采集,通常大家以为到网上偷数据,然后把到收集到的数据挂到自己网上去。其实也可以将采集到的数据做为公司的参考,或把收集的数据跟自己公司的业务做对比等。 目前网页采集多为3P代码为多(3P即ASP、PHP 、JSP)。用得最有代表的就动易科技公司BBS中新闻采集系统,和网上流传的新浪新闻采集系统等都是用ASP程序来使用,但速度从理论上来说不是很好。如果尝试用其它软件的多线程采集是不是更快?答案是肯定的。用DELPHI、VC、VB、JB都可以,PB似乎比较不好做。以下用DELPHI来解释采集网页数据。 一、 简单的新闻采集 新闻采集是最简单的,只要识别标题、副题、作者、出处、日期、新闻主体、分页就可以了。在采集之前肯定要取得网页的内容,所以在DELPHI里加入idHTTP控件(在indy Clients面板),然后用idHTTP1.GET 方法取得网页的内容,声明如下: function Get(AURL: string): string; overload; AURL参数,是string类型,指定一个URL地址字符串。函数返回也是string类型,返回网页的HTML源文件。比如我们可以这样调用: tmpStr:= idHTTP1.Get(‘http://www.163.com’); 调用成功后,tmpstr变量里存储的就是网易主页的代码了。 接下来,讲一下数据的截取,这里,我定义了这么一个函数: function TForm1.GetStr(StrSource,StrBegin,StrEnd:string):string; var in_star,in_end:integer; begin in_star:=AnsiPos(strbegin,strsource)+length(strbegin); in_end:=AnsiPos(strend,strsource); result:=copy(strsource,in_sta,in_end-in_star); end; StrSource:string类型,表示HTML源文件。 StrBegin:string类型,表示截取开始的标记。 StrEnd:string,表示截取结束的标记。 函数返回字符串StrSource中从StrSource到StrBegin之间的一段文本。 比如: strtmp:=TForm1.GetStr(‘A123BCD’,‘A’,‘BC’); 运行后,strtmp的值为:’123’。 关于函数里用到的AnsiPos和copy,都是系统定义的,可以从delphi的帮助文件里找到相关说明,我在这里也简单罗嗦一下: function AnsiPos(const Substr, S: string): Integer 返回Substr在S中第一次出现的位置。 function copy(strsource,in_sta,in_end-in_star): string; 返回字符串strsource中,从in_sta(整型数据)开始到in_end-in_star(整型数据)结束的字符串。 有了以上函数,我们就可以通过设置各种标记,来截取想要的文章内容了。在程序中,比较麻烦的是我们需要设置许多标记,要定位某一项内容,必须设置它的开始和结束标志。比如要取得网页上的文章标题,必须事先查看网页代码,查看出文章标题前边和后边的一些特征代码,通过这些特征代码,来截取文章的标题。 下面我们来实际演示一下,假设要采集的文章地址为http://www.xxx.com/test.htm 代码为: <html> <head> <meta http-equiv="Content-Language" content="zh-cn"> <meta name="GENERATOR" content="Microsoft FrontPage 5.0"> <meta name="ProgId" content="FrontPage.Editor.Document"> <meta http-equiv="Content-Type" content="text/html; charset=gb2312"> <title>新建网页 1</title> </head> <body> <p align="center"><b>文章标题</b></p> <table border="1" cellpadding="0" cellspacing="0" style="border-collapse: collapse" bordercolor="#111111" width="100%" id="AutoNumber1"> <tr><td width="60%">作者</td> <td width="40%">出处</td></tr> </table> <p><font size="2">这里是文章内容正文。</font></p> <a href='..new_pr.asp'>上一页</a> <a href='new_ne.asp'>下一页</a> </body> </html> 第一步,我们用StrSource:= idHTTP1.Get(‘http://www.xxx.com/test.htm ’);将网页代码保存在strsource变量中。 然后定义strTitle、strAuthor、strCopyFrom、strContent: strTitle:= GetStr(StrSource,’ <p align="center"><b>’,’ </b></p>’): strAuthor:= GetStr(StrSource,’ <tr><td width="60%">’,’ </td>’): strCopyFrom:= GetStr(StrSource,’ <td width="40%">’,’ </td></tr>’): strContent:= GetStr(StrSource,’ <p><font size="2">,’ </font></p>’): 这样,就能把文章的标题、副题、作者、出处、日期、内容和分页分别存储在以上变量中。 第二步,用循环的办法,打开下一页,并取得内容,加到strContent变量中。 StrSource:= idHTTP1.Get(‘new_ne.asp’); strContent:= strContent +GetStr(StrSource,’ <p><font size="2">,’ </font></p>’): 然后再判断有没有下一页,如果还有就接着取得下一页的内容。 这样就完成了一个简单的截取过程。从以上的程序代码可以看到,我们使用的截取办法都是找截取内容的头部和尾部的,如果遇到这个头部和尾部有多个怎么办?似乎没办法,只会找到第一个,所以在找之前应该验证一下是不是只有一处有这个截取的内容的前后部。 以上内容没有程序验证,仅供参考,如果认为有用可以试试。