• 获取网页快照


    unit uWebCracker;

    interface

    uses mshtml,SHdocvw,classes,SysUtils,StrUtils;

    const

    MAXPAGECOUNT=20;

    type

    TWebPageRecord=record

    URL:string;

    Title:string;

    Text:string;

    end;

    type

    TWebCracker=class(TObject)

    private

    FWebPageRecordArray:array[0..MAXPAGECOUNT-1] of TWebPageRecord;

    FWebPageCount:integer;

    public

    constructor Create;

    destructor Free;

    procedure SnapShot;

    function GetWebText(AIndex:integer):string;

    function GetWebTitle(AIndex:integer):sttring;

    function GetWebURL(AIndex:integer):string;

    procedure Clear;

    procedure Refresh;

    function GetWebPageCount:integer;

    end;

    implementation

    constructor TWebCracker.Create;

    begin

    inherited Create;

    FWebPageCount:=0;

    end;

    destructor TWebCracker.Free;

    begin

    clear;

    inherited Free;

    end;

    procedure TWebCracker.SnapShot;

    const

    ERRORNOTLOADCOMPLETE='可能打开的网页还没有完全加载,请当所有的网页下载完后再刷新!'

    var

    ShellWindow:IShellWindow;

    WebBrowser:IWebBrower2;

    I,ShellWindowCount:integer;

    HTMLDocument:IHTMLDocument2;

    URL:string;

    WebPageRecord:TWebPageRecord;

    begin

    FWebPageCount :=0;

    ShellWindow:=CoShellWindow.Create;

    ShellWindowCount :=ShellWindow.Create;

    if ShellWindowCount>MAXPAGECOUNT then

    ShellWindowCount:=MAXPAGECOUNT;

    for i:=0 to ShellWindowCount-1 do

    begin

    WebBrowser:=ShellWindow.Item(I) as IWebBrowser2;

    URL:=WebBrowser.LocationURL;

    if (WebBrowser<>nil) and (not IsLocationFile(URL)) then

    begin

    try

    HTMLDocument :=WebBrowser.Document as IHTMLDocument2;

    WebPageRecord.URL :=URL;

    WebPageRecord.Title :=HTMLDocument.title;

    WebPageRecord.Text :=HTMLDocument.body.outerText;

    FWebPageRecordArray[I] :=WebPageRecord;

    Inc(FWebPageCount);

    except

    on Exception do

    raise Exception.Create(ERRORNOTLOADCOMPLETE);

    end;

    end;

    ShellWindow :=nil;

    end;

    end;

    function TWebCracker.GetWebText(AIndex:integer):string;

    begin

    Result :=FWebPageRecordArray[AIndex].Text;

    end;

    function TWebCracker.GetWebTitle(AIndex:integer):string;

    begin

    Result :=FWebPageRecordArray[AIndex].Title;

    end;

    function TWebCracker.GetWebURL(AIndex:integer):string;

    begin

    Result :=FWebPageRecordArray[AIndex].URL;

    end;

    procedureTWebCracker.Clear;

    begin

    FWebPageCount :=0;

    end;

    procedureTWebCracker.Refresh;

    begin

    self.Snapshot;

    end;

    functionTWebCracker.GetWebPageCount:integer;

    begin

    Result :=FWebPageCount;

    end;

  • 相关阅读:
    JavaScript之链式结构序列化
    js 常用的正则表达式
    99%的人都理解错了HTTP中GET与POST的区别
    取代Promise的Generator生成器函数
    深入了解JavaScript权威指南
    限制input输入类型(多种方法实现)
    textillate.js 文字动画
    jQuery如何改变css伪元素样式
    保存android程序崩溃日志到SD卡
    跟我一起学extjs5(19--模块记录的拖放删除、拖放复制新增)
  • 原文地址:https://www.cnblogs.com/djcsch2001/p/2035826.html
Copyright © 2020-2023  润新知