• 获取网页快照


    unit uWebCracker;

    interface

    uses mshtml,SHdocvw,classes,SysUtils,StrUtils;

    const

    MAXPAGECOUNT=20;

    type

    TWebPageRecord=record

    URL:string;

    Title:string;

    Text:string;

    end;

    type

    TWebCracker=class(TObject)

    private

    FWebPageRecordArray:array[0..MAXPAGECOUNT-1] of TWebPageRecord;

    FWebPageCount:integer;

    public

    constructor Create;

    destructor Free;

    procedure SnapShot;

    function GetWebText(AIndex:integer):string;

    function GetWebTitle(AIndex:integer):sttring;

    function GetWebURL(AIndex:integer):string;

    procedure Clear;

    procedure Refresh;

    function GetWebPageCount:integer;

    end;

    implementation

    constructor TWebCracker.Create;

    begin

    inherited Create;

    FWebPageCount:=0;

    end;

    destructor TWebCracker.Free;

    begin

    clear;

    inherited Free;

    end;

    procedure TWebCracker.SnapShot;

    const

    ERRORNOTLOADCOMPLETE='可能打开的网页还没有完全加载,请当所有的网页下载完后再刷新!'

    var

    ShellWindow:IShellWindow;

    WebBrowser:IWebBrower2;

    I,ShellWindowCount:integer;

    HTMLDocument:IHTMLDocument2;

    URL:string;

    WebPageRecord:TWebPageRecord;

    begin

    FWebPageCount :=0;

    ShellWindow:=CoShellWindow.Create;

    ShellWindowCount :=ShellWindow.Create;

    if ShellWindowCount>MAXPAGECOUNT then

    ShellWindowCount:=MAXPAGECOUNT;

    for i:=0 to ShellWindowCount-1 do

    begin

    WebBrowser:=ShellWindow.Item(I) as IWebBrowser2;

    URL:=WebBrowser.LocationURL;

    if (WebBrowser<>nil) and (not IsLocationFile(URL)) then

    begin

    try

    HTMLDocument :=WebBrowser.Document as IHTMLDocument2;

    WebPageRecord.URL :=URL;

    WebPageRecord.Title :=HTMLDocument.title;

    WebPageRecord.Text :=HTMLDocument.body.outerText;

    FWebPageRecordArray[I] :=WebPageRecord;

    Inc(FWebPageCount);

    except

    on Exception do

    raise Exception.Create(ERRORNOTLOADCOMPLETE);

    end;

    end;

    ShellWindow :=nil;

    end;

    end;

    function TWebCracker.GetWebText(AIndex:integer):string;

    begin

    Result :=FWebPageRecordArray[AIndex].Text;

    end;

    function TWebCracker.GetWebTitle(AIndex:integer):string;

    begin

    Result :=FWebPageRecordArray[AIndex].Title;

    end;

    function TWebCracker.GetWebURL(AIndex:integer):string;

    begin

    Result :=FWebPageRecordArray[AIndex].URL;

    end;

    procedureTWebCracker.Clear;

    begin

    FWebPageCount :=0;

    end;

    procedureTWebCracker.Refresh;

    begin

    self.Snapshot;

    end;

    functionTWebCracker.GetWebPageCount:integer;

    begin

    Result :=FWebPageCount;

    end;

  • 相关阅读:
    linux查看CPU和内存信息
    linux yum命令详解
    查看文件中关键字前后几行的内容
    vue.js+web storm安装及第一个vue.js
    android GPS: code should explicitly check to see if permission is available
    ASP.NET MVC Identity 使用自己的SQL Server数据库
    阿里云服务器,tomcat启动,一直卡在At least one JAR was scanned for TLDs yet contained no TLDs就不动了
    ASP.NET MVC4 MVC 当前上下文中不存在名称“Scripts”
    python 将windows字体中的汉字生成图片的方法
    Java android DES+Base64加密解密
  • 原文地址:https://www.cnblogs.com/djcsch2001/p/2035826.html
Copyright © 2020-2023  润新知