• html网页采集


    UI_Less.pas:

      1 unit UI_Less;
      2 
      3 interface
      4 
      5 uses
      6   Windows, Classes, Messages, Forms, MsHtml, Urlmon, ActiveX;
      7 
      8 const
      9   WM_USER_STARTWALKING = WM_USER + 1;
     10   DISPID_AMBIENT_DLCONTROL = (-5512);
     11   READYSTATE_COMPLETE = $00000004;
     12 
     13   DLCTL_DLIMAGES = $00000010;
     14   DLCTL_VIDEOS = $00000020;
     15   DLCTL_BGSOUNDS = $00000040;
     16   DLCTL_NO_SCRIPTS = $00000080;
     17   DLCTL_NO_JAVA = $00000100;
     18   DLCTL_NO_RUNACTIVEXCTLS = $00000200;
     19   DLCTL_NO_DLACTIVEXCTLS = $00000400;
     20   DLCTL_DOWNLOADONLY = $00000800;
     21   DLCTL_NO_FRAMEDOWNLOAD = $00001000;
     22   DLCTL_RESYNCHRONIZE = $00002000;
     23   DLCTL_PRAGMA_NO_CACHE = $00004000;
     24   DLCTL_NO_BEHAVIORS = $00008000;
     25   DLCTL_NO_METACHARSET = $00010000;
     26   DLCTL_URL_ENCODING_DISABLE_UTF8 = $00020000;
     27   DLCTL_URL_ENCODING_ENABLE_UTF8 = $00040000;
     28   DLCTL_FORCEOFFLINE = $10000000;
     29   DLCTL_NO_CLIENTPULL = $20000000;
     30   DLCTL_SILENT = $40000000;
     31   DLCTL_OFFLINEIFNOTCONNECTED = $80000000;
     32   DLCTL_OFFLINE = DLCTL_OFFLINEIFNOTCONNECTED;
     33 
     34 type
     35   TUILess = class(TComponent, IUnknown, IDispatch, IPropertyNotifySink,
     36     IOleClientSite)
     37   private
     38     FDocTitle: string;
     39     FBodyText: TStrings;
     40     FBodyHtml: TStrings;
     41   protected
     42     /// IDISPATCH
     43     function Invoke(DispID: Integer; const IID: TGUID; LocaleID: Integer;
     44       Flags: Word; var Params; VarResult, ExcepInfo, ArgErr: Pointer): HResult;
     45       stdcall;
     46     /// IPROPERTYNOTIFYSINK
     47     function OnChanged(DispID: TDispID): HResult; stdcall;
     48     function OnRequestEdit(DispID: TDispID): HResult; stdcall;
     49     /// IOLECLIENTSITE
     50     function SaveObject: HResult; stdcall;
     51     function GetMoniker(dwAssign: Longint; dwWhichMoniker: Longint;
     52       out mk: IMoniker): HResult; stdcall;
     53     function GetContainer(out container: IOleContainer): HResult; stdcall;
     54     function ShowObject: HResult; stdcall;
     55     function OnShowWindow(fShow: BOOL): HResult; stdcall;
     56     function RequestNewObjectLayout: HResult; stdcall;
     57     ///
     58     function LoadUrlFromMoniker: HResult;
     59     function LoadUrlFromFile: HResult;
     60     // * We only use LoadUrlFromMoniker, but we could use LoadUrlFromFile instead.
     61 
     62   public
     63     constructor Create(AOwner: TComponent); override;
     64     destructor Destroy; override;
     65     property DocTitle: string read FDocTitle;
     66     property BodyText: TStrings read FBodyText write FBodyText;
     67     property BodyHtml: TStrings read FBodyHtml write FBodyHtml;
     68     function Get(URL: PWidechar; var IsSucceed: Boolean; IsStop: Boolean)
     69       : IHTMLELEMENTCollection;
     70     procedure GetAnchorList(IC: IHTMLELEMENTCollection; Anchorlist: TStrings);
     71     procedure GetImageList(IC: IHTMLELEMENTCollection; ImageList: TStrings);
     72   end;
     73 
     74 implementation
     75 
     76 var
     77   Doc: IhtmlDocument2;
     78   _URL: PWidechar;
     79 
     80 constructor TUILess.Create(AOwner: TComponent);
     81 begin
     82   inherited Create(AOwner);
     83   FBodyText := TStringList.Create;
     84   FBodyHtml := TStringList.Create;
     85 end;
     86 
     87 destructor TUILess.Destroy;
     88 begin
     89   if Assigned(FBodyText) then
     90     FBodyText.Free;
     91   if Assigned(FBodyHtml) then
     92     FBodyHtml.Free;
     93   inherited Destroy;
     94 end;
     95 
     96 /// CORE ---->>>>>>>>>
     97 function TUILess.Get(URL: PWidechar; var IsSucceed: Boolean; IsStop: Boolean)
     98   : IHTMLELEMENTCollection;
     99 var
    100   Cookie: Integer;
    101   CP: IConnectionPoint;
    102   OleObject: IOleObject;
    103   OleControl: IOleControl;
    104   CPC: IConnectionPointContainer;
    105   All: IHTMLElement;
    106   Msg: TMsg;
    107   hr: HResult;
    108 begin
    109   _URL := URL;
    110   IsSucceed := false;
    111   try
    112     CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER,
    113       IID_IHTMLDocument2, Doc);
    114     OleObject := Doc as IOleObject;
    115     OleObject.SetClientSite(self);
    116     OleControl := Doc as IOleControl;
    117     OleControl.OnAmbientPropertyChange(DISPID_AMBIENT_DLCONTROL);
    118     CPC := Doc as IConnectionPointContainer;
    119     CPC.FindConnectionPoint(IPropertyNotifySink, CP);
    120     CP.Advise(self, Cookie);
    121     hr := LoadUrlFromMoniker; // alternative: Hr:= LoadUrlFromFile;
    122     if ((SUCCEEDED(hr)) or (hr = E_PENDING)) then
    123       while (GetMessage(Msg, 0, 0, 0)) do
    124       begin
    125         if ((Msg.message = WM_USER_STARTWALKING) and (Msg.hwnd = 0)) then
    126         begin
    127           PostQuitMessage(0);
    128           result := Doc.Get_all;
    129           All := Doc.Get_body;
    130           FDocTitle := string(Doc.nameProp);
    131           FBodyText.Text := string(All.outerText);
    132           FBodyHtml.Text := string(All.outerHTML);
    133           IsSucceed := true;
    134         end
    135         else
    136           DispatchMessage(Msg);
    137         if IsStop then
    138           Exit;
    139       end;
    140   except
    141     Exit;
    142   end;
    143 end;
    144 
    145 function TUILess.Invoke(DispID: Integer; const IID: TGUID; LocaleID: Integer;
    146   Flags: Word; var Params; VarResult, ExcepInfo, ArgErr: Pointer): HResult;
    147 var
    148   I: Integer;
    149 begin
    150   if DISPID_AMBIENT_DLCONTROL = DispID then
    151   begin
    152     I := DLCTL_DOWNLOADONLY + DLCTL_NO_SCRIPTS + DLCTL_NO_JAVA +
    153       DLCTL_NO_DLACTIVEXCTLS + DLCTL_NO_RUNACTIVEXCTLS;
    154     PVariant(VarResult)^ := I;
    155     result := S_OK;
    156   end
    157   else
    158     result := DISP_E_MEMBERNOTFOUND;
    159 end;
    160 
    161 function TUILess.OnChanged(DispID: TDispID): HResult;
    162 var
    163   dp: TDispParams;
    164   vResult: OleVariant;
    165 begin
    166   if (DISPID_READYSTATE = DispID) then
    167     if SUCCEEDED((Doc as IhtmlDocument2).Invoke(DISPID_READYSTATE, GUID_null,
    168         LOCALE_SYSTEM_DEFAULT, DISPATCH_PROPERTYGET, dp, @vResult, nil, nil))
    169       then
    170       if Integer(vResult) = READYSTATE_COMPLETE then
    171         PostThreadMessage(GetCurrentThreadId(), WM_USER_STARTWALKING, 0, 0);
    172 end;
    173 
    174 function TUILess.LoadUrlFromMoniker: HResult;
    175 var
    176   Moniker: IMoniker;
    177   BindCtx: IBindCTX;
    178   PM: IPersistMoniker;
    179 begin
    180   createURLMoniker(nil, _URL, Moniker);
    181   CreateBindCtx(0, BindCtx);
    182   PM := Doc as IPersistMoniker;
    183   result := PM.Load(LongBool(0), Moniker, BindCtx, STGM_READ)
    184 end;
    185 
    186 function TUILess.LoadUrlFromFile: HResult;
    187 var
    188   PF: IPersistfile;
    189 begin
    190   PF := Doc as IPersistfile;
    191   result := PF.Load(_URL, 0);
    192 end;
    193 
    194 // 获取图像链接
    195 procedure TUILess.GetImageList(IC: IHTMLELEMENTCollection; ImageList: TStrings);
    196 var
    197   Image: IHTMLImgElement;
    198   Disp: IDispatch;
    199   x: Integer;
    200 begin
    201   if IC <> nil then
    202   begin
    203     for x := 0 to IC.Length - 1 do
    204     begin
    205       application.ProcessMessages;
    206       Disp := IC.item(x, 0);
    207       if SUCCEEDED(Disp.QueryInterface(IHTMLImgElement, Image)) then
    208         ImageList.add(string(Image.src));
    209     end;
    210   end;
    211 end;
    212 
    213 // 获取链接
    214 procedure TUILess.GetAnchorList(IC: IHTMLELEMENTCollection;
    215   Anchorlist: TStrings);
    216 var
    217   anchor: IHTMLAnchorElement;
    218   Disp: IDispatch;
    219   x: Integer;
    220 begin
    221   if IC <> nil then
    222   begin
    223     for x := 0 to IC.Length - 1 do
    224     begin
    225       application.ProcessMessages;
    226       Disp := IC.item(x, 0);
    227       if (SUCCEEDED(Disp.QueryInterface(IHTMLAnchorElement, anchor)) and
    228           (anchor.href <> '')) then
    229         Anchorlist.add(string(anchor.href));
    230     end;
    231   end;
    232 end;
    233 
    234 /// Don't Care ------>>>>>>>>>>>
    235 function TUILess.OnRequestEdit(DispID: TDispID): HResult;
    236 begin
    237   result := E_NOTIMPL;
    238 end;
    239 
    240 function TUILess.SaveObject: HResult;
    241 begin
    242   result := E_NOTIMPL;
    243 end;
    244 
    245 function TUILess.GetMoniker(dwAssign: Longint; dwWhichMoniker: Longint;
    246   out mk: IMoniker): HResult;
    247 begin
    248   result := E_NOTIMPL;
    249 end;
    250 
    251 function TUILess.GetContainer(out container: IOleContainer): HResult;
    252 begin
    253   result := E_NOTIMPL;
    254 end;
    255 
    256 function TUILess.ShowObject: HResult;
    257 begin
    258   result := E_NOTIMPL;
    259 end;
    260 
    261 function TUILess.OnShowWindow(fShow: BOOL): HResult;
    262 begin
    263   result := E_NOTIMPL;
    264 end;
    265 
    266 function TUILess.RequestNewObjectLayout: HResult;
    267 begin
    268   result := E_NOTIMPL;
    269 end;
    270 
    271 end.
    View Code

    Unit3.pas:

      1 unit Unit3;
      2 
      3 interface
      4 
      5 uses
      6   Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
      7   Dialogs, StdCtrls;
      8 
      9 type
     10   TForm3 = class(TForm)
     11     Button1: TButton;
     12     Edit1: TEdit;
     13     Memo1: TMemo;
     14     Button2: TButton;
     15     Button3: TButton;
     16     Button4: TButton;
     17     procedure Button1Click(Sender: TObject);
     18     procedure Button2Click(Sender: TObject);
     19     procedure Button3Click(Sender: TObject);
     20     procedure Button4Click(Sender: TObject);
     21   private
     22     { Private declarations }
     23     procedure into(i: Word);
     24   public
     25     { Public declarations }
     26   end;
     27 
     28 var
     29   Form3: TForm3;
     30 
     31 implementation
     32 uses UI_Less;
     33 
     34 {$R *.dfm}
     35 
     36 function DoStrToWideChar(s: string): PWideChar;
     37 var
     38  //   s:sting;
     39   pwc: PWidechar;
     40   len: integer;
     41 begin
     42   //  s:= 'abcdefg ';
     43   len := length(s) + 1;
     44   pwc := AllocMem(len * sizeof(widechar));
     45   stringtowidechar(s, pwc, len);
     46    // showmessage(widechartostring(pwc));
     47 
     48   result := pwc;
     49    //  FreeMem(pwc);
     50 end;
     51 
     52 
     53 
     54 procedure TForm3.into(i: Word);
     55 var
     56   sh: TUILess;
     57   su: boolean; // 是否获取成功
     58   // isstop: boolean; //设全局变量可以中断连接 ,避免出错
     59   surl: PWideChar;
     60 begin
     61   surl := DoStrToWideChar(Trim(Edit1.Text));
     62   sh := TUILess.Create(nil);
     63   try
     64     Memo1.Clear;
     65     case i of
     66       1:
     67         sh.GetAnchorList(sh.get(surl, su, False), Memo1.Lines);
     68       2:
     69         sh.GetImageList(sh.get(surl, su, False), Memo1.Lines);
     70       3:
     71         begin
     72           sh.get(surl, su, False);
     73           Memo1.Lines := sh.BodyText;
     74         end;
     75       4:
     76         begin
     77           sh.get(surl, su, False);
     78           Memo1.Lines := sh.BodyHtml;
     79         end;
     80     end;
     81   finally
     82     //sh.Free;
     83   end;
     84 end;
     85 
     86 procedure TForm3.Button1Click(Sender: TObject);
     87 begin
     88   into(1);
     89 end;
     90 
     91 procedure TForm3.Button2Click(Sender: TObject);
     92 begin
     93   into(2);
     94 end;
     95 
     96 procedure TForm3.Button3Click(Sender: TObject);
     97 begin
     98   into(3);
     99 end;
    100 
    101 procedure TForm3.Button4Click(Sender: TObject);
    102 begin
    103   into(4);
    104 end;
    105 
    106 end.
    View Code
  • 相关阅读:
    Gantt/BTS 生产计划电子看板甘特图
    C# DotNetty TCP对接 松下扣料机
    Vue 和 Zebra 打印机连接直接打印条码
    JavaFx 通信ITC数字广播 SAPI 使用NeoSpeech Liang包生成语音
    Docker RabbitMQ 镜像集群
    游戏匹配实现
    Metro 界面设计案例
    JavaFX 集成 Sqlite 和 Hibernate 开发爬虫应用
    Discuz论坛 自动加好友留言程序
    JavaFX Metro UI 和 开发库
  • 原文地址:https://www.cnblogs.com/FKdelphi/p/10357222.html
Copyright © 2020-2023  润新知