• Delphi 解析HTML


    uses mshtml;

    IHTMLEleMent.ID;

    IHTMLEleMent.tagName;

    IHTMLEleMent.title;
    elmt._className;
    elmt.getAttribute('anchor', 0);

    procedure
    TForm1.btnphClick(Sender: TObject); var Document: IHTMLDocument2; FTableCollection, tempCoc: IHTMLElementCollection; table: IHTMLTABLE; TableRow: IHTMLTableRow; elmt: IHTMLEleMent; I, J, K: integer; str: string; begin Document := WebBrowser1.Document as IHTMLDocument2; FTableCollection := Document.all; FTableCollection.Length; // FTableCollection.item(1, 0); FTableCollection := Document.all.tags('table') as IHTMLElementCollection; for I := 0 to FTableCollection.Length - 1 do begin table := FTableCollection.item(I, 0) as IHTMLTABLE; //题数 for J := 0 to table.rows.Length - 1 do begin TableRow := (table.rows.item(J, 0) as IHTMLTableRow); //每道题信息 str := ''; for K := 0 to TableRow.cells.Length - 1 do begin elmt := TableRow.cells.item(K, 0) as IHTMLEleMent; str := str + elmt.innerText + #9; end; str := StringReplace(str, ''#$D#$A'', '', [rfReplaceAll]); Memo3.Lines.Add(str); Memo3.Lines.Add('------------------------------------------'); end; end; end;

      URL := 'http://bbs.csdn.net/forums/Delphi';
      WebBrowser1.Navigate(URL);

    下面是一行tablerow的数据,可以分解出来,<td><td>之间是一个cell列。

    如何解析第一列的href、class之间的3个数据呢,?、delphi7 滚动条颜色、VCL组件开发及应用

              <tr>
        <td class="title">
          <strong class="green"></strong>
          <a href="/topics/390861446" target="_blank" title="delphi7 滚动条颜色">delphi7 滚动条颜色</a>
          <span class="forum_link">[<span class="parent"><a href="/forums/Delphi">Delphi</a></span> <a href="/forums/DelphiVCL">VCL组件开发及应用</a>]</span>
        </td>
        <td class="tc">20</td>
        <td class="tc">
          <a href="http://my.csdn.net/u010745617" rel="nofollow" target="_blank">u010745617</a><br />
          <span class="time">08-15 16:25</span></td>
        <td class="tc">1</td>
        <td class="tc">
          <a href="http://my.csdn.net/NongCunGongLu" rel="nofollow" target="_blank">NongCunGongLu</a><br />
          <span class="time">08-17 13:41</span>
        </td>
        <td class="tc">
          <a href="/topics/390861446/close" target="_blank">管理</a>
        </td>
      </tr>
        <td class="title">
          <strong class="green"></strong>
          <a href="/topics/390861446" target="_blank" title="delphi7 滚动条颜色">delphi7 滚动条颜色</a>
          <span class="forum_link">[<span class="parent"><a href="/forums/Delphi">Delphi</a></span> <a href="/forums/DelphiVCL">VCL组件开发及应用</a>]</span>
        </td>

    把td这一部分IHTMLEleMent当作IHTMLElementCollection解析就可以了,tagName,getAttribute('href',0),title,_className都可以获得正确的值,是6个集合元素。

           tempCoc := elmt.all as IHTMLElementCollection;
            if (tempCoc.Length = 6) then
            begin
              for q := 0 to tempCoc.Length - 1 do
              begin
                emt2 := tempCoc.item(q, 0) as IHTMLEleMent;
                if emt2.tagName = 'STRONG' then
                  s2 := emt2.innerText
                else if emt2.tagName = 'A' then
                begin
                  s2 := emt2.getAttribute('href',0);
                  s2 := emt2.title;
                  s2 := emt2.innerText;
                end
                else if emt2.tagName = 'SPAN' then
                begin
                  emt2.tagName;
                  emt2._className;
                  emt2.title;
                  emt2.innerText;
                end;
              end;
            end;

     elmt: IHTMLEleMent;

      elmt := (WebBrowser1.Document as ihtmldocument3).getElementById('idbtn001');

    getElementsByName

    getElementById

    getElementsByTagName

    doc2: IHTMLDocument2;
    doc3: IHTMLDocument3;

      doc2.forms.item('form1',0) as IHTMLFormElement;//取form1的表单

    elmt := (WebBrowser1.Document as IHTMLDocument3).getElementById('divfirstID');//通过ID取得某个节点
    
    elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点
    elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点
    elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点
    elmt := ((elmt.children as IHTMLElementCollection).item(2, 0)) as IHTMLEleMent;//子节点里的第3个子节点
    elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点
  • 相关阅读:
    [ACM_数学] 大菲波数 (hdu oj 1715 ,java 大数)
    [ACM_水题] 不要62(hdu oj 2089, 不含62和4的数字统计)
    [ACM_其他] Square Ice (poj1099 规律)
    [ACM_图论] Domino Effect (POJ1135 Dijkstra算法 SSSP 单源最短路算法 中等 模板)
    [ACM_搜索] Triangles(POJ1471,简单搜索,注意细节)
    [ACM_图论] Sorting Slides(挑选幻灯片,二分匹配,中等)
    [ACM_图论] The Perfect Stall 完美的牛栏(匈牙利算法、最大二分匹配)
    [ACM_几何] Metal Cutting(POJ1514)半平面割与全排暴力切割方案
    [ACM_动态规划] 轮廓线动态规划——铺放骨牌(状态压缩1)
    [ACM_数据结构] 竞赛排名
  • 原文地址:https://www.cnblogs.com/cb168/p/3918237.html
Copyright © 2020-2023  润新知